diff options
author | Zach Brown <zach.brown@oracle.com> | 2006-10-11 04:21:08 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-10-11 14:14:16 -0400 |
commit | b517bea1c74e4773482b3f41b3f493522a8c8e30 (patch) | |
tree | 1e98110151f1a35cde84ce1a2cb6f0cb871e9551 | |
parent | d0d856e8bd6e697cb44b2b4dd038f3bec576a70e (diff) |
[PATCH] 64-bit jbd2 core
Here is the patch to JBD to handle 64 bit block numbers, originally from Zach
Brown. This patch is useful only after adding support for 64-bit block
numbers in the filesystem.
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/jbd2/commit.c | 17 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 11 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 43 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 14 | ||||
-rw-r--r-- | include/linux/jbd2.h | 14 |
5 files changed, 77 insertions, 22 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index b1a4eafc1541..44d68a113c73 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -271,6 +271,14 @@ write_out_data: | |||
271 | journal_do_submit_data(wbuf, bufs); | 271 | journal_do_submit_data(wbuf, bufs); |
272 | } | 272 | } |
273 | 273 | ||
274 | static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | ||
275 | sector_t block) | ||
276 | { | ||
277 | tag->t_blocknr = cpu_to_be32(block & (u32)~0); | ||
278 | if (tag_bytes > JBD_TAG_SIZE32) | ||
279 | tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); | ||
280 | } | ||
281 | |||
274 | /* | 282 | /* |
275 | * jbd2_journal_commit_transaction | 283 | * jbd2_journal_commit_transaction |
276 | * | 284 | * |
@@ -293,6 +301,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
293 | int first_tag = 0; | 301 | int first_tag = 0; |
294 | int tag_flag; | 302 | int tag_flag; |
295 | int i; | 303 | int i; |
304 | int tag_bytes = journal_tag_bytes(journal); | ||
296 | 305 | ||
297 | /* | 306 | /* |
298 | * First job: lock down the current transaction and wait for | 307 | * First job: lock down the current transaction and wait for |
@@ -597,10 +606,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
597 | tag_flag |= JBD2_FLAG_SAME_UUID; | 606 | tag_flag |= JBD2_FLAG_SAME_UUID; |
598 | 607 | ||
599 | tag = (journal_block_tag_t *) tagp; | 608 | tag = (journal_block_tag_t *) tagp; |
600 | tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr); | 609 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); |
601 | tag->t_flags = cpu_to_be32(tag_flag); | 610 | tag->t_flags = cpu_to_be32(tag_flag); |
602 | tagp += sizeof(journal_block_tag_t); | 611 | tagp += tag_bytes; |
603 | space_left -= sizeof(journal_block_tag_t); | 612 | space_left -= tag_bytes; |
604 | 613 | ||
605 | if (first_tag) { | 614 | if (first_tag) { |
606 | memcpy (tagp, journal->j_uuid, 16); | 615 | memcpy (tagp, journal->j_uuid, 16); |
@@ -614,7 +623,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
614 | 623 | ||
615 | if (bufs == journal->j_wbufsize || | 624 | if (bufs == journal->j_wbufsize || |
616 | commit_transaction->t_buffers == NULL || | 625 | commit_transaction->t_buffers == NULL || |
617 | space_left < sizeof(journal_block_tag_t) + 16) { | 626 | space_left < tag_bytes + 16) { |
618 | 627 | ||
619 | jbd_debug(4, "JBD: Submit %d IOs\n", bufs); | 628 | jbd_debug(4, "JBD: Submit %d IOs\n", bufs); |
620 | 629 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8d0f71e562fe..926ebcbf8a7a 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1610,6 +1610,17 @@ int jbd2_journal_blocks_per_page(struct inode *inode) | |||
1610 | } | 1610 | } |
1611 | 1611 | ||
1612 | /* | 1612 | /* |
1613 | * helper functions to deal with 32 or 64bit block numbers. | ||
1614 | */ | ||
1615 | size_t journal_tag_bytes(journal_t *journal) | ||
1616 | { | ||
1617 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) | ||
1618 | return JBD_TAG_SIZE64; | ||
1619 | else | ||
1620 | return JBD_TAG_SIZE32; | ||
1621 | } | ||
1622 | |||
1623 | /* | ||
1613 | * Simple support for retrying memory allocations. Introduced to help to | 1624 | * Simple support for retrying memory allocations. Introduced to help to |
1614 | * debug different VM deadlock avoidance strategies. | 1625 | * debug different VM deadlock avoidance strategies. |
1615 | */ | 1626 | */ |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index b2012d112432..2486843adda0 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -178,19 +178,20 @@ static int jread(struct buffer_head **bhp, journal_t *journal, | |||
178 | * Count the number of in-use tags in a journal descriptor block. | 178 | * Count the number of in-use tags in a journal descriptor block. |
179 | */ | 179 | */ |
180 | 180 | ||
181 | static int count_tags(struct buffer_head *bh, int size) | 181 | static int count_tags(journal_t *journal, struct buffer_head *bh) |
182 | { | 182 | { |
183 | char * tagp; | 183 | char * tagp; |
184 | journal_block_tag_t * tag; | 184 | journal_block_tag_t * tag; |
185 | int nr = 0; | 185 | int nr = 0, size = journal->j_blocksize; |
186 | int tag_bytes = journal_tag_bytes(journal); | ||
186 | 187 | ||
187 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 188 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
188 | 189 | ||
189 | while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) { | 190 | while ((tagp - bh->b_data + tag_bytes) <= size) { |
190 | tag = (journal_block_tag_t *) tagp; | 191 | tag = (journal_block_tag_t *) tagp; |
191 | 192 | ||
192 | nr++; | 193 | nr++; |
193 | tagp += sizeof(journal_block_tag_t); | 194 | tagp += tag_bytes; |
194 | if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) | 195 | if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID))) |
195 | tagp += 16; | 196 | tagp += 16; |
196 | 197 | ||
@@ -307,6 +308,14 @@ int jbd2_journal_skip_recovery(journal_t *journal) | |||
307 | return err; | 308 | return err; |
308 | } | 309 | } |
309 | 310 | ||
311 | static inline sector_t read_tag_block(int tag_bytes, journal_block_tag_t *tag) | ||
312 | { | ||
313 | sector_t block = be32_to_cpu(tag->t_blocknr); | ||
314 | if (tag_bytes > JBD_TAG_SIZE32) | ||
315 | block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; | ||
316 | return block; | ||
317 | } | ||
318 | |||
310 | static int do_one_pass(journal_t *journal, | 319 | static int do_one_pass(journal_t *journal, |
311 | struct recovery_info *info, enum passtype pass) | 320 | struct recovery_info *info, enum passtype pass) |
312 | { | 321 | { |
@@ -318,11 +327,12 @@ static int do_one_pass(journal_t *journal, | |||
318 | struct buffer_head * bh; | 327 | struct buffer_head * bh; |
319 | unsigned int sequence; | 328 | unsigned int sequence; |
320 | int blocktype; | 329 | int blocktype; |
330 | int tag_bytes = journal_tag_bytes(journal); | ||
321 | 331 | ||
322 | /* Precompute the maximum metadata descriptors in a descriptor block */ | 332 | /* Precompute the maximum metadata descriptors in a descriptor block */ |
323 | int MAX_BLOCKS_PER_DESC; | 333 | int MAX_BLOCKS_PER_DESC; |
324 | MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) | 334 | MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t)) |
325 | / sizeof(journal_block_tag_t)); | 335 | / tag_bytes); |
326 | 336 | ||
327 | /* | 337 | /* |
328 | * First thing is to establish what we expect to find in the log | 338 | * First thing is to establish what we expect to find in the log |
@@ -412,8 +422,7 @@ static int do_one_pass(journal_t *journal, | |||
412 | * in pass REPLAY; otherwise, just skip over the | 422 | * in pass REPLAY; otherwise, just skip over the |
413 | * blocks it describes. */ | 423 | * blocks it describes. */ |
414 | if (pass != PASS_REPLAY) { | 424 | if (pass != PASS_REPLAY) { |
415 | next_log_block += | 425 | next_log_block += count_tags(journal, bh); |
416 | count_tags(bh, journal->j_blocksize); | ||
417 | wrap(journal, next_log_block); | 426 | wrap(journal, next_log_block); |
418 | brelse(bh); | 427 | brelse(bh); |
419 | continue; | 428 | continue; |
@@ -424,7 +433,7 @@ static int do_one_pass(journal_t *journal, | |||
424 | * getting done here! */ | 433 | * getting done here! */ |
425 | 434 | ||
426 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 435 | tagp = &bh->b_data[sizeof(journal_header_t)]; |
427 | while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) | 436 | while ((tagp - bh->b_data + tag_bytes) |
428 | <= journal->j_blocksize) { | 437 | <= journal->j_blocksize) { |
429 | unsigned long io_block; | 438 | unsigned long io_block; |
430 | 439 | ||
@@ -446,7 +455,8 @@ static int do_one_pass(journal_t *journal, | |||
446 | unsigned long blocknr; | 455 | unsigned long blocknr; |
447 | 456 | ||
448 | J_ASSERT(obh != NULL); | 457 | J_ASSERT(obh != NULL); |
449 | blocknr = be32_to_cpu(tag->t_blocknr); | 458 | blocknr = read_tag_block(tag_bytes, |
459 | tag); | ||
450 | 460 | ||
451 | /* If the block has been | 461 | /* If the block has been |
452 | * revoked, then we're all done | 462 | * revoked, then we're all done |
@@ -494,7 +504,7 @@ static int do_one_pass(journal_t *journal, | |||
494 | } | 504 | } |
495 | 505 | ||
496 | skip_write: | 506 | skip_write: |
497 | tagp += sizeof(journal_block_tag_t); | 507 | tagp += tag_bytes; |
498 | if (!(flags & JBD2_FLAG_SAME_UUID)) | 508 | if (!(flags & JBD2_FLAG_SAME_UUID)) |
499 | tagp += 16; | 509 | tagp += 16; |
500 | 510 | ||
@@ -572,17 +582,24 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, | |||
572 | { | 582 | { |
573 | jbd2_journal_revoke_header_t *header; | 583 | jbd2_journal_revoke_header_t *header; |
574 | int offset, max; | 584 | int offset, max; |
585 | int record_len = 4; | ||
575 | 586 | ||
576 | header = (jbd2_journal_revoke_header_t *) bh->b_data; | 587 | header = (jbd2_journal_revoke_header_t *) bh->b_data; |
577 | offset = sizeof(jbd2_journal_revoke_header_t); | 588 | offset = sizeof(jbd2_journal_revoke_header_t); |
578 | max = be32_to_cpu(header->r_count); | 589 | max = be32_to_cpu(header->r_count); |
579 | 590 | ||
580 | while (offset < max) { | 591 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) |
592 | record_len = 8; | ||
593 | |||
594 | while (offset + record_len <= max) { | ||
581 | unsigned long blocknr; | 595 | unsigned long blocknr; |
582 | int err; | 596 | int err; |
583 | 597 | ||
584 | blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); | 598 | if (record_len == 4) |
585 | offset += 4; | 599 | blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); |
600 | else | ||
601 | blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); | ||
602 | offset += record_len; | ||
586 | err = jbd2_journal_set_revoke(journal, blocknr, sequence); | 603 | err = jbd2_journal_set_revoke(journal, blocknr, sequence); |
587 | if (err) | 604 | if (err) |
588 | return err; | 605 | return err; |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 5820a0c5ad26..8aac875bd301 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -584,9 +584,17 @@ static void write_one_revoke_record(journal_t *journal, | |||
584 | *descriptorp = descriptor; | 584 | *descriptorp = descriptor; |
585 | } | 585 | } |
586 | 586 | ||
587 | * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = | 587 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { |
588 | cpu_to_be32(record->blocknr); | 588 | * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = |
589 | offset += 4; | 589 | cpu_to_be64(record->blocknr); |
590 | offset += 8; | ||
591 | |||
592 | } else { | ||
593 | * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = | ||
594 | cpu_to_be32(record->blocknr); | ||
595 | offset += 4; | ||
596 | } | ||
597 | |||
590 | *offsetp = offset; | 598 | *offsetp = offset; |
591 | } | 599 | } |
592 | 600 | ||
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 3251f7abb57d..5e5aa64f1261 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -150,14 +150,21 @@ typedef struct journal_header_s | |||
150 | 150 | ||
151 | 151 | ||
152 | /* | 152 | /* |
153 | * The block tag: used to describe a single buffer in the journal | 153 | * The block tag: used to describe a single buffer in the journal. |
154 | * t_blocknr_high is only used if INCOMPAT_64BIT is set, so this | ||
155 | * raw struct shouldn't be used for pointer math or sizeof() - use | ||
156 | * journal_tag_bytes(journal) instead to compute this. | ||
154 | */ | 157 | */ |
155 | typedef struct journal_block_tag_s | 158 | typedef struct journal_block_tag_s |
156 | { | 159 | { |
157 | __be32 t_blocknr; /* The on-disk block number */ | 160 | __be32 t_blocknr; /* The on-disk block number */ |
158 | __be32 t_flags; /* See below */ | 161 | __be32 t_flags; /* See below */ |
162 | __be32 t_blocknr_high; /* most-significant high 32bits. */ | ||
159 | } journal_block_tag_t; | 163 | } journal_block_tag_t; |
160 | 164 | ||
165 | #define JBD_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high)) | ||
166 | #define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t)) | ||
167 | |||
161 | /* | 168 | /* |
162 | * The revoke descriptor: used on disk to describe a series of blocks to | 169 | * The revoke descriptor: used on disk to describe a series of blocks to |
163 | * be revoked from the log | 170 | * be revoked from the log |
@@ -235,11 +242,13 @@ typedef struct journal_superblock_s | |||
235 | ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) | 242 | ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) |
236 | 243 | ||
237 | #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 | 244 | #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 |
245 | #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 | ||
238 | 246 | ||
239 | /* Features known to this kernel version: */ | 247 | /* Features known to this kernel version: */ |
240 | #define JBD2_KNOWN_COMPAT_FEATURES 0 | 248 | #define JBD2_KNOWN_COMPAT_FEATURES 0 |
241 | #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 | 249 | #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 |
242 | #define JBD2_KNOWN_INCOMPAT_FEATURES JBD2_FEATURE_INCOMPAT_REVOKE | 250 | #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ |
251 | JBD2_FEATURE_INCOMPAT_64BIT) | ||
243 | 252 | ||
244 | #ifdef __KERNEL__ | 253 | #ifdef __KERNEL__ |
245 | 254 | ||
@@ -1052,6 +1061,7 @@ static inline int tid_geq(tid_t x, tid_t y) | |||
1052 | } | 1061 | } |
1053 | 1062 | ||
1054 | extern int jbd2_journal_blocks_per_page(struct inode *inode); | 1063 | extern int jbd2_journal_blocks_per_page(struct inode *inode); |
1064 | extern size_t journal_tag_bytes(journal_t *journal); | ||
1055 | 1065 | ||
1056 | /* | 1066 | /* |
1057 | * Return the minimum number of blocks which must be free in the journal | 1067 | * Return the minimum number of blocks which must be free in the journal |