diff options
| author | Jan Kara <jack@suse.cz> | 2008-07-11 19:27:31 -0400 |
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2008-07-11 19:27:31 -0400 |
| commit | 87c89c232c8f7b3820c33c3b9bc803e9358027da (patch) | |
| tree | 9a714242513ec3e5e1c28fad1bfff852efd033d5 /fs/jbd2 | |
| parent | 678aaf481496b01473b778685eca231d6784098b (diff) | |
jbd2: Remove data=ordered mode support using jbd buffer heads
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd2')
| -rw-r--r-- | fs/jbd2/checkpoint.c | 1 | ||||
| -rw-r--r-- | fs/jbd2/commit.c | 221 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 1 | ||||
| -rw-r--r-- | fs/jbd2/transaction.c | 217 |
4 files changed, 14 insertions, 426 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6914598022ce..91389c8aee8a 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -688,7 +688,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
| 688 | 688 | ||
| 689 | J_ASSERT(transaction->t_state == T_FINISHED); | 689 | J_ASSERT(transaction->t_state == T_FINISHED); |
| 690 | J_ASSERT(transaction->t_buffers == NULL); | 690 | J_ASSERT(transaction->t_buffers == NULL); |
| 691 | J_ASSERT(transaction->t_sync_datalist == NULL); | ||
| 692 | J_ASSERT(transaction->t_forget == NULL); | 691 | J_ASSERT(transaction->t_forget == NULL); |
| 693 | J_ASSERT(transaction->t_iobuf_list == NULL); | 692 | J_ASSERT(transaction->t_iobuf_list == NULL); |
| 694 | J_ASSERT(transaction->t_shadow_list == NULL); | 693 | J_ASSERT(transaction->t_shadow_list == NULL); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 3ca107b5c86b..483183d15ed5 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -37,8 +37,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | |||
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | /* | 39 | /* |
| 40 | * When an ext3-ordered file is truncated, it is possible that many pages are | 40 | * When an ext4 file is truncated, it is possible that some pages are not |
| 41 | * not sucessfully freed, because they are attached to a committing transaction. | 41 | * successfully freed, because they are attached to a committing transaction. |
| 42 | * After the transaction commits, these pages are left on the LRU, with no | 42 | * After the transaction commits, these pages are left on the LRU, with no |
| 43 | * ->mapping, and with attached buffers. These pages are trivially reclaimable | 43 | * ->mapping, and with attached buffers. These pages are trivially reclaimable |
| 44 | * by the VM, but their apparent absence upsets the VM accounting, and it makes | 44 | * by the VM, but their apparent absence upsets the VM accounting, and it makes |
| @@ -80,21 +80,6 @@ nope: | |||
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | /* | 82 | /* |
| 83 | * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is | ||
| 84 | * held. For ranking reasons we must trylock. If we lose, schedule away and | ||
| 85 | * return 0. j_list_lock is dropped in this case. | ||
| 86 | */ | ||
| 87 | static int inverted_lock(journal_t *journal, struct buffer_head *bh) | ||
| 88 | { | ||
| 89 | if (!jbd_trylock_bh_state(bh)) { | ||
| 90 | spin_unlock(&journal->j_list_lock); | ||
| 91 | schedule(); | ||
| 92 | return 0; | ||
| 93 | } | ||
| 94 | return 1; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Done it all: now submit the commit record. We should have | 83 | * Done it all: now submit the commit record. We should have |
| 99 | * cleaned up our previous buffers by now, so if we are in abort | 84 | * cleaned up our previous buffers by now, so if we are in abort |
| 100 | * mode we can now just skip the rest of the journal write | 85 | * mode we can now just skip the rest of the journal write |
| @@ -200,162 +185,6 @@ static int journal_wait_on_commit_record(struct buffer_head *bh) | |||
| 200 | } | 185 | } |
| 201 | 186 | ||
| 202 | /* | 187 | /* |
| 203 | * Wait for all submitted IO to complete. | ||
| 204 | */ | ||
| 205 | static int journal_wait_on_locked_list(journal_t *journal, | ||
| 206 | transaction_t *commit_transaction) | ||
| 207 | { | ||
| 208 | int ret = 0; | ||
| 209 | struct journal_head *jh; | ||
| 210 | |||
| 211 | while (commit_transaction->t_locked_list) { | ||
| 212 | struct buffer_head *bh; | ||
| 213 | |||
| 214 | jh = commit_transaction->t_locked_list->b_tprev; | ||
| 215 | bh = jh2bh(jh); | ||
| 216 | get_bh(bh); | ||
| 217 | if (buffer_locked(bh)) { | ||
| 218 | spin_unlock(&journal->j_list_lock); | ||
| 219 | wait_on_buffer(bh); | ||
| 220 | if (unlikely(!buffer_uptodate(bh))) | ||
| 221 | ret = -EIO; | ||
| 222 | spin_lock(&journal->j_list_lock); | ||
| 223 | } | ||
| 224 | if (!inverted_lock(journal, bh)) { | ||
| 225 | put_bh(bh); | ||
| 226 | spin_lock(&journal->j_list_lock); | ||
| 227 | continue; | ||
| 228 | } | ||
| 229 | if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { | ||
| 230 | __jbd2_journal_unfile_buffer(jh); | ||
| 231 | jbd_unlock_bh_state(bh); | ||
| 232 | jbd2_journal_remove_journal_head(bh); | ||
| 233 | put_bh(bh); | ||
| 234 | } else { | ||
| 235 | jbd_unlock_bh_state(bh); | ||
| 236 | } | ||
| 237 | put_bh(bh); | ||
| 238 | cond_resched_lock(&journal->j_list_lock); | ||
| 239 | } | ||
| 240 | return ret; | ||
| 241 | } | ||
| 242 | |||
| 243 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | ||
| 244 | { | ||
| 245 | int i; | ||
| 246 | |||
| 247 | for (i = 0; i < bufs; i++) { | ||
| 248 | wbuf[i]->b_end_io = end_buffer_write_sync; | ||
| 249 | /* We use-up our safety reference in submit_bh() */ | ||
| 250 | submit_bh(WRITE, wbuf[i]); | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | /* | ||
| 255 | * Submit all the data buffers to disk | ||
| 256 | */ | ||
| 257 | static void journal_submit_data_buffers(journal_t *journal, | ||
| 258 | transaction_t *commit_transaction) | ||
| 259 | { | ||
| 260 | struct journal_head *jh; | ||
| 261 | struct buffer_head *bh; | ||
| 262 | int locked; | ||
| 263 | int bufs = 0; | ||
| 264 | struct buffer_head **wbuf = journal->j_wbuf; | ||
| 265 | |||
| 266 | /* | ||
| 267 | * Whenever we unlock the journal and sleep, things can get added | ||
| 268 | * onto ->t_sync_datalist, so we have to keep looping back to | ||
| 269 | * write_out_data until we *know* that the list is empty. | ||
| 270 | * | ||
| 271 | * Cleanup any flushed data buffers from the data list. Even in | ||
| 272 | * abort mode, we want to flush this out as soon as possible. | ||
| 273 | */ | ||
| 274 | write_out_data: | ||
| 275 | cond_resched(); | ||
| 276 | spin_lock(&journal->j_list_lock); | ||
| 277 | |||
| 278 | while (commit_transaction->t_sync_datalist) { | ||
| 279 | jh = commit_transaction->t_sync_datalist; | ||
| 280 | bh = jh2bh(jh); | ||
| 281 | locked = 0; | ||
| 282 | |||
| 283 | /* Get reference just to make sure buffer does not disappear | ||
| 284 | * when we are forced to drop various locks */ | ||
| 285 | get_bh(bh); | ||
| 286 | /* If the buffer is dirty, we need to submit IO and hence | ||
| 287 | * we need the buffer lock. We try to lock the buffer without | ||
| 288 | * blocking. If we fail, we need to drop j_list_lock and do | ||
| 289 | * blocking lock_buffer(). | ||
| 290 | */ | ||
| 291 | if (buffer_dirty(bh)) { | ||
| 292 | if (test_set_buffer_locked(bh)) { | ||
| 293 | BUFFER_TRACE(bh, "needs blocking lock"); | ||
| 294 | spin_unlock(&journal->j_list_lock); | ||
| 295 | /* Write out all data to prevent deadlocks */ | ||
| 296 | journal_do_submit_data(wbuf, bufs); | ||
| 297 | bufs = 0; | ||
| 298 | lock_buffer(bh); | ||
| 299 | spin_lock(&journal->j_list_lock); | ||
| 300 | } | ||
| 301 | locked = 1; | ||
| 302 | } | ||
| 303 | /* We have to get bh_state lock. Again out of order, sigh. */ | ||
| 304 | if (!inverted_lock(journal, bh)) { | ||
| 305 | jbd_lock_bh_state(bh); | ||
| 306 | spin_lock(&journal->j_list_lock); | ||
| 307 | } | ||
| 308 | /* Someone already cleaned up the buffer? */ | ||
| 309 | if (!buffer_jbd(bh) | ||
| 310 | || jh->b_transaction != commit_transaction | ||
| 311 | || jh->b_jlist != BJ_SyncData) { | ||
| 312 | jbd_unlock_bh_state(bh); | ||
| 313 | if (locked) | ||
| 314 | unlock_buffer(bh); | ||
| 315 | BUFFER_TRACE(bh, "already cleaned up"); | ||
| 316 | put_bh(bh); | ||
| 317 | continue; | ||
| 318 | } | ||
| 319 | if (locked && test_clear_buffer_dirty(bh)) { | ||
| 320 | BUFFER_TRACE(bh, "needs writeout, adding to array"); | ||
| 321 | wbuf[bufs++] = bh; | ||
| 322 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
| 323 | BJ_Locked); | ||
| 324 | jbd_unlock_bh_state(bh); | ||
| 325 | if (bufs == journal->j_wbufsize) { | ||
| 326 | spin_unlock(&journal->j_list_lock); | ||
| 327 | journal_do_submit_data(wbuf, bufs); | ||
| 328 | bufs = 0; | ||
| 329 | goto write_out_data; | ||
| 330 | } | ||
| 331 | } else if (!locked && buffer_locked(bh)) { | ||
| 332 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
| 333 | BJ_Locked); | ||
| 334 | jbd_unlock_bh_state(bh); | ||
| 335 | put_bh(bh); | ||
| 336 | } else { | ||
| 337 | BUFFER_TRACE(bh, "writeout complete: unfile"); | ||
| 338 | __jbd2_journal_unfile_buffer(jh); | ||
| 339 | jbd_unlock_bh_state(bh); | ||
| 340 | if (locked) | ||
| 341 | unlock_buffer(bh); | ||
| 342 | jbd2_journal_remove_journal_head(bh); | ||
| 343 | /* Once for our safety reference, once for | ||
| 344 | * jbd2_journal_remove_journal_head() */ | ||
| 345 | put_bh(bh); | ||
| 346 | put_bh(bh); | ||
| 347 | } | ||
| 348 | |||
| 349 | if (need_resched() || spin_needbreak(&journal->j_list_lock)) { | ||
| 350 | spin_unlock(&journal->j_list_lock); | ||
| 351 | goto write_out_data; | ||
| 352 | } | ||
| 353 | } | ||
| 354 | spin_unlock(&journal->j_list_lock); | ||
| 355 | journal_do_submit_data(wbuf, bufs); | ||
| 356 | } | ||
| 357 | |||
| 358 | /* | ||
| 359 | * Submit all the data buffers of inode associated with the transaction to | 188 | * Submit all the data buffers of inode associated with the transaction to |
| 360 | * disk. | 189 | * disk. |
| 361 | * | 190 | * |
| @@ -602,42 +431,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 602 | * Now start flushing things to disk, in the order they appear | 431 | * Now start flushing things to disk, in the order they appear |
| 603 | * on the transaction lists. Data blocks go first. | 432 | * on the transaction lists. Data blocks go first. |
| 604 | */ | 433 | */ |
| 605 | err = 0; | ||
| 606 | journal_submit_data_buffers(journal, commit_transaction); | ||
| 607 | err = journal_submit_inode_data_buffers(journal, commit_transaction); | 434 | err = journal_submit_inode_data_buffers(journal, commit_transaction); |
| 608 | if (err) | 435 | if (err) |
| 609 | jbd2_journal_abort(journal, err); | 436 | jbd2_journal_abort(journal, err); |
| 610 | 437 | ||
| 611 | /* | ||
| 612 | * Wait for all previously submitted IO to complete if commit | ||
| 613 | * record is to be written synchronously. | ||
| 614 | */ | ||
| 615 | spin_lock(&journal->j_list_lock); | ||
| 616 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
| 617 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) | ||
| 618 | err = journal_wait_on_locked_list(journal, | ||
| 619 | commit_transaction); | ||
| 620 | |||
| 621 | spin_unlock(&journal->j_list_lock); | ||
| 622 | |||
| 623 | if (err) | ||
| 624 | jbd2_journal_abort(journal, err); | ||
| 625 | |||
| 626 | jbd2_journal_write_revoke_records(journal, commit_transaction); | 438 | jbd2_journal_write_revoke_records(journal, commit_transaction); |
| 627 | 439 | ||
| 628 | jbd_debug(3, "JBD: commit phase 2\n"); | 440 | jbd_debug(3, "JBD: commit phase 2\n"); |
| 629 | 441 | ||
| 630 | /* | 442 | /* |
| 631 | * If we found any dirty or locked buffers, then we should have | ||
| 632 | * looped back up to the write_out_data label. If there weren't | ||
| 633 | * any then journal_clean_data_list should have wiped the list | ||
| 634 | * clean by now, so check that it is in fact empty. | ||
| 635 | */ | ||
| 636 | J_ASSERT (commit_transaction->t_sync_datalist == NULL); | ||
| 637 | |||
| 638 | jbd_debug (3, "JBD: commit phase 3\n"); | ||
| 639 | |||
| 640 | /* | ||
| 641 | * Way to go: we have now written out all of the data for a | 443 | * Way to go: we have now written out all of the data for a |
| 642 | * transaction! Now comes the tricky part: we need to write out | 444 | * transaction! Now comes the tricky part: we need to write out |
| 643 | * metadata. Loop over the transaction's entire buffer list: | 445 | * metadata. Loop over the transaction's entire buffer list: |
| @@ -655,6 +457,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 655 | J_ASSERT(commit_transaction->t_nr_buffers <= | 457 | J_ASSERT(commit_transaction->t_nr_buffers <= |
| 656 | commit_transaction->t_outstanding_credits); | 458 | commit_transaction->t_outstanding_credits); |
| 657 | 459 | ||
| 460 | err = 0; | ||
| 658 | descriptor = NULL; | 461 | descriptor = NULL; |
| 659 | bufs = 0; | 462 | bufs = 0; |
| 660 | while (commit_transaction->t_buffers) { | 463 | while (commit_transaction->t_buffers) { |
| @@ -829,13 +632,6 @@ start_journal_io: | |||
| 829 | &cbh, crc32_sum); | 632 | &cbh, crc32_sum); |
| 830 | if (err) | 633 | if (err) |
| 831 | __jbd2_journal_abort_hard(journal); | 634 | __jbd2_journal_abort_hard(journal); |
| 832 | |||
| 833 | spin_lock(&journal->j_list_lock); | ||
| 834 | err = journal_wait_on_locked_list(journal, | ||
| 835 | commit_transaction); | ||
| 836 | spin_unlock(&journal->j_list_lock); | ||
| 837 | if (err) | ||
| 838 | __jbd2_journal_abort_hard(journal); | ||
| 839 | } | 635 | } |
| 840 | 636 | ||
| 841 | /* | 637 | /* |
| @@ -860,7 +656,7 @@ start_journal_io: | |||
| 860 | so we incur less scheduling load. | 656 | so we incur less scheduling load. |
| 861 | */ | 657 | */ |
| 862 | 658 | ||
| 863 | jbd_debug(3, "JBD: commit phase 4\n"); | 659 | jbd_debug(3, "JBD: commit phase 3\n"); |
| 864 | 660 | ||
| 865 | /* | 661 | /* |
| 866 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 662 | * akpm: these are BJ_IO, and j_list_lock is not needed. |
| @@ -919,7 +715,7 @@ wait_for_iobuf: | |||
| 919 | 715 | ||
| 920 | J_ASSERT (commit_transaction->t_shadow_list == NULL); | 716 | J_ASSERT (commit_transaction->t_shadow_list == NULL); |
| 921 | 717 | ||
| 922 | jbd_debug(3, "JBD: commit phase 5\n"); | 718 | jbd_debug(3, "JBD: commit phase 4\n"); |
| 923 | 719 | ||
| 924 | /* Here we wait for the revoke record and descriptor record buffers */ | 720 | /* Here we wait for the revoke record and descriptor record buffers */ |
| 925 | wait_for_ctlbuf: | 721 | wait_for_ctlbuf: |
| @@ -946,7 +742,7 @@ wait_for_iobuf: | |||
| 946 | /* AKPM: bforget here */ | 742 | /* AKPM: bforget here */ |
| 947 | } | 743 | } |
| 948 | 744 | ||
| 949 | jbd_debug(3, "JBD: commit phase 6\n"); | 745 | jbd_debug(3, "JBD: commit phase 5\n"); |
| 950 | 746 | ||
| 951 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 747 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
| 952 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 748 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
| @@ -966,9 +762,8 @@ wait_for_iobuf: | |||
| 966 | transaction can be removed from any checkpoint list it was on | 762 | transaction can be removed from any checkpoint list it was on |
| 967 | before. */ | 763 | before. */ |
| 968 | 764 | ||
| 969 | jbd_debug(3, "JBD: commit phase 7\n"); | 765 | jbd_debug(3, "JBD: commit phase 6\n"); |
| 970 | 766 | ||
| 971 | J_ASSERT(commit_transaction->t_sync_datalist == NULL); | ||
| 972 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | 767 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
| 973 | J_ASSERT(commit_transaction->t_buffers == NULL); | 768 | J_ASSERT(commit_transaction->t_buffers == NULL); |
| 974 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 769 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
| @@ -1090,7 +885,7 @@ restart_loop: | |||
| 1090 | 885 | ||
| 1091 | /* Done with this transaction! */ | 886 | /* Done with this transaction! */ |
| 1092 | 887 | ||
| 1093 | jbd_debug(3, "JBD: commit phase 8\n"); | 888 | jbd_debug(3, "JBD: commit phase 7\n"); |
| 1094 | 889 | ||
| 1095 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 890 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
| 1096 | 891 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 78cf7bd7f604..b26c6d9fe6ae 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -50,7 +50,6 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates); | |||
| 50 | EXPORT_SYMBOL(jbd2_journal_get_write_access); | 50 | EXPORT_SYMBOL(jbd2_journal_get_write_access); |
| 51 | EXPORT_SYMBOL(jbd2_journal_get_create_access); | 51 | EXPORT_SYMBOL(jbd2_journal_get_create_access); |
| 52 | EXPORT_SYMBOL(jbd2_journal_get_undo_access); | 52 | EXPORT_SYMBOL(jbd2_journal_get_undo_access); |
| 53 | EXPORT_SYMBOL(jbd2_journal_dirty_data); | ||
| 54 | EXPORT_SYMBOL(jbd2_journal_dirty_metadata); | 53 | EXPORT_SYMBOL(jbd2_journal_dirty_metadata); |
| 55 | EXPORT_SYMBOL(jbd2_journal_release_buffer); | 54 | EXPORT_SYMBOL(jbd2_journal_release_buffer); |
| 56 | EXPORT_SYMBOL(jbd2_journal_forget); | 55 | EXPORT_SYMBOL(jbd2_journal_forget); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 98b596d23705..4f7cadbb19fa 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -943,183 +943,6 @@ out: | |||
| 943 | } | 943 | } |
| 944 | 944 | ||
| 945 | /** | 945 | /** |
| 946 | * int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which | ||
| 947 | * needs to be flushed before we can commit the | ||
| 948 | * current transaction. | ||
| 949 | * @handle: transaction | ||
| 950 | * @bh: bufferhead to mark | ||
| 951 | * | ||
| 952 | * The buffer is placed on the transaction's data list and is marked as | ||
| 953 | * belonging to the transaction. | ||
| 954 | * | ||
| 955 | * Returns error number or 0 on success. | ||
| 956 | * | ||
| 957 | * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage | ||
| 958 | * by kswapd. | ||
| 959 | */ | ||
| 960 | int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh) | ||
| 961 | { | ||
| 962 | journal_t *journal = handle->h_transaction->t_journal; | ||
| 963 | int need_brelse = 0; | ||
| 964 | struct journal_head *jh; | ||
| 965 | |||
| 966 | if (is_handle_aborted(handle)) | ||
| 967 | return 0; | ||
| 968 | |||
| 969 | jh = jbd2_journal_add_journal_head(bh); | ||
| 970 | JBUFFER_TRACE(jh, "entry"); | ||
| 971 | |||
| 972 | /* | ||
| 973 | * The buffer could *already* be dirty. Writeout can start | ||
| 974 | * at any time. | ||
| 975 | */ | ||
| 976 | jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); | ||
| 977 | |||
| 978 | /* | ||
| 979 | * What if the buffer is already part of a running transaction? | ||
| 980 | * | ||
| 981 | * There are two cases: | ||
| 982 | * 1) It is part of the current running transaction. Refile it, | ||
| 983 | * just in case we have allocated it as metadata, deallocated | ||
| 984 | * it, then reallocated it as data. | ||
| 985 | * 2) It is part of the previous, still-committing transaction. | ||
| 986 | * If all we want to do is to guarantee that the buffer will be | ||
| 987 | * written to disk before this new transaction commits, then | ||
| 988 | * being sure that the *previous* transaction has this same | ||
| 989 | * property is sufficient for us! Just leave it on its old | ||
| 990 | * transaction. | ||
| 991 | * | ||
| 992 | * In case (2), the buffer must not already exist as metadata | ||
| 993 | * --- that would violate write ordering (a transaction is free | ||
| 994 | * to write its data at any point, even before the previous | ||
| 995 | * committing transaction has committed). The caller must | ||
| 996 | * never, ever allow this to happen: there's nothing we can do | ||
| 997 | * about it in this layer. | ||
| 998 | */ | ||
| 999 | jbd_lock_bh_state(bh); | ||
| 1000 | spin_lock(&journal->j_list_lock); | ||
| 1001 | |||
| 1002 | /* Now that we have bh_state locked, are we really still mapped? */ | ||
| 1003 | if (!buffer_mapped(bh)) { | ||
| 1004 | JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); | ||
| 1005 | goto no_journal; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | if (jh->b_transaction) { | ||
| 1009 | JBUFFER_TRACE(jh, "has transaction"); | ||
| 1010 | if (jh->b_transaction != handle->h_transaction) { | ||
| 1011 | JBUFFER_TRACE(jh, "belongs to older transaction"); | ||
| 1012 | J_ASSERT_JH(jh, jh->b_transaction == | ||
| 1013 | journal->j_committing_transaction); | ||
| 1014 | |||
| 1015 | /* @@@ IS THIS TRUE ? */ | ||
| 1016 | /* | ||
| 1017 | * Not any more. Scenario: someone does a write() | ||
| 1018 | * in data=journal mode. The buffer's transaction has | ||
| 1019 | * moved into commit. Then someone does another | ||
| 1020 | * write() to the file. We do the frozen data copyout | ||
| 1021 | * and set b_next_transaction to point to j_running_t. | ||
| 1022 | * And while we're in that state, someone does a | ||
| 1023 | * writepage() in an attempt to pageout the same area | ||
| 1024 | * of the file via a shared mapping. At present that | ||
| 1025 | * calls jbd2_journal_dirty_data(), and we get right here. | ||
| 1026 | * It may be too late to journal the data. Simply | ||
| 1027 | * falling through to the next test will suffice: the | ||
| 1028 | * data will be dirty and wil be checkpointed. The | ||
| 1029 | * ordering comments in the next comment block still | ||
| 1030 | * apply. | ||
| 1031 | */ | ||
| 1032 | //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); | ||
| 1033 | |||
| 1034 | /* | ||
| 1035 | * If we're journalling data, and this buffer was | ||
| 1036 | * subject to a write(), it could be metadata, forget | ||
| 1037 | * or shadow against the committing transaction. Now, | ||
| 1038 | * someone has dirtied the same darn page via a mapping | ||
| 1039 | * and it is being writepage()'d. | ||
| 1040 | * We *could* just steal the page from commit, with some | ||
| 1041 | * fancy locking there. Instead, we just skip it - | ||
| 1042 | * don't tie the page's buffers to the new transaction | ||
| 1043 | * at all. | ||
| 1044 | * Implication: if we crash before the writepage() data | ||
| 1045 | * is written into the filesystem, recovery will replay | ||
| 1046 | * the write() data. | ||
| 1047 | */ | ||
| 1048 | if (jh->b_jlist != BJ_None && | ||
| 1049 | jh->b_jlist != BJ_SyncData && | ||
| 1050 | jh->b_jlist != BJ_Locked) { | ||
| 1051 | JBUFFER_TRACE(jh, "Not stealing"); | ||
| 1052 | goto no_journal; | ||
| 1053 | } | ||
| 1054 | |||
| 1055 | /* | ||
| 1056 | * This buffer may be undergoing writeout in commit. We | ||
| 1057 | * can't return from here and let the caller dirty it | ||
| 1058 | * again because that can cause the write-out loop in | ||
| 1059 | * commit to never terminate. | ||
| 1060 | */ | ||
| 1061 | if (buffer_dirty(bh)) { | ||
| 1062 | get_bh(bh); | ||
| 1063 | spin_unlock(&journal->j_list_lock); | ||
| 1064 | jbd_unlock_bh_state(bh); | ||
| 1065 | need_brelse = 1; | ||
| 1066 | sync_dirty_buffer(bh); | ||
| 1067 | jbd_lock_bh_state(bh); | ||
| 1068 | spin_lock(&journal->j_list_lock); | ||
| 1069 | /* Since we dropped the lock... */ | ||
| 1070 | if (!buffer_mapped(bh)) { | ||
| 1071 | JBUFFER_TRACE(jh, "buffer got unmapped"); | ||
| 1072 | goto no_journal; | ||
| 1073 | } | ||
| 1074 | /* The buffer may become locked again at any | ||
| 1075 | time if it is redirtied */ | ||
| 1076 | } | ||
| 1077 | |||
| 1078 | /* journal_clean_data_list() may have got there first */ | ||
| 1079 | if (jh->b_transaction != NULL) { | ||
| 1080 | JBUFFER_TRACE(jh, "unfile from commit"); | ||
| 1081 | __jbd2_journal_temp_unlink_buffer(jh); | ||
| 1082 | /* It still points to the committing | ||
| 1083 | * transaction; move it to this one so | ||
| 1084 | * that the refile assert checks are | ||
| 1085 | * happy. */ | ||
| 1086 | jh->b_transaction = handle->h_transaction; | ||
| 1087 | } | ||
| 1088 | /* The buffer will be refiled below */ | ||
| 1089 | |||
| 1090 | } | ||
| 1091 | /* | ||
| 1092 | * Special case --- the buffer might actually have been | ||
| 1093 | * allocated and then immediately deallocated in the previous, | ||
| 1094 | * committing transaction, so might still be left on that | ||
| 1095 | * transaction's metadata lists. | ||
| 1096 | */ | ||
| 1097 | if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { | ||
| 1098 | JBUFFER_TRACE(jh, "not on correct data list: unfile"); | ||
| 1099 | J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); | ||
| 1100 | __jbd2_journal_temp_unlink_buffer(jh); | ||
| 1101 | jh->b_transaction = handle->h_transaction; | ||
| 1102 | JBUFFER_TRACE(jh, "file as data"); | ||
| 1103 | __jbd2_journal_file_buffer(jh, handle->h_transaction, | ||
| 1104 | BJ_SyncData); | ||
| 1105 | } | ||
| 1106 | } else { | ||
| 1107 | JBUFFER_TRACE(jh, "not on a transaction"); | ||
| 1108 | __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); | ||
| 1109 | } | ||
| 1110 | no_journal: | ||
| 1111 | spin_unlock(&journal->j_list_lock); | ||
| 1112 | jbd_unlock_bh_state(bh); | ||
| 1113 | if (need_brelse) { | ||
| 1114 | BUFFER_TRACE(bh, "brelse"); | ||
| 1115 | __brelse(bh); | ||
| 1116 | } | ||
| 1117 | JBUFFER_TRACE(jh, "exit"); | ||
| 1118 | jbd2_journal_put_journal_head(jh); | ||
| 1119 | return 0; | ||
| 1120 | } | ||
| 1121 | |||
| 1122 | /** | ||
| 1123 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata | 946 | * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata |
| 1124 | * @handle: transaction to add buffer to. | 947 | * @handle: transaction to add buffer to. |
| 1125 | * @bh: buffer to mark | 948 | * @bh: buffer to mark |
| @@ -1541,10 +1364,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
| 1541 | * Remove a buffer from the appropriate transaction list. | 1364 | * Remove a buffer from the appropriate transaction list. |
| 1542 | * | 1365 | * |
| 1543 | * Note that this function can *change* the value of | 1366 | * Note that this function can *change* the value of |
| 1544 | * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, | 1367 | * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, |
| 1545 | * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller | 1368 | * t_log_list or t_reserved_list. If the caller is holding onto a copy of one |
| 1546 | * is holding onto a copy of one of thee pointers, it could go bad. | 1369 | * of these pointers, it could go bad. Generally the caller needs to re-read |
| 1547 | * Generally the caller needs to re-read the pointer from the transaction_t. | 1370 | * the pointer from the transaction_t. |
| 1548 | * | 1371 | * |
| 1549 | * Called under j_list_lock. The journal may not be locked. | 1372 | * Called under j_list_lock. The journal may not be locked. |
| 1550 | */ | 1373 | */ |
| @@ -1566,9 +1389,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
| 1566 | switch (jh->b_jlist) { | 1389 | switch (jh->b_jlist) { |
| 1567 | case BJ_None: | 1390 | case BJ_None: |
| 1568 | return; | 1391 | return; |
| 1569 | case BJ_SyncData: | ||
| 1570 | list = &transaction->t_sync_datalist; | ||
| 1571 | break; | ||
| 1572 | case BJ_Metadata: | 1392 | case BJ_Metadata: |
| 1573 | transaction->t_nr_buffers--; | 1393 | transaction->t_nr_buffers--; |
| 1574 | J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); | 1394 | J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); |
| @@ -1589,9 +1409,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
| 1589 | case BJ_Reserved: | 1409 | case BJ_Reserved: |
| 1590 | list = &transaction->t_reserved_list; | 1410 | list = &transaction->t_reserved_list; |
| 1591 | break; | 1411 | break; |
| 1592 | case BJ_Locked: | ||
| 1593 | list = &transaction->t_locked_list; | ||
| 1594 | break; | ||
| 1595 | } | 1412 | } |
| 1596 | 1413 | ||
| 1597 | __blist_del_buffer(list, jh); | 1414 | __blist_del_buffer(list, jh); |
| @@ -1634,15 +1451,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1634 | goto out; | 1451 | goto out; |
| 1635 | 1452 | ||
| 1636 | spin_lock(&journal->j_list_lock); | 1453 | spin_lock(&journal->j_list_lock); |
| 1637 | if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { | 1454 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { |
| 1638 | if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { | ||
| 1639 | /* A written-back ordered data buffer */ | ||
| 1640 | JBUFFER_TRACE(jh, "release data"); | ||
| 1641 | __jbd2_journal_unfile_buffer(jh); | ||
| 1642 | jbd2_journal_remove_journal_head(bh); | ||
| 1643 | __brelse(bh); | ||
| 1644 | } | ||
| 1645 | } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | ||
| 1646 | /* written-back checkpointed metadata buffer */ | 1455 | /* written-back checkpointed metadata buffer */ |
| 1647 | if (jh->b_jlist == BJ_None) { | 1456 | if (jh->b_jlist == BJ_None) { |
| 1648 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1457 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
| @@ -1878,6 +1687,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1878 | if (!buffer_jbd(bh)) | 1687 | if (!buffer_jbd(bh)) |
| 1879 | goto zap_buffer_unlocked; | 1688 | goto zap_buffer_unlocked; |
| 1880 | 1689 | ||
| 1690 | /* OK, we have data buffer in journaled mode */ | ||
| 1881 | spin_lock(&journal->j_state_lock); | 1691 | spin_lock(&journal->j_state_lock); |
| 1882 | jbd_lock_bh_state(bh); | 1692 | jbd_lock_bh_state(bh); |
| 1883 | spin_lock(&journal->j_list_lock); | 1693 | spin_lock(&journal->j_list_lock); |
| @@ -1941,15 +1751,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) | |||
| 1941 | } | 1751 | } |
| 1942 | } else if (transaction == journal->j_committing_transaction) { | 1752 | } else if (transaction == journal->j_committing_transaction) { |
| 1943 | JBUFFER_TRACE(jh, "on committing transaction"); | 1753 | JBUFFER_TRACE(jh, "on committing transaction"); |
| 1944 | if (jh->b_jlist == BJ_Locked) { | ||
| 1945 | /* | ||
| 1946 | * The buffer is on the committing transaction's locked | ||
| 1947 | * list. We have the buffer locked, so I/O has | ||
| 1948 | * completed. So we can nail the buffer now. | ||
| 1949 | */ | ||
| 1950 | may_free = __dispose_buffer(jh, transaction); | ||
| 1951 | goto zap_buffer; | ||
| 1952 | } | ||
| 1953 | /* | 1754 | /* |
| 1954 | * If it is committing, we simply cannot touch it. We | 1755 | * If it is committing, we simply cannot touch it. We |
| 1955 | * can remove it's next_transaction pointer from the | 1756 | * can remove it's next_transaction pointer from the |
| @@ -2082,9 +1883,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
| 2082 | J_ASSERT_JH(jh, !jh->b_committed_data); | 1883 | J_ASSERT_JH(jh, !jh->b_committed_data); |
| 2083 | J_ASSERT_JH(jh, !jh->b_frozen_data); | 1884 | J_ASSERT_JH(jh, !jh->b_frozen_data); |
| 2084 | return; | 1885 | return; |
| 2085 | case BJ_SyncData: | ||
| 2086 | list = &transaction->t_sync_datalist; | ||
| 2087 | break; | ||
| 2088 | case BJ_Metadata: | 1886 | case BJ_Metadata: |
| 2089 | transaction->t_nr_buffers++; | 1887 | transaction->t_nr_buffers++; |
| 2090 | list = &transaction->t_buffers; | 1888 | list = &transaction->t_buffers; |
| @@ -2104,9 +1902,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
| 2104 | case BJ_Reserved: | 1902 | case BJ_Reserved: |
| 2105 | list = &transaction->t_reserved_list; | 1903 | list = &transaction->t_reserved_list; |
| 2106 | break; | 1904 | break; |
| 2107 | case BJ_Locked: | ||
| 2108 | list = &transaction->t_locked_list; | ||
| 2109 | break; | ||
| 2110 | } | 1905 | } |
| 2111 | 1906 | ||
| 2112 | __blist_add_buffer(list, jh); | 1907 | __blist_add_buffer(list, jh); |
