diff options
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r-- | fs/jbd2/commit.c | 221 |
1 files changed, 8 insertions, 213 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 3ca107b5c86b..483183d15ed5 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -37,8 +37,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | |||
37 | } | 37 | } |
38 | 38 | ||
39 | /* | 39 | /* |
40 | * When an ext3-ordered file is truncated, it is possible that many pages are | 40 | * When an ext4 file is truncated, it is possible that some pages are not |
41 | * not sucessfully freed, because they are attached to a committing transaction. | 41 | * successfully freed, because they are attached to a committing transaction. |
42 | * After the transaction commits, these pages are left on the LRU, with no | 42 | * After the transaction commits, these pages are left on the LRU, with no |
43 | * ->mapping, and with attached buffers. These pages are trivially reclaimable | 43 | * ->mapping, and with attached buffers. These pages are trivially reclaimable |
44 | * by the VM, but their apparent absence upsets the VM accounting, and it makes | 44 | * by the VM, but their apparent absence upsets the VM accounting, and it makes |
@@ -80,21 +80,6 @@ nope: | |||
80 | } | 80 | } |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is | ||
84 | * held. For ranking reasons we must trylock. If we lose, schedule away and | ||
85 | * return 0. j_list_lock is dropped in this case. | ||
86 | */ | ||
87 | static int inverted_lock(journal_t *journal, struct buffer_head *bh) | ||
88 | { | ||
89 | if (!jbd_trylock_bh_state(bh)) { | ||
90 | spin_unlock(&journal->j_list_lock); | ||
91 | schedule(); | ||
92 | return 0; | ||
93 | } | ||
94 | return 1; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Done it all: now submit the commit record. We should have | 83 | * Done it all: now submit the commit record. We should have |
99 | * cleaned up our previous buffers by now, so if we are in abort | 84 | * cleaned up our previous buffers by now, so if we are in abort |
100 | * mode we can now just skip the rest of the journal write | 85 | * mode we can now just skip the rest of the journal write |
@@ -200,162 +185,6 @@ static int journal_wait_on_commit_record(struct buffer_head *bh) | |||
200 | } | 185 | } |
201 | 186 | ||
202 | /* | 187 | /* |
203 | * Wait for all submitted IO to complete. | ||
204 | */ | ||
205 | static int journal_wait_on_locked_list(journal_t *journal, | ||
206 | transaction_t *commit_transaction) | ||
207 | { | ||
208 | int ret = 0; | ||
209 | struct journal_head *jh; | ||
210 | |||
211 | while (commit_transaction->t_locked_list) { | ||
212 | struct buffer_head *bh; | ||
213 | |||
214 | jh = commit_transaction->t_locked_list->b_tprev; | ||
215 | bh = jh2bh(jh); | ||
216 | get_bh(bh); | ||
217 | if (buffer_locked(bh)) { | ||
218 | spin_unlock(&journal->j_list_lock); | ||
219 | wait_on_buffer(bh); | ||
220 | if (unlikely(!buffer_uptodate(bh))) | ||
221 | ret = -EIO; | ||
222 | spin_lock(&journal->j_list_lock); | ||
223 | } | ||
224 | if (!inverted_lock(journal, bh)) { | ||
225 | put_bh(bh); | ||
226 | spin_lock(&journal->j_list_lock); | ||
227 | continue; | ||
228 | } | ||
229 | if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { | ||
230 | __jbd2_journal_unfile_buffer(jh); | ||
231 | jbd_unlock_bh_state(bh); | ||
232 | jbd2_journal_remove_journal_head(bh); | ||
233 | put_bh(bh); | ||
234 | } else { | ||
235 | jbd_unlock_bh_state(bh); | ||
236 | } | ||
237 | put_bh(bh); | ||
238 | cond_resched_lock(&journal->j_list_lock); | ||
239 | } | ||
240 | return ret; | ||
241 | } | ||
242 | |||
243 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | ||
244 | { | ||
245 | int i; | ||
246 | |||
247 | for (i = 0; i < bufs; i++) { | ||
248 | wbuf[i]->b_end_io = end_buffer_write_sync; | ||
249 | /* We use-up our safety reference in submit_bh() */ | ||
250 | submit_bh(WRITE, wbuf[i]); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Submit all the data buffers to disk | ||
256 | */ | ||
257 | static void journal_submit_data_buffers(journal_t *journal, | ||
258 | transaction_t *commit_transaction) | ||
259 | { | ||
260 | struct journal_head *jh; | ||
261 | struct buffer_head *bh; | ||
262 | int locked; | ||
263 | int bufs = 0; | ||
264 | struct buffer_head **wbuf = journal->j_wbuf; | ||
265 | |||
266 | /* | ||
267 | * Whenever we unlock the journal and sleep, things can get added | ||
268 | * onto ->t_sync_datalist, so we have to keep looping back to | ||
269 | * write_out_data until we *know* that the list is empty. | ||
270 | * | ||
271 | * Cleanup any flushed data buffers from the data list. Even in | ||
272 | * abort mode, we want to flush this out as soon as possible. | ||
273 | */ | ||
274 | write_out_data: | ||
275 | cond_resched(); | ||
276 | spin_lock(&journal->j_list_lock); | ||
277 | |||
278 | while (commit_transaction->t_sync_datalist) { | ||
279 | jh = commit_transaction->t_sync_datalist; | ||
280 | bh = jh2bh(jh); | ||
281 | locked = 0; | ||
282 | |||
283 | /* Get reference just to make sure buffer does not disappear | ||
284 | * when we are forced to drop various locks */ | ||
285 | get_bh(bh); | ||
286 | /* If the buffer is dirty, we need to submit IO and hence | ||
287 | * we need the buffer lock. We try to lock the buffer without | ||
288 | * blocking. If we fail, we need to drop j_list_lock and do | ||
289 | * blocking lock_buffer(). | ||
290 | */ | ||
291 | if (buffer_dirty(bh)) { | ||
292 | if (test_set_buffer_locked(bh)) { | ||
293 | BUFFER_TRACE(bh, "needs blocking lock"); | ||
294 | spin_unlock(&journal->j_list_lock); | ||
295 | /* Write out all data to prevent deadlocks */ | ||
296 | journal_do_submit_data(wbuf, bufs); | ||
297 | bufs = 0; | ||
298 | lock_buffer(bh); | ||
299 | spin_lock(&journal->j_list_lock); | ||
300 | } | ||
301 | locked = 1; | ||
302 | } | ||
303 | /* We have to get bh_state lock. Again out of order, sigh. */ | ||
304 | if (!inverted_lock(journal, bh)) { | ||
305 | jbd_lock_bh_state(bh); | ||
306 | spin_lock(&journal->j_list_lock); | ||
307 | } | ||
308 | /* Someone already cleaned up the buffer? */ | ||
309 | if (!buffer_jbd(bh) | ||
310 | || jh->b_transaction != commit_transaction | ||
311 | || jh->b_jlist != BJ_SyncData) { | ||
312 | jbd_unlock_bh_state(bh); | ||
313 | if (locked) | ||
314 | unlock_buffer(bh); | ||
315 | BUFFER_TRACE(bh, "already cleaned up"); | ||
316 | put_bh(bh); | ||
317 | continue; | ||
318 | } | ||
319 | if (locked && test_clear_buffer_dirty(bh)) { | ||
320 | BUFFER_TRACE(bh, "needs writeout, adding to array"); | ||
321 | wbuf[bufs++] = bh; | ||
322 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
323 | BJ_Locked); | ||
324 | jbd_unlock_bh_state(bh); | ||
325 | if (bufs == journal->j_wbufsize) { | ||
326 | spin_unlock(&journal->j_list_lock); | ||
327 | journal_do_submit_data(wbuf, bufs); | ||
328 | bufs = 0; | ||
329 | goto write_out_data; | ||
330 | } | ||
331 | } else if (!locked && buffer_locked(bh)) { | ||
332 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
333 | BJ_Locked); | ||
334 | jbd_unlock_bh_state(bh); | ||
335 | put_bh(bh); | ||
336 | } else { | ||
337 | BUFFER_TRACE(bh, "writeout complete: unfile"); | ||
338 | __jbd2_journal_unfile_buffer(jh); | ||
339 | jbd_unlock_bh_state(bh); | ||
340 | if (locked) | ||
341 | unlock_buffer(bh); | ||
342 | jbd2_journal_remove_journal_head(bh); | ||
343 | /* Once for our safety reference, once for | ||
344 | * jbd2_journal_remove_journal_head() */ | ||
345 | put_bh(bh); | ||
346 | put_bh(bh); | ||
347 | } | ||
348 | |||
349 | if (need_resched() || spin_needbreak(&journal->j_list_lock)) { | ||
350 | spin_unlock(&journal->j_list_lock); | ||
351 | goto write_out_data; | ||
352 | } | ||
353 | } | ||
354 | spin_unlock(&journal->j_list_lock); | ||
355 | journal_do_submit_data(wbuf, bufs); | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Submit all the data buffers of inode associated with the transaction to | 188 | * Submit all the data buffers of inode associated with the transaction to |
360 | * disk. | 189 | * disk. |
361 | * | 190 | * |
@@ -602,42 +431,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
602 | * Now start flushing things to disk, in the order they appear | 431 | * Now start flushing things to disk, in the order they appear |
603 | * on the transaction lists. Data blocks go first. | 432 | * on the transaction lists. Data blocks go first. |
604 | */ | 433 | */ |
605 | err = 0; | ||
606 | journal_submit_data_buffers(journal, commit_transaction); | ||
607 | err = journal_submit_inode_data_buffers(journal, commit_transaction); | 434 | err = journal_submit_inode_data_buffers(journal, commit_transaction); |
608 | if (err) | 435 | if (err) |
609 | jbd2_journal_abort(journal, err); | 436 | jbd2_journal_abort(journal, err); |
610 | 437 | ||
611 | /* | ||
612 | * Wait for all previously submitted IO to complete if commit | ||
613 | * record is to be written synchronously. | ||
614 | */ | ||
615 | spin_lock(&journal->j_list_lock); | ||
616 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
617 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) | ||
618 | err = journal_wait_on_locked_list(journal, | ||
619 | commit_transaction); | ||
620 | |||
621 | spin_unlock(&journal->j_list_lock); | ||
622 | |||
623 | if (err) | ||
624 | jbd2_journal_abort(journal, err); | ||
625 | |||
626 | jbd2_journal_write_revoke_records(journal, commit_transaction); | 438 | jbd2_journal_write_revoke_records(journal, commit_transaction); |
627 | 439 | ||
628 | jbd_debug(3, "JBD: commit phase 2\n"); | 440 | jbd_debug(3, "JBD: commit phase 2\n"); |
629 | 441 | ||
630 | /* | 442 | /* |
631 | * If we found any dirty or locked buffers, then we should have | ||
632 | * looped back up to the write_out_data label. If there weren't | ||
633 | * any then journal_clean_data_list should have wiped the list | ||
634 | * clean by now, so check that it is in fact empty. | ||
635 | */ | ||
636 | J_ASSERT (commit_transaction->t_sync_datalist == NULL); | ||
637 | |||
638 | jbd_debug (3, "JBD: commit phase 3\n"); | ||
639 | |||
640 | /* | ||
641 | * Way to go: we have now written out all of the data for a | 443 | * Way to go: we have now written out all of the data for a |
642 | * transaction! Now comes the tricky part: we need to write out | 444 | * transaction! Now comes the tricky part: we need to write out |
643 | * metadata. Loop over the transaction's entire buffer list: | 445 | * metadata. Loop over the transaction's entire buffer list: |
@@ -655,6 +457,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
655 | J_ASSERT(commit_transaction->t_nr_buffers <= | 457 | J_ASSERT(commit_transaction->t_nr_buffers <= |
656 | commit_transaction->t_outstanding_credits); | 458 | commit_transaction->t_outstanding_credits); |
657 | 459 | ||
460 | err = 0; | ||
658 | descriptor = NULL; | 461 | descriptor = NULL; |
659 | bufs = 0; | 462 | bufs = 0; |
660 | while (commit_transaction->t_buffers) { | 463 | while (commit_transaction->t_buffers) { |
@@ -829,13 +632,6 @@ start_journal_io: | |||
829 | &cbh, crc32_sum); | 632 | &cbh, crc32_sum); |
830 | if (err) | 633 | if (err) |
831 | __jbd2_journal_abort_hard(journal); | 634 | __jbd2_journal_abort_hard(journal); |
832 | |||
833 | spin_lock(&journal->j_list_lock); | ||
834 | err = journal_wait_on_locked_list(journal, | ||
835 | commit_transaction); | ||
836 | spin_unlock(&journal->j_list_lock); | ||
837 | if (err) | ||
838 | __jbd2_journal_abort_hard(journal); | ||
839 | } | 635 | } |
840 | 636 | ||
841 | /* | 637 | /* |
@@ -860,7 +656,7 @@ start_journal_io: | |||
860 | so we incur less scheduling load. | 656 | so we incur less scheduling load. |
861 | */ | 657 | */ |
862 | 658 | ||
863 | jbd_debug(3, "JBD: commit phase 4\n"); | 659 | jbd_debug(3, "JBD: commit phase 3\n"); |
864 | 660 | ||
865 | /* | 661 | /* |
866 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 662 | * akpm: these are BJ_IO, and j_list_lock is not needed. |
@@ -919,7 +715,7 @@ wait_for_iobuf: | |||
919 | 715 | ||
920 | J_ASSERT (commit_transaction->t_shadow_list == NULL); | 716 | J_ASSERT (commit_transaction->t_shadow_list == NULL); |
921 | 717 | ||
922 | jbd_debug(3, "JBD: commit phase 5\n"); | 718 | jbd_debug(3, "JBD: commit phase 4\n"); |
923 | 719 | ||
924 | /* Here we wait for the revoke record and descriptor record buffers */ | 720 | /* Here we wait for the revoke record and descriptor record buffers */ |
925 | wait_for_ctlbuf: | 721 | wait_for_ctlbuf: |
@@ -946,7 +742,7 @@ wait_for_iobuf: | |||
946 | /* AKPM: bforget here */ | 742 | /* AKPM: bforget here */ |
947 | } | 743 | } |
948 | 744 | ||
949 | jbd_debug(3, "JBD: commit phase 6\n"); | 745 | jbd_debug(3, "JBD: commit phase 5\n"); |
950 | 746 | ||
951 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 747 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
952 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 748 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
@@ -966,9 +762,8 @@ wait_for_iobuf: | |||
966 | transaction can be removed from any checkpoint list it was on | 762 | transaction can be removed from any checkpoint list it was on |
967 | before. */ | 763 | before. */ |
968 | 764 | ||
969 | jbd_debug(3, "JBD: commit phase 7\n"); | 765 | jbd_debug(3, "JBD: commit phase 6\n"); |
970 | 766 | ||
971 | J_ASSERT(commit_transaction->t_sync_datalist == NULL); | ||
972 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | 767 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
973 | J_ASSERT(commit_transaction->t_buffers == NULL); | 768 | J_ASSERT(commit_transaction->t_buffers == NULL); |
974 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 769 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
@@ -1090,7 +885,7 @@ restart_loop: | |||
1090 | 885 | ||
1091 | /* Done with this transaction! */ | 886 | /* Done with this transaction! */ |
1092 | 887 | ||
1093 | jbd_debug(3, "JBD: commit phase 8\n"); | 888 | jbd_debug(3, "JBD: commit phase 7\n"); |
1094 | 889 | ||
1095 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 890 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
1096 | 891 | ||