aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/commit.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r--fs/jbd2/commit.c221
1 files changed, 8 insertions, 213 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 3ca107b5c86b..483183d15ed5 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -37,8 +37,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
37} 37}
38 38
39/* 39/*
40 * When an ext3-ordered file is truncated, it is possible that many pages are 40 * When an ext4 file is truncated, it is possible that some pages are not
41 * not sucessfully freed, because they are attached to a committing transaction. 41 * successfully freed, because they are attached to a committing transaction.
42 * After the transaction commits, these pages are left on the LRU, with no 42 * After the transaction commits, these pages are left on the LRU, with no
43 * ->mapping, and with attached buffers. These pages are trivially reclaimable 43 * ->mapping, and with attached buffers. These pages are trivially reclaimable
44 * by the VM, but their apparent absence upsets the VM accounting, and it makes 44 * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -80,21 +80,6 @@ nope:
80} 80}
81 81
82/* 82/*
83 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
84 * held. For ranking reasons we must trylock. If we lose, schedule away and
85 * return 0. j_list_lock is dropped in this case.
86 */
87static int inverted_lock(journal_t *journal, struct buffer_head *bh)
88{
89 if (!jbd_trylock_bh_state(bh)) {
90 spin_unlock(&journal->j_list_lock);
91 schedule();
92 return 0;
93 }
94 return 1;
95}
96
97/*
98 * Done it all: now submit the commit record. We should have 83 * Done it all: now submit the commit record. We should have
99 * cleaned up our previous buffers by now, so if we are in abort 84 * cleaned up our previous buffers by now, so if we are in abort
100 * mode we can now just skip the rest of the journal write 85 * mode we can now just skip the rest of the journal write
@@ -200,162 +185,6 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
200} 185}
201 186
202/* 187/*
203 * Wait for all submitted IO to complete.
204 */
205static int journal_wait_on_locked_list(journal_t *journal,
206 transaction_t *commit_transaction)
207{
208 int ret = 0;
209 struct journal_head *jh;
210
211 while (commit_transaction->t_locked_list) {
212 struct buffer_head *bh;
213
214 jh = commit_transaction->t_locked_list->b_tprev;
215 bh = jh2bh(jh);
216 get_bh(bh);
217 if (buffer_locked(bh)) {
218 spin_unlock(&journal->j_list_lock);
219 wait_on_buffer(bh);
220 if (unlikely(!buffer_uptodate(bh)))
221 ret = -EIO;
222 spin_lock(&journal->j_list_lock);
223 }
224 if (!inverted_lock(journal, bh)) {
225 put_bh(bh);
226 spin_lock(&journal->j_list_lock);
227 continue;
228 }
229 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
230 __jbd2_journal_unfile_buffer(jh);
231 jbd_unlock_bh_state(bh);
232 jbd2_journal_remove_journal_head(bh);
233 put_bh(bh);
234 } else {
235 jbd_unlock_bh_state(bh);
236 }
237 put_bh(bh);
238 cond_resched_lock(&journal->j_list_lock);
239 }
240 return ret;
241 }
242
243static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
244{
245 int i;
246
247 for (i = 0; i < bufs; i++) {
248 wbuf[i]->b_end_io = end_buffer_write_sync;
249 /* We use-up our safety reference in submit_bh() */
250 submit_bh(WRITE, wbuf[i]);
251 }
252}
253
254/*
255 * Submit all the data buffers to disk
256 */
257static void journal_submit_data_buffers(journal_t *journal,
258 transaction_t *commit_transaction)
259{
260 struct journal_head *jh;
261 struct buffer_head *bh;
262 int locked;
263 int bufs = 0;
264 struct buffer_head **wbuf = journal->j_wbuf;
265
266 /*
267 * Whenever we unlock the journal and sleep, things can get added
268 * onto ->t_sync_datalist, so we have to keep looping back to
269 * write_out_data until we *know* that the list is empty.
270 *
271 * Cleanup any flushed data buffers from the data list. Even in
272 * abort mode, we want to flush this out as soon as possible.
273 */
274write_out_data:
275 cond_resched();
276 spin_lock(&journal->j_list_lock);
277
278 while (commit_transaction->t_sync_datalist) {
279 jh = commit_transaction->t_sync_datalist;
280 bh = jh2bh(jh);
281 locked = 0;
282
283 /* Get reference just to make sure buffer does not disappear
284 * when we are forced to drop various locks */
285 get_bh(bh);
286 /* If the buffer is dirty, we need to submit IO and hence
287 * we need the buffer lock. We try to lock the buffer without
288 * blocking. If we fail, we need to drop j_list_lock and do
289 * blocking lock_buffer().
290 */
291 if (buffer_dirty(bh)) {
292 if (test_set_buffer_locked(bh)) {
293 BUFFER_TRACE(bh, "needs blocking lock");
294 spin_unlock(&journal->j_list_lock);
295 /* Write out all data to prevent deadlocks */
296 journal_do_submit_data(wbuf, bufs);
297 bufs = 0;
298 lock_buffer(bh);
299 spin_lock(&journal->j_list_lock);
300 }
301 locked = 1;
302 }
303 /* We have to get bh_state lock. Again out of order, sigh. */
304 if (!inverted_lock(journal, bh)) {
305 jbd_lock_bh_state(bh);
306 spin_lock(&journal->j_list_lock);
307 }
308 /* Someone already cleaned up the buffer? */
309 if (!buffer_jbd(bh)
310 || jh->b_transaction != commit_transaction
311 || jh->b_jlist != BJ_SyncData) {
312 jbd_unlock_bh_state(bh);
313 if (locked)
314 unlock_buffer(bh);
315 BUFFER_TRACE(bh, "already cleaned up");
316 put_bh(bh);
317 continue;
318 }
319 if (locked && test_clear_buffer_dirty(bh)) {
320 BUFFER_TRACE(bh, "needs writeout, adding to array");
321 wbuf[bufs++] = bh;
322 __jbd2_journal_file_buffer(jh, commit_transaction,
323 BJ_Locked);
324 jbd_unlock_bh_state(bh);
325 if (bufs == journal->j_wbufsize) {
326 spin_unlock(&journal->j_list_lock);
327 journal_do_submit_data(wbuf, bufs);
328 bufs = 0;
329 goto write_out_data;
330 }
331 } else if (!locked && buffer_locked(bh)) {
332 __jbd2_journal_file_buffer(jh, commit_transaction,
333 BJ_Locked);
334 jbd_unlock_bh_state(bh);
335 put_bh(bh);
336 } else {
337 BUFFER_TRACE(bh, "writeout complete: unfile");
338 __jbd2_journal_unfile_buffer(jh);
339 jbd_unlock_bh_state(bh);
340 if (locked)
341 unlock_buffer(bh);
342 jbd2_journal_remove_journal_head(bh);
343 /* Once for our safety reference, once for
344 * jbd2_journal_remove_journal_head() */
345 put_bh(bh);
346 put_bh(bh);
347 }
348
349 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
350 spin_unlock(&journal->j_list_lock);
351 goto write_out_data;
352 }
353 }
354 spin_unlock(&journal->j_list_lock);
355 journal_do_submit_data(wbuf, bufs);
356}
357
358/*
359 * Submit all the data buffers of inode associated with the transaction to 188 * Submit all the data buffers of inode associated with the transaction to
360 * disk. 189 * disk.
361 * 190 *
@@ -602,42 +431,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
602 * Now start flushing things to disk, in the order they appear 431 * Now start flushing things to disk, in the order they appear
603 * on the transaction lists. Data blocks go first. 432 * on the transaction lists. Data blocks go first.
604 */ 433 */
605 err = 0;
606 journal_submit_data_buffers(journal, commit_transaction);
607 err = journal_submit_inode_data_buffers(journal, commit_transaction); 434 err = journal_submit_inode_data_buffers(journal, commit_transaction);
608 if (err) 435 if (err)
609 jbd2_journal_abort(journal, err); 436 jbd2_journal_abort(journal, err);
610 437
611 /*
612 * Wait for all previously submitted IO to complete if commit
613 * record is to be written synchronously.
614 */
615 spin_lock(&journal->j_list_lock);
616 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
617 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
618 err = journal_wait_on_locked_list(journal,
619 commit_transaction);
620
621 spin_unlock(&journal->j_list_lock);
622
623 if (err)
624 jbd2_journal_abort(journal, err);
625
626 jbd2_journal_write_revoke_records(journal, commit_transaction); 438 jbd2_journal_write_revoke_records(journal, commit_transaction);
627 439
628 jbd_debug(3, "JBD: commit phase 2\n"); 440 jbd_debug(3, "JBD: commit phase 2\n");
629 441
630 /* 442 /*
631 * If we found any dirty or locked buffers, then we should have
632 * looped back up to the write_out_data label. If there weren't
633 * any then journal_clean_data_list should have wiped the list
634 * clean by now, so check that it is in fact empty.
635 */
636 J_ASSERT (commit_transaction->t_sync_datalist == NULL);
637
638 jbd_debug (3, "JBD: commit phase 3\n");
639
640 /*
641 * Way to go: we have now written out all of the data for a 443 * Way to go: we have now written out all of the data for a
642 * transaction! Now comes the tricky part: we need to write out 444 * transaction! Now comes the tricky part: we need to write out
643 * metadata. Loop over the transaction's entire buffer list: 445 * metadata. Loop over the transaction's entire buffer list:
@@ -655,6 +457,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
655 J_ASSERT(commit_transaction->t_nr_buffers <= 457 J_ASSERT(commit_transaction->t_nr_buffers <=
656 commit_transaction->t_outstanding_credits); 458 commit_transaction->t_outstanding_credits);
657 459
460 err = 0;
658 descriptor = NULL; 461 descriptor = NULL;
659 bufs = 0; 462 bufs = 0;
660 while (commit_transaction->t_buffers) { 463 while (commit_transaction->t_buffers) {
@@ -829,13 +632,6 @@ start_journal_io:
829 &cbh, crc32_sum); 632 &cbh, crc32_sum);
830 if (err) 633 if (err)
831 __jbd2_journal_abort_hard(journal); 634 __jbd2_journal_abort_hard(journal);
832
833 spin_lock(&journal->j_list_lock);
834 err = journal_wait_on_locked_list(journal,
835 commit_transaction);
836 spin_unlock(&journal->j_list_lock);
837 if (err)
838 __jbd2_journal_abort_hard(journal);
839 } 635 }
840 636
841 /* 637 /*
@@ -860,7 +656,7 @@ start_journal_io:
860 so we incur less scheduling load. 656 so we incur less scheduling load.
861 */ 657 */
862 658
863 jbd_debug(3, "JBD: commit phase 4\n"); 659 jbd_debug(3, "JBD: commit phase 3\n");
864 660
865 /* 661 /*
866 * akpm: these are BJ_IO, and j_list_lock is not needed. 662 * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -919,7 +715,7 @@ wait_for_iobuf:
919 715
920 J_ASSERT (commit_transaction->t_shadow_list == NULL); 716 J_ASSERT (commit_transaction->t_shadow_list == NULL);
921 717
922 jbd_debug(3, "JBD: commit phase 5\n"); 718 jbd_debug(3, "JBD: commit phase 4\n");
923 719
924 /* Here we wait for the revoke record and descriptor record buffers */ 720 /* Here we wait for the revoke record and descriptor record buffers */
925 wait_for_ctlbuf: 721 wait_for_ctlbuf:
@@ -946,7 +742,7 @@ wait_for_iobuf:
946 /* AKPM: bforget here */ 742 /* AKPM: bforget here */
947 } 743 }
948 744
949 jbd_debug(3, "JBD: commit phase 6\n"); 745 jbd_debug(3, "JBD: commit phase 5\n");
950 746
951 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 747 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
952 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 748 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -966,9 +762,8 @@ wait_for_iobuf:
966 transaction can be removed from any checkpoint list it was on 762 transaction can be removed from any checkpoint list it was on
967 before. */ 763 before. */
968 764
969 jbd_debug(3, "JBD: commit phase 7\n"); 765 jbd_debug(3, "JBD: commit phase 6\n");
970 766
971 J_ASSERT(commit_transaction->t_sync_datalist == NULL);
972 J_ASSERT(list_empty(&commit_transaction->t_inode_list)); 767 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
973 J_ASSERT(commit_transaction->t_buffers == NULL); 768 J_ASSERT(commit_transaction->t_buffers == NULL);
974 J_ASSERT(commit_transaction->t_checkpoint_list == NULL); 769 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
@@ -1090,7 +885,7 @@ restart_loop:
1090 885
1091 /* Done with this transaction! */ 886 /* Done with this transaction! */
1092 887
1093 jbd_debug(3, "JBD: commit phase 8\n"); 888 jbd_debug(3, "JBD: commit phase 7\n");
1094 889
1095 J_ASSERT(commit_transaction->t_state == T_COMMIT); 890 J_ASSERT(commit_transaction->t_state == T_COMMIT);
1096 891