summaryrefslogtreecommitdiffstats
path: root/fs/jbd/checkpoint.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2015-06-18 10:52:29 -0400
committerJan Kara <jack@suse.com>2015-07-23 14:59:40 -0400
commitc290ea01abb7907fde602f3ba55905ef10a37477 (patch)
tree67b3f47105259178034ef42d096bb5accd9407a3 /fs/jbd/checkpoint.c
parent82ff50b222d8ac645cdeba974c612c9eef01c3dd (diff)
fs: Remove ext3 filesystem driver
The functionality of ext3 is fully supported by ext4 driver. Major distributions (SUSE, RedHat) already use ext4 driver to handle ext3 filesystems for quite some time. There is some ugliness in mm resulting from jbd cleaning buffers in a dirty page without cleaning page dirty bit and also support for buffer bouncing in the block layer when stable pages are required is there only because of jbd. So let's remove the ext3 driver. This saves us some 28k lines of duplicated code. Acked-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/jbd/checkpoint.c')
-rw-r--r--fs/jbd/checkpoint.c782
1 files changed, 0 insertions, 782 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
deleted file mode 100644
index 08c03044abdd..000000000000
--- a/fs/jbd/checkpoint.c
+++ /dev/null
@@ -1,782 +0,0 @@
1/*
2 * linux/fs/jbd/checkpoint.c
3 *
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
5 *
6 * Copyright 1999 Red Hat Software --- All Rights Reserved
7 *
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
11 *
12 * Checkpoint routines for the generic filesystem journaling code.
13 * Part of the ext2fs journaling system.
14 *
15 * Checkpointing is the process of ensuring that a section of the log is
16 * committed fully to disk, so that that portion of the log can be
17 * reused.
18 */
19
20#include <linux/time.h>
21#include <linux/fs.h>
22#include <linux/jbd.h>
23#include <linux/errno.h>
24#include <linux/slab.h>
25#include <linux/blkdev.h>
26#include <trace/events/jbd.h>
27
28/*
29 * Unlink a buffer from a transaction checkpoint list.
30 *
31 * Called with j_list_lock held.
32 */
33static inline void __buffer_unlink_first(struct journal_head *jh)
34{
35 transaction_t *transaction = jh->b_cp_transaction;
36
37 jh->b_cpnext->b_cpprev = jh->b_cpprev;
38 jh->b_cpprev->b_cpnext = jh->b_cpnext;
39 if (transaction->t_checkpoint_list == jh) {
40 transaction->t_checkpoint_list = jh->b_cpnext;
41 if (transaction->t_checkpoint_list == jh)
42 transaction->t_checkpoint_list = NULL;
43 }
44}
45
46/*
47 * Unlink a buffer from a transaction checkpoint(io) list.
48 *
49 * Called with j_list_lock held.
50 */
51static inline void __buffer_unlink(struct journal_head *jh)
52{
53 transaction_t *transaction = jh->b_cp_transaction;
54
55 __buffer_unlink_first(jh);
56 if (transaction->t_checkpoint_io_list == jh) {
57 transaction->t_checkpoint_io_list = jh->b_cpnext;
58 if (transaction->t_checkpoint_io_list == jh)
59 transaction->t_checkpoint_io_list = NULL;
60 }
61}
62
63/*
64 * Move a buffer from the checkpoint list to the checkpoint io list
65 *
66 * Called with j_list_lock held
67 */
68static inline void __buffer_relink_io(struct journal_head *jh)
69{
70 transaction_t *transaction = jh->b_cp_transaction;
71
72 __buffer_unlink_first(jh);
73
74 if (!transaction->t_checkpoint_io_list) {
75 jh->b_cpnext = jh->b_cpprev = jh;
76 } else {
77 jh->b_cpnext = transaction->t_checkpoint_io_list;
78 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
79 jh->b_cpprev->b_cpnext = jh;
80 jh->b_cpnext->b_cpprev = jh;
81 }
82 transaction->t_checkpoint_io_list = jh;
83}
84
85/*
86 * Try to release a checkpointed buffer from its transaction.
87 * Returns 1 if we released it and 2 if we also released the
88 * whole transaction.
89 *
90 * Requires j_list_lock
91 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
92 */
93static int __try_to_free_cp_buf(struct journal_head *jh)
94{
95 int ret = 0;
96 struct buffer_head *bh = jh2bh(jh);
97
98 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
99 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
100 /*
101 * Get our reference so that bh cannot be freed before
102 * we unlock it
103 */
104 get_bh(bh);
105 JBUFFER_TRACE(jh, "remove from checkpoint list");
106 ret = __journal_remove_checkpoint(jh) + 1;
107 jbd_unlock_bh_state(bh);
108 BUFFER_TRACE(bh, "release");
109 __brelse(bh);
110 } else {
111 jbd_unlock_bh_state(bh);
112 }
113 return ret;
114}
115
116/*
117 * __log_wait_for_space: wait until there is space in the journal.
118 *
119 * Called under j-state_lock *only*. It will be unlocked if we have to wait
120 * for a checkpoint to free up some space in the log.
121 */
122void __log_wait_for_space(journal_t *journal)
123{
124 int nblocks, space_left;
125 assert_spin_locked(&journal->j_state_lock);
126
127 nblocks = jbd_space_needed(journal);
128 while (__log_space_left(journal) < nblocks) {
129 if (journal->j_flags & JFS_ABORT)
130 return;
131 spin_unlock(&journal->j_state_lock);
132 mutex_lock(&journal->j_checkpoint_mutex);
133
134 /*
135 * Test again, another process may have checkpointed while we
136 * were waiting for the checkpoint lock. If there are no
137 * transactions ready to be checkpointed, try to recover
138 * journal space by calling cleanup_journal_tail(), and if
139 * that doesn't work, by waiting for the currently committing
140 * transaction to complete. If there is absolutely no way
141 * to make progress, this is either a BUG or corrupted
142 * filesystem, so abort the journal and leave a stack
143 * trace for forensic evidence.
144 */
145 spin_lock(&journal->j_state_lock);
146 spin_lock(&journal->j_list_lock);
147 nblocks = jbd_space_needed(journal);
148 space_left = __log_space_left(journal);
149 if (space_left < nblocks) {
150 int chkpt = journal->j_checkpoint_transactions != NULL;
151 tid_t tid = 0;
152
153 if (journal->j_committing_transaction)
154 tid = journal->j_committing_transaction->t_tid;
155 spin_unlock(&journal->j_list_lock);
156 spin_unlock(&journal->j_state_lock);
157 if (chkpt) {
158 log_do_checkpoint(journal);
159 } else if (cleanup_journal_tail(journal) == 0) {
160 /* We were able to recover space; yay! */
161 ;
162 } else if (tid) {
163 log_wait_commit(journal, tid);
164 } else {
165 printk(KERN_ERR "%s: needed %d blocks and "
166 "only had %d space available\n",
167 __func__, nblocks, space_left);
168 printk(KERN_ERR "%s: no way to get more "
169 "journal space\n", __func__);
170 WARN_ON(1);
171 journal_abort(journal, 0);
172 }
173 spin_lock(&journal->j_state_lock);
174 } else {
175 spin_unlock(&journal->j_list_lock);
176 }
177 mutex_unlock(&journal->j_checkpoint_mutex);
178 }
179}
180
181/*
182 * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
183 * The caller must restart a list walk. Wait for someone else to run
184 * jbd_unlock_bh_state().
185 */
186static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
187 __releases(journal->j_list_lock)
188{
189 get_bh(bh);
190 spin_unlock(&journal->j_list_lock);
191 jbd_lock_bh_state(bh);
192 jbd_unlock_bh_state(bh);
193 put_bh(bh);
194}
195
196/*
197 * Clean up transaction's list of buffers submitted for io.
198 * We wait for any pending IO to complete and remove any clean
199 * buffers. Note that we take the buffers in the opposite ordering
200 * from the one in which they were submitted for IO.
201 *
202 * Return 0 on success, and return <0 if some buffers have failed
203 * to be written out.
204 *
205 * Called with j_list_lock held.
206 */
207static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
208{
209 struct journal_head *jh;
210 struct buffer_head *bh;
211 tid_t this_tid;
212 int released = 0;
213 int ret = 0;
214
215 this_tid = transaction->t_tid;
216restart:
217 /* Did somebody clean up the transaction in the meanwhile? */
218 if (journal->j_checkpoint_transactions != transaction ||
219 transaction->t_tid != this_tid)
220 return ret;
221 while (!released && transaction->t_checkpoint_io_list) {
222 jh = transaction->t_checkpoint_io_list;
223 bh = jh2bh(jh);
224 if (!jbd_trylock_bh_state(bh)) {
225 jbd_sync_bh(journal, bh);
226 spin_lock(&journal->j_list_lock);
227 goto restart;
228 }
229 get_bh(bh);
230 if (buffer_locked(bh)) {
231 spin_unlock(&journal->j_list_lock);
232 jbd_unlock_bh_state(bh);
233 wait_on_buffer(bh);
234 /* the journal_head may have gone by now */
235 BUFFER_TRACE(bh, "brelse");
236 __brelse(bh);
237 spin_lock(&journal->j_list_lock);
238 goto restart;
239 }
240 if (unlikely(buffer_write_io_error(bh)))
241 ret = -EIO;
242
243 /*
244 * Now in whatever state the buffer currently is, we know that
245 * it has been written out and so we can drop it from the list
246 */
247 released = __journal_remove_checkpoint(jh);
248 jbd_unlock_bh_state(bh);
249 __brelse(bh);
250 }
251
252 return ret;
253}
254
255#define NR_BATCH 64
256
257static void
258__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
259{
260 int i;
261 struct blk_plug plug;
262
263 blk_start_plug(&plug);
264 for (i = 0; i < *batch_count; i++)
265 write_dirty_buffer(bhs[i], WRITE_SYNC);
266 blk_finish_plug(&plug);
267
268 for (i = 0; i < *batch_count; i++) {
269 struct buffer_head *bh = bhs[i];
270 clear_buffer_jwrite(bh);
271 BUFFER_TRACE(bh, "brelse");
272 __brelse(bh);
273 }
274 *batch_count = 0;
275}
276
277/*
278 * Try to flush one buffer from the checkpoint list to disk.
279 *
280 * Return 1 if something happened which requires us to abort the current
281 * scan of the checkpoint list. Return <0 if the buffer has failed to
282 * be written out.
283 *
284 * Called with j_list_lock held and drops it if 1 is returned
285 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
286 */
287static int __process_buffer(journal_t *journal, struct journal_head *jh,
288 struct buffer_head **bhs, int *batch_count)
289{
290 struct buffer_head *bh = jh2bh(jh);
291 int ret = 0;
292
293 if (buffer_locked(bh)) {
294 get_bh(bh);
295 spin_unlock(&journal->j_list_lock);
296 jbd_unlock_bh_state(bh);
297 wait_on_buffer(bh);
298 /* the journal_head may have gone by now */
299 BUFFER_TRACE(bh, "brelse");
300 __brelse(bh);
301 ret = 1;
302 } else if (jh->b_transaction != NULL) {
303 transaction_t *t = jh->b_transaction;
304 tid_t tid = t->t_tid;
305
306 spin_unlock(&journal->j_list_lock);
307 jbd_unlock_bh_state(bh);
308 log_start_commit(journal, tid);
309 log_wait_commit(journal, tid);
310 ret = 1;
311 } else if (!buffer_dirty(bh)) {
312 ret = 1;
313 if (unlikely(buffer_write_io_error(bh)))
314 ret = -EIO;
315 get_bh(bh);
316 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
317 BUFFER_TRACE(bh, "remove from checkpoint");
318 __journal_remove_checkpoint(jh);
319 spin_unlock(&journal->j_list_lock);
320 jbd_unlock_bh_state(bh);
321 __brelse(bh);
322 } else {
323 /*
324 * Important: we are about to write the buffer, and
325 * possibly block, while still holding the journal lock.
326 * We cannot afford to let the transaction logic start
327 * messing around with this buffer before we write it to
328 * disk, as that would break recoverability.
329 */
330 BUFFER_TRACE(bh, "queue");
331 get_bh(bh);
332 J_ASSERT_BH(bh, !buffer_jwrite(bh));
333 set_buffer_jwrite(bh);
334 bhs[*batch_count] = bh;
335 __buffer_relink_io(jh);
336 jbd_unlock_bh_state(bh);
337 (*batch_count)++;
338 if (*batch_count == NR_BATCH) {
339 spin_unlock(&journal->j_list_lock);
340 __flush_batch(journal, bhs, batch_count);
341 ret = 1;
342 }
343 }
344 return ret;
345}
346
347/*
348 * Perform an actual checkpoint. We take the first transaction on the
349 * list of transactions to be checkpointed and send all its buffers
350 * to disk. We submit larger chunks of data at once.
351 *
352 * The journal should be locked before calling this function.
353 * Called with j_checkpoint_mutex held.
354 */
355int log_do_checkpoint(journal_t *journal)
356{
357 transaction_t *transaction;
358 tid_t this_tid;
359 int result;
360
361 jbd_debug(1, "Start checkpoint\n");
362
363 /*
364 * First thing: if there are any transactions in the log which
365 * don't need checkpointing, just eliminate them from the
366 * journal straight away.
367 */
368 result = cleanup_journal_tail(journal);
369 trace_jbd_checkpoint(journal, result);
370 jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
371 if (result <= 0)
372 return result;
373
374 /*
375 * OK, we need to start writing disk blocks. Take one transaction
376 * and write it.
377 */
378 result = 0;
379 spin_lock(&journal->j_list_lock);
380 if (!journal->j_checkpoint_transactions)
381 goto out;
382 transaction = journal->j_checkpoint_transactions;
383 this_tid = transaction->t_tid;
384restart:
385 /*
386 * If someone cleaned up this transaction while we slept, we're
387 * done (maybe it's a new transaction, but it fell at the same
388 * address).
389 */
390 if (journal->j_checkpoint_transactions == transaction &&
391 transaction->t_tid == this_tid) {
392 int batch_count = 0;
393 struct buffer_head *bhs[NR_BATCH];
394 struct journal_head *jh;
395 int retry = 0, err;
396
397 while (!retry && transaction->t_checkpoint_list) {
398 struct buffer_head *bh;
399
400 jh = transaction->t_checkpoint_list;
401 bh = jh2bh(jh);
402 if (!jbd_trylock_bh_state(bh)) {
403 jbd_sync_bh(journal, bh);
404 retry = 1;
405 break;
406 }
407 retry = __process_buffer(journal, jh, bhs,&batch_count);
408 if (retry < 0 && !result)
409 result = retry;
410 if (!retry && (need_resched() ||
411 spin_needbreak(&journal->j_list_lock))) {
412 spin_unlock(&journal->j_list_lock);
413 retry = 1;
414 break;
415 }
416 }
417
418 if (batch_count) {
419 if (!retry) {
420 spin_unlock(&journal->j_list_lock);
421 retry = 1;
422 }
423 __flush_batch(journal, bhs, &batch_count);
424 }
425
426 if (retry) {
427 spin_lock(&journal->j_list_lock);
428 goto restart;
429 }
430 /*
431 * Now we have cleaned up the first transaction's checkpoint
432 * list. Let's clean up the second one
433 */
434 err = __wait_cp_io(journal, transaction);
435 if (!result)
436 result = err;
437 }
438out:
439 spin_unlock(&journal->j_list_lock);
440 if (result < 0)
441 journal_abort(journal, result);
442 else
443 result = cleanup_journal_tail(journal);
444
445 return (result < 0) ? result : 0;
446}
447
448/*
449 * Check the list of checkpoint transactions for the journal to see if
450 * we have already got rid of any since the last update of the log tail
451 * in the journal superblock. If so, we can instantly roll the
452 * superblock forward to remove those transactions from the log.
453 *
454 * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
455 *
456 * This is the only part of the journaling code which really needs to be
457 * aware of transaction aborts. Checkpointing involves writing to the
458 * main filesystem area rather than to the journal, so it can proceed
459 * even in abort state, but we must not update the super block if
460 * checkpointing may have failed. Otherwise, we would lose some metadata
461 * buffers which should be written-back to the filesystem.
462 */
463
464int cleanup_journal_tail(journal_t *journal)
465{
466 transaction_t * transaction;
467 tid_t first_tid;
468 unsigned int blocknr, freed;
469
470 if (is_journal_aborted(journal))
471 return 1;
472
473 /*
474 * OK, work out the oldest transaction remaining in the log, and
475 * the log block it starts at.
476 *
477 * If the log is now empty, we need to work out which is the
478 * next transaction ID we will write, and where it will
479 * start.
480 */
481 spin_lock(&journal->j_state_lock);
482 spin_lock(&journal->j_list_lock);
483 transaction = journal->j_checkpoint_transactions;
484 if (transaction) {
485 first_tid = transaction->t_tid;
486 blocknr = transaction->t_log_start;
487 } else if ((transaction = journal->j_committing_transaction) != NULL) {
488 first_tid = transaction->t_tid;
489 blocknr = transaction->t_log_start;
490 } else if ((transaction = journal->j_running_transaction) != NULL) {
491 first_tid = transaction->t_tid;
492 blocknr = journal->j_head;
493 } else {
494 first_tid = journal->j_transaction_sequence;
495 blocknr = journal->j_head;
496 }
497 spin_unlock(&journal->j_list_lock);
498 J_ASSERT(blocknr != 0);
499
500 /* If the oldest pinned transaction is at the tail of the log
501 already then there's not much we can do right now. */
502 if (journal->j_tail_sequence == first_tid) {
503 spin_unlock(&journal->j_state_lock);
504 return 1;
505 }
506 spin_unlock(&journal->j_state_lock);
507
508 /*
509 * We need to make sure that any blocks that were recently written out
510 * --- perhaps by log_do_checkpoint() --- are flushed out before we
511 * drop the transactions from the journal. Similarly we need to be sure
512 * superblock makes it to disk before next transaction starts reusing
513 * freed space (otherwise we could replay some blocks of the new
514 * transaction thinking they belong to the old one). So we use
515 * WRITE_FLUSH_FUA. It's unlikely this will be necessary, especially
516 * with an appropriately sized journal, but we need this to guarantee
517 * correctness. Fortunately cleanup_journal_tail() doesn't get called
518 * all that often.
519 */
520 journal_update_sb_log_tail(journal, first_tid, blocknr,
521 WRITE_FLUSH_FUA);
522
523 spin_lock(&journal->j_state_lock);
524 /* OK, update the superblock to recover the freed space.
525 * Physical blocks come first: have we wrapped beyond the end of
526 * the log? */
527 freed = blocknr - journal->j_tail;
528 if (blocknr < journal->j_tail)
529 freed = freed + journal->j_last - journal->j_first;
530
531 trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed);
532 jbd_debug(1,
533 "Cleaning journal tail from %d to %d (offset %u), "
534 "freeing %u\n",
535 journal->j_tail_sequence, first_tid, blocknr, freed);
536
537 journal->j_free += freed;
538 journal->j_tail_sequence = first_tid;
539 journal->j_tail = blocknr;
540 spin_unlock(&journal->j_state_lock);
541 return 0;
542}
543
544
545/* Checkpoint list management */
546
547/*
548 * journal_clean_one_cp_list
549 *
550 * Find all the written-back checkpoint buffers in the given list and release
551 * them.
552 *
553 * Called with j_list_lock held.
554 * Returns number of buffers reaped (for debug)
555 */
556
557static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
558{
559 struct journal_head *last_jh;
560 struct journal_head *next_jh = jh;
561 int ret, freed = 0;
562
563 *released = 0;
564 if (!jh)
565 return 0;
566
567 last_jh = jh->b_cpprev;
568 do {
569 jh = next_jh;
570 next_jh = jh->b_cpnext;
571 /* Use trylock because of the ranking */
572 if (jbd_trylock_bh_state(jh2bh(jh))) {
573 ret = __try_to_free_cp_buf(jh);
574 if (ret) {
575 freed++;
576 if (ret == 2) {
577 *released = 1;
578 return freed;
579 }
580 }
581 }
582 /*
583 * This function only frees up some memory
584 * if possible so we dont have an obligation
585 * to finish processing. Bail out if preemption
586 * requested:
587 */
588 if (need_resched())
589 return freed;
590 } while (jh != last_jh);
591
592 return freed;
593}
594
595/*
596 * journal_clean_checkpoint_list
597 *
598 * Find all the written-back checkpoint buffers in the journal and release them.
599 *
600 * Called with the journal locked.
601 * Called with j_list_lock held.
602 * Returns number of buffers reaped (for debug)
603 */
604
605int __journal_clean_checkpoint_list(journal_t *journal)
606{
607 transaction_t *transaction, *last_transaction, *next_transaction;
608 int ret = 0;
609 int released;
610
611 transaction = journal->j_checkpoint_transactions;
612 if (!transaction)
613 goto out;
614
615 last_transaction = transaction->t_cpprev;
616 next_transaction = transaction;
617 do {
618 transaction = next_transaction;
619 next_transaction = transaction->t_cpnext;
620 ret += journal_clean_one_cp_list(transaction->
621 t_checkpoint_list, &released);
622 /*
623 * This function only frees up some memory if possible so we
624 * dont have an obligation to finish processing. Bail out if
625 * preemption requested:
626 */
627 if (need_resched())
628 goto out;
629 if (released)
630 continue;
631 /*
632 * It is essential that we are as careful as in the case of
633 * t_checkpoint_list with removing the buffer from the list as
634 * we can possibly see not yet submitted buffers on io_list
635 */
636 ret += journal_clean_one_cp_list(transaction->
637 t_checkpoint_io_list, &released);
638 if (need_resched())
639 goto out;
640 } while (transaction != last_transaction);
641out:
642 return ret;
643}
644
645/*
646 * journal_remove_checkpoint: called after a buffer has been committed
647 * to disk (either by being write-back flushed to disk, or being
648 * committed to the log).
649 *
650 * We cannot safely clean a transaction out of the log until all of the
651 * buffer updates committed in that transaction have safely been stored
652 * elsewhere on disk. To achieve this, all of the buffers in a
653 * transaction need to be maintained on the transaction's checkpoint
654 * lists until they have been rewritten, at which point this function is
655 * called to remove the buffer from the existing transaction's
656 * checkpoint lists.
657 *
658 * The function returns 1 if it frees the transaction, 0 otherwise.
659 * The function can free jh and bh.
660 *
661 * This function is called with j_list_lock held.
662 * This function is called with jbd_lock_bh_state(jh2bh(jh))
663 */
664
665int __journal_remove_checkpoint(struct journal_head *jh)
666{
667 transaction_t *transaction;
668 journal_t *journal;
669 int ret = 0;
670
671 JBUFFER_TRACE(jh, "entry");
672
673 if ((transaction = jh->b_cp_transaction) == NULL) {
674 JBUFFER_TRACE(jh, "not on transaction");
675 goto out;
676 }
677 journal = transaction->t_journal;
678
679 JBUFFER_TRACE(jh, "removing from transaction");
680 __buffer_unlink(jh);
681 jh->b_cp_transaction = NULL;
682 journal_put_journal_head(jh);
683
684 if (transaction->t_checkpoint_list != NULL ||
685 transaction->t_checkpoint_io_list != NULL)
686 goto out;
687
688 /*
689 * There is one special case to worry about: if we have just pulled the
690 * buffer off a running or committing transaction's checkpoing list,
691 * then even if the checkpoint list is empty, the transaction obviously
692 * cannot be dropped!
693 *
694 * The locking here around t_state is a bit sleazy.
695 * See the comment at the end of journal_commit_transaction().
696 */
697 if (transaction->t_state != T_FINISHED)
698 goto out;
699
700 /* OK, that was the last buffer for the transaction: we can now
701 safely remove this transaction from the log */
702
703 __journal_drop_transaction(journal, transaction);
704
705 /* Just in case anybody was waiting for more transactions to be
706 checkpointed... */
707 wake_up(&journal->j_wait_logspace);
708 ret = 1;
709out:
710 return ret;
711}
712
713/*
714 * journal_insert_checkpoint: put a committed buffer onto a checkpoint
715 * list so that we know when it is safe to clean the transaction out of
716 * the log.
717 *
718 * Called with the journal locked.
719 * Called with j_list_lock held.
720 */
721void __journal_insert_checkpoint(struct journal_head *jh,
722 transaction_t *transaction)
723{
724 JBUFFER_TRACE(jh, "entry");
725 J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
726 J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
727
728 /* Get reference for checkpointing transaction */
729 journal_grab_journal_head(jh2bh(jh));
730 jh->b_cp_transaction = transaction;
731
732 if (!transaction->t_checkpoint_list) {
733 jh->b_cpnext = jh->b_cpprev = jh;
734 } else {
735 jh->b_cpnext = transaction->t_checkpoint_list;
736 jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
737 jh->b_cpprev->b_cpnext = jh;
738 jh->b_cpnext->b_cpprev = jh;
739 }
740 transaction->t_checkpoint_list = jh;
741}
742
743/*
744 * We've finished with this transaction structure: adios...
745 *
746 * The transaction must have no links except for the checkpoint by this
747 * point.
748 *
749 * Called with the journal locked.
750 * Called with j_list_lock held.
751 */
752
753void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
754{
755 assert_spin_locked(&journal->j_list_lock);
756 if (transaction->t_cpnext) {
757 transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
758 transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
759 if (journal->j_checkpoint_transactions == transaction)
760 journal->j_checkpoint_transactions =
761 transaction->t_cpnext;
762 if (journal->j_checkpoint_transactions == transaction)
763 journal->j_checkpoint_transactions = NULL;
764 }
765
766 J_ASSERT(transaction->t_state == T_FINISHED);
767 J_ASSERT(transaction->t_buffers == NULL);
768 J_ASSERT(transaction->t_sync_datalist == NULL);
769 J_ASSERT(transaction->t_forget == NULL);
770 J_ASSERT(transaction->t_iobuf_list == NULL);
771 J_ASSERT(transaction->t_shadow_list == NULL);
772 J_ASSERT(transaction->t_log_list == NULL);
773 J_ASSERT(transaction->t_checkpoint_list == NULL);
774 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
775 J_ASSERT(transaction->t_updates == 0);
776 J_ASSERT(journal->j_committing_transaction != transaction);
777 J_ASSERT(journal->j_running_transaction != transaction);
778
779 trace_jbd_drop_transaction(journal, transaction);
780 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
781 kfree(transaction);
782}