aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2006-01-06 03:19:55 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 11:33:59 -0500
commitf93ea411b73594f7d144855fd34278bcf34a9afc (patch)
tree50419ba9250be6f923470b4eff73370512c00267
parent6fe2e70bbed3995d930f39452fb6ce3be7dc47dc (diff)
[PATCH] jbd: split checkpoint lists
Split the checkpoint list of the transaction into two lists. In the first list we keep the buffers that need to be submitted for IO. In the second list are kept buffers that were already submitted and we just have to wait for the IO to complete. This should simplify a handling of checkpoint lists a bit and can eventually be also a performance gain. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/jbd/checkpoint.c418
-rw-r--r--include/linux/jbd.h8
2 files changed, 248 insertions, 178 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 014a51fd00d7..cb3cef525c3b 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,29 +24,75 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25 25
26/* 26/*
27 * Unlink a buffer from a transaction. 27 * Unlink a buffer from a transaction checkpoint list.
28 * 28 *
29 * Called with j_list_lock held. 29 * Called with j_list_lock held.
30 */ 30 */
31 31
32static inline void __buffer_unlink(struct journal_head *jh) 32static void __buffer_unlink_first(struct journal_head *jh)
33{ 33{
34 transaction_t *transaction; 34 transaction_t *transaction;
35 35
36 transaction = jh->b_cp_transaction; 36 transaction = jh->b_cp_transaction;
37 jh->b_cp_transaction = NULL;
38 37
39 jh->b_cpnext->b_cpprev = jh->b_cpprev; 38 jh->b_cpnext->b_cpprev = jh->b_cpprev;
40 jh->b_cpprev->b_cpnext = jh->b_cpnext; 39 jh->b_cpprev->b_cpnext = jh->b_cpnext;
41 if (transaction->t_checkpoint_list == jh) 40 if (transaction->t_checkpoint_list == jh) {
42 transaction->t_checkpoint_list = jh->b_cpnext; 41 transaction->t_checkpoint_list = jh->b_cpnext;
43 if (transaction->t_checkpoint_list == jh) 42 if (transaction->t_checkpoint_list == jh)
44 transaction->t_checkpoint_list = NULL; 43 transaction->t_checkpoint_list = NULL;
44 }
45}
46
47/*
48 * Unlink a buffer from a transaction checkpoint(io) list.
49 *
50 * Called with j_list_lock held.
51 */
52
53static inline void __buffer_unlink(struct journal_head *jh)
54{
55 transaction_t *transaction;
56
57 transaction = jh->b_cp_transaction;
58
59 __buffer_unlink_first(jh);
60 if (transaction->t_checkpoint_io_list == jh) {
61 transaction->t_checkpoint_io_list = jh->b_cpnext;
62 if (transaction->t_checkpoint_io_list == jh)
63 transaction->t_checkpoint_io_list = NULL;
64 }
65}
66
67/*
68 * Move a buffer from the checkpoint list to the checkpoint io list
69 *
70 * Called with j_list_lock held
71 */
72
73static inline void __buffer_relink_io(struct journal_head *jh)
74{
75 transaction_t *transaction;
76
77 transaction = jh->b_cp_transaction;
78 __buffer_unlink_first(jh);
79
80 if (!transaction->t_checkpoint_io_list) {
81 jh->b_cpnext = jh->b_cpprev = jh;
82 } else {
83 jh->b_cpnext = transaction->t_checkpoint_io_list;
84 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
85 jh->b_cpprev->b_cpnext = jh;
86 jh->b_cpnext->b_cpprev = jh;
87 }
88 transaction->t_checkpoint_io_list = jh;
45} 89}
46 90
47/* 91/*
48 * Try to release a checkpointed buffer from its transaction. 92 * Try to release a checkpointed buffer from its transaction.
49 * Returns 1 if we released it. 93 * Returns 1 if we released it and 2 if we also released the
94 * whole transaction.
95 *
50 * Requires j_list_lock 96 * Requires j_list_lock
51 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 97 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
52 */ 98 */
@@ -57,12 +103,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
57 103
58 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 104 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
59 JBUFFER_TRACE(jh, "remove from checkpoint list"); 105 JBUFFER_TRACE(jh, "remove from checkpoint list");
60 __journal_remove_checkpoint(jh); 106 ret = __journal_remove_checkpoint(jh) + 1;
61 jbd_unlock_bh_state(bh); 107 jbd_unlock_bh_state(bh);
62 journal_remove_journal_head(bh); 108 journal_remove_journal_head(bh);
63 BUFFER_TRACE(bh, "release"); 109 BUFFER_TRACE(bh, "release");
64 __brelse(bh); 110 __brelse(bh);
65 ret = 1;
66 } else { 111 } else {
67 jbd_unlock_bh_state(bh); 112 jbd_unlock_bh_state(bh);
68 } 113 }
@@ -117,83 +162,53 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
117} 162}
118 163
119/* 164/*
120 * Clean up a transaction's checkpoint list. 165 * Clean up transaction's list of buffers submitted for io.
121 * 166 * We wait for any pending IO to complete and remove any clean
122 * We wait for any pending IO to complete and make sure any clean 167 * buffers. Note that we take the buffers in the opposite ordering
123 * buffers are removed from the transaction. 168 * from the one in which they were submitted for IO.
124 *
125 * Return 1 if we performed any actions which might have destroyed the
126 * checkpoint. (journal_remove_checkpoint() deletes the transaction when
127 * the last checkpoint buffer is cleansed)
128 * 169 *
129 * Called with j_list_lock held. 170 * Called with j_list_lock held.
130 */ 171 */
131static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) 172
173static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
132{ 174{
133 struct journal_head *jh, *next_jh, *last_jh; 175 struct journal_head *jh;
134 struct buffer_head *bh; 176 struct buffer_head *bh;
135 int ret = 0; 177 tid_t this_tid;
136 178 int released = 0;
137 assert_spin_locked(&journal->j_list_lock); 179
138 jh = transaction->t_checkpoint_list; 180 this_tid = transaction->t_tid;
139 if (!jh) 181restart:
140 return 0; 182 /* Didn't somebody clean up the transaction in the meanwhile */
141 183 if (journal->j_checkpoint_transactions != transaction ||
142 last_jh = jh->b_cpprev; 184 transaction->t_tid != this_tid)
143 next_jh = jh; 185 return;
144 do { 186 while (!released && transaction->t_checkpoint_io_list) {
145 jh = next_jh; 187 jh = transaction->t_checkpoint_io_list;
146 bh = jh2bh(jh); 188 bh = jh2bh(jh);
189 if (!jbd_trylock_bh_state(bh)) {
190 jbd_sync_bh(journal, bh);
191 spin_lock(&journal->j_list_lock);
192 goto restart;
193 }
147 if (buffer_locked(bh)) { 194 if (buffer_locked(bh)) {
148 atomic_inc(&bh->b_count); 195 atomic_inc(&bh->b_count);
149 spin_unlock(&journal->j_list_lock); 196 spin_unlock(&journal->j_list_lock);
197 jbd_unlock_bh_state(bh);
150 wait_on_buffer(bh); 198 wait_on_buffer(bh);
151 /* the journal_head may have gone by now */ 199 /* the journal_head may have gone by now */
152 BUFFER_TRACE(bh, "brelse"); 200 BUFFER_TRACE(bh, "brelse");
153 __brelse(bh); 201 __brelse(bh);
154 goto out_return_1; 202 spin_lock(&journal->j_list_lock);
155 } 203 goto restart;
156
157 /*
158 * This is foul
159 */
160 if (!jbd_trylock_bh_state(bh)) {
161 jbd_sync_bh(journal, bh);
162 goto out_return_1;
163 } 204 }
164
165 if (jh->b_transaction != NULL) {
166 transaction_t *t = jh->b_transaction;
167 tid_t tid = t->t_tid;
168
169 spin_unlock(&journal->j_list_lock);
170 jbd_unlock_bh_state(bh);
171 log_start_commit(journal, tid);
172 log_wait_commit(journal, tid);
173 goto out_return_1;
174 }
175
176 /* 205 /*
177 * AKPM: I think the buffer_jbddirty test is redundant - it 206 * Now in whatever state the buffer currently is, we know that
178 * shouldn't have NULL b_transaction? 207 * it has been written out and so we can drop it from the list
179 */ 208 */
180 next_jh = jh->b_cpnext; 209 released = __journal_remove_checkpoint(jh);
181 if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { 210 jbd_unlock_bh_state(bh);
182 BUFFER_TRACE(bh, "remove from checkpoint"); 211 }
183 __journal_remove_checkpoint(jh);
184 jbd_unlock_bh_state(bh);
185 journal_remove_journal_head(bh);
186 __brelse(bh);
187 ret = 1;
188 } else {
189 jbd_unlock_bh_state(bh);
190 }
191 } while (jh != last_jh);
192
193 return ret;
194out_return_1:
195 spin_lock(&journal->j_list_lock);
196 return 1;
197} 212}
198 213
199#define NR_BATCH 64 214#define NR_BATCH 64
@@ -203,9 +218,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
203{ 218{
204 int i; 219 int i;
205 220
206 spin_unlock(&journal->j_list_lock);
207 ll_rw_block(SWRITE, *batch_count, bhs); 221 ll_rw_block(SWRITE, *batch_count, bhs);
208 spin_lock(&journal->j_list_lock);
209 for (i = 0; i < *batch_count; i++) { 222 for (i = 0; i < *batch_count; i++) {
210 struct buffer_head *bh = bhs[i]; 223 struct buffer_head *bh = bhs[i];
211 clear_buffer_jwrite(bh); 224 clear_buffer_jwrite(bh);
@@ -221,19 +234,46 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
221 * Return 1 if something happened which requires us to abort the current 234 * Return 1 if something happened which requires us to abort the current
222 * scan of the checkpoint list. 235 * scan of the checkpoint list.
223 * 236 *
224 * Called with j_list_lock held. 237 * Called with j_list_lock held and drops it if 1 is returned
225 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 238 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
226 */ 239 */
227static int __flush_buffer(journal_t *journal, struct journal_head *jh, 240static int __process_buffer(journal_t *journal, struct journal_head *jh,
228 struct buffer_head **bhs, int *batch_count, 241 struct buffer_head **bhs, int *batch_count)
229 int *drop_count)
230{ 242{
231 struct buffer_head *bh = jh2bh(jh); 243 struct buffer_head *bh = jh2bh(jh);
232 int ret = 0; 244 int ret = 0;
233 245
234 if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { 246 if (buffer_locked(bh)) {
235 J_ASSERT_JH(jh, jh->b_transaction == NULL); 247 get_bh(bh);
248 spin_unlock(&journal->j_list_lock);
249 jbd_unlock_bh_state(bh);
250 wait_on_buffer(bh);
251 /* the journal_head may have gone by now */
252 BUFFER_TRACE(bh, "brelse");
253 put_bh(bh);
254 ret = 1;
255 }
256 else if (jh->b_transaction != NULL) {
257 transaction_t *t = jh->b_transaction;
258 tid_t tid = t->t_tid;
236 259
260 spin_unlock(&journal->j_list_lock);
261 jbd_unlock_bh_state(bh);
262 log_start_commit(journal, tid);
263 log_wait_commit(journal, tid);
264 ret = 1;
265 }
266 else if (!buffer_dirty(bh)) {
267 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
268 BUFFER_TRACE(bh, "remove from checkpoint");
269 __journal_remove_checkpoint(jh);
270 spin_unlock(&journal->j_list_lock);
271 jbd_unlock_bh_state(bh);
272 journal_remove_journal_head(bh);
273 put_bh(bh);
274 ret = 1;
275 }
276 else {
237 /* 277 /*
238 * Important: we are about to write the buffer, and 278 * Important: we are about to write the buffer, and
239 * possibly block, while still holding the journal lock. 279 * possibly block, while still holding the journal lock.
@@ -246,45 +286,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
246 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 286 J_ASSERT_BH(bh, !buffer_jwrite(bh));
247 set_buffer_jwrite(bh); 287 set_buffer_jwrite(bh);
248 bhs[*batch_count] = bh; 288 bhs[*batch_count] = bh;
289 __buffer_relink_io(jh);
249 jbd_unlock_bh_state(bh); 290 jbd_unlock_bh_state(bh);
250 (*batch_count)++; 291 (*batch_count)++;
251 if (*batch_count == NR_BATCH) { 292 if (*batch_count == NR_BATCH) {
293 spin_unlock(&journal->j_list_lock);
252 __flush_batch(journal, bhs, batch_count); 294 __flush_batch(journal, bhs, batch_count);
253 ret = 1; 295 ret = 1;
254 } 296 }
255 } else {
256 int last_buffer = 0;
257 if (jh->b_cpnext == jh) {
258 /* We may be about to drop the transaction. Tell the
259 * caller that the lists have changed.
260 */
261 last_buffer = 1;
262 }
263 if (__try_to_free_cp_buf(jh)) {
264 (*drop_count)++;
265 ret = last_buffer;
266 }
267 } 297 }
268 return ret; 298 return ret;
269} 299}
270 300
271/* 301/*
272 * Perform an actual checkpoint. We don't write out only enough to 302 * Perform an actual checkpoint. We take the first transaction on the
273 * satisfy the current blocked requests: rather we submit a reasonably 303 * list of transactions to be checkpointed and send all its buffers
274 * sized chunk of the outstanding data to disk at once for 304 * to disk. We submit larger chunks of data at once.
275 * efficiency. __log_wait_for_space() will retry if we didn't free enough.
276 * 305 *
277 * However, we _do_ take into account the amount requested so that once
278 * the IO has been queued, we can return as soon as enough of it has
279 * completed to disk.
280 *
281 * The journal should be locked before calling this function. 306 * The journal should be locked before calling this function.
282 */ 307 */
283int log_do_checkpoint(journal_t *journal) 308int log_do_checkpoint(journal_t *journal)
284{ 309{
310 transaction_t *transaction;
311 tid_t this_tid;
285 int result; 312 int result;
286 int batch_count = 0;
287 struct buffer_head *bhs[NR_BATCH];
288 313
289 jbd_debug(1, "Start checkpoint\n"); 314 jbd_debug(1, "Start checkpoint\n");
290 315
@@ -299,79 +324,70 @@ int log_do_checkpoint(journal_t *journal)
299 return result; 324 return result;
300 325
301 /* 326 /*
302 * OK, we need to start writing disk blocks. Try to free up a 327 * OK, we need to start writing disk blocks. Take one transaction
303 * quarter of the log in a single checkpoint if we can. 328 * and write it.
304 */ 329 */
330 spin_lock(&journal->j_list_lock);
331 if (!journal->j_checkpoint_transactions)
332 goto out;
333 transaction = journal->j_checkpoint_transactions;
334 this_tid = transaction->t_tid;
335restart:
305 /* 336 /*
306 * AKPM: check this code. I had a feeling a while back that it 337 * If someone cleaned up this transaction while we slept, we're
307 * degenerates into a busy loop at unmount time. 338 * done (maybe it's a new transaction, but it fell at the same
339 * address).
308 */ 340 */
309 spin_lock(&journal->j_list_lock); 341 if (journal->j_checkpoint_transactions == transaction ||
310 while (journal->j_checkpoint_transactions) { 342 transaction->t_tid == this_tid) {
311 transaction_t *transaction; 343 int batch_count = 0;
312 struct journal_head *jh, *last_jh, *next_jh; 344 struct buffer_head *bhs[NR_BATCH];
313 int drop_count = 0; 345 struct journal_head *jh;
314 int cleanup_ret, retry = 0; 346 int retry = 0;
315 tid_t this_tid; 347
316 348 while (!retry && transaction->t_checkpoint_list) {
317 transaction = journal->j_checkpoint_transactions;
318 this_tid = transaction->t_tid;
319 jh = transaction->t_checkpoint_list;
320 last_jh = jh->b_cpprev;
321 next_jh = jh;
322 do {
323 struct buffer_head *bh; 349 struct buffer_head *bh;
324 350
325 jh = next_jh; 351 jh = transaction->t_checkpoint_list;
326 next_jh = jh->b_cpnext;
327 bh = jh2bh(jh); 352 bh = jh2bh(jh);
328 if (!jbd_trylock_bh_state(bh)) { 353 if (!jbd_trylock_bh_state(bh)) {
329 jbd_sync_bh(journal, bh); 354 jbd_sync_bh(journal, bh);
330 spin_lock(&journal->j_list_lock);
331 retry = 1; 355 retry = 1;
332 break; 356 break;
333 } 357 }
334 retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); 358 retry = __process_buffer(journal, jh, bhs,
335 if (cond_resched_lock(&journal->j_list_lock)) { 359 &batch_count);
360 if (!retry &&
361 lock_need_resched(&journal->j_list_lock)) {
362 spin_unlock(&journal->j_list_lock);
336 retry = 1; 363 retry = 1;
337 break; 364 break;
338 } 365 }
339 } while (jh != last_jh && !retry); 366 }
340 367
341 if (batch_count) { 368 if (batch_count) {
369 if (!retry) {
370 spin_unlock(&journal->j_list_lock);
371 retry = 1;
372 }
342 __flush_batch(journal, bhs, &batch_count); 373 __flush_batch(journal, bhs, &batch_count);
343 retry = 1;
344 } 374 }
345 375
376 if (retry) {
377 spin_lock(&journal->j_list_lock);
378 goto restart;
379 }
346 /* 380 /*
347 * If someone cleaned up this transaction while we slept, we're 381 * Now we have cleaned up the first transaction's checkpoint
348 * done 382 * list. Let's clean up the second one.
349 */
350 if (journal->j_checkpoint_transactions != transaction)
351 break;
352 if (retry)
353 continue;
354 /*
355 * Maybe it's a new transaction, but it fell at the same
356 * address
357 */
358 if (transaction->t_tid != this_tid)
359 continue;
360 /*
361 * We have walked the whole transaction list without
362 * finding anything to write to disk. We had better be
363 * able to make some progress or we are in trouble.
364 */ 383 */
365 cleanup_ret = __cleanup_transaction(journal, transaction); 384 __wait_cp_io(journal, transaction);
366 J_ASSERT(drop_count != 0 || cleanup_ret != 0);
367 if (journal->j_checkpoint_transactions != transaction)
368 break;
369 } 385 }
386out:
370 spin_unlock(&journal->j_list_lock); 387 spin_unlock(&journal->j_list_lock);
371 result = cleanup_journal_tail(journal); 388 result = cleanup_journal_tail(journal);
372 if (result < 0) 389 if (result < 0)
373 return result; 390 return result;
374
375 return 0; 391 return 0;
376} 392}
377 393
@@ -456,52 +472,91 @@ int cleanup_journal_tail(journal_t *journal)
456/* Checkpoint list management */ 472/* Checkpoint list management */
457 473
458/* 474/*
475 * journal_clean_one_cp_list
476 *
477 * Find all the written-back checkpoint buffers in the given list and release them.
478 *
479 * Called with the journal locked.
480 * Called with j_list_lock held.
481 * Returns number of bufers reaped (for debug)
482 */
483
484static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
485{
486 struct journal_head *last_jh;
487 struct journal_head *next_jh = jh;
488 int ret, freed = 0;
489
490 *released = 0;
491 if (!jh)
492 return 0;
493
494 last_jh = jh->b_cpprev;
495 do {
496 jh = next_jh;
497 next_jh = jh->b_cpnext;
498 /* Use trylock because of the ranking */
499 if (jbd_trylock_bh_state(jh2bh(jh))) {
500 ret = __try_to_free_cp_buf(jh);
501 if (ret) {
502 freed++;
503 if (ret == 2) {
504 *released = 1;
505 return freed;
506 }
507 }
508 }
509 /*
510 * This function only frees up some memory if possible so we
511 * dont have an obligation to finish processing. Bail out if
512 * preemption requested:
513 */
514 if (need_resched())
515 return freed;
516 } while (jh != last_jh);
517
518 return freed;
519}
520
521/*
459 * journal_clean_checkpoint_list 522 * journal_clean_checkpoint_list
460 * 523 *
461 * Find all the written-back checkpoint buffers in the journal and release them. 524 * Find all the written-back checkpoint buffers in the journal and release them.
462 * 525 *
463 * Called with the journal locked. 526 * Called with the journal locked.
464 * Called with j_list_lock held. 527 * Called with j_list_lock held.
465 * Returns number of bufers reaped (for debug) 528 * Returns number of buffers reaped (for debug)
466 */ 529 */
467 530
468int __journal_clean_checkpoint_list(journal_t *journal) 531int __journal_clean_checkpoint_list(journal_t *journal)
469{ 532{
470 transaction_t *transaction, *last_transaction, *next_transaction; 533 transaction_t *transaction, *last_transaction, *next_transaction;
471 int ret = 0; 534 int ret = 0, released;
472 535
473 transaction = journal->j_checkpoint_transactions; 536 transaction = journal->j_checkpoint_transactions;
474 if (transaction == 0) 537 if (!transaction)
475 goto out; 538 goto out;
476 539
477 last_transaction = transaction->t_cpprev; 540 last_transaction = transaction->t_cpprev;
478 next_transaction = transaction; 541 next_transaction = transaction;
479 do { 542 do {
480 struct journal_head *jh;
481
482 transaction = next_transaction; 543 transaction = next_transaction;
483 next_transaction = transaction->t_cpnext; 544 next_transaction = transaction->t_cpnext;
484 jh = transaction->t_checkpoint_list; 545 ret += journal_clean_one_cp_list(transaction->
485 if (jh) { 546 t_checkpoint_list, &released);
486 struct journal_head *last_jh = jh->b_cpprev; 547 if (need_resched())
487 struct journal_head *next_jh = jh; 548 goto out;
488 549 if (released)
489 do { 550 continue;
490 jh = next_jh; 551 /*
491 next_jh = jh->b_cpnext; 552 * It is essential that we are as careful as in the case of
492 /* Use trylock because of the ranknig */ 553 * t_checkpoint_list with removing the buffer from the list as
493 if (jbd_trylock_bh_state(jh2bh(jh))) 554 * we can possibly see not yet submitted buffers on io_list
494 ret += __try_to_free_cp_buf(jh); 555 */
495 /* 556 ret += journal_clean_one_cp_list(transaction->
496 * This function only frees up some memory 557 t_checkpoint_io_list, &released);
497 * if possible so we dont have an obligation 558 if (need_resched())
498 * to finish processing. Bail out if preemption 559 goto out;
499 * requested:
500 */
501 if (need_resched())
502 goto out;
503 } while (jh != last_jh);
504 }
505 } while (transaction != last_transaction); 560 } while (transaction != last_transaction);
506out: 561out:
507 return ret; 562 return ret;
@@ -516,18 +571,22 @@ out:
516 * buffer updates committed in that transaction have safely been stored 571 * buffer updates committed in that transaction have safely been stored
517 * elsewhere on disk. To achieve this, all of the buffers in a 572 * elsewhere on disk. To achieve this, all of the buffers in a
518 * transaction need to be maintained on the transaction's checkpoint 573 * transaction need to be maintained on the transaction's checkpoint
519 * list until they have been rewritten, at which point this function is 574 * lists until they have been rewritten, at which point this function is
520 * called to remove the buffer from the existing transaction's 575 * called to remove the buffer from the existing transaction's
521 * checkpoint list. 576 * checkpoint lists.
577 *
578 * The function returns 1 if it frees the transaction, 0 otherwise.
522 * 579 *
523 * This function is called with the journal locked. 580 * This function is called with the journal locked.
524 * This function is called with j_list_lock held. 581 * This function is called with j_list_lock held.
582 * This function is called with jbd_lock_bh_state(jh2bh(jh))
525 */ 583 */
526 584
527void __journal_remove_checkpoint(struct journal_head *jh) 585int __journal_remove_checkpoint(struct journal_head *jh)
528{ 586{
529 transaction_t *transaction; 587 transaction_t *transaction;
530 journal_t *journal; 588 journal_t *journal;
589 int ret = 0;
531 590
532 JBUFFER_TRACE(jh, "entry"); 591 JBUFFER_TRACE(jh, "entry");
533 592
@@ -538,8 +597,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
538 journal = transaction->t_journal; 597 journal = transaction->t_journal;
539 598
540 __buffer_unlink(jh); 599 __buffer_unlink(jh);
600 jh->b_cp_transaction = NULL;
541 601
542 if (transaction->t_checkpoint_list != NULL) 602 if (transaction->t_checkpoint_list != NULL ||
603 transaction->t_checkpoint_io_list != NULL)
543 goto out; 604 goto out;
544 JBUFFER_TRACE(jh, "transaction has no more buffers"); 605 JBUFFER_TRACE(jh, "transaction has no more buffers");
545 606
@@ -565,8 +626,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
565 /* Just in case anybody was waiting for more transactions to be 626 /* Just in case anybody was waiting for more transactions to be
566 checkpointed... */ 627 checkpointed... */
567 wake_up(&journal->j_wait_logspace); 628 wake_up(&journal->j_wait_logspace);
629 ret = 1;
568out: 630out:
569 JBUFFER_TRACE(jh, "exit"); 631 JBUFFER_TRACE(jh, "exit");
632 return ret;
570} 633}
571 634
572/* 635/*
@@ -628,6 +691,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
628 J_ASSERT(transaction->t_shadow_list == NULL); 691 J_ASSERT(transaction->t_shadow_list == NULL);
629 J_ASSERT(transaction->t_log_list == NULL); 692 J_ASSERT(transaction->t_log_list == NULL);
630 J_ASSERT(transaction->t_checkpoint_list == NULL); 693 J_ASSERT(transaction->t_checkpoint_list == NULL);
694 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
631 J_ASSERT(transaction->t_updates == 0); 695 J_ASSERT(transaction->t_updates == 0);
632 J_ASSERT(journal->j_committing_transaction != transaction); 696 J_ASSERT(journal->j_committing_transaction != transaction);
633 J_ASSERT(journal->j_running_transaction != transaction); 697 J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index dcde7adfdce5..558cb4c26ec9 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -498,6 +498,12 @@ struct transaction_s
498 struct journal_head *t_checkpoint_list; 498 struct journal_head *t_checkpoint_list;
499 499
500 /* 500 /*
501 * Doubly-linked circular list of all buffers submitted for IO while
502 * checkpointing. [j_list_lock]
503 */
504 struct journal_head *t_checkpoint_io_list;
505
506 /*
501 * Doubly-linked circular list of temporary buffers currently undergoing 507 * Doubly-linked circular list of temporary buffers currently undergoing
502 * IO in the log [j_list_lock] 508 * IO in the log [j_list_lock]
503 */ 509 */
@@ -843,7 +849,7 @@ extern void journal_commit_transaction(journal_t *);
843 849
844/* Checkpoint list management */ 850/* Checkpoint list management */
845int __journal_clean_checkpoint_list(journal_t *journal); 851int __journal_clean_checkpoint_list(journal_t *journal);
846void __journal_remove_checkpoint(struct journal_head *); 852int __journal_remove_checkpoint(struct journal_head *);
847void __journal_insert_checkpoint(struct journal_head *, transaction_t *); 853void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
848 854
849/* Buffer IO */ 855/* Buffer IO */