aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2006-06-23 05:06:05 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 10:43:08 -0400
commit78ce89c92bc6eaf5933b5664bff64253a7103bd7 (patch)
treeedaadf283127c1a74bfe3aa7b165c1cf45d14347
parentcdaad343b561cdeb38b0578bb038eb5e87ed5551 (diff)
[PATCH] JBD: split checkpoint lists
Split the checkpoint list of the transaction into two lists. In the first list we keep the buffers that need to be submitted for IO. In the second list are kept buffers that were already submitted and we just have to wait for the IO to complete. This should simplify a handling of checkpoint lists a bit and can eventually be also a performance gain. Signed-off-by: Jan Kara <jack@suse.cz> Cc: Mark Fasheh <mark.fasheh@oracle.com> Cc: "Stephen C. Tweedie" <sct@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/jbd/checkpoint.c419
-rw-r--r--include/linux/jbd.h8
2 files changed, 246 insertions, 181 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 3f5102b069db..47678a26c13b 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,29 +24,67 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25 25
26/* 26/*
27 * Unlink a buffer from a transaction. 27 * Unlink a buffer from a transaction checkpoint list.
28 * 28 *
29 * Called with j_list_lock held. 29 * Called with j_list_lock held.
30 */ 30 */
31 31static inline void __buffer_unlink_first(struct journal_head *jh)
32static inline void __buffer_unlink(struct journal_head *jh)
33{ 32{
34 transaction_t *transaction; 33 transaction_t *transaction = jh->b_cp_transaction;
35
36 transaction = jh->b_cp_transaction;
37 jh->b_cp_transaction = NULL;
38 34
39 jh->b_cpnext->b_cpprev = jh->b_cpprev; 35 jh->b_cpnext->b_cpprev = jh->b_cpprev;
40 jh->b_cpprev->b_cpnext = jh->b_cpnext; 36 jh->b_cpprev->b_cpnext = jh->b_cpnext;
41 if (transaction->t_checkpoint_list == jh) 37 if (transaction->t_checkpoint_list == jh) {
42 transaction->t_checkpoint_list = jh->b_cpnext; 38 transaction->t_checkpoint_list = jh->b_cpnext;
43 if (transaction->t_checkpoint_list == jh) 39 if (transaction->t_checkpoint_list == jh)
44 transaction->t_checkpoint_list = NULL; 40 transaction->t_checkpoint_list = NULL;
41 }
42}
43
44/*
45 * Unlink a buffer from a transaction checkpoint(io) list.
46 *
47 * Called with j_list_lock held.
48 */
49static inline void __buffer_unlink(struct journal_head *jh)
50{
51 transaction_t *transaction = jh->b_cp_transaction;
52
53 __buffer_unlink_first(jh);
54 if (transaction->t_checkpoint_io_list == jh) {
55 transaction->t_checkpoint_io_list = jh->b_cpnext;
56 if (transaction->t_checkpoint_io_list == jh)
57 transaction->t_checkpoint_io_list = NULL;
58 }
59}
60
61/*
62 * Move a buffer from the checkpoint list to the checkpoint io list
63 *
64 * Called with j_list_lock held
65 */
66static inline void __buffer_relink_io(struct journal_head *jh)
67{
68 transaction_t *transaction = jh->b_cp_transaction;
69
70 __buffer_unlink_first(jh);
71
72 if (!transaction->t_checkpoint_io_list) {
73 jh->b_cpnext = jh->b_cpprev = jh;
74 } else {
75 jh->b_cpnext = transaction->t_checkpoint_io_list;
76 jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
77 jh->b_cpprev->b_cpnext = jh;
78 jh->b_cpnext->b_cpprev = jh;
79 }
80 transaction->t_checkpoint_io_list = jh;
45} 81}
46 82
47/* 83/*
48 * Try to release a checkpointed buffer from its transaction. 84 * Try to release a checkpointed buffer from its transaction.
49 * Returns 1 if we released it. 85 * Returns 1 if we released it and 2 if we also released the
86 * whole transaction.
87 *
50 * Requires j_list_lock 88 * Requires j_list_lock
51 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 89 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
52 */ 90 */
@@ -57,12 +95,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
57 95
58 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
59 JBUFFER_TRACE(jh, "remove from checkpoint list"); 97 JBUFFER_TRACE(jh, "remove from checkpoint list");
60 __journal_remove_checkpoint(jh); 98 ret = __journal_remove_checkpoint(jh) + 1;
61 jbd_unlock_bh_state(bh); 99 jbd_unlock_bh_state(bh);
62 journal_remove_journal_head(bh); 100 journal_remove_journal_head(bh);
63 BUFFER_TRACE(bh, "release"); 101 BUFFER_TRACE(bh, "release");
64 __brelse(bh); 102 __brelse(bh);
65 ret = 1;
66 } else { 103 } else {
67 jbd_unlock_bh_state(bh); 104 jbd_unlock_bh_state(bh);
68 } 105 }
@@ -117,83 +154,54 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
117} 154}
118 155
119/* 156/*
120 * Clean up a transaction's checkpoint list. 157 * Clean up transaction's list of buffers submitted for io.
121 * 158 * We wait for any pending IO to complete and remove any clean
122 * We wait for any pending IO to complete and make sure any clean 159 * buffers. Note that we take the buffers in the opposite ordering
123 * buffers are removed from the transaction. 160 * from the one in which they were submitted for IO.
124 *
125 * Return 1 if we performed any actions which might have destroyed the
126 * checkpoint. (journal_remove_checkpoint() deletes the transaction when
127 * the last checkpoint buffer is cleansed)
128 * 161 *
129 * Called with j_list_lock held. 162 * Called with j_list_lock held.
130 */ 163 */
131static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) 164static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
132{ 165{
133 struct journal_head *jh, *next_jh, *last_jh; 166 struct journal_head *jh;
134 struct buffer_head *bh; 167 struct buffer_head *bh;
135 int ret = 0; 168 tid_t this_tid;
136 169 int released = 0;
137 assert_spin_locked(&journal->j_list_lock); 170
138 jh = transaction->t_checkpoint_list; 171 this_tid = transaction->t_tid;
139 if (!jh) 172restart:
140 return 0; 173 /* Did somebody clean up the transaction in the meanwhile? */
141 174 if (journal->j_checkpoint_transactions != transaction ||
142 last_jh = jh->b_cpprev; 175 transaction->t_tid != this_tid)
143 next_jh = jh; 176 return;
144 do { 177 while (!released && transaction->t_checkpoint_io_list) {
145 jh = next_jh; 178 jh = transaction->t_checkpoint_io_list;
146 bh = jh2bh(jh); 179 bh = jh2bh(jh);
180 if (!jbd_trylock_bh_state(bh)) {
181 jbd_sync_bh(journal, bh);
182 spin_lock(&journal->j_list_lock);
183 goto restart;
184 }
147 if (buffer_locked(bh)) { 185 if (buffer_locked(bh)) {
148 atomic_inc(&bh->b_count); 186 atomic_inc(&bh->b_count);
149 spin_unlock(&journal->j_list_lock); 187 spin_unlock(&journal->j_list_lock);
188 jbd_unlock_bh_state(bh);
150 wait_on_buffer(bh); 189 wait_on_buffer(bh);
151 /* the journal_head may have gone by now */ 190 /* the journal_head may have gone by now */
152 BUFFER_TRACE(bh, "brelse"); 191 BUFFER_TRACE(bh, "brelse");
153 __brelse(bh); 192 __brelse(bh);
154 goto out_return_1; 193 spin_lock(&journal->j_list_lock);
194 goto restart;
155 } 195 }
156
157 /* 196 /*
158 * This is foul 197 * Now in whatever state the buffer currently is, we know that
198 * it has been written out and so we can drop it from the list
159 */ 199 */
160 if (!jbd_trylock_bh_state(bh)) { 200 released = __journal_remove_checkpoint(jh);
161 jbd_sync_bh(journal, bh); 201 jbd_unlock_bh_state(bh);
162 goto out_return_1; 202 journal_remove_journal_head(bh);
163 } 203 __brelse(bh);
164 204 }
165 if (jh->b_transaction != NULL) {
166 transaction_t *t = jh->b_transaction;
167 tid_t tid = t->t_tid;
168
169 spin_unlock(&journal->j_list_lock);
170 jbd_unlock_bh_state(bh);
171 log_start_commit(journal, tid);
172 log_wait_commit(journal, tid);
173 goto out_return_1;
174 }
175
176 /*
177 * AKPM: I think the buffer_jbddirty test is redundant - it
178 * shouldn't have NULL b_transaction?
179 */
180 next_jh = jh->b_cpnext;
181 if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
182 BUFFER_TRACE(bh, "remove from checkpoint");
183 __journal_remove_checkpoint(jh);
184 jbd_unlock_bh_state(bh);
185 journal_remove_journal_head(bh);
186 __brelse(bh);
187 ret = 1;
188 } else {
189 jbd_unlock_bh_state(bh);
190 }
191 } while (jh != last_jh);
192
193 return ret;
194out_return_1:
195 spin_lock(&journal->j_list_lock);
196 return 1;
197} 205}
198 206
199#define NR_BATCH 64 207#define NR_BATCH 64
@@ -203,9 +211,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
203{ 211{
204 int i; 212 int i;
205 213
206 spin_unlock(&journal->j_list_lock);
207 ll_rw_block(SWRITE, *batch_count, bhs); 214 ll_rw_block(SWRITE, *batch_count, bhs);
208 spin_lock(&journal->j_list_lock);
209 for (i = 0; i < *batch_count; i++) { 215 for (i = 0; i < *batch_count; i++) {
210 struct buffer_head *bh = bhs[i]; 216 struct buffer_head *bh = bhs[i];
211 clear_buffer_jwrite(bh); 217 clear_buffer_jwrite(bh);
@@ -221,19 +227,43 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
221 * Return 1 if something happened which requires us to abort the current 227 * Return 1 if something happened which requires us to abort the current
222 * scan of the checkpoint list. 228 * scan of the checkpoint list.
223 * 229 *
224 * Called with j_list_lock held. 230 * Called with j_list_lock held and drops it if 1 is returned
225 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 231 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
226 */ 232 */
227static int __flush_buffer(journal_t *journal, struct journal_head *jh, 233static int __process_buffer(journal_t *journal, struct journal_head *jh,
228 struct buffer_head **bhs, int *batch_count, 234 struct buffer_head **bhs, int *batch_count)
229 int *drop_count)
230{ 235{
231 struct buffer_head *bh = jh2bh(jh); 236 struct buffer_head *bh = jh2bh(jh);
232 int ret = 0; 237 int ret = 0;
233 238
234 if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) { 239 if (buffer_locked(bh)) {
235 J_ASSERT_JH(jh, jh->b_transaction == NULL); 240 atomic_inc(&bh->b_count);
241 spin_unlock(&journal->j_list_lock);
242 jbd_unlock_bh_state(bh);
243 wait_on_buffer(bh);
244 /* the journal_head may have gone by now */
245 BUFFER_TRACE(bh, "brelse");
246 __brelse(bh);
247 ret = 1;
248 } else if (jh->b_transaction != NULL) {
249 transaction_t *t = jh->b_transaction;
250 tid_t tid = t->t_tid;
236 251
252 spin_unlock(&journal->j_list_lock);
253 jbd_unlock_bh_state(bh);
254 log_start_commit(journal, tid);
255 log_wait_commit(journal, tid);
256 ret = 1;
257 } else if (!buffer_dirty(bh)) {
258 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
259 BUFFER_TRACE(bh, "remove from checkpoint");
260 __journal_remove_checkpoint(jh);
261 spin_unlock(&journal->j_list_lock);
262 jbd_unlock_bh_state(bh);
263 journal_remove_journal_head(bh);
264 __brelse(bh);
265 ret = 1;
266 } else {
237 /* 267 /*
238 * Important: we are about to write the buffer, and 268 * Important: we are about to write the buffer, and
239 * possibly block, while still holding the journal lock. 269 * possibly block, while still holding the journal lock.
@@ -246,45 +276,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
246 J_ASSERT_BH(bh, !buffer_jwrite(bh)); 276 J_ASSERT_BH(bh, !buffer_jwrite(bh));
247 set_buffer_jwrite(bh); 277 set_buffer_jwrite(bh);
248 bhs[*batch_count] = bh; 278 bhs[*batch_count] = bh;
279 __buffer_relink_io(jh);
249 jbd_unlock_bh_state(bh); 280 jbd_unlock_bh_state(bh);
250 (*batch_count)++; 281 (*batch_count)++;
251 if (*batch_count == NR_BATCH) { 282 if (*batch_count == NR_BATCH) {
283 spin_unlock(&journal->j_list_lock);
252 __flush_batch(journal, bhs, batch_count); 284 __flush_batch(journal, bhs, batch_count);
253 ret = 1; 285 ret = 1;
254 } 286 }
255 } else {
256 int last_buffer = 0;
257 if (jh->b_cpnext == jh) {
258 /* We may be about to drop the transaction. Tell the
259 * caller that the lists have changed.
260 */
261 last_buffer = 1;
262 }
263 if (__try_to_free_cp_buf(jh)) {
264 (*drop_count)++;
265 ret = last_buffer;
266 }
267 } 287 }
268 return ret; 288 return ret;
269} 289}
270 290
271/* 291/*
272 * Perform an actual checkpoint. We don't write out only enough to 292 * Perform an actual checkpoint. We take the first transaction on the
273 * satisfy the current blocked requests: rather we submit a reasonably 293 * list of transactions to be checkpointed and send all its buffers
274 * sized chunk of the outstanding data to disk at once for 294 * to disk. We submit larger chunks of data at once.
275 * efficiency. __log_wait_for_space() will retry if we didn't free enough.
276 * 295 *
277 * However, we _do_ take into account the amount requested so that once
278 * the IO has been queued, we can return as soon as enough of it has
279 * completed to disk.
280 *
281 * The journal should be locked before calling this function. 296 * The journal should be locked before calling this function.
282 */ 297 */
283int log_do_checkpoint(journal_t *journal) 298int log_do_checkpoint(journal_t *journal)
284{ 299{
300 transaction_t *transaction;
301 tid_t this_tid;
285 int result; 302 int result;
286 int batch_count = 0;
287 struct buffer_head *bhs[NR_BATCH];
288 303
289 jbd_debug(1, "Start checkpoint\n"); 304 jbd_debug(1, "Start checkpoint\n");
290 305
@@ -299,79 +314,68 @@ int log_do_checkpoint(journal_t *journal)
299 return result; 314 return result;
300 315
301 /* 316 /*
302 * OK, we need to start writing disk blocks. Try to free up a 317 * OK, we need to start writing disk blocks. Take one transaction
303 * quarter of the log in a single checkpoint if we can. 318 * and write it.
304 */ 319 */
320 spin_lock(&journal->j_list_lock);
321 if (!journal->j_checkpoint_transactions)
322 goto out;
323 transaction = journal->j_checkpoint_transactions;
324 this_tid = transaction->t_tid;
325restart:
305 /* 326 /*
306 * AKPM: check this code. I had a feeling a while back that it 327 * If someone cleaned up this transaction while we slept, we're
307 * degenerates into a busy loop at unmount time. 328 * done (maybe it's a new transaction, but it fell at the same
329 * address).
308 */ 330 */
309 spin_lock(&journal->j_list_lock); 331 if (journal->j_checkpoint_transactions == transaction &&
310 while (journal->j_checkpoint_transactions) { 332 transaction->t_tid == this_tid) {
311 transaction_t *transaction; 333 int batch_count = 0;
312 struct journal_head *jh, *last_jh, *next_jh; 334 struct buffer_head *bhs[NR_BATCH];
313 int drop_count = 0; 335 struct journal_head *jh;
314 int cleanup_ret, retry = 0; 336 int retry = 0;
315 tid_t this_tid; 337
316 338 while (!retry && transaction->t_checkpoint_list) {
317 transaction = journal->j_checkpoint_transactions;
318 this_tid = transaction->t_tid;
319 jh = transaction->t_checkpoint_list;
320 last_jh = jh->b_cpprev;
321 next_jh = jh;
322 do {
323 struct buffer_head *bh; 339 struct buffer_head *bh;
324 340
325 jh = next_jh; 341 jh = transaction->t_checkpoint_list;
326 next_jh = jh->b_cpnext;
327 bh = jh2bh(jh); 342 bh = jh2bh(jh);
328 if (!jbd_trylock_bh_state(bh)) { 343 if (!jbd_trylock_bh_state(bh)) {
329 jbd_sync_bh(journal, bh); 344 jbd_sync_bh(journal, bh);
330 spin_lock(&journal->j_list_lock);
331 retry = 1; 345 retry = 1;
332 break; 346 break;
333 } 347 }
334 retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count); 348 retry = __process_buffer(journal, jh, bhs,&batch_count);
335 if (cond_resched_lock(&journal->j_list_lock)) { 349 if (!retry && lock_need_resched(&journal->j_list_lock)){
350 spin_unlock(&journal->j_list_lock);
336 retry = 1; 351 retry = 1;
337 break; 352 break;
338 } 353 }
339 } while (jh != last_jh && !retry); 354 }
340 355
341 if (batch_count) { 356 if (batch_count) {
357 if (!retry) {
358 spin_unlock(&journal->j_list_lock);
359 retry = 1;
360 }
342 __flush_batch(journal, bhs, &batch_count); 361 __flush_batch(journal, bhs, &batch_count);
343 retry = 1;
344 } 362 }
345 363
364 if (retry) {
365 spin_lock(&journal->j_list_lock);
366 goto restart;
367 }
346 /* 368 /*
347 * If someone cleaned up this transaction while we slept, we're 369 * Now we have cleaned up the first transaction's checkpoint
348 * done 370 * list. Let's clean up the second one
349 */
350 if (journal->j_checkpoint_transactions != transaction)
351 break;
352 if (retry)
353 continue;
354 /*
355 * Maybe it's a new transaction, but it fell at the same
356 * address
357 */
358 if (transaction->t_tid != this_tid)
359 continue;
360 /*
361 * We have walked the whole transaction list without
362 * finding anything to write to disk. We had better be
363 * able to make some progress or we are in trouble.
364 */ 371 */
365 cleanup_ret = __cleanup_transaction(journal, transaction); 372 __wait_cp_io(journal, transaction);
366 J_ASSERT(drop_count != 0 || cleanup_ret != 0);
367 if (journal->j_checkpoint_transactions != transaction)
368 break;
369 } 373 }
374out:
370 spin_unlock(&journal->j_list_lock); 375 spin_unlock(&journal->j_list_lock);
371 result = cleanup_journal_tail(journal); 376 result = cleanup_journal_tail(journal);
372 if (result < 0) 377 if (result < 0)
373 return result; 378 return result;
374
375 return 0; 379 return 0;
376} 380}
377 381
@@ -456,52 +460,98 @@ int cleanup_journal_tail(journal_t *journal)
456/* Checkpoint list management */ 460/* Checkpoint list management */
457 461
458/* 462/*
463 * journal_clean_one_cp_list
464 *
465 * Find all the written-back checkpoint buffers in the given list and release them.
466 *
467 * Called with the journal locked.
468 * Called with j_list_lock held.
469 * Returns number of bufers reaped (for debug)
470 */
471
472static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
473{
474 struct journal_head *last_jh;
475 struct journal_head *next_jh = jh;
476 int ret, freed = 0;
477
478 *released = 0;
479 if (!jh)
480 return 0;
481
482 last_jh = jh->b_cpprev;
483 do {
484 jh = next_jh;
485 next_jh = jh->b_cpnext;
486 /* Use trylock because of the ranking */
487 if (jbd_trylock_bh_state(jh2bh(jh))) {
488 ret = __try_to_free_cp_buf(jh);
489 if (ret) {
490 freed++;
491 if (ret == 2) {
492 *released = 1;
493 return freed;
494 }
495 }
496 }
497 /*
498 * This function only frees up some memory
499 * if possible so we dont have an obligation
500 * to finish processing. Bail out if preemption
501 * requested:
502 */
503 if (need_resched())
504 return freed;
505 } while (jh != last_jh);
506
507 return freed;
508}
509
510/*
459 * journal_clean_checkpoint_list 511 * journal_clean_checkpoint_list
460 * 512 *
461 * Find all the written-back checkpoint buffers in the journal and release them. 513 * Find all the written-back checkpoint buffers in the journal and release them.
462 * 514 *
463 * Called with the journal locked. 515 * Called with the journal locked.
464 * Called with j_list_lock held. 516 * Called with j_list_lock held.
465 * Returns number of bufers reaped (for debug) 517 * Returns number of buffers reaped (for debug)
466 */ 518 */
467 519
468int __journal_clean_checkpoint_list(journal_t *journal) 520int __journal_clean_checkpoint_list(journal_t *journal)
469{ 521{
470 transaction_t *transaction, *last_transaction, *next_transaction; 522 transaction_t *transaction, *last_transaction, *next_transaction;
471 int ret = 0; 523 int ret = 0;
524 int released;
472 525
473 transaction = journal->j_checkpoint_transactions; 526 transaction = journal->j_checkpoint_transactions;
474 if (transaction == 0) 527 if (!transaction)
475 goto out; 528 goto out;
476 529
477 last_transaction = transaction->t_cpprev; 530 last_transaction = transaction->t_cpprev;
478 next_transaction = transaction; 531 next_transaction = transaction;
479 do { 532 do {
480 struct journal_head *jh;
481
482 transaction = next_transaction; 533 transaction = next_transaction;
483 next_transaction = transaction->t_cpnext; 534 next_transaction = transaction->t_cpnext;
484 jh = transaction->t_checkpoint_list; 535 ret += journal_clean_one_cp_list(transaction->
485 if (jh) { 536 t_checkpoint_list, &released);
486 struct journal_head *last_jh = jh->b_cpprev; 537 /*
487 struct journal_head *next_jh = jh; 538 * This function only frees up some memory if possible so we
488 539 * dont have an obligation to finish processing. Bail out if
489 do { 540 * preemption requested:
490 jh = next_jh; 541 */
491 next_jh = jh->b_cpnext; 542 if (need_resched())
492 /* Use trylock because of the ranknig */ 543 goto out;
493 if (jbd_trylock_bh_state(jh2bh(jh))) 544 if (released)
494 ret += __try_to_free_cp_buf(jh); 545 continue;
495 /* 546 /*
496 * This function only frees up some memory 547 * It is essential that we are as careful as in the case of
497 * if possible so we dont have an obligation 548 * t_checkpoint_list with removing the buffer from the list as
498 * to finish processing. Bail out if preemption 549 * we can possibly see not yet submitted buffers on io_list
499 * requested: 550 */
500 */ 551 ret += journal_clean_one_cp_list(transaction->
501 if (need_resched()) 552 t_checkpoint_io_list, &released);
502 goto out; 553 if (need_resched())
503 } while (jh != last_jh); 554 goto out;
504 }
505 } while (transaction != last_transaction); 555 } while (transaction != last_transaction);
506out: 556out:
507 return ret; 557 return ret;
@@ -516,18 +566,22 @@ out:
516 * buffer updates committed in that transaction have safely been stored 566 * buffer updates committed in that transaction have safely been stored
517 * elsewhere on disk. To achieve this, all of the buffers in a 567 * elsewhere on disk. To achieve this, all of the buffers in a
518 * transaction need to be maintained on the transaction's checkpoint 568 * transaction need to be maintained on the transaction's checkpoint
519 * list until they have been rewritten, at which point this function is 569 * lists until they have been rewritten, at which point this function is
520 * called to remove the buffer from the existing transaction's 570 * called to remove the buffer from the existing transaction's
521 * checkpoint list. 571 * checkpoint lists.
572 *
573 * The function returns 1 if it frees the transaction, 0 otherwise.
522 * 574 *
523 * This function is called with the journal locked. 575 * This function is called with the journal locked.
524 * This function is called with j_list_lock held. 576 * This function is called with j_list_lock held.
577 * This function is called with jbd_lock_bh_state(jh2bh(jh))
525 */ 578 */
526 579
527void __journal_remove_checkpoint(struct journal_head *jh) 580int __journal_remove_checkpoint(struct journal_head *jh)
528{ 581{
529 transaction_t *transaction; 582 transaction_t *transaction;
530 journal_t *journal; 583 journal_t *journal;
584 int ret = 0;
531 585
532 JBUFFER_TRACE(jh, "entry"); 586 JBUFFER_TRACE(jh, "entry");
533 587
@@ -538,8 +592,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
538 journal = transaction->t_journal; 592 journal = transaction->t_journal;
539 593
540 __buffer_unlink(jh); 594 __buffer_unlink(jh);
595 jh->b_cp_transaction = NULL;
541 596
542 if (transaction->t_checkpoint_list != NULL) 597 if (transaction->t_checkpoint_list != NULL ||
598 transaction->t_checkpoint_io_list != NULL)
543 goto out; 599 goto out;
544 JBUFFER_TRACE(jh, "transaction has no more buffers"); 600 JBUFFER_TRACE(jh, "transaction has no more buffers");
545 601
@@ -565,8 +621,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
565 /* Just in case anybody was waiting for more transactions to be 621 /* Just in case anybody was waiting for more transactions to be
566 checkpointed... */ 622 checkpointed... */
567 wake_up(&journal->j_wait_logspace); 623 wake_up(&journal->j_wait_logspace);
624 ret = 1;
568out: 625out:
569 JBUFFER_TRACE(jh, "exit"); 626 JBUFFER_TRACE(jh, "exit");
627 return ret;
570} 628}
571 629
572/* 630/*
@@ -628,6 +686,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
628 J_ASSERT(transaction->t_shadow_list == NULL); 686 J_ASSERT(transaction->t_shadow_list == NULL);
629 J_ASSERT(transaction->t_log_list == NULL); 687 J_ASSERT(transaction->t_log_list == NULL);
630 J_ASSERT(transaction->t_checkpoint_list == NULL); 688 J_ASSERT(transaction->t_checkpoint_list == NULL);
689 J_ASSERT(transaction->t_checkpoint_io_list == NULL);
631 J_ASSERT(transaction->t_updates == 0); 690 J_ASSERT(transaction->t_updates == 0);
632 J_ASSERT(journal->j_committing_transaction != transaction); 691 J_ASSERT(journal->j_committing_transaction != transaction);
633 J_ASSERT(journal->j_running_transaction != transaction); 692 J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 6a425e370cb3..20eb34403d0c 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -501,6 +501,12 @@ struct transaction_s
501 struct journal_head *t_checkpoint_list; 501 struct journal_head *t_checkpoint_list;
502 502
503 /* 503 /*
504 * Doubly-linked circular list of all buffers submitted for IO while
505 * checkpointing. [j_list_lock]
506 */
507 struct journal_head *t_checkpoint_io_list;
508
509 /*
504 * Doubly-linked circular list of temporary buffers currently undergoing 510 * Doubly-linked circular list of temporary buffers currently undergoing
505 * IO in the log [j_list_lock] 511 * IO in the log [j_list_lock]
506 */ 512 */
@@ -849,7 +855,7 @@ extern void journal_commit_transaction(journal_t *);
849 855
850/* Checkpoint list management */ 856/* Checkpoint list management */
851int __journal_clean_checkpoint_list(journal_t *journal); 857int __journal_clean_checkpoint_list(journal_t *journal);
852void __journal_remove_checkpoint(struct journal_head *); 858int __journal_remove_checkpoint(struct journal_head *);
853void __journal_insert_checkpoint(struct journal_head *, transaction_t *); 859void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
854 860
855/* Buffer IO */ 861/* Buffer IO */