aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2/checkpoint.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2/checkpoint.c')
-rw-r--r--fs/jbd2/checkpoint.c334
1 files changed, 136 insertions, 198 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 7f34f4716165..988b32ed4c87 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
96 96
97 if (jh->b_transaction == NULL && !buffer_locked(bh) && 97 if (jh->b_transaction == NULL && !buffer_locked(bh) &&
98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) { 98 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
99 /*
100 * Get our reference so that bh cannot be freed before
101 * we unlock it
102 */
103 get_bh(bh);
104 JBUFFER_TRACE(jh, "remove from checkpoint list"); 99 JBUFFER_TRACE(jh, "remove from checkpoint list");
105 ret = __jbd2_journal_remove_checkpoint(jh) + 1; 100 ret = __jbd2_journal_remove_checkpoint(jh) + 1;
106 BUFFER_TRACE(bh, "release");
107 __brelse(bh);
108 } 101 }
109 return ret; 102 return ret;
110} 103}
@@ -122,8 +115,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
122 115
123 nblocks = jbd2_space_needed(journal); 116 nblocks = jbd2_space_needed(journal);
124 while (jbd2_log_space_left(journal) < nblocks) { 117 while (jbd2_log_space_left(journal) < nblocks) {
125 if (journal->j_flags & JBD2_ABORT)
126 return;
127 write_unlock(&journal->j_state_lock); 118 write_unlock(&journal->j_state_lock);
128 mutex_lock(&journal->j_checkpoint_mutex); 119 mutex_lock(&journal->j_checkpoint_mutex);
129 120
@@ -139,6 +130,10 @@ void __jbd2_log_wait_for_space(journal_t *journal)
139 * trace for forensic evidence. 130 * trace for forensic evidence.
140 */ 131 */
141 write_lock(&journal->j_state_lock); 132 write_lock(&journal->j_state_lock);
133 if (journal->j_flags & JBD2_ABORT) {
134 mutex_unlock(&journal->j_checkpoint_mutex);
135 return;
136 }
142 spin_lock(&journal->j_list_lock); 137 spin_lock(&journal->j_list_lock);
143 nblocks = jbd2_space_needed(journal); 138 nblocks = jbd2_space_needed(journal);
144 space_left = jbd2_log_space_left(journal); 139 space_left = jbd2_log_space_left(journal);
@@ -183,58 +178,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
183 } 178 }
184} 179}
185 180
186/*
187 * Clean up transaction's list of buffers submitted for io.
188 * We wait for any pending IO to complete and remove any clean
189 * buffers. Note that we take the buffers in the opposite ordering
190 * from the one in which they were submitted for IO.
191 *
192 * Return 0 on success, and return <0 if some buffers have failed
193 * to be written out.
194 *
195 * Called with j_list_lock held.
196 */
197static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
198{
199 struct journal_head *jh;
200 struct buffer_head *bh;
201 tid_t this_tid;
202 int released = 0;
203 int ret = 0;
204
205 this_tid = transaction->t_tid;
206restart:
207 /* Did somebody clean up the transaction in the meanwhile? */
208 if (journal->j_checkpoint_transactions != transaction ||
209 transaction->t_tid != this_tid)
210 return ret;
211 while (!released && transaction->t_checkpoint_io_list) {
212 jh = transaction->t_checkpoint_io_list;
213 bh = jh2bh(jh);
214 get_bh(bh);
215 if (buffer_locked(bh)) {
216 spin_unlock(&journal->j_list_lock);
217 wait_on_buffer(bh);
218 /* the journal_head may have gone by now */
219 BUFFER_TRACE(bh, "brelse");
220 __brelse(bh);
221 spin_lock(&journal->j_list_lock);
222 goto restart;
223 }
224 if (unlikely(buffer_write_io_error(bh)))
225 ret = -EIO;
226
227 /*
228 * Now in whatever state the buffer currently is, we know that
229 * it has been written out and so we can drop it from the list
230 */
231 released = __jbd2_journal_remove_checkpoint(jh);
232 __brelse(bh);
233 }
234
235 return ret;
236}
237
238static void 181static void
239__flush_batch(journal_t *journal, int *batch_count) 182__flush_batch(journal_t *journal, int *batch_count)
240{ 183{
@@ -255,81 +198,6 @@ __flush_batch(journal_t *journal, int *batch_count)
255} 198}
256 199
257/* 200/*
258 * Try to flush one buffer from the checkpoint list to disk.
259 *
260 * Return 1 if something happened which requires us to abort the current
261 * scan of the checkpoint list. Return <0 if the buffer has failed to
262 * be written out.
263 *
264 * Called with j_list_lock held and drops it if 1 is returned
265 */
266static int __process_buffer(journal_t *journal, struct journal_head *jh,
267 int *batch_count, transaction_t *transaction)
268{
269 struct buffer_head *bh = jh2bh(jh);
270 int ret = 0;
271
272 if (buffer_locked(bh)) {
273 get_bh(bh);
274 spin_unlock(&journal->j_list_lock);
275 wait_on_buffer(bh);
276 /* the journal_head may have gone by now */
277 BUFFER_TRACE(bh, "brelse");
278 __brelse(bh);
279 ret = 1;
280 } else if (jh->b_transaction != NULL) {
281 transaction_t *t = jh->b_transaction;
282 tid_t tid = t->t_tid;
283
284 transaction->t_chp_stats.cs_forced_to_close++;
285 spin_unlock(&journal->j_list_lock);
286 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
287 /*
288 * The journal thread is dead; so starting and
289 * waiting for a commit to finish will cause
290 * us to wait for a _very_ long time.
291 */
292 printk(KERN_ERR "JBD2: %s: "
293 "Waiting for Godot: block %llu\n",
294 journal->j_devname,
295 (unsigned long long) bh->b_blocknr);
296 jbd2_log_start_commit(journal, tid);
297 jbd2_log_wait_commit(journal, tid);
298 ret = 1;
299 } else if (!buffer_dirty(bh)) {
300 ret = 1;
301 if (unlikely(buffer_write_io_error(bh)))
302 ret = -EIO;
303 get_bh(bh);
304 BUFFER_TRACE(bh, "remove from checkpoint");
305 __jbd2_journal_remove_checkpoint(jh);
306 spin_unlock(&journal->j_list_lock);
307 __brelse(bh);
308 } else {
309 /*
310 * Important: we are about to write the buffer, and
311 * possibly block, while still holding the journal lock.
312 * We cannot afford to let the transaction logic start
313 * messing around with this buffer before we write it to
314 * disk, as that would break recoverability.
315 */
316 BUFFER_TRACE(bh, "queue");
317 get_bh(bh);
318 J_ASSERT_BH(bh, !buffer_jwrite(bh));
319 journal->j_chkpt_bhs[*batch_count] = bh;
320 __buffer_relink_io(jh);
321 transaction->t_chp_stats.cs_written++;
322 (*batch_count)++;
323 if (*batch_count == JBD2_NR_BATCH) {
324 spin_unlock(&journal->j_list_lock);
325 __flush_batch(journal, batch_count);
326 ret = 1;
327 }
328 }
329 return ret;
330}
331
332/*
333 * Perform an actual checkpoint. We take the first transaction on the 201 * Perform an actual checkpoint. We take the first transaction on the
334 * list of transactions to be checkpointed and send all its buffers 202 * list of transactions to be checkpointed and send all its buffers
335 * to disk. We submit larger chunks of data at once. 203 * to disk. We submit larger chunks of data at once.
@@ -339,9 +207,11 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
339 */ 207 */
340int jbd2_log_do_checkpoint(journal_t *journal) 208int jbd2_log_do_checkpoint(journal_t *journal)
341{ 209{
342 transaction_t *transaction; 210 struct journal_head *jh;
343 tid_t this_tid; 211 struct buffer_head *bh;
344 int result; 212 transaction_t *transaction;
213 tid_t this_tid;
214 int result, batch_count = 0;
345 215
346 jbd_debug(1, "Start checkpoint\n"); 216 jbd_debug(1, "Start checkpoint\n");
347 217
@@ -374,45 +244,117 @@ restart:
374 * done (maybe it's a new transaction, but it fell at the same 244 * done (maybe it's a new transaction, but it fell at the same
375 * address). 245 * address).
376 */ 246 */
377 if (journal->j_checkpoint_transactions == transaction && 247 if (journal->j_checkpoint_transactions != transaction ||
378 transaction->t_tid == this_tid) { 248 transaction->t_tid != this_tid)
379 int batch_count = 0; 249 goto out;
380 struct journal_head *jh; 250
381 int retry = 0, err; 251 /* checkpoint all of the transaction's buffers */
382 252 while (transaction->t_checkpoint_list) {
383 while (!retry && transaction->t_checkpoint_list) { 253 jh = transaction->t_checkpoint_list;
384 jh = transaction->t_checkpoint_list; 254 bh = jh2bh(jh);
385 retry = __process_buffer(journal, jh, &batch_count, 255
386 transaction); 256 if (buffer_locked(bh)) {
387 if (retry < 0 && !result) 257 spin_unlock(&journal->j_list_lock);
388 result = retry; 258 get_bh(bh);
389 if (!retry && (need_resched() || 259 wait_on_buffer(bh);
390 spin_needbreak(&journal->j_list_lock))) { 260 /* the journal_head may have gone by now */
391 spin_unlock(&journal->j_list_lock); 261 BUFFER_TRACE(bh, "brelse");
392 retry = 1; 262 __brelse(bh);
393 break; 263 goto retry;
394 }
395 } 264 }
265 if (jh->b_transaction != NULL) {
266 transaction_t *t = jh->b_transaction;
267 tid_t tid = t->t_tid;
396 268
397 if (batch_count) { 269 transaction->t_chp_stats.cs_forced_to_close++;
398 if (!retry) { 270 spin_unlock(&journal->j_list_lock);
399 spin_unlock(&journal->j_list_lock); 271 if (unlikely(journal->j_flags & JBD2_UNMOUNT))
400 retry = 1; 272 /*
401 } 273 * The journal thread is dead; so
402 __flush_batch(journal, &batch_count); 274 * starting and waiting for a commit
275 * to finish will cause us to wait for
276 * a _very_ long time.
277 */
278 printk(KERN_ERR
279 "JBD2: %s: Waiting for Godot: block %llu\n",
280 journal->j_devname, (unsigned long long) bh->b_blocknr);
281
282 jbd2_log_start_commit(journal, tid);
283 jbd2_log_wait_commit(journal, tid);
284 goto retry;
285 }
286 if (!buffer_dirty(bh)) {
287 if (unlikely(buffer_write_io_error(bh)) && !result)
288 result = -EIO;
289 BUFFER_TRACE(bh, "remove from checkpoint");
290 if (__jbd2_journal_remove_checkpoint(jh))
291 /* The transaction was released; we're done */
292 goto out;
293 continue;
403 } 294 }
295 /*
296 * Important: we are about to write the buffer, and
297 * possibly block, while still holding the journal
298 * lock. We cannot afford to let the transaction
299 * logic start messing around with this buffer before
300 * we write it to disk, as that would break
301 * recoverability.
302 */
303 BUFFER_TRACE(bh, "queue");
304 get_bh(bh);
305 J_ASSERT_BH(bh, !buffer_jwrite(bh));
306 journal->j_chkpt_bhs[batch_count++] = bh;
307 __buffer_relink_io(jh);
308 transaction->t_chp_stats.cs_written++;
309 if ((batch_count == JBD2_NR_BATCH) ||
310 need_resched() ||
311 spin_needbreak(&journal->j_list_lock))
312 goto unlock_and_flush;
313 }
404 314
405 if (retry) { 315 if (batch_count) {
316 unlock_and_flush:
317 spin_unlock(&journal->j_list_lock);
318 retry:
319 if (batch_count)
320 __flush_batch(journal, &batch_count);
406 spin_lock(&journal->j_list_lock); 321 spin_lock(&journal->j_list_lock);
407 goto restart; 322 goto restart;
323 }
324
325 /*
326 * Now we issued all of the transaction's buffers, let's deal
327 * with the buffers that are out for I/O.
328 */
329restart2:
330 /* Did somebody clean up the transaction in the meanwhile? */
331 if (journal->j_checkpoint_transactions != transaction ||
332 transaction->t_tid != this_tid)
333 goto out;
334
335 while (transaction->t_checkpoint_io_list) {
336 jh = transaction->t_checkpoint_io_list;
337 bh = jh2bh(jh);
338 if (buffer_locked(bh)) {
339 spin_unlock(&journal->j_list_lock);
340 get_bh(bh);
341 wait_on_buffer(bh);
342 /* the journal_head may have gone by now */
343 BUFFER_TRACE(bh, "brelse");
344 __brelse(bh);
345 spin_lock(&journal->j_list_lock);
346 goto restart2;
408 } 347 }
348 if (unlikely(buffer_write_io_error(bh)) && !result)
349 result = -EIO;
350
409 /* 351 /*
410 * Now we have cleaned up the first transaction's checkpoint 352 * Now in whatever state the buffer currently is, we
411 * list. Let's clean up the second one 353 * know that it has been written out and so we can
354 * drop it from the list
412 */ 355 */
413 err = __wait_cp_io(journal, transaction); 356 if (__jbd2_journal_remove_checkpoint(jh))
414 if (!result) 357 break;
415 result = err;
416 } 358 }
417out: 359out:
418 spin_unlock(&journal->j_list_lock); 360 spin_unlock(&journal->j_list_lock);
@@ -478,18 +420,16 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
478 * Find all the written-back checkpoint buffers in the given list and 420 * Find all the written-back checkpoint buffers in the given list and
479 * release them. 421 * release them.
480 * 422 *
481 * Called with the journal locked.
482 * Called with j_list_lock held. 423 * Called with j_list_lock held.
483 * Returns number of buffers reaped (for debug) 424 * Returns 1 if we freed the transaction, 0 otherwise.
484 */ 425 */
485 426static int journal_clean_one_cp_list(struct journal_head *jh)
486static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
487{ 427{
488 struct journal_head *last_jh; 428 struct journal_head *last_jh;
489 struct journal_head *next_jh = jh; 429 struct journal_head *next_jh = jh;
490 int ret, freed = 0; 430 int ret;
431 int freed = 0;
491 432
492 *released = 0;
493 if (!jh) 433 if (!jh)
494 return 0; 434 return 0;
495 435
@@ -498,13 +438,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
498 jh = next_jh; 438 jh = next_jh;
499 next_jh = jh->b_cpnext; 439 next_jh = jh->b_cpnext;
500 ret = __try_to_free_cp_buf(jh); 440 ret = __try_to_free_cp_buf(jh);
501 if (ret) { 441 if (!ret)
502 freed++; 442 return freed;
503 if (ret == 2) { 443 if (ret == 2)
504 *released = 1; 444 return 1;
505 return freed; 445 freed = 1;
506 }
507 }
508 /* 446 /*
509 * This function only frees up some memory 447 * This function only frees up some memory
510 * if possible so we dont have an obligation 448 * if possible so we dont have an obligation
@@ -523,49 +461,49 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
523 * 461 *
524 * Find all the written-back checkpoint buffers in the journal and release them. 462 * Find all the written-back checkpoint buffers in the journal and release them.
525 * 463 *
526 * Called with the journal locked.
527 * Called with j_list_lock held. 464 * Called with j_list_lock held.
528 * Returns number of buffers reaped (for debug)
529 */ 465 */
530 466void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
531int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
532{ 467{
533 transaction_t *transaction, *last_transaction, *next_transaction; 468 transaction_t *transaction, *last_transaction, *next_transaction;
534 int ret = 0; 469 int ret;
535 int released;
536 470
537 transaction = journal->j_checkpoint_transactions; 471 transaction = journal->j_checkpoint_transactions;
538 if (!transaction) 472 if (!transaction)
539 goto out; 473 return;
540 474
541 last_transaction = transaction->t_cpprev; 475 last_transaction = transaction->t_cpprev;
542 next_transaction = transaction; 476 next_transaction = transaction;
543 do { 477 do {
544 transaction = next_transaction; 478 transaction = next_transaction;
545 next_transaction = transaction->t_cpnext; 479 next_transaction = transaction->t_cpnext;
546 ret += journal_clean_one_cp_list(transaction-> 480 ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
547 t_checkpoint_list, &released);
548 /* 481 /*
549 * This function only frees up some memory if possible so we 482 * This function only frees up some memory if possible so we
550 * dont have an obligation to finish processing. Bail out if 483 * dont have an obligation to finish processing. Bail out if
551 * preemption requested: 484 * preemption requested:
552 */ 485 */
553 if (need_resched()) 486 if (need_resched())
554 goto out; 487 return;
555 if (released) 488 if (ret)
556 continue; 489 continue;
557 /* 490 /*
558 * It is essential that we are as careful as in the case of 491 * It is essential that we are as careful as in the case of
559 * t_checkpoint_list with removing the buffer from the list as 492 * t_checkpoint_list with removing the buffer from the list as
560 * we can possibly see not yet submitted buffers on io_list 493 * we can possibly see not yet submitted buffers on io_list
561 */ 494 */
562 ret += journal_clean_one_cp_list(transaction-> 495 ret = journal_clean_one_cp_list(transaction->
563 t_checkpoint_io_list, &released); 496 t_checkpoint_io_list);
564 if (need_resched()) 497 if (need_resched())
565 goto out; 498 return;
499 /*
500 * Stop scanning if we couldn't free the transaction. This
501 * avoids pointless scanning of transactions which still
502 * weren't checkpointed.
503 */
504 if (!ret)
505 return;
566 } while (transaction != last_transaction); 506 } while (transaction != last_transaction);
567out:
568 return ret;
569} 507}
570 508
571/* 509/*