diff options
Diffstat (limited to 'fs/jbd2/checkpoint.c')
-rw-r--r-- | fs/jbd2/checkpoint.c | 334 |
1 files changed, 136 insertions, 198 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 7f34f4716165..988b32ed4c87 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
96 | 96 | ||
97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && | 97 | if (jh->b_transaction == NULL && !buffer_locked(bh) && |
98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { |
99 | /* | ||
100 | * Get our reference so that bh cannot be freed before | ||
101 | * we unlock it | ||
102 | */ | ||
103 | get_bh(bh); | ||
104 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 99 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
105 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 100 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
106 | BUFFER_TRACE(bh, "release"); | ||
107 | __brelse(bh); | ||
108 | } | 101 | } |
109 | return ret; | 102 | return ret; |
110 | } | 103 | } |
@@ -122,8 +115,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
122 | 115 | ||
123 | nblocks = jbd2_space_needed(journal); | 116 | nblocks = jbd2_space_needed(journal); |
124 | while (jbd2_log_space_left(journal) < nblocks) { | 117 | while (jbd2_log_space_left(journal) < nblocks) { |
125 | if (journal->j_flags & JBD2_ABORT) | ||
126 | return; | ||
127 | write_unlock(&journal->j_state_lock); | 118 | write_unlock(&journal->j_state_lock); |
128 | mutex_lock(&journal->j_checkpoint_mutex); | 119 | mutex_lock(&journal->j_checkpoint_mutex); |
129 | 120 | ||
@@ -139,6 +130,10 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
139 | * trace for forensic evidence. | 130 | * trace for forensic evidence. |
140 | */ | 131 | */ |
141 | write_lock(&journal->j_state_lock); | 132 | write_lock(&journal->j_state_lock); |
133 | if (journal->j_flags & JBD2_ABORT) { | ||
134 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
135 | return; | ||
136 | } | ||
142 | spin_lock(&journal->j_list_lock); | 137 | spin_lock(&journal->j_list_lock); |
143 | nblocks = jbd2_space_needed(journal); | 138 | nblocks = jbd2_space_needed(journal); |
144 | space_left = jbd2_log_space_left(journal); | 139 | space_left = jbd2_log_space_left(journal); |
@@ -183,58 +178,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
183 | } | 178 | } |
184 | } | 179 | } |
185 | 180 | ||
186 | /* | ||
187 | * Clean up transaction's list of buffers submitted for io. | ||
188 | * We wait for any pending IO to complete and remove any clean | ||
189 | * buffers. Note that we take the buffers in the opposite ordering | ||
190 | * from the one in which they were submitted for IO. | ||
191 | * | ||
192 | * Return 0 on success, and return <0 if some buffers have failed | ||
193 | * to be written out. | ||
194 | * | ||
195 | * Called with j_list_lock held. | ||
196 | */ | ||
197 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) | ||
198 | { | ||
199 | struct journal_head *jh; | ||
200 | struct buffer_head *bh; | ||
201 | tid_t this_tid; | ||
202 | int released = 0; | ||
203 | int ret = 0; | ||
204 | |||
205 | this_tid = transaction->t_tid; | ||
206 | restart: | ||
207 | /* Did somebody clean up the transaction in the meanwhile? */ | ||
208 | if (journal->j_checkpoint_transactions != transaction || | ||
209 | transaction->t_tid != this_tid) | ||
210 | return ret; | ||
211 | while (!released && transaction->t_checkpoint_io_list) { | ||
212 | jh = transaction->t_checkpoint_io_list; | ||
213 | bh = jh2bh(jh); | ||
214 | get_bh(bh); | ||
215 | if (buffer_locked(bh)) { | ||
216 | spin_unlock(&journal->j_list_lock); | ||
217 | wait_on_buffer(bh); | ||
218 | /* the journal_head may have gone by now */ | ||
219 | BUFFER_TRACE(bh, "brelse"); | ||
220 | __brelse(bh); | ||
221 | spin_lock(&journal->j_list_lock); | ||
222 | goto restart; | ||
223 | } | ||
224 | if (unlikely(buffer_write_io_error(bh))) | ||
225 | ret = -EIO; | ||
226 | |||
227 | /* | ||
228 | * Now in whatever state the buffer currently is, we know that | ||
229 | * it has been written out and so we can drop it from the list | ||
230 | */ | ||
231 | released = __jbd2_journal_remove_checkpoint(jh); | ||
232 | __brelse(bh); | ||
233 | } | ||
234 | |||
235 | return ret; | ||
236 | } | ||
237 | |||
238 | static void | 181 | static void |
239 | __flush_batch(journal_t *journal, int *batch_count) | 182 | __flush_batch(journal_t *journal, int *batch_count) |
240 | { | 183 | { |
@@ -255,81 +198,6 @@ __flush_batch(journal_t *journal, int *batch_count) | |||
255 | } | 198 | } |
256 | 199 | ||
257 | /* | 200 | /* |
258 | * Try to flush one buffer from the checkpoint list to disk. | ||
259 | * | ||
260 | * Return 1 if something happened which requires us to abort the current | ||
261 | * scan of the checkpoint list. Return <0 if the buffer has failed to | ||
262 | * be written out. | ||
263 | * | ||
264 | * Called with j_list_lock held and drops it if 1 is returned | ||
265 | */ | ||
266 | static int __process_buffer(journal_t *journal, struct journal_head *jh, | ||
267 | int *batch_count, transaction_t *transaction) | ||
268 | { | ||
269 | struct buffer_head *bh = jh2bh(jh); | ||
270 | int ret = 0; | ||
271 | |||
272 | if (buffer_locked(bh)) { | ||
273 | get_bh(bh); | ||
274 | spin_unlock(&journal->j_list_lock); | ||
275 | wait_on_buffer(bh); | ||
276 | /* the journal_head may have gone by now */ | ||
277 | BUFFER_TRACE(bh, "brelse"); | ||
278 | __brelse(bh); | ||
279 | ret = 1; | ||
280 | } else if (jh->b_transaction != NULL) { | ||
281 | transaction_t *t = jh->b_transaction; | ||
282 | tid_t tid = t->t_tid; | ||
283 | |||
284 | transaction->t_chp_stats.cs_forced_to_close++; | ||
285 | spin_unlock(&journal->j_list_lock); | ||
286 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) | ||
287 | /* | ||
288 | * The journal thread is dead; so starting and | ||
289 | * waiting for a commit to finish will cause | ||
290 | * us to wait for a _very_ long time. | ||
291 | */ | ||
292 | printk(KERN_ERR "JBD2: %s: " | ||
293 | "Waiting for Godot: block %llu\n", | ||
294 | journal->j_devname, | ||
295 | (unsigned long long) bh->b_blocknr); | ||
296 | jbd2_log_start_commit(journal, tid); | ||
297 | jbd2_log_wait_commit(journal, tid); | ||
298 | ret = 1; | ||
299 | } else if (!buffer_dirty(bh)) { | ||
300 | ret = 1; | ||
301 | if (unlikely(buffer_write_io_error(bh))) | ||
302 | ret = -EIO; | ||
303 | get_bh(bh); | ||
304 | BUFFER_TRACE(bh, "remove from checkpoint"); | ||
305 | __jbd2_journal_remove_checkpoint(jh); | ||
306 | spin_unlock(&journal->j_list_lock); | ||
307 | __brelse(bh); | ||
308 | } else { | ||
309 | /* | ||
310 | * Important: we are about to write the buffer, and | ||
311 | * possibly block, while still holding the journal lock. | ||
312 | * We cannot afford to let the transaction logic start | ||
313 | * messing around with this buffer before we write it to | ||
314 | * disk, as that would break recoverability. | ||
315 | */ | ||
316 | BUFFER_TRACE(bh, "queue"); | ||
317 | get_bh(bh); | ||
318 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | ||
319 | journal->j_chkpt_bhs[*batch_count] = bh; | ||
320 | __buffer_relink_io(jh); | ||
321 | transaction->t_chp_stats.cs_written++; | ||
322 | (*batch_count)++; | ||
323 | if (*batch_count == JBD2_NR_BATCH) { | ||
324 | spin_unlock(&journal->j_list_lock); | ||
325 | __flush_batch(journal, batch_count); | ||
326 | ret = 1; | ||
327 | } | ||
328 | } | ||
329 | return ret; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * Perform an actual checkpoint. We take the first transaction on the | 201 | * Perform an actual checkpoint. We take the first transaction on the |
334 | * list of transactions to be checkpointed and send all its buffers | 202 | * list of transactions to be checkpointed and send all its buffers |
335 | * to disk. We submit larger chunks of data at once. | 203 | * to disk. We submit larger chunks of data at once. |
@@ -339,9 +207,11 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
339 | */ | 207 | */ |
340 | int jbd2_log_do_checkpoint(journal_t *journal) | 208 | int jbd2_log_do_checkpoint(journal_t *journal) |
341 | { | 209 | { |
342 | transaction_t *transaction; | 210 | struct journal_head *jh; |
343 | tid_t this_tid; | 211 | struct buffer_head *bh; |
344 | int result; | 212 | transaction_t *transaction; |
213 | tid_t this_tid; | ||
214 | int result, batch_count = 0; | ||
345 | 215 | ||
346 | jbd_debug(1, "Start checkpoint\n"); | 216 | jbd_debug(1, "Start checkpoint\n"); |
347 | 217 | ||
@@ -374,45 +244,117 @@ restart: | |||
374 | * done (maybe it's a new transaction, but it fell at the same | 244 | * done (maybe it's a new transaction, but it fell at the same |
375 | * address). | 245 | * address). |
376 | */ | 246 | */ |
377 | if (journal->j_checkpoint_transactions == transaction && | 247 | if (journal->j_checkpoint_transactions != transaction || |
378 | transaction->t_tid == this_tid) { | 248 | transaction->t_tid != this_tid) |
379 | int batch_count = 0; | 249 | goto out; |
380 | struct journal_head *jh; | 250 | |
381 | int retry = 0, err; | 251 | /* checkpoint all of the transaction's buffers */ |
382 | 252 | while (transaction->t_checkpoint_list) { | |
383 | while (!retry && transaction->t_checkpoint_list) { | 253 | jh = transaction->t_checkpoint_list; |
384 | jh = transaction->t_checkpoint_list; | 254 | bh = jh2bh(jh); |
385 | retry = __process_buffer(journal, jh, &batch_count, | 255 | |
386 | transaction); | 256 | if (buffer_locked(bh)) { |
387 | if (retry < 0 && !result) | 257 | spin_unlock(&journal->j_list_lock); |
388 | result = retry; | 258 | get_bh(bh); |
389 | if (!retry && (need_resched() || | 259 | wait_on_buffer(bh); |
390 | spin_needbreak(&journal->j_list_lock))) { | 260 | /* the journal_head may have gone by now */ |
391 | spin_unlock(&journal->j_list_lock); | 261 | BUFFER_TRACE(bh, "brelse"); |
392 | retry = 1; | 262 | __brelse(bh); |
393 | break; | 263 | goto retry; |
394 | } | ||
395 | } | 264 | } |
265 | if (jh->b_transaction != NULL) { | ||
266 | transaction_t *t = jh->b_transaction; | ||
267 | tid_t tid = t->t_tid; | ||
396 | 268 | ||
397 | if (batch_count) { | 269 | transaction->t_chp_stats.cs_forced_to_close++; |
398 | if (!retry) { | 270 | spin_unlock(&journal->j_list_lock); |
399 | spin_unlock(&journal->j_list_lock); | 271 | if (unlikely(journal->j_flags & JBD2_UNMOUNT)) |
400 | retry = 1; | 272 | /* |
401 | } | 273 | * The journal thread is dead; so |
402 | __flush_batch(journal, &batch_count); | 274 | * starting and waiting for a commit |
275 | * to finish will cause us to wait for | ||
276 | * a _very_ long time. | ||
277 | */ | ||
278 | printk(KERN_ERR | ||
279 | "JBD2: %s: Waiting for Godot: block %llu\n", | ||
280 | journal->j_devname, (unsigned long long) bh->b_blocknr); | ||
281 | |||
282 | jbd2_log_start_commit(journal, tid); | ||
283 | jbd2_log_wait_commit(journal, tid); | ||
284 | goto retry; | ||
285 | } | ||
286 | if (!buffer_dirty(bh)) { | ||
287 | if (unlikely(buffer_write_io_error(bh)) && !result) | ||
288 | result = -EIO; | ||
289 | BUFFER_TRACE(bh, "remove from checkpoint"); | ||
290 | if (__jbd2_journal_remove_checkpoint(jh)) | ||
291 | /* The transaction was released; we're done */ | ||
292 | goto out; | ||
293 | continue; | ||
403 | } | 294 | } |
295 | /* | ||
296 | * Important: we are about to write the buffer, and | ||
297 | * possibly block, while still holding the journal | ||
298 | * lock. We cannot afford to let the transaction | ||
299 | * logic start messing around with this buffer before | ||
300 | * we write it to disk, as that would break | ||
301 | * recoverability. | ||
302 | */ | ||
303 | BUFFER_TRACE(bh, "queue"); | ||
304 | get_bh(bh); | ||
305 | J_ASSERT_BH(bh, !buffer_jwrite(bh)); | ||
306 | journal->j_chkpt_bhs[batch_count++] = bh; | ||
307 | __buffer_relink_io(jh); | ||
308 | transaction->t_chp_stats.cs_written++; | ||
309 | if ((batch_count == JBD2_NR_BATCH) || | ||
310 | need_resched() || | ||
311 | spin_needbreak(&journal->j_list_lock)) | ||
312 | goto unlock_and_flush; | ||
313 | } | ||
404 | 314 | ||
405 | if (retry) { | 315 | if (batch_count) { |
316 | unlock_and_flush: | ||
317 | spin_unlock(&journal->j_list_lock); | ||
318 | retry: | ||
319 | if (batch_count) | ||
320 | __flush_batch(journal, &batch_count); | ||
406 | spin_lock(&journal->j_list_lock); | 321 | spin_lock(&journal->j_list_lock); |
407 | goto restart; | 322 | goto restart; |
323 | } | ||
324 | |||
325 | /* | ||
326 | * Now we issued all of the transaction's buffers, let's deal | ||
327 | * with the buffers that are out for I/O. | ||
328 | */ | ||
329 | restart2: | ||
330 | /* Did somebody clean up the transaction in the meanwhile? */ | ||
331 | if (journal->j_checkpoint_transactions != transaction || | ||
332 | transaction->t_tid != this_tid) | ||
333 | goto out; | ||
334 | |||
335 | while (transaction->t_checkpoint_io_list) { | ||
336 | jh = transaction->t_checkpoint_io_list; | ||
337 | bh = jh2bh(jh); | ||
338 | if (buffer_locked(bh)) { | ||
339 | spin_unlock(&journal->j_list_lock); | ||
340 | get_bh(bh); | ||
341 | wait_on_buffer(bh); | ||
342 | /* the journal_head may have gone by now */ | ||
343 | BUFFER_TRACE(bh, "brelse"); | ||
344 | __brelse(bh); | ||
345 | spin_lock(&journal->j_list_lock); | ||
346 | goto restart2; | ||
408 | } | 347 | } |
348 | if (unlikely(buffer_write_io_error(bh)) && !result) | ||
349 | result = -EIO; | ||
350 | |||
409 | /* | 351 | /* |
410 | * Now we have cleaned up the first transaction's checkpoint | 352 | * Now in whatever state the buffer currently is, we |
411 | * list. Let's clean up the second one | 353 | * know that it has been written out and so we can |
354 | * drop it from the list | ||
412 | */ | 355 | */ |
413 | err = __wait_cp_io(journal, transaction); | 356 | if (__jbd2_journal_remove_checkpoint(jh)) |
414 | if (!result) | 357 | break; |
415 | result = err; | ||
416 | } | 358 | } |
417 | out: | 359 | out: |
418 | spin_unlock(&journal->j_list_lock); | 360 | spin_unlock(&journal->j_list_lock); |
@@ -478,18 +420,16 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
478 | * Find all the written-back checkpoint buffers in the given list and | 420 | * Find all the written-back checkpoint buffers in the given list and |
479 | * release them. | 421 | * release them. |
480 | * | 422 | * |
481 | * Called with the journal locked. | ||
482 | * Called with j_list_lock held. | 423 | * Called with j_list_lock held. |
483 | * Returns number of buffers reaped (for debug) | 424 | * Returns 1 if we freed the transaction, 0 otherwise. |
484 | */ | 425 | */ |
485 | 426 | static int journal_clean_one_cp_list(struct journal_head *jh) | |
486 | static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | ||
487 | { | 427 | { |
488 | struct journal_head *last_jh; | 428 | struct journal_head *last_jh; |
489 | struct journal_head *next_jh = jh; | 429 | struct journal_head *next_jh = jh; |
490 | int ret, freed = 0; | 430 | int ret; |
431 | int freed = 0; | ||
491 | 432 | ||
492 | *released = 0; | ||
493 | if (!jh) | 433 | if (!jh) |
494 | return 0; | 434 | return 0; |
495 | 435 | ||
@@ -498,13 +438,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
498 | jh = next_jh; | 438 | jh = next_jh; |
499 | next_jh = jh->b_cpnext; | 439 | next_jh = jh->b_cpnext; |
500 | ret = __try_to_free_cp_buf(jh); | 440 | ret = __try_to_free_cp_buf(jh); |
501 | if (ret) { | 441 | if (!ret) |
502 | freed++; | 442 | return freed; |
503 | if (ret == 2) { | 443 | if (ret == 2) |
504 | *released = 1; | 444 | return 1; |
505 | return freed; | 445 | freed = 1; |
506 | } | ||
507 | } | ||
508 | /* | 446 | /* |
509 | * This function only frees up some memory | 447 | * This function only frees up some memory |
510 | * if possible so we dont have an obligation | 448 | * if possible so we dont have an obligation |
@@ -523,49 +461,49 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) | |||
523 | * | 461 | * |
524 | * Find all the written-back checkpoint buffers in the journal and release them. | 462 | * Find all the written-back checkpoint buffers in the journal and release them. |
525 | * | 463 | * |
526 | * Called with the journal locked. | ||
527 | * Called with j_list_lock held. | 464 | * Called with j_list_lock held. |
528 | * Returns number of buffers reaped (for debug) | ||
529 | */ | 465 | */ |
530 | 466 | void __jbd2_journal_clean_checkpoint_list(journal_t *journal) | |
531 | int __jbd2_journal_clean_checkpoint_list(journal_t *journal) | ||
532 | { | 467 | { |
533 | transaction_t *transaction, *last_transaction, *next_transaction; | 468 | transaction_t *transaction, *last_transaction, *next_transaction; |
534 | int ret = 0; | 469 | int ret; |
535 | int released; | ||
536 | 470 | ||
537 | transaction = journal->j_checkpoint_transactions; | 471 | transaction = journal->j_checkpoint_transactions; |
538 | if (!transaction) | 472 | if (!transaction) |
539 | goto out; | 473 | return; |
540 | 474 | ||
541 | last_transaction = transaction->t_cpprev; | 475 | last_transaction = transaction->t_cpprev; |
542 | next_transaction = transaction; | 476 | next_transaction = transaction; |
543 | do { | 477 | do { |
544 | transaction = next_transaction; | 478 | transaction = next_transaction; |
545 | next_transaction = transaction->t_cpnext; | 479 | next_transaction = transaction->t_cpnext; |
546 | ret += journal_clean_one_cp_list(transaction-> | 480 | ret = journal_clean_one_cp_list(transaction->t_checkpoint_list); |
547 | t_checkpoint_list, &released); | ||
548 | /* | 481 | /* |
549 | * This function only frees up some memory if possible so we | 482 | * This function only frees up some memory if possible so we |
550 | * dont have an obligation to finish processing. Bail out if | 483 | * dont have an obligation to finish processing. Bail out if |
551 | * preemption requested: | 484 | * preemption requested: |
552 | */ | 485 | */ |
553 | if (need_resched()) | 486 | if (need_resched()) |
554 | goto out; | 487 | return; |
555 | if (released) | 488 | if (ret) |
556 | continue; | 489 | continue; |
557 | /* | 490 | /* |
558 | * It is essential that we are as careful as in the case of | 491 | * It is essential that we are as careful as in the case of |
559 | * t_checkpoint_list with removing the buffer from the list as | 492 | * t_checkpoint_list with removing the buffer from the list as |
560 | * we can possibly see not yet submitted buffers on io_list | 493 | * we can possibly see not yet submitted buffers on io_list |
561 | */ | 494 | */ |
562 | ret += journal_clean_one_cp_list(transaction-> | 495 | ret = journal_clean_one_cp_list(transaction-> |
563 | t_checkpoint_io_list, &released); | 496 | t_checkpoint_io_list); |
564 | if (need_resched()) | 497 | if (need_resched()) |
565 | goto out; | 498 | return; |
499 | /* | ||
500 | * Stop scanning if we couldn't free the transaction. This | ||
501 | * avoids pointless scanning of transactions which still | ||
502 | * weren't checkpointed. | ||
503 | */ | ||
504 | if (!ret) | ||
505 | return; | ||
566 | } while (transaction != last_transaction); | 506 | } while (transaction != last_transaction); |
567 | out: | ||
568 | return ret; | ||
569 | } | 507 | } |
570 | 508 | ||
571 | /* | 509 | /* |