diff options
Diffstat (limited to 'fs/jbd2/checkpoint.c')
| -rw-r--r-- | fs/jbd2/checkpoint.c | 71 |
1 files changed, 57 insertions, 14 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 91389c8aee8a..9203c3332f17 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
| 21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
| 22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
| 23 | #include <linux/marker.h> | ||
| 23 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
| 24 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 25 | 26 | ||
| @@ -93,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
| 93 | int ret = 0; | 94 | int ret = 0; |
| 94 | struct buffer_head *bh = jh2bh(jh); | 95 | struct buffer_head *bh = jh2bh(jh); |
| 95 | 96 | ||
| 96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { | 97 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
| 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | ||
| 97 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 99 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
| 98 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 100 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
| 99 | jbd_unlock_bh_state(bh); | 101 | jbd_unlock_bh_state(bh); |
| @@ -126,14 +128,29 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
| 126 | 128 | ||
| 127 | /* | 129 | /* |
| 128 | * Test again, another process may have checkpointed while we | 130 | * Test again, another process may have checkpointed while we |
| 129 | * were waiting for the checkpoint lock | 131 | * were waiting for the checkpoint lock. If there are no |
| 132 | * outstanding transactions there is nothing to checkpoint and | ||
| 133 | * we can't make progress. Abort the journal in this case. | ||
| 130 | */ | 134 | */ |
| 131 | spin_lock(&journal->j_state_lock); | 135 | spin_lock(&journal->j_state_lock); |
| 136 | spin_lock(&journal->j_list_lock); | ||
| 132 | nblocks = jbd_space_needed(journal); | 137 | nblocks = jbd_space_needed(journal); |
| 133 | if (__jbd2_log_space_left(journal) < nblocks) { | 138 | if (__jbd2_log_space_left(journal) < nblocks) { |
| 139 | int chkpt = journal->j_checkpoint_transactions != NULL; | ||
| 140 | |||
| 141 | spin_unlock(&journal->j_list_lock); | ||
| 134 | spin_unlock(&journal->j_state_lock); | 142 | spin_unlock(&journal->j_state_lock); |
| 135 | jbd2_log_do_checkpoint(journal); | 143 | if (chkpt) { |
| 144 | jbd2_log_do_checkpoint(journal); | ||
| 145 | } else { | ||
| 146 | printk(KERN_ERR "%s: no transactions\n", | ||
| 147 | __func__); | ||
| 148 | jbd2_journal_abort(journal, 0); | ||
| 149 | } | ||
| 150 | |||
| 136 | spin_lock(&journal->j_state_lock); | 151 | spin_lock(&journal->j_state_lock); |
| 152 | } else { | ||
| 153 | spin_unlock(&journal->j_list_lock); | ||
| 137 | } | 154 | } |
| 138 | mutex_unlock(&journal->j_checkpoint_mutex); | 155 | mutex_unlock(&journal->j_checkpoint_mutex); |
| 139 | } | 156 | } |
| @@ -160,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | |||
| 160 | * buffers. Note that we take the buffers in the opposite ordering | 177 | * buffers. Note that we take the buffers in the opposite ordering |
| 161 | * from the one in which they were submitted for IO. | 178 | * from the one in which they were submitted for IO. |
| 162 | * | 179 | * |
| 180 | * Return 0 on success, and return <0 if some buffers have failed | ||
| 181 | * to be written out. | ||
| 182 | * | ||
| 163 | * Called with j_list_lock held. | 183 | * Called with j_list_lock held. |
| 164 | */ | 184 | */ |
| 165 | static void __wait_cp_io(journal_t *journal, transaction_t *transaction) | 185 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) |
| 166 | { | 186 | { |
| 167 | struct journal_head *jh; | 187 | struct journal_head *jh; |
| 168 | struct buffer_head *bh; | 188 | struct buffer_head *bh; |
| 169 | tid_t this_tid; | 189 | tid_t this_tid; |
| 170 | int released = 0; | 190 | int released = 0; |
| 191 | int ret = 0; | ||
| 171 | 192 | ||
| 172 | this_tid = transaction->t_tid; | 193 | this_tid = transaction->t_tid; |
| 173 | restart: | 194 | restart: |
| 174 | /* Did somebody clean up the transaction in the meanwhile? */ | 195 | /* Did somebody clean up the transaction in the meanwhile? */ |
| 175 | if (journal->j_checkpoint_transactions != transaction || | 196 | if (journal->j_checkpoint_transactions != transaction || |
| 176 | transaction->t_tid != this_tid) | 197 | transaction->t_tid != this_tid) |
| 177 | return; | 198 | return ret; |
| 178 | while (!released && transaction->t_checkpoint_io_list) { | 199 | while (!released && transaction->t_checkpoint_io_list) { |
| 179 | jh = transaction->t_checkpoint_io_list; | 200 | jh = transaction->t_checkpoint_io_list; |
| 180 | bh = jh2bh(jh); | 201 | bh = jh2bh(jh); |
| @@ -194,6 +215,9 @@ restart: | |||
| 194 | spin_lock(&journal->j_list_lock); | 215 | spin_lock(&journal->j_list_lock); |
| 195 | goto restart; | 216 | goto restart; |
| 196 | } | 217 | } |
| 218 | if (unlikely(buffer_write_io_error(bh))) | ||
| 219 | ret = -EIO; | ||
| 220 | |||
| 197 | /* | 221 | /* |
| 198 | * Now in whatever state the buffer currently is, we know that | 222 | * Now in whatever state the buffer currently is, we know that |
| 199 | * it has been written out and so we can drop it from the list | 223 | * it has been written out and so we can drop it from the list |
| @@ -203,6 +227,8 @@ restart: | |||
| 203 | jbd2_journal_remove_journal_head(bh); | 227 | jbd2_journal_remove_journal_head(bh); |
| 204 | __brelse(bh); | 228 | __brelse(bh); |
| 205 | } | 229 | } |
| 230 | |||
| 231 | return ret; | ||
| 206 | } | 232 | } |
| 207 | 233 | ||
| 208 | #define NR_BATCH 64 | 234 | #define NR_BATCH 64 |
| @@ -226,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
| 226 | * Try to flush one buffer from the checkpoint list to disk. | 252 | * Try to flush one buffer from the checkpoint list to disk. |
| 227 | * | 253 | * |
| 228 | * Return 1 if something happened which requires us to abort the current | 254 | * Return 1 if something happened which requires us to abort the current |
| 229 | * scan of the checkpoint list. | 255 | * scan of the checkpoint list. Return <0 if the buffer has failed to |
| 256 | * be written out. | ||
| 230 | * | 257 | * |
| 231 | * Called with j_list_lock held and drops it if 1 is returned | 258 | * Called with j_list_lock held and drops it if 1 is returned |
| 232 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 259 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
| @@ -258,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
| 258 | jbd2_log_wait_commit(journal, tid); | 285 | jbd2_log_wait_commit(journal, tid); |
| 259 | ret = 1; | 286 | ret = 1; |
| 260 | } else if (!buffer_dirty(bh)) { | 287 | } else if (!buffer_dirty(bh)) { |
| 288 | ret = 1; | ||
| 289 | if (unlikely(buffer_write_io_error(bh))) | ||
| 290 | ret = -EIO; | ||
| 261 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 291 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
| 262 | BUFFER_TRACE(bh, "remove from checkpoint"); | 292 | BUFFER_TRACE(bh, "remove from checkpoint"); |
| 263 | __jbd2_journal_remove_checkpoint(jh); | 293 | __jbd2_journal_remove_checkpoint(jh); |
| @@ -265,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
| 265 | jbd_unlock_bh_state(bh); | 295 | jbd_unlock_bh_state(bh); |
| 266 | jbd2_journal_remove_journal_head(bh); | 296 | jbd2_journal_remove_journal_head(bh); |
| 267 | __brelse(bh); | 297 | __brelse(bh); |
| 268 | ret = 1; | ||
| 269 | } else { | 298 | } else { |
| 270 | /* | 299 | /* |
| 271 | * Important: we are about to write the buffer, and | 300 | * Important: we are about to write the buffer, and |
| @@ -298,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
| 298 | * to disk. We submit larger chunks of data at once. | 327 | * to disk. We submit larger chunks of data at once. |
| 299 | * | 328 | * |
| 300 | * The journal should be locked before calling this function. | 329 | * The journal should be locked before calling this function. |
| 330 | * Called with j_checkpoint_mutex held. | ||
| 301 | */ | 331 | */ |
| 302 | int jbd2_log_do_checkpoint(journal_t *journal) | 332 | int jbd2_log_do_checkpoint(journal_t *journal) |
| 303 | { | 333 | { |
| @@ -313,6 +343,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
| 313 | * journal straight away. | 343 | * journal straight away. |
| 314 | */ | 344 | */ |
| 315 | result = jbd2_cleanup_journal_tail(journal); | 345 | result = jbd2_cleanup_journal_tail(journal); |
| 346 | trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", | ||
| 347 | journal->j_devname, result); | ||
| 316 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); | 348 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); |
| 317 | if (result <= 0) | 349 | if (result <= 0) |
| 318 | return result; | 350 | return result; |
| @@ -321,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
| 321 | * OK, we need to start writing disk blocks. Take one transaction | 353 | * OK, we need to start writing disk blocks. Take one transaction |
| 322 | * and write it. | 354 | * and write it. |
| 323 | */ | 355 | */ |
| 356 | result = 0; | ||
| 324 | spin_lock(&journal->j_list_lock); | 357 | spin_lock(&journal->j_list_lock); |
| 325 | if (!journal->j_checkpoint_transactions) | 358 | if (!journal->j_checkpoint_transactions) |
| 326 | goto out; | 359 | goto out; |
| @@ -339,7 +372,7 @@ restart: | |||
| 339 | int batch_count = 0; | 372 | int batch_count = 0; |
| 340 | struct buffer_head *bhs[NR_BATCH]; | 373 | struct buffer_head *bhs[NR_BATCH]; |
| 341 | struct journal_head *jh; | 374 | struct journal_head *jh; |
| 342 | int retry = 0; | 375 | int retry = 0, err; |
| 343 | 376 | ||
| 344 | while (!retry && transaction->t_checkpoint_list) { | 377 | while (!retry && transaction->t_checkpoint_list) { |
| 345 | struct buffer_head *bh; | 378 | struct buffer_head *bh; |
| @@ -353,6 +386,8 @@ restart: | |||
| 353 | } | 386 | } |
| 354 | retry = __process_buffer(journal, jh, bhs, &batch_count, | 387 | retry = __process_buffer(journal, jh, bhs, &batch_count, |
| 355 | transaction); | 388 | transaction); |
| 389 | if (retry < 0 && !result) | ||
| 390 | result = retry; | ||
| 356 | if (!retry && (need_resched() || | 391 | if (!retry && (need_resched() || |
| 357 | spin_needbreak(&journal->j_list_lock))) { | 392 | spin_needbreak(&journal->j_list_lock))) { |
| 358 | spin_unlock(&journal->j_list_lock); | 393 | spin_unlock(&journal->j_list_lock); |
| @@ -377,14 +412,18 @@ restart: | |||
| 377 | * Now we have cleaned up the first transaction's checkpoint | 412 | * Now we have cleaned up the first transaction's checkpoint |
| 378 | * list. Let's clean up the second one | 413 | * list. Let's clean up the second one |
| 379 | */ | 414 | */ |
| 380 | __wait_cp_io(journal, transaction); | 415 | err = __wait_cp_io(journal, transaction); |
| 416 | if (!result) | ||
| 417 | result = err; | ||
| 381 | } | 418 | } |
| 382 | out: | 419 | out: |
| 383 | spin_unlock(&journal->j_list_lock); | 420 | spin_unlock(&journal->j_list_lock); |
| 384 | result = jbd2_cleanup_journal_tail(journal); | ||
| 385 | if (result < 0) | 421 | if (result < 0) |
| 386 | return result; | 422 | jbd2_journal_abort(journal, result); |
| 387 | return 0; | 423 | else |
| 424 | result = jbd2_cleanup_journal_tail(journal); | ||
| 425 | |||
| 426 | return (result < 0) ? result : 0; | ||
| 388 | } | 427 | } |
| 389 | 428 | ||
| 390 | /* | 429 | /* |
| @@ -400,8 +439,9 @@ out: | |||
| 400 | * This is the only part of the journaling code which really needs to be | 439 | * This is the only part of the journaling code which really needs to be |
| 401 | * aware of transaction aborts. Checkpointing involves writing to the | 440 | * aware of transaction aborts. Checkpointing involves writing to the |
| 402 | * main filesystem area rather than to the journal, so it can proceed | 441 | * main filesystem area rather than to the journal, so it can proceed |
| 403 | * even in abort state, but we must not update the journal superblock if | 442 | * even in abort state, but we must not update the super block if |
| 404 | * we have an abort error outstanding. | 443 | * checkpointing may have failed. Otherwise, we would lose some metadata |
| 444 | * buffers which should be written-back to the filesystem. | ||
| 405 | */ | 445 | */ |
| 406 | 446 | ||
| 407 | int jbd2_cleanup_journal_tail(journal_t *journal) | 447 | int jbd2_cleanup_journal_tail(journal_t *journal) |
| @@ -410,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
| 410 | tid_t first_tid; | 450 | tid_t first_tid; |
| 411 | unsigned long blocknr, freed; | 451 | unsigned long blocknr, freed; |
| 412 | 452 | ||
| 453 | if (is_journal_aborted(journal)) | ||
| 454 | return 1; | ||
| 455 | |||
| 413 | /* OK, work out the oldest transaction remaining in the log, and | 456 | /* OK, work out the oldest transaction remaining in the log, and |
| 414 | * the log block it starts at. | 457 | * the log block it starts at. |
| 415 | * | 458 | * |
