aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/jbd/checkpoint.c49
-rw-r--r--fs/jbd/journal.c28
-rw-r--r--fs/jbd/recovery.c7
-rw-r--r--include/linux/jbd.h2
4 files changed, 65 insertions, 21 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index a5432bbbfb88..e29293501d42 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
93 int ret = 0; 93 int ret = 0;
94 struct buffer_head *bh = jh2bh(jh); 94 struct buffer_head *bh = jh2bh(jh);
95 95
96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
97 !buffer_dirty(bh) && buffer_uptodate(bh)) {
97 JBUFFER_TRACE(jh, "remove from checkpoint list"); 98 JBUFFER_TRACE(jh, "remove from checkpoint list");
98 ret = __journal_remove_checkpoint(jh) + 1; 99 ret = __journal_remove_checkpoint(jh) + 1;
99 jbd_unlock_bh_state(bh); 100 jbd_unlock_bh_state(bh);
@@ -160,21 +161,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
160 * buffers. Note that we take the buffers in the opposite ordering 161 * buffers. Note that we take the buffers in the opposite ordering
161 * from the one in which they were submitted for IO. 162 * from the one in which they were submitted for IO.
162 * 163 *
164 * Return 0 on success, and return <0 if some buffers have failed
165 * to be written out.
166 *
163 * Called with j_list_lock held. 167 * Called with j_list_lock held.
164 */ 168 */
165static void __wait_cp_io(journal_t *journal, transaction_t *transaction) 169static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
166{ 170{
167 struct journal_head *jh; 171 struct journal_head *jh;
168 struct buffer_head *bh; 172 struct buffer_head *bh;
169 tid_t this_tid; 173 tid_t this_tid;
170 int released = 0; 174 int released = 0;
175 int ret = 0;
171 176
172 this_tid = transaction->t_tid; 177 this_tid = transaction->t_tid;
173restart: 178restart:
174 /* Did somebody clean up the transaction in the meanwhile? */ 179 /* Did somebody clean up the transaction in the meanwhile? */
175 if (journal->j_checkpoint_transactions != transaction || 180 if (journal->j_checkpoint_transactions != transaction ||
176 transaction->t_tid != this_tid) 181 transaction->t_tid != this_tid)
177 return; 182 return ret;
178 while (!released && transaction->t_checkpoint_io_list) { 183 while (!released && transaction->t_checkpoint_io_list) {
179 jh = transaction->t_checkpoint_io_list; 184 jh = transaction->t_checkpoint_io_list;
180 bh = jh2bh(jh); 185 bh = jh2bh(jh);
@@ -194,6 +199,9 @@ restart:
194 spin_lock(&journal->j_list_lock); 199 spin_lock(&journal->j_list_lock);
195 goto restart; 200 goto restart;
196 } 201 }
202 if (unlikely(!buffer_uptodate(bh)))
203 ret = -EIO;
204
197 /* 205 /*
198 * Now in whatever state the buffer currently is, we know that 206 * Now in whatever state the buffer currently is, we know that
199 * it has been written out and so we can drop it from the list 207 * it has been written out and so we can drop it from the list
@@ -203,6 +211,8 @@ restart:
203 journal_remove_journal_head(bh); 211 journal_remove_journal_head(bh);
204 __brelse(bh); 212 __brelse(bh);
205 } 213 }
214
215 return ret;
206} 216}
207 217
208#define NR_BATCH 64 218#define NR_BATCH 64
@@ -226,7 +236,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
226 * Try to flush one buffer from the checkpoint list to disk. 236 * Try to flush one buffer from the checkpoint list to disk.
227 * 237 *
228 * Return 1 if something happened which requires us to abort the current 238 * Return 1 if something happened which requires us to abort the current
229 * scan of the checkpoint list. 239 * scan of the checkpoint list. Return <0 if the buffer has failed to
240 * be written out.
230 * 241 *
231 * Called with j_list_lock held and drops it if 1 is returned 242 * Called with j_list_lock held and drops it if 1 is returned
232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 243 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -256,6 +267,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
256 log_wait_commit(journal, tid); 267 log_wait_commit(journal, tid);
257 ret = 1; 268 ret = 1;
258 } else if (!buffer_dirty(bh)) { 269 } else if (!buffer_dirty(bh)) {
270 ret = 1;
271 if (unlikely(!buffer_uptodate(bh)))
272 ret = -EIO;
259 J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 273 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
260 BUFFER_TRACE(bh, "remove from checkpoint"); 274 BUFFER_TRACE(bh, "remove from checkpoint");
261 __journal_remove_checkpoint(jh); 275 __journal_remove_checkpoint(jh);
@@ -263,7 +277,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
263 jbd_unlock_bh_state(bh); 277 jbd_unlock_bh_state(bh);
264 journal_remove_journal_head(bh); 278 journal_remove_journal_head(bh);
265 __brelse(bh); 279 __brelse(bh);
266 ret = 1;
267 } else { 280 } else {
268 /* 281 /*
269 * Important: we are about to write the buffer, and 282 * Important: we are about to write the buffer, and
@@ -295,6 +308,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
295 * to disk. We submit larger chunks of data at once. 308 * to disk. We submit larger chunks of data at once.
296 * 309 *
297 * The journal should be locked before calling this function. 310 * The journal should be locked before calling this function.
311 * Called with j_checkpoint_mutex held.
298 */ 312 */
299int log_do_checkpoint(journal_t *journal) 313int log_do_checkpoint(journal_t *journal)
300{ 314{
@@ -318,6 +332,7 @@ int log_do_checkpoint(journal_t *journal)
318 * OK, we need to start writing disk blocks. Take one transaction 332 * OK, we need to start writing disk blocks. Take one transaction
319 * and write it. 333 * and write it.
320 */ 334 */
335 result = 0;
321 spin_lock(&journal->j_list_lock); 336 spin_lock(&journal->j_list_lock);
322 if (!journal->j_checkpoint_transactions) 337 if (!journal->j_checkpoint_transactions)
323 goto out; 338 goto out;
@@ -334,7 +349,7 @@ restart:
334 int batch_count = 0; 349 int batch_count = 0;
335 struct buffer_head *bhs[NR_BATCH]; 350 struct buffer_head *bhs[NR_BATCH];
336 struct journal_head *jh; 351 struct journal_head *jh;
337 int retry = 0; 352 int retry = 0, err;
338 353
339 while (!retry && transaction->t_checkpoint_list) { 354 while (!retry && transaction->t_checkpoint_list) {
340 struct buffer_head *bh; 355 struct buffer_head *bh;
@@ -347,6 +362,8 @@ restart:
347 break; 362 break;
348 } 363 }
349 retry = __process_buffer(journal, jh, bhs,&batch_count); 364 retry = __process_buffer(journal, jh, bhs,&batch_count);
365 if (retry < 0 && !result)
366 result = retry;
350 if (!retry && (need_resched() || 367 if (!retry && (need_resched() ||
351 spin_needbreak(&journal->j_list_lock))) { 368 spin_needbreak(&journal->j_list_lock))) {
352 spin_unlock(&journal->j_list_lock); 369 spin_unlock(&journal->j_list_lock);
@@ -371,14 +388,18 @@ restart:
371 * Now we have cleaned up the first transaction's checkpoint 388 * Now we have cleaned up the first transaction's checkpoint
372 * list. Let's clean up the second one 389 * list. Let's clean up the second one
373 */ 390 */
374 __wait_cp_io(journal, transaction); 391 err = __wait_cp_io(journal, transaction);
392 if (!result)
393 result = err;
375 } 394 }
376out: 395out:
377 spin_unlock(&journal->j_list_lock); 396 spin_unlock(&journal->j_list_lock);
378 result = cleanup_journal_tail(journal);
379 if (result < 0) 397 if (result < 0)
380 return result; 398 journal_abort(journal, result);
381 return 0; 399 else
400 result = cleanup_journal_tail(journal);
401
402 return (result < 0) ? result : 0;
382} 403}
383 404
384/* 405/*
@@ -394,8 +415,9 @@ out:
394 * This is the only part of the journaling code which really needs to be 415 * This is the only part of the journaling code which really needs to be
395 * aware of transaction aborts. Checkpointing involves writing to the 416 * aware of transaction aborts. Checkpointing involves writing to the
396 * main filesystem area rather than to the journal, so it can proceed 417 * main filesystem area rather than to the journal, so it can proceed
397 * even in abort state, but we must not update the journal superblock if 418 * even in abort state, but we must not update the super block if
398 * we have an abort error outstanding. 419 * checkpointing may have failed. Otherwise, we would lose some metadata
420 * buffers which should be written-back to the filesystem.
399 */ 421 */
400 422
401int cleanup_journal_tail(journal_t *journal) 423int cleanup_journal_tail(journal_t *journal)
@@ -404,6 +426,9 @@ int cleanup_journal_tail(journal_t *journal)
404 tid_t first_tid; 426 tid_t first_tid;
405 unsigned long blocknr, freed; 427 unsigned long blocknr, freed;
406 428
429 if (is_journal_aborted(journal))
430 return 1;
431
407 /* OK, work out the oldest transaction remaining in the log, and 432 /* OK, work out the oldest transaction remaining in the log, and
408 * the log block it starts at. 433 * the log block it starts at.
409 * 434 *
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index aa7143a8349b..9e4fa52d7dc8 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1121,9 +1121,12 @@ recovery_error:
1121 * 1121 *
1122 * Release a journal_t structure once it is no longer in use by the 1122 * Release a journal_t structure once it is no longer in use by the
1123 * journaled object. 1123 * journaled object.
1124 * Return <0 if we couldn't clean up the journal.
1124 */ 1125 */
1125void journal_destroy(journal_t *journal) 1126int journal_destroy(journal_t *journal)
1126{ 1127{
1128 int err = 0;
1129
1127 /* Wait for the commit thread to wake up and die. */ 1130 /* Wait for the commit thread to wake up and die. */
1128 journal_kill_thread(journal); 1131 journal_kill_thread(journal);
1129 1132
@@ -1146,11 +1149,16 @@ void journal_destroy(journal_t *journal)
1146 J_ASSERT(journal->j_checkpoint_transactions == NULL); 1149 J_ASSERT(journal->j_checkpoint_transactions == NULL);
1147 spin_unlock(&journal->j_list_lock); 1150 spin_unlock(&journal->j_list_lock);
1148 1151
1149 /* We can now mark the journal as empty. */
1150 journal->j_tail = 0;
1151 journal->j_tail_sequence = ++journal->j_transaction_sequence;
1152 if (journal->j_sb_buffer) { 1152 if (journal->j_sb_buffer) {
1153 journal_update_superblock(journal, 1); 1153 if (!is_journal_aborted(journal)) {
1154 /* We can now mark the journal as empty. */
1155 journal->j_tail = 0;
1156 journal->j_tail_sequence =
1157 ++journal->j_transaction_sequence;
1158 journal_update_superblock(journal, 1);
1159 } else {
1160 err = -EIO;
1161 }
1154 brelse(journal->j_sb_buffer); 1162 brelse(journal->j_sb_buffer);
1155 } 1163 }
1156 1164
@@ -1160,6 +1168,8 @@ void journal_destroy(journal_t *journal)
1160 journal_destroy_revoke(journal); 1168 journal_destroy_revoke(journal);
1161 kfree(journal->j_wbuf); 1169 kfree(journal->j_wbuf);
1162 kfree(journal); 1170 kfree(journal);
1171
1172 return err;
1163} 1173}
1164 1174
1165 1175
@@ -1359,10 +1369,16 @@ int journal_flush(journal_t *journal)
1359 spin_lock(&journal->j_list_lock); 1369 spin_lock(&journal->j_list_lock);
1360 while (!err && journal->j_checkpoint_transactions != NULL) { 1370 while (!err && journal->j_checkpoint_transactions != NULL) {
1361 spin_unlock(&journal->j_list_lock); 1371 spin_unlock(&journal->j_list_lock);
1372 mutex_lock(&journal->j_checkpoint_mutex);
1362 err = log_do_checkpoint(journal); 1373 err = log_do_checkpoint(journal);
1374 mutex_unlock(&journal->j_checkpoint_mutex);
1363 spin_lock(&journal->j_list_lock); 1375 spin_lock(&journal->j_list_lock);
1364 } 1376 }
1365 spin_unlock(&journal->j_list_lock); 1377 spin_unlock(&journal->j_list_lock);
1378
1379 if (is_journal_aborted(journal))
1380 return -EIO;
1381
1366 cleanup_journal_tail(journal); 1382 cleanup_journal_tail(journal);
1367 1383
1368 /* Finally, mark the journal as really needing no recovery. 1384 /* Finally, mark the journal as really needing no recovery.
@@ -1384,7 +1400,7 @@ int journal_flush(journal_t *journal)
1384 J_ASSERT(journal->j_head == journal->j_tail); 1400 J_ASSERT(journal->j_head == journal->j_tail);
1385 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1401 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
1386 spin_unlock(&journal->j_state_lock); 1402 spin_unlock(&journal->j_state_lock);
1387 return err; 1403 return 0;
1388} 1404}
1389 1405
1390/** 1406/**
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 43bc5e5ed064..db5e982c5ddf 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -223,7 +223,7 @@ do { \
223 */ 223 */
224int journal_recover(journal_t *journal) 224int journal_recover(journal_t *journal)
225{ 225{
226 int err; 226 int err, err2;
227 journal_superblock_t * sb; 227 journal_superblock_t * sb;
228 228
229 struct recovery_info info; 229 struct recovery_info info;
@@ -261,7 +261,10 @@ int journal_recover(journal_t *journal)
261 journal->j_transaction_sequence = ++info.end_transaction; 261 journal->j_transaction_sequence = ++info.end_transaction;
262 262
263 journal_clear_revoke(journal); 263 journal_clear_revoke(journal);
264 sync_blockdev(journal->j_fs_dev); 264 err2 = sync_blockdev(journal->j_fs_dev);
265 if (!err)
266 err = err2;
267
265 return err; 268 return err;
266} 269}
267 270
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 35d4f6342fac..346e2b80be7d 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -911,7 +911,7 @@ extern int journal_set_features
911 (journal_t *, unsigned long, unsigned long, unsigned long); 911 (journal_t *, unsigned long, unsigned long, unsigned long);
912extern int journal_create (journal_t *); 912extern int journal_create (journal_t *);
913extern int journal_load (journal_t *journal); 913extern int journal_load (journal_t *journal);
914extern void journal_destroy (journal_t *); 914extern int journal_destroy (journal_t *);
915extern int journal_recover (journal_t *journal); 915extern int journal_recover (journal_t *journal);
916extern int journal_wipe (journal_t *, int); 916extern int journal_wipe (journal_t *, int);
917extern int journal_skip_recovery (journal_t *); 917extern int journal_skip_recovery (journal_t *);