aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd')
-rw-r--r--fs/jbd/checkpoint.c68
-rw-r--r--fs/jbd/journal.c28
-rw-r--r--fs/jbd/recovery.c7
-rw-r--r--fs/jbd/transaction.c1
4 files changed, 81 insertions, 23 deletions
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index a5432bbbfb8..1bd8d4acc6f 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
93 int ret = 0; 93 int ret = 0;
94 struct buffer_head *bh = jh2bh(jh); 94 struct buffer_head *bh = jh2bh(jh);
95 95
96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
97 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
97 JBUFFER_TRACE(jh, "remove from checkpoint list"); 98 JBUFFER_TRACE(jh, "remove from checkpoint list");
98 ret = __journal_remove_checkpoint(jh) + 1; 99 ret = __journal_remove_checkpoint(jh) + 1;
99 jbd_unlock_bh_state(bh); 100 jbd_unlock_bh_state(bh);
@@ -126,14 +127,29 @@ void __log_wait_for_space(journal_t *journal)
126 127
127 /* 128 /*
128 * Test again, another process may have checkpointed while we 129 * Test again, another process may have checkpointed while we
129 * were waiting for the checkpoint lock 130 * were waiting for the checkpoint lock. If there are no
131 * outstanding transactions there is nothing to checkpoint and
132 * we can't make progress. Abort the journal in this case.
130 */ 133 */
131 spin_lock(&journal->j_state_lock); 134 spin_lock(&journal->j_state_lock);
135 spin_lock(&journal->j_list_lock);
132 nblocks = jbd_space_needed(journal); 136 nblocks = jbd_space_needed(journal);
133 if (__log_space_left(journal) < nblocks) { 137 if (__log_space_left(journal) < nblocks) {
138 int chkpt = journal->j_checkpoint_transactions != NULL;
139
140 spin_unlock(&journal->j_list_lock);
134 spin_unlock(&journal->j_state_lock); 141 spin_unlock(&journal->j_state_lock);
135 log_do_checkpoint(journal); 142 if (chkpt) {
143 log_do_checkpoint(journal);
144 } else {
145 printk(KERN_ERR "%s: no transactions\n",
146 __func__);
147 journal_abort(journal, 0);
148 }
149
136 spin_lock(&journal->j_state_lock); 150 spin_lock(&journal->j_state_lock);
151 } else {
152 spin_unlock(&journal->j_list_lock);
137 } 153 }
138 mutex_unlock(&journal->j_checkpoint_mutex); 154 mutex_unlock(&journal->j_checkpoint_mutex);
139 } 155 }
@@ -160,21 +176,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
160 * buffers. Note that we take the buffers in the opposite ordering 176 * buffers. Note that we take the buffers in the opposite ordering
161 * from the one in which they were submitted for IO. 177 * from the one in which they were submitted for IO.
162 * 178 *
179 * Return 0 on success, and return <0 if some buffers have failed
180 * to be written out.
181 *
163 * Called with j_list_lock held. 182 * Called with j_list_lock held.
164 */ 183 */
165static void __wait_cp_io(journal_t *journal, transaction_t *transaction) 184static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
166{ 185{
167 struct journal_head *jh; 186 struct journal_head *jh;
168 struct buffer_head *bh; 187 struct buffer_head *bh;
169 tid_t this_tid; 188 tid_t this_tid;
170 int released = 0; 189 int released = 0;
190 int ret = 0;
171 191
172 this_tid = transaction->t_tid; 192 this_tid = transaction->t_tid;
173restart: 193restart:
174 /* Did somebody clean up the transaction in the meanwhile? */ 194 /* Did somebody clean up the transaction in the meanwhile? */
175 if (journal->j_checkpoint_transactions != transaction || 195 if (journal->j_checkpoint_transactions != transaction ||
176 transaction->t_tid != this_tid) 196 transaction->t_tid != this_tid)
177 return; 197 return ret;
178 while (!released && transaction->t_checkpoint_io_list) { 198 while (!released && transaction->t_checkpoint_io_list) {
179 jh = transaction->t_checkpoint_io_list; 199 jh = transaction->t_checkpoint_io_list;
180 bh = jh2bh(jh); 200 bh = jh2bh(jh);
@@ -194,6 +214,9 @@ restart:
194 spin_lock(&journal->j_list_lock); 214 spin_lock(&journal->j_list_lock);
195 goto restart; 215 goto restart;
196 } 216 }
217 if (unlikely(buffer_write_io_error(bh)))
218 ret = -EIO;
219
197 /* 220 /*
198 * Now in whatever state the buffer currently is, we know that 221 * Now in whatever state the buffer currently is, we know that
199 * it has been written out and so we can drop it from the list 222 * it has been written out and so we can drop it from the list
@@ -203,6 +226,8 @@ restart:
203 journal_remove_journal_head(bh); 226 journal_remove_journal_head(bh);
204 __brelse(bh); 227 __brelse(bh);
205 } 228 }
229
230 return ret;
206} 231}
207 232
208#define NR_BATCH 64 233#define NR_BATCH 64
@@ -226,7 +251,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
226 * Try to flush one buffer from the checkpoint list to disk. 251 * Try to flush one buffer from the checkpoint list to disk.
227 * 252 *
228 * Return 1 if something happened which requires us to abort the current 253 * Return 1 if something happened which requires us to abort the current
229 * scan of the checkpoint list. 254 * scan of the checkpoint list. Return <0 if the buffer has failed to
255 * be written out.
230 * 256 *
231 * Called with j_list_lock held and drops it if 1 is returned 257 * Called with j_list_lock held and drops it if 1 is returned
232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 258 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -256,6 +282,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
256 log_wait_commit(journal, tid); 282 log_wait_commit(journal, tid);
257 ret = 1; 283 ret = 1;
258 } else if (!buffer_dirty(bh)) { 284 } else if (!buffer_dirty(bh)) {
285 ret = 1;
286 if (unlikely(buffer_write_io_error(bh)))
287 ret = -EIO;
259 J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 288 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
260 BUFFER_TRACE(bh, "remove from checkpoint"); 289 BUFFER_TRACE(bh, "remove from checkpoint");
261 __journal_remove_checkpoint(jh); 290 __journal_remove_checkpoint(jh);
@@ -263,7 +292,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
263 jbd_unlock_bh_state(bh); 292 jbd_unlock_bh_state(bh);
264 journal_remove_journal_head(bh); 293 journal_remove_journal_head(bh);
265 __brelse(bh); 294 __brelse(bh);
266 ret = 1;
267 } else { 295 } else {
268 /* 296 /*
269 * Important: we are about to write the buffer, and 297 * Important: we are about to write the buffer, and
@@ -295,6 +323,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
295 * to disk. We submit larger chunks of data at once. 323 * to disk. We submit larger chunks of data at once.
296 * 324 *
297 * The journal should be locked before calling this function. 325 * The journal should be locked before calling this function.
326 * Called with j_checkpoint_mutex held.
298 */ 327 */
299int log_do_checkpoint(journal_t *journal) 328int log_do_checkpoint(journal_t *journal)
300{ 329{
@@ -318,6 +347,7 @@ int log_do_checkpoint(journal_t *journal)
318 * OK, we need to start writing disk blocks. Take one transaction 347 * OK, we need to start writing disk blocks. Take one transaction
319 * and write it. 348 * and write it.
320 */ 349 */
350 result = 0;
321 spin_lock(&journal->j_list_lock); 351 spin_lock(&journal->j_list_lock);
322 if (!journal->j_checkpoint_transactions) 352 if (!journal->j_checkpoint_transactions)
323 goto out; 353 goto out;
@@ -334,7 +364,7 @@ restart:
334 int batch_count = 0; 364 int batch_count = 0;
335 struct buffer_head *bhs[NR_BATCH]; 365 struct buffer_head *bhs[NR_BATCH];
336 struct journal_head *jh; 366 struct journal_head *jh;
337 int retry = 0; 367 int retry = 0, err;
338 368
339 while (!retry && transaction->t_checkpoint_list) { 369 while (!retry && transaction->t_checkpoint_list) {
340 struct buffer_head *bh; 370 struct buffer_head *bh;
@@ -347,6 +377,8 @@ restart:
347 break; 377 break;
348 } 378 }
349 retry = __process_buffer(journal, jh, bhs,&batch_count); 379 retry = __process_buffer(journal, jh, bhs,&batch_count);
380 if (retry < 0 && !result)
381 result = retry;
350 if (!retry && (need_resched() || 382 if (!retry && (need_resched() ||
351 spin_needbreak(&journal->j_list_lock))) { 383 spin_needbreak(&journal->j_list_lock))) {
352 spin_unlock(&journal->j_list_lock); 384 spin_unlock(&journal->j_list_lock);
@@ -371,14 +403,18 @@ restart:
371 * Now we have cleaned up the first transaction's checkpoint 403 * Now we have cleaned up the first transaction's checkpoint
372 * list. Let's clean up the second one 404 * list. Let's clean up the second one
373 */ 405 */
374 __wait_cp_io(journal, transaction); 406 err = __wait_cp_io(journal, transaction);
407 if (!result)
408 result = err;
375 } 409 }
376out: 410out:
377 spin_unlock(&journal->j_list_lock); 411 spin_unlock(&journal->j_list_lock);
378 result = cleanup_journal_tail(journal);
379 if (result < 0) 412 if (result < 0)
380 return result; 413 journal_abort(journal, result);
381 return 0; 414 else
415 result = cleanup_journal_tail(journal);
416
417 return (result < 0) ? result : 0;
382} 418}
383 419
384/* 420/*
@@ -394,8 +430,9 @@ out:
394 * This is the only part of the journaling code which really needs to be 430 * This is the only part of the journaling code which really needs to be
395 * aware of transaction aborts. Checkpointing involves writing to the 431 * aware of transaction aborts. Checkpointing involves writing to the
396 * main filesystem area rather than to the journal, so it can proceed 432 * main filesystem area rather than to the journal, so it can proceed
397 * even in abort state, but we must not update the journal superblock if 433 * even in abort state, but we must not update the super block if
398 * we have an abort error outstanding. 434 * checkpointing may have failed. Otherwise, we would lose some metadata
435 * buffers which should be written-back to the filesystem.
399 */ 436 */
400 437
401int cleanup_journal_tail(journal_t *journal) 438int cleanup_journal_tail(journal_t *journal)
@@ -404,6 +441,9 @@ int cleanup_journal_tail(journal_t *journal)
404 tid_t first_tid; 441 tid_t first_tid;
405 unsigned long blocknr, freed; 442 unsigned long blocknr, freed;
406 443
444 if (is_journal_aborted(journal))
445 return 1;
446
407 /* OK, work out the oldest transaction remaining in the log, and 447 /* OK, work out the oldest transaction remaining in the log, and
408 * the log block it starts at. 448 * the log block it starts at.
409 * 449 *
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index aa7143a8349..9e4fa52d7dc 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1121,9 +1121,12 @@ recovery_error:
1121 * 1121 *
1122 * Release a journal_t structure once it is no longer in use by the 1122 * Release a journal_t structure once it is no longer in use by the
1123 * journaled object. 1123 * journaled object.
1124 * Return <0 if we couldn't clean up the journal.
1124 */ 1125 */
1125void journal_destroy(journal_t *journal) 1126int journal_destroy(journal_t *journal)
1126{ 1127{
1128 int err = 0;
1129
1127 /* Wait for the commit thread to wake up and die. */ 1130 /* Wait for the commit thread to wake up and die. */
1128 journal_kill_thread(journal); 1131 journal_kill_thread(journal);
1129 1132
@@ -1146,11 +1149,16 @@ void journal_destroy(journal_t *journal)
1146 J_ASSERT(journal->j_checkpoint_transactions == NULL); 1149 J_ASSERT(journal->j_checkpoint_transactions == NULL);
1147 spin_unlock(&journal->j_list_lock); 1150 spin_unlock(&journal->j_list_lock);
1148 1151
1149 /* We can now mark the journal as empty. */
1150 journal->j_tail = 0;
1151 journal->j_tail_sequence = ++journal->j_transaction_sequence;
1152 if (journal->j_sb_buffer) { 1152 if (journal->j_sb_buffer) {
1153 journal_update_superblock(journal, 1); 1153 if (!is_journal_aborted(journal)) {
1154 /* We can now mark the journal as empty. */
1155 journal->j_tail = 0;
1156 journal->j_tail_sequence =
1157 ++journal->j_transaction_sequence;
1158 journal_update_superblock(journal, 1);
1159 } else {
1160 err = -EIO;
1161 }
1154 brelse(journal->j_sb_buffer); 1162 brelse(journal->j_sb_buffer);
1155 } 1163 }
1156 1164
@@ -1160,6 +1168,8 @@ void journal_destroy(journal_t *journal)
1160 journal_destroy_revoke(journal); 1168 journal_destroy_revoke(journal);
1161 kfree(journal->j_wbuf); 1169 kfree(journal->j_wbuf);
1162 kfree(journal); 1170 kfree(journal);
1171
1172 return err;
1163} 1173}
1164 1174
1165 1175
@@ -1359,10 +1369,16 @@ int journal_flush(journal_t *journal)
1359 spin_lock(&journal->j_list_lock); 1369 spin_lock(&journal->j_list_lock);
1360 while (!err && journal->j_checkpoint_transactions != NULL) { 1370 while (!err && journal->j_checkpoint_transactions != NULL) {
1361 spin_unlock(&journal->j_list_lock); 1371 spin_unlock(&journal->j_list_lock);
1372 mutex_lock(&journal->j_checkpoint_mutex);
1362 err = log_do_checkpoint(journal); 1373 err = log_do_checkpoint(journal);
1374 mutex_unlock(&journal->j_checkpoint_mutex);
1363 spin_lock(&journal->j_list_lock); 1375 spin_lock(&journal->j_list_lock);
1364 } 1376 }
1365 spin_unlock(&journal->j_list_lock); 1377 spin_unlock(&journal->j_list_lock);
1378
1379 if (is_journal_aborted(journal))
1380 return -EIO;
1381
1366 cleanup_journal_tail(journal); 1382 cleanup_journal_tail(journal);
1367 1383
1368 /* Finally, mark the journal as really needing no recovery. 1384 /* Finally, mark the journal as really needing no recovery.
@@ -1384,7 +1400,7 @@ int journal_flush(journal_t *journal)
1384 J_ASSERT(journal->j_head == journal->j_tail); 1400 J_ASSERT(journal->j_head == journal->j_tail);
1385 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1401 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
1386 spin_unlock(&journal->j_state_lock); 1402 spin_unlock(&journal->j_state_lock);
1387 return err; 1403 return 0;
1388} 1404}
1389 1405
1390/** 1406/**
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 43bc5e5ed06..db5e982c5dd 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -223,7 +223,7 @@ do { \
223 */ 223 */
224int journal_recover(journal_t *journal) 224int journal_recover(journal_t *journal)
225{ 225{
226 int err; 226 int err, err2;
227 journal_superblock_t * sb; 227 journal_superblock_t * sb;
228 228
229 struct recovery_info info; 229 struct recovery_info info;
@@ -261,7 +261,10 @@ int journal_recover(journal_t *journal)
261 journal->j_transaction_sequence = ++info.end_transaction; 261 journal->j_transaction_sequence = ++info.end_transaction;
262 262
263 journal_clear_revoke(journal); 263 journal_clear_revoke(journal);
264 sync_blockdev(journal->j_fs_dev); 264 err2 = sync_blockdev(journal->j_fs_dev);
265 if (!err)
266 err = err2;
267
265 return err; 268 return err;
266} 269}
267 270
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index d15cd6e7251..60d4c32c880 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -860,7 +860,6 @@ out:
860 * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences 860 * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences
861 * @handle: transaction 861 * @handle: transaction
862 * @bh: buffer to undo 862 * @bh: buffer to undo
863 * @credits: store the number of taken credits here (if not NULL)
864 * 863 *
865 * Sometimes there is a need to distinguish between metadata which has 864 * Sometimes there is a need to distinguish between metadata which has
866 * been committed to disk and that which has not. The ext3fs code uses 865 * been committed to disk and that which has not. The ext3fs code uses