diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-28 11:26:12 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-28 11:26:12 -0400 |
commit | 7a9787e1eba95a166265e6a260cf30af04ef0a99 (patch) | |
tree | e730a4565e0318140d2fbd2f0415d18a339d7336 /fs/jbd | |
parent | 41b9eb264c8407655db57b60b4457fe1b2ec9977 (diff) | |
parent | 0173a3265b228da319ceb9c1ec6a5682fd1b2d92 (diff) |
Merge commit 'v2.6.28-rc2' into x86/pci-ioapic-boot-irq-quirks
Diffstat (limited to 'fs/jbd')
-rw-r--r-- | fs/jbd/Kconfig | 30 | ||||
-rw-r--r-- | fs/jbd/checkpoint.c | 68 | ||||
-rw-r--r-- | fs/jbd/commit.c | 78 | ||||
-rw-r--r-- | fs/jbd/journal.c | 36 | ||||
-rw-r--r-- | fs/jbd/recovery.c | 7 | ||||
-rw-r--r-- | fs/jbd/revoke.c | 163 | ||||
-rw-r--r-- | fs/jbd/transaction.c | 77 |
7 files changed, 318 insertions, 141 deletions
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig new file mode 100644 index 000000000000..4e28beeed157 --- /dev/null +++ b/fs/jbd/Kconfig | |||
@@ -0,0 +1,30 @@ | |||
1 | config JBD | ||
2 | tristate | ||
3 | help | ||
4 | This is a generic journalling layer for block devices. It is | ||
5 | currently used by the ext3 file system, but it could also be | ||
6 | used to add journal support to other file systems or block | ||
7 | devices such as RAID or LVM. | ||
8 | |||
9 | If you are using the ext3 file system, you need to say Y here. | ||
10 | If you are not using ext3 then you will probably want to say N. | ||
11 | |||
12 | To compile this device as a module, choose M here: the module will be | ||
13 | called jbd. If you are compiling ext3 into the kernel, you | ||
14 | cannot compile this code as a module. | ||
15 | |||
16 | config JBD_DEBUG | ||
17 | bool "JBD (ext3) debugging support" | ||
18 | depends on JBD && DEBUG_FS | ||
19 | help | ||
20 | If you are using the ext3 journaled file system (or potentially any | ||
21 | other file system/device using JBD), this option allows you to | ||
22 | enable debugging output while the system is running, in order to | ||
23 | help track down any problems you are having. By default the | ||
24 | debugging output will be turned off. | ||
25 | |||
26 | If you select Y here, then you will be able to turn on debugging | ||
27 | with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a | ||
28 | number between 1 and 5, the higher the number, the more debugging | ||
29 | output is generated. To turn debugging off again, do | ||
30 | "echo 0 > /sys/kernel/debug/jbd/jbd-debug". | ||
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index a5432bbbfb88..1bd8d4acc6f2 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c | |||
@@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
93 | int ret = 0; | 93 | int ret = 0; |
94 | struct buffer_head *bh = jh2bh(jh); | 94 | struct buffer_head *bh = jh2bh(jh); |
95 | 95 | ||
96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { | 96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
97 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | ||
97 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 98 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
98 | ret = __journal_remove_checkpoint(jh) + 1; | 99 | ret = __journal_remove_checkpoint(jh) + 1; |
99 | jbd_unlock_bh_state(bh); | 100 | jbd_unlock_bh_state(bh); |
@@ -126,14 +127,29 @@ void __log_wait_for_space(journal_t *journal) | |||
126 | 127 | ||
127 | /* | 128 | /* |
128 | * Test again, another process may have checkpointed while we | 129 | * Test again, another process may have checkpointed while we |
129 | * were waiting for the checkpoint lock | 130 | * were waiting for the checkpoint lock. If there are no |
131 | * outstanding transactions there is nothing to checkpoint and | ||
132 | * we can't make progress. Abort the journal in this case. | ||
130 | */ | 133 | */ |
131 | spin_lock(&journal->j_state_lock); | 134 | spin_lock(&journal->j_state_lock); |
135 | spin_lock(&journal->j_list_lock); | ||
132 | nblocks = jbd_space_needed(journal); | 136 | nblocks = jbd_space_needed(journal); |
133 | if (__log_space_left(journal) < nblocks) { | 137 | if (__log_space_left(journal) < nblocks) { |
138 | int chkpt = journal->j_checkpoint_transactions != NULL; | ||
139 | |||
140 | spin_unlock(&journal->j_list_lock); | ||
134 | spin_unlock(&journal->j_state_lock); | 141 | spin_unlock(&journal->j_state_lock); |
135 | log_do_checkpoint(journal); | 142 | if (chkpt) { |
143 | log_do_checkpoint(journal); | ||
144 | } else { | ||
145 | printk(KERN_ERR "%s: no transactions\n", | ||
146 | __func__); | ||
147 | journal_abort(journal, 0); | ||
148 | } | ||
149 | |||
136 | spin_lock(&journal->j_state_lock); | 150 | spin_lock(&journal->j_state_lock); |
151 | } else { | ||
152 | spin_unlock(&journal->j_list_lock); | ||
137 | } | 153 | } |
138 | mutex_unlock(&journal->j_checkpoint_mutex); | 154 | mutex_unlock(&journal->j_checkpoint_mutex); |
139 | } | 155 | } |
@@ -160,21 +176,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | |||
160 | * buffers. Note that we take the buffers in the opposite ordering | 176 | * buffers. Note that we take the buffers in the opposite ordering |
161 | * from the one in which they were submitted for IO. | 177 | * from the one in which they were submitted for IO. |
162 | * | 178 | * |
179 | * Return 0 on success, and return <0 if some buffers have failed | ||
180 | * to be written out. | ||
181 | * | ||
163 | * Called with j_list_lock held. | 182 | * Called with j_list_lock held. |
164 | */ | 183 | */ |
165 | static void __wait_cp_io(journal_t *journal, transaction_t *transaction) | 184 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) |
166 | { | 185 | { |
167 | struct journal_head *jh; | 186 | struct journal_head *jh; |
168 | struct buffer_head *bh; | 187 | struct buffer_head *bh; |
169 | tid_t this_tid; | 188 | tid_t this_tid; |
170 | int released = 0; | 189 | int released = 0; |
190 | int ret = 0; | ||
171 | 191 | ||
172 | this_tid = transaction->t_tid; | 192 | this_tid = transaction->t_tid; |
173 | restart: | 193 | restart: |
174 | /* Did somebody clean up the transaction in the meanwhile? */ | 194 | /* Did somebody clean up the transaction in the meanwhile? */ |
175 | if (journal->j_checkpoint_transactions != transaction || | 195 | if (journal->j_checkpoint_transactions != transaction || |
176 | transaction->t_tid != this_tid) | 196 | transaction->t_tid != this_tid) |
177 | return; | 197 | return ret; |
178 | while (!released && transaction->t_checkpoint_io_list) { | 198 | while (!released && transaction->t_checkpoint_io_list) { |
179 | jh = transaction->t_checkpoint_io_list; | 199 | jh = transaction->t_checkpoint_io_list; |
180 | bh = jh2bh(jh); | 200 | bh = jh2bh(jh); |
@@ -194,6 +214,9 @@ restart: | |||
194 | spin_lock(&journal->j_list_lock); | 214 | spin_lock(&journal->j_list_lock); |
195 | goto restart; | 215 | goto restart; |
196 | } | 216 | } |
217 | if (unlikely(buffer_write_io_error(bh))) | ||
218 | ret = -EIO; | ||
219 | |||
197 | /* | 220 | /* |
198 | * Now in whatever state the buffer currently is, we know that | 221 | * Now in whatever state the buffer currently is, we know that |
199 | * it has been written out and so we can drop it from the list | 222 | * it has been written out and so we can drop it from the list |
@@ -203,6 +226,8 @@ restart: | |||
203 | journal_remove_journal_head(bh); | 226 | journal_remove_journal_head(bh); |
204 | __brelse(bh); | 227 | __brelse(bh); |
205 | } | 228 | } |
229 | |||
230 | return ret; | ||
206 | } | 231 | } |
207 | 232 | ||
208 | #define NR_BATCH 64 | 233 | #define NR_BATCH 64 |
@@ -226,7 +251,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
226 | * Try to flush one buffer from the checkpoint list to disk. | 251 | * Try to flush one buffer from the checkpoint list to disk. |
227 | * | 252 | * |
228 | * Return 1 if something happened which requires us to abort the current | 253 | * Return 1 if something happened which requires us to abort the current |
229 | * scan of the checkpoint list. | 254 | * scan of the checkpoint list. Return <0 if the buffer has failed to |
255 | * be written out. | ||
230 | * | 256 | * |
231 | * Called with j_list_lock held and drops it if 1 is returned | 257 | * Called with j_list_lock held and drops it if 1 is returned |
232 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 258 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
@@ -256,6 +282,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
256 | log_wait_commit(journal, tid); | 282 | log_wait_commit(journal, tid); |
257 | ret = 1; | 283 | ret = 1; |
258 | } else if (!buffer_dirty(bh)) { | 284 | } else if (!buffer_dirty(bh)) { |
285 | ret = 1; | ||
286 | if (unlikely(buffer_write_io_error(bh))) | ||
287 | ret = -EIO; | ||
259 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 288 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
260 | BUFFER_TRACE(bh, "remove from checkpoint"); | 289 | BUFFER_TRACE(bh, "remove from checkpoint"); |
261 | __journal_remove_checkpoint(jh); | 290 | __journal_remove_checkpoint(jh); |
@@ -263,7 +292,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
263 | jbd_unlock_bh_state(bh); | 292 | jbd_unlock_bh_state(bh); |
264 | journal_remove_journal_head(bh); | 293 | journal_remove_journal_head(bh); |
265 | __brelse(bh); | 294 | __brelse(bh); |
266 | ret = 1; | ||
267 | } else { | 295 | } else { |
268 | /* | 296 | /* |
269 | * Important: we are about to write the buffer, and | 297 | * Important: we are about to write the buffer, and |
@@ -295,6 +323,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
295 | * to disk. We submit larger chunks of data at once. | 323 | * to disk. We submit larger chunks of data at once. |
296 | * | 324 | * |
297 | * The journal should be locked before calling this function. | 325 | * The journal should be locked before calling this function. |
326 | * Called with j_checkpoint_mutex held. | ||
298 | */ | 327 | */ |
299 | int log_do_checkpoint(journal_t *journal) | 328 | int log_do_checkpoint(journal_t *journal) |
300 | { | 329 | { |
@@ -318,6 +347,7 @@ int log_do_checkpoint(journal_t *journal) | |||
318 | * OK, we need to start writing disk blocks. Take one transaction | 347 | * OK, we need to start writing disk blocks. Take one transaction |
319 | * and write it. | 348 | * and write it. |
320 | */ | 349 | */ |
350 | result = 0; | ||
321 | spin_lock(&journal->j_list_lock); | 351 | spin_lock(&journal->j_list_lock); |
322 | if (!journal->j_checkpoint_transactions) | 352 | if (!journal->j_checkpoint_transactions) |
323 | goto out; | 353 | goto out; |
@@ -334,7 +364,7 @@ restart: | |||
334 | int batch_count = 0; | 364 | int batch_count = 0; |
335 | struct buffer_head *bhs[NR_BATCH]; | 365 | struct buffer_head *bhs[NR_BATCH]; |
336 | struct journal_head *jh; | 366 | struct journal_head *jh; |
337 | int retry = 0; | 367 | int retry = 0, err; |
338 | 368 | ||
339 | while (!retry && transaction->t_checkpoint_list) { | 369 | while (!retry && transaction->t_checkpoint_list) { |
340 | struct buffer_head *bh; | 370 | struct buffer_head *bh; |
@@ -347,6 +377,8 @@ restart: | |||
347 | break; | 377 | break; |
348 | } | 378 | } |
349 | retry = __process_buffer(journal, jh, bhs,&batch_count); | 379 | retry = __process_buffer(journal, jh, bhs,&batch_count); |
380 | if (retry < 0 && !result) | ||
381 | result = retry; | ||
350 | if (!retry && (need_resched() || | 382 | if (!retry && (need_resched() || |
351 | spin_needbreak(&journal->j_list_lock))) { | 383 | spin_needbreak(&journal->j_list_lock))) { |
352 | spin_unlock(&journal->j_list_lock); | 384 | spin_unlock(&journal->j_list_lock); |
@@ -371,14 +403,18 @@ restart: | |||
371 | * Now we have cleaned up the first transaction's checkpoint | 403 | * Now we have cleaned up the first transaction's checkpoint |
372 | * list. Let's clean up the second one | 404 | * list. Let's clean up the second one |
373 | */ | 405 | */ |
374 | __wait_cp_io(journal, transaction); | 406 | err = __wait_cp_io(journal, transaction); |
407 | if (!result) | ||
408 | result = err; | ||
375 | } | 409 | } |
376 | out: | 410 | out: |
377 | spin_unlock(&journal->j_list_lock); | 411 | spin_unlock(&journal->j_list_lock); |
378 | result = cleanup_journal_tail(journal); | ||
379 | if (result < 0) | 412 | if (result < 0) |
380 | return result; | 413 | journal_abort(journal, result); |
381 | return 0; | 414 | else |
415 | result = cleanup_journal_tail(journal); | ||
416 | |||
417 | return (result < 0) ? result : 0; | ||
382 | } | 418 | } |
383 | 419 | ||
384 | /* | 420 | /* |
@@ -394,8 +430,9 @@ out: | |||
394 | * This is the only part of the journaling code which really needs to be | 430 | * This is the only part of the journaling code which really needs to be |
395 | * aware of transaction aborts. Checkpointing involves writing to the | 431 | * aware of transaction aborts. Checkpointing involves writing to the |
396 | * main filesystem area rather than to the journal, so it can proceed | 432 | * main filesystem area rather than to the journal, so it can proceed |
397 | * even in abort state, but we must not update the journal superblock if | 433 | * even in abort state, but we must not update the super block if |
398 | * we have an abort error outstanding. | 434 | * checkpointing may have failed. Otherwise, we would lose some metadata |
435 | * buffers which should be written-back to the filesystem. | ||
399 | */ | 436 | */ |
400 | 437 | ||
401 | int cleanup_journal_tail(journal_t *journal) | 438 | int cleanup_journal_tail(journal_t *journal) |
@@ -404,6 +441,9 @@ int cleanup_journal_tail(journal_t *journal) | |||
404 | tid_t first_tid; | 441 | tid_t first_tid; |
405 | unsigned long blocknr, freed; | 442 | unsigned long blocknr, freed; |
406 | 443 | ||
444 | if (is_journal_aborted(journal)) | ||
445 | return 1; | ||
446 | |||
407 | /* OK, work out the oldest transaction remaining in the log, and | 447 | /* OK, work out the oldest transaction remaining in the log, and |
408 | * the log block it starts at. | 448 | * the log block it starts at. |
409 | * | 449 | * |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 5a8ca61498ca..25719d902c51 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | |||
36 | 36 | ||
37 | /* | 37 | /* |
38 | * When an ext3-ordered file is truncated, it is possible that many pages are | 38 | * When an ext3-ordered file is truncated, it is possible that many pages are |
39 | * not sucessfully freed, because they are attached to a committing transaction. | 39 | * not successfully freed, because they are attached to a committing transaction. |
40 | * After the transaction commits, these pages are left on the LRU, with no | 40 | * After the transaction commits, these pages are left on the LRU, with no |
41 | * ->mapping, and with attached buffers. These pages are trivially reclaimable | 41 | * ->mapping, and with attached buffers. These pages are trivially reclaimable |
42 | * by the VM, but their apparent absence upsets the VM accounting, and it makes | 42 | * by the VM, but their apparent absence upsets the VM accounting, and it makes |
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | |||
45 | * So here, we have a buffer which has just come off the forget list. Look to | 45 | * So here, we have a buffer which has just come off the forget list. Look to |
46 | * see if we can strip all buffers from the backing page. | 46 | * see if we can strip all buffers from the backing page. |
47 | * | 47 | * |
48 | * Called under lock_journal(), and possibly under journal_datalist_lock. The | 48 | * Called under journal->j_list_lock. The caller provided us with a ref |
49 | * caller provided us with a ref against the buffer, and we drop that here. | 49 | * against the buffer, and we drop that here. |
50 | */ | 50 | */ |
51 | static void release_buffer_page(struct buffer_head *bh) | 51 | static void release_buffer_page(struct buffer_head *bh) |
52 | { | 52 | { |
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh) | |||
63 | goto nope; | 63 | goto nope; |
64 | 64 | ||
65 | /* OK, it's a truncated page */ | 65 | /* OK, it's a truncated page */ |
66 | if (TestSetPageLocked(page)) | 66 | if (!trylock_page(page)) |
67 | goto nope; | 67 | goto nope; |
68 | 68 | ||
69 | page_cache_get(page); | 69 | page_cache_get(page); |
@@ -78,6 +78,19 @@ nope: | |||
78 | } | 78 | } |
79 | 79 | ||
80 | /* | 80 | /* |
81 | * Decrement reference counter for data buffer. If it has been marked | ||
82 | * 'BH_Freed', release it and the page to which it belongs if possible. | ||
83 | */ | ||
84 | static void release_data_buffer(struct buffer_head *bh) | ||
85 | { | ||
86 | if (buffer_freed(bh)) { | ||
87 | clear_buffer_freed(bh); | ||
88 | release_buffer_page(bh); | ||
89 | } else | ||
90 | put_bh(bh); | ||
91 | } | ||
92 | |||
93 | /* | ||
81 | * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is | 94 | * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is |
82 | * held. For ranking reasons we must trylock. If we lose, schedule away and | 95 | * held. For ranking reasons we must trylock. If we lose, schedule away and |
83 | * return 0. j_list_lock is dropped in this case. | 96 | * return 0. j_list_lock is dropped in this case. |
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | |||
172 | /* | 185 | /* |
173 | * Submit all the data buffers to disk | 186 | * Submit all the data buffers to disk |
174 | */ | 187 | */ |
175 | static void journal_submit_data_buffers(journal_t *journal, | 188 | static int journal_submit_data_buffers(journal_t *journal, |
176 | transaction_t *commit_transaction) | 189 | transaction_t *commit_transaction) |
177 | { | 190 | { |
178 | struct journal_head *jh; | 191 | struct journal_head *jh; |
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal, | |||
180 | int locked; | 193 | int locked; |
181 | int bufs = 0; | 194 | int bufs = 0; |
182 | struct buffer_head **wbuf = journal->j_wbuf; | 195 | struct buffer_head **wbuf = journal->j_wbuf; |
196 | int err = 0; | ||
183 | 197 | ||
184 | /* | 198 | /* |
185 | * Whenever we unlock the journal and sleep, things can get added | 199 | * Whenever we unlock the journal and sleep, things can get added |
@@ -207,7 +221,7 @@ write_out_data: | |||
207 | * blocking lock_buffer(). | 221 | * blocking lock_buffer(). |
208 | */ | 222 | */ |
209 | if (buffer_dirty(bh)) { | 223 | if (buffer_dirty(bh)) { |
210 | if (test_set_buffer_locked(bh)) { | 224 | if (!trylock_buffer(bh)) { |
211 | BUFFER_TRACE(bh, "needs blocking lock"); | 225 | BUFFER_TRACE(bh, "needs blocking lock"); |
212 | spin_unlock(&journal->j_list_lock); | 226 | spin_unlock(&journal->j_list_lock); |
213 | /* Write out all data to prevent deadlocks */ | 227 | /* Write out all data to prevent deadlocks */ |
@@ -231,7 +245,7 @@ write_out_data: | |||
231 | if (locked) | 245 | if (locked) |
232 | unlock_buffer(bh); | 246 | unlock_buffer(bh); |
233 | BUFFER_TRACE(bh, "already cleaned up"); | 247 | BUFFER_TRACE(bh, "already cleaned up"); |
234 | put_bh(bh); | 248 | release_data_buffer(bh); |
235 | continue; | 249 | continue; |
236 | } | 250 | } |
237 | if (locked && test_clear_buffer_dirty(bh)) { | 251 | if (locked && test_clear_buffer_dirty(bh)) { |
@@ -253,15 +267,17 @@ write_out_data: | |||
253 | put_bh(bh); | 267 | put_bh(bh); |
254 | } else { | 268 | } else { |
255 | BUFFER_TRACE(bh, "writeout complete: unfile"); | 269 | BUFFER_TRACE(bh, "writeout complete: unfile"); |
270 | if (unlikely(!buffer_uptodate(bh))) | ||
271 | err = -EIO; | ||
256 | __journal_unfile_buffer(jh); | 272 | __journal_unfile_buffer(jh); |
257 | jbd_unlock_bh_state(bh); | 273 | jbd_unlock_bh_state(bh); |
258 | if (locked) | 274 | if (locked) |
259 | unlock_buffer(bh); | 275 | unlock_buffer(bh); |
260 | journal_remove_journal_head(bh); | 276 | journal_remove_journal_head(bh); |
261 | /* Once for our safety reference, once for | 277 | /* One for our safety reference, other for |
262 | * journal_remove_journal_head() */ | 278 | * journal_remove_journal_head() */ |
263 | put_bh(bh); | 279 | put_bh(bh); |
264 | put_bh(bh); | 280 | release_data_buffer(bh); |
265 | } | 281 | } |
266 | 282 | ||
267 | if (need_resched() || spin_needbreak(&journal->j_list_lock)) { | 283 | if (need_resched() || spin_needbreak(&journal->j_list_lock)) { |
@@ -271,6 +287,8 @@ write_out_data: | |||
271 | } | 287 | } |
272 | spin_unlock(&journal->j_list_lock); | 288 | spin_unlock(&journal->j_list_lock); |
273 | journal_do_submit_data(wbuf, bufs); | 289 | journal_do_submit_data(wbuf, bufs); |
290 | |||
291 | return err; | ||
274 | } | 292 | } |
275 | 293 | ||
276 | /* | 294 | /* |
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal) | |||
410 | * Now start flushing things to disk, in the order they appear | 428 | * Now start flushing things to disk, in the order they appear |
411 | * on the transaction lists. Data blocks go first. | 429 | * on the transaction lists. Data blocks go first. |
412 | */ | 430 | */ |
413 | err = 0; | 431 | err = journal_submit_data_buffers(journal, commit_transaction); |
414 | journal_submit_data_buffers(journal, commit_transaction); | ||
415 | 432 | ||
416 | /* | 433 | /* |
417 | * Wait for all previously submitted IO to complete. | 434 | * Wait for all previously submitted IO to complete. |
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal) | |||
426 | if (buffer_locked(bh)) { | 443 | if (buffer_locked(bh)) { |
427 | spin_unlock(&journal->j_list_lock); | 444 | spin_unlock(&journal->j_list_lock); |
428 | wait_on_buffer(bh); | 445 | wait_on_buffer(bh); |
429 | if (unlikely(!buffer_uptodate(bh))) | ||
430 | err = -EIO; | ||
431 | spin_lock(&journal->j_list_lock); | 446 | spin_lock(&journal->j_list_lock); |
432 | } | 447 | } |
448 | if (unlikely(!buffer_uptodate(bh))) { | ||
449 | if (!trylock_page(bh->b_page)) { | ||
450 | spin_unlock(&journal->j_list_lock); | ||
451 | lock_page(bh->b_page); | ||
452 | spin_lock(&journal->j_list_lock); | ||
453 | } | ||
454 | if (bh->b_page->mapping) | ||
455 | set_bit(AS_EIO, &bh->b_page->mapping->flags); | ||
456 | |||
457 | unlock_page(bh->b_page); | ||
458 | SetPageError(bh->b_page); | ||
459 | err = -EIO; | ||
460 | } | ||
433 | if (!inverted_lock(journal, bh)) { | 461 | if (!inverted_lock(journal, bh)) { |
434 | put_bh(bh); | 462 | put_bh(bh); |
435 | spin_lock(&journal->j_list_lock); | 463 | spin_lock(&journal->j_list_lock); |
@@ -443,17 +471,23 @@ void journal_commit_transaction(journal_t *journal) | |||
443 | } else { | 471 | } else { |
444 | jbd_unlock_bh_state(bh); | 472 | jbd_unlock_bh_state(bh); |
445 | } | 473 | } |
446 | put_bh(bh); | 474 | release_data_buffer(bh); |
447 | cond_resched_lock(&journal->j_list_lock); | 475 | cond_resched_lock(&journal->j_list_lock); |
448 | } | 476 | } |
449 | spin_unlock(&journal->j_list_lock); | 477 | spin_unlock(&journal->j_list_lock); |
450 | 478 | ||
451 | if (err) | 479 | if (err) { |
452 | journal_abort(journal, err); | 480 | char b[BDEVNAME_SIZE]; |
453 | 481 | ||
454 | journal_write_revoke_records(journal, commit_transaction); | 482 | printk(KERN_WARNING |
483 | "JBD: Detected IO errors while flushing file data " | ||
484 | "on %s\n", bdevname(journal->j_fs_dev, b)); | ||
485 | if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR) | ||
486 | journal_abort(journal, err); | ||
487 | err = 0; | ||
488 | } | ||
455 | 489 | ||
456 | jbd_debug(3, "JBD: commit phase 2\n"); | 490 | journal_write_revoke_records(journal, commit_transaction); |
457 | 491 | ||
458 | /* | 492 | /* |
459 | * If we found any dirty or locked buffers, then we should have | 493 | * If we found any dirty or locked buffers, then we should have |
@@ -486,9 +520,10 @@ void journal_commit_transaction(journal_t *journal) | |||
486 | jh = commit_transaction->t_buffers; | 520 | jh = commit_transaction->t_buffers; |
487 | 521 | ||
488 | /* If we're in abort mode, we just un-journal the buffer and | 522 | /* If we're in abort mode, we just un-journal the buffer and |
489 | release it for background writing. */ | 523 | release it. */ |
490 | 524 | ||
491 | if (is_journal_aborted(journal)) { | 525 | if (is_journal_aborted(journal)) { |
526 | clear_buffer_jbddirty(jh2bh(jh)); | ||
492 | JBUFFER_TRACE(jh, "journal is aborting: refile"); | 527 | JBUFFER_TRACE(jh, "journal is aborting: refile"); |
493 | journal_refile_buffer(journal, jh); | 528 | journal_refile_buffer(journal, jh); |
494 | /* If that was the last one, we need to clean up | 529 | /* If that was the last one, we need to clean up |
@@ -730,6 +765,9 @@ wait_for_iobuf: | |||
730 | /* AKPM: bforget here */ | 765 | /* AKPM: bforget here */ |
731 | } | 766 | } |
732 | 767 | ||
768 | if (err) | ||
769 | journal_abort(journal, err); | ||
770 | |||
733 | jbd_debug(3, "JBD: commit phase 6\n"); | 771 | jbd_debug(3, "JBD: commit phase 6\n"); |
734 | 772 | ||
735 | if (journal_write_commit_record(journal, commit_transaction)) | 773 | if (journal_write_commit_record(journal, commit_transaction)) |
@@ -820,6 +858,8 @@ restart_loop: | |||
820 | if (buffer_jbddirty(bh)) { | 858 | if (buffer_jbddirty(bh)) { |
821 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); | 859 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); |
822 | __journal_insert_checkpoint(jh, commit_transaction); | 860 | __journal_insert_checkpoint(jh, commit_transaction); |
861 | if (is_journal_aborted(journal)) | ||
862 | clear_buffer_jbddirty(bh); | ||
823 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | 863 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); |
824 | __journal_refile_buffer(jh); | 864 | __journal_refile_buffer(jh); |
825 | jbd_unlock_bh_state(bh); | 865 | jbd_unlock_bh_state(bh); |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index b99c3b3654c4..9e4fa52d7dc8 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features); | |||
68 | EXPORT_SYMBOL(journal_create); | 68 | EXPORT_SYMBOL(journal_create); |
69 | EXPORT_SYMBOL(journal_load); | 69 | EXPORT_SYMBOL(journal_load); |
70 | EXPORT_SYMBOL(journal_destroy); | 70 | EXPORT_SYMBOL(journal_destroy); |
71 | EXPORT_SYMBOL(journal_update_superblock); | ||
72 | EXPORT_SYMBOL(journal_abort); | 71 | EXPORT_SYMBOL(journal_abort); |
73 | EXPORT_SYMBOL(journal_errno); | 72 | EXPORT_SYMBOL(journal_errno); |
74 | EXPORT_SYMBOL(journal_ack_err); | 73 | EXPORT_SYMBOL(journal_ack_err); |
@@ -1122,9 +1121,12 @@ recovery_error: | |||
1122 | * | 1121 | * |
1123 | * Release a journal_t structure once it is no longer in use by the | 1122 | * Release a journal_t structure once it is no longer in use by the |
1124 | * journaled object. | 1123 | * journaled object. |
1124 | * Return <0 if we couldn't clean up the journal. | ||
1125 | */ | 1125 | */ |
1126 | void journal_destroy(journal_t *journal) | 1126 | int journal_destroy(journal_t *journal) |
1127 | { | 1127 | { |
1128 | int err = 0; | ||
1129 | |||
1128 | /* Wait for the commit thread to wake up and die. */ | 1130 | /* Wait for the commit thread to wake up and die. */ |
1129 | journal_kill_thread(journal); | 1131 | journal_kill_thread(journal); |
1130 | 1132 | ||
@@ -1147,11 +1149,16 @@ void journal_destroy(journal_t *journal) | |||
1147 | J_ASSERT(journal->j_checkpoint_transactions == NULL); | 1149 | J_ASSERT(journal->j_checkpoint_transactions == NULL); |
1148 | spin_unlock(&journal->j_list_lock); | 1150 | spin_unlock(&journal->j_list_lock); |
1149 | 1151 | ||
1150 | /* We can now mark the journal as empty. */ | ||
1151 | journal->j_tail = 0; | ||
1152 | journal->j_tail_sequence = ++journal->j_transaction_sequence; | ||
1153 | if (journal->j_sb_buffer) { | 1152 | if (journal->j_sb_buffer) { |
1154 | journal_update_superblock(journal, 1); | 1153 | if (!is_journal_aborted(journal)) { |
1154 | /* We can now mark the journal as empty. */ | ||
1155 | journal->j_tail = 0; | ||
1156 | journal->j_tail_sequence = | ||
1157 | ++journal->j_transaction_sequence; | ||
1158 | journal_update_superblock(journal, 1); | ||
1159 | } else { | ||
1160 | err = -EIO; | ||
1161 | } | ||
1155 | brelse(journal->j_sb_buffer); | 1162 | brelse(journal->j_sb_buffer); |
1156 | } | 1163 | } |
1157 | 1164 | ||
@@ -1161,6 +1168,8 @@ void journal_destroy(journal_t *journal) | |||
1161 | journal_destroy_revoke(journal); | 1168 | journal_destroy_revoke(journal); |
1162 | kfree(journal->j_wbuf); | 1169 | kfree(journal->j_wbuf); |
1163 | kfree(journal); | 1170 | kfree(journal); |
1171 | |||
1172 | return err; | ||
1164 | } | 1173 | } |
1165 | 1174 | ||
1166 | 1175 | ||
@@ -1360,10 +1369,16 @@ int journal_flush(journal_t *journal) | |||
1360 | spin_lock(&journal->j_list_lock); | 1369 | spin_lock(&journal->j_list_lock); |
1361 | while (!err && journal->j_checkpoint_transactions != NULL) { | 1370 | while (!err && journal->j_checkpoint_transactions != NULL) { |
1362 | spin_unlock(&journal->j_list_lock); | 1371 | spin_unlock(&journal->j_list_lock); |
1372 | mutex_lock(&journal->j_checkpoint_mutex); | ||
1363 | err = log_do_checkpoint(journal); | 1373 | err = log_do_checkpoint(journal); |
1374 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
1364 | spin_lock(&journal->j_list_lock); | 1375 | spin_lock(&journal->j_list_lock); |
1365 | } | 1376 | } |
1366 | spin_unlock(&journal->j_list_lock); | 1377 | spin_unlock(&journal->j_list_lock); |
1378 | |||
1379 | if (is_journal_aborted(journal)) | ||
1380 | return -EIO; | ||
1381 | |||
1367 | cleanup_journal_tail(journal); | 1382 | cleanup_journal_tail(journal); |
1368 | 1383 | ||
1369 | /* Finally, mark the journal as really needing no recovery. | 1384 | /* Finally, mark the journal as really needing no recovery. |
@@ -1385,7 +1400,7 @@ int journal_flush(journal_t *journal) | |||
1385 | J_ASSERT(journal->j_head == journal->j_tail); | 1400 | J_ASSERT(journal->j_head == journal->j_tail); |
1386 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); | 1401 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); |
1387 | spin_unlock(&journal->j_state_lock); | 1402 | spin_unlock(&journal->j_state_lock); |
1388 | return err; | 1403 | return 0; |
1389 | } | 1404 | } |
1390 | 1405 | ||
1391 | /** | 1406 | /** |
@@ -1636,9 +1651,10 @@ static int journal_init_journal_head_cache(void) | |||
1636 | 1651 | ||
1637 | static void journal_destroy_journal_head_cache(void) | 1652 | static void journal_destroy_journal_head_cache(void) |
1638 | { | 1653 | { |
1639 | J_ASSERT(journal_head_cache != NULL); | 1654 | if (journal_head_cache) { |
1640 | kmem_cache_destroy(journal_head_cache); | 1655 | kmem_cache_destroy(journal_head_cache); |
1641 | journal_head_cache = NULL; | 1656 | journal_head_cache = NULL; |
1657 | } | ||
1642 | } | 1658 | } |
1643 | 1659 | ||
1644 | /* | 1660 | /* |
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 43bc5e5ed064..db5e982c5ddf 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c | |||
@@ -223,7 +223,7 @@ do { \ | |||
223 | */ | 223 | */ |
224 | int journal_recover(journal_t *journal) | 224 | int journal_recover(journal_t *journal) |
225 | { | 225 | { |
226 | int err; | 226 | int err, err2; |
227 | journal_superblock_t * sb; | 227 | journal_superblock_t * sb; |
228 | 228 | ||
229 | struct recovery_info info; | 229 | struct recovery_info info; |
@@ -261,7 +261,10 @@ int journal_recover(journal_t *journal) | |||
261 | journal->j_transaction_sequence = ++info.end_transaction; | 261 | journal->j_transaction_sequence = ++info.end_transaction; |
262 | 262 | ||
263 | journal_clear_revoke(journal); | 263 | journal_clear_revoke(journal); |
264 | sync_blockdev(journal->j_fs_dev); | 264 | err2 = sync_blockdev(journal->j_fs_dev); |
265 | if (!err) | ||
266 | err = err2; | ||
267 | |||
265 | return err; | 268 | return err; |
266 | } | 269 | } |
267 | 270 | ||
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 1bb43e987f4b..c7bd649bbbdc 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, | |||
166 | return NULL; | 166 | return NULL; |
167 | } | 167 | } |
168 | 168 | ||
169 | void journal_destroy_revoke_caches(void) | ||
170 | { | ||
171 | if (revoke_record_cache) { | ||
172 | kmem_cache_destroy(revoke_record_cache); | ||
173 | revoke_record_cache = NULL; | ||
174 | } | ||
175 | if (revoke_table_cache) { | ||
176 | kmem_cache_destroy(revoke_table_cache); | ||
177 | revoke_table_cache = NULL; | ||
178 | } | ||
179 | } | ||
180 | |||
169 | int __init journal_init_revoke_caches(void) | 181 | int __init journal_init_revoke_caches(void) |
170 | { | 182 | { |
183 | J_ASSERT(!revoke_record_cache); | ||
184 | J_ASSERT(!revoke_table_cache); | ||
185 | |||
171 | revoke_record_cache = kmem_cache_create("revoke_record", | 186 | revoke_record_cache = kmem_cache_create("revoke_record", |
172 | sizeof(struct jbd_revoke_record_s), | 187 | sizeof(struct jbd_revoke_record_s), |
173 | 0, | 188 | 0, |
174 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, | 189 | SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, |
175 | NULL); | 190 | NULL); |
176 | if (!revoke_record_cache) | 191 | if (!revoke_record_cache) |
177 | return -ENOMEM; | 192 | goto record_cache_failure; |
178 | 193 | ||
179 | revoke_table_cache = kmem_cache_create("revoke_table", | 194 | revoke_table_cache = kmem_cache_create("revoke_table", |
180 | sizeof(struct jbd_revoke_table_s), | 195 | sizeof(struct jbd_revoke_table_s), |
181 | 0, SLAB_TEMPORARY, NULL); | 196 | 0, SLAB_TEMPORARY, NULL); |
182 | if (!revoke_table_cache) { | 197 | if (!revoke_table_cache) |
183 | kmem_cache_destroy(revoke_record_cache); | 198 | goto table_cache_failure; |
184 | revoke_record_cache = NULL; | 199 | |
185 | return -ENOMEM; | ||
186 | } | ||
187 | return 0; | 200 | return 0; |
188 | } | ||
189 | 201 | ||
190 | void journal_destroy_revoke_caches(void) | 202 | table_cache_failure: |
191 | { | 203 | journal_destroy_revoke_caches(); |
192 | kmem_cache_destroy(revoke_record_cache); | 204 | record_cache_failure: |
193 | revoke_record_cache = NULL; | 205 | return -ENOMEM; |
194 | kmem_cache_destroy(revoke_table_cache); | ||
195 | revoke_table_cache = NULL; | ||
196 | } | 206 | } |
197 | 207 | ||
198 | /* Initialise the revoke table for a given journal to a given size. */ | 208 | static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size) |
199 | |||
200 | int journal_init_revoke(journal_t *journal, int hash_size) | ||
201 | { | 209 | { |
202 | int shift, tmp; | 210 | int shift = 0; |
211 | int tmp = hash_size; | ||
212 | struct jbd_revoke_table_s *table; | ||
203 | 213 | ||
204 | J_ASSERT (journal->j_revoke_table[0] == NULL); | 214 | table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); |
215 | if (!table) | ||
216 | goto out; | ||
205 | 217 | ||
206 | shift = 0; | ||
207 | tmp = hash_size; | ||
208 | while((tmp >>= 1UL) != 0UL) | 218 | while((tmp >>= 1UL) != 0UL) |
209 | shift++; | 219 | shift++; |
210 | 220 | ||
211 | journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); | 221 | table->hash_size = hash_size; |
212 | if (!journal->j_revoke_table[0]) | 222 | table->hash_shift = shift; |
213 | return -ENOMEM; | 223 | table->hash_table = |
214 | journal->j_revoke = journal->j_revoke_table[0]; | ||
215 | |||
216 | /* Check that the hash_size is a power of two */ | ||
217 | J_ASSERT(is_power_of_2(hash_size)); | ||
218 | |||
219 | journal->j_revoke->hash_size = hash_size; | ||
220 | |||
221 | journal->j_revoke->hash_shift = shift; | ||
222 | |||
223 | journal->j_revoke->hash_table = | ||
224 | kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); | 224 | kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); |
225 | if (!journal->j_revoke->hash_table) { | 225 | if (!table->hash_table) { |
226 | kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); | 226 | kmem_cache_free(revoke_table_cache, table); |
227 | journal->j_revoke = NULL; | 227 | table = NULL; |
228 | return -ENOMEM; | 228 | goto out; |
229 | } | 229 | } |
230 | 230 | ||
231 | for (tmp = 0; tmp < hash_size; tmp++) | 231 | for (tmp = 0; tmp < hash_size; tmp++) |
232 | INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); | 232 | INIT_LIST_HEAD(&table->hash_table[tmp]); |
233 | 233 | ||
234 | journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); | 234 | out: |
235 | if (!journal->j_revoke_table[1]) { | 235 | return table; |
236 | kfree(journal->j_revoke_table[0]->hash_table); | 236 | } |
237 | kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); | 237 | |
238 | return -ENOMEM; | 238 | static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table) |
239 | { | ||
240 | int i; | ||
241 | struct list_head *hash_list; | ||
242 | |||
243 | for (i = 0; i < table->hash_size; i++) { | ||
244 | hash_list = &table->hash_table[i]; | ||
245 | J_ASSERT(list_empty(hash_list)); | ||
239 | } | 246 | } |
240 | 247 | ||
241 | journal->j_revoke = journal->j_revoke_table[1]; | 248 | kfree(table->hash_table); |
249 | kmem_cache_free(revoke_table_cache, table); | ||
250 | } | ||
242 | 251 | ||
243 | /* Check that the hash_size is a power of two */ | 252 | /* Initialise the revoke table for a given journal to a given size. */ |
253 | int journal_init_revoke(journal_t *journal, int hash_size) | ||
254 | { | ||
255 | J_ASSERT(journal->j_revoke_table[0] == NULL); | ||
244 | J_ASSERT(is_power_of_2(hash_size)); | 256 | J_ASSERT(is_power_of_2(hash_size)); |
245 | 257 | ||
246 | journal->j_revoke->hash_size = hash_size; | 258 | journal->j_revoke_table[0] = journal_init_revoke_table(hash_size); |
259 | if (!journal->j_revoke_table[0]) | ||
260 | goto fail0; | ||
247 | 261 | ||
248 | journal->j_revoke->hash_shift = shift; | 262 | journal->j_revoke_table[1] = journal_init_revoke_table(hash_size); |
263 | if (!journal->j_revoke_table[1]) | ||
264 | goto fail1; | ||
249 | 265 | ||
250 | journal->j_revoke->hash_table = | 266 | journal->j_revoke = journal->j_revoke_table[1]; |
251 | kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); | ||
252 | if (!journal->j_revoke->hash_table) { | ||
253 | kfree(journal->j_revoke_table[0]->hash_table); | ||
254 | kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); | ||
255 | kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]); | ||
256 | journal->j_revoke = NULL; | ||
257 | return -ENOMEM; | ||
258 | } | ||
259 | |||
260 | for (tmp = 0; tmp < hash_size; tmp++) | ||
261 | INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); | ||
262 | 267 | ||
263 | spin_lock_init(&journal->j_revoke_lock); | 268 | spin_lock_init(&journal->j_revoke_lock); |
264 | 269 | ||
265 | return 0; | 270 | return 0; |
266 | } | ||
267 | 271 | ||
268 | /* Destoy a journal's revoke table. The table must already be empty! */ | 272 | fail1: |
273 | journal_destroy_revoke_table(journal->j_revoke_table[0]); | ||
274 | fail0: | ||
275 | return -ENOMEM; | ||
276 | } | ||
269 | 277 | ||
278 | /* Destroy a journal's revoke table. The table must already be empty! */ | ||
270 | void journal_destroy_revoke(journal_t *journal) | 279 | void journal_destroy_revoke(journal_t *journal) |
271 | { | 280 | { |
272 | struct jbd_revoke_table_s *table; | ||
273 | struct list_head *hash_list; | ||
274 | int i; | ||
275 | |||
276 | table = journal->j_revoke_table[0]; | ||
277 | if (!table) | ||
278 | return; | ||
279 | |||
280 | for (i=0; i<table->hash_size; i++) { | ||
281 | hash_list = &table->hash_table[i]; | ||
282 | J_ASSERT (list_empty(hash_list)); | ||
283 | } | ||
284 | |||
285 | kfree(table->hash_table); | ||
286 | kmem_cache_free(revoke_table_cache, table); | ||
287 | journal->j_revoke = NULL; | ||
288 | |||
289 | table = journal->j_revoke_table[1]; | ||
290 | if (!table) | ||
291 | return; | ||
292 | |||
293 | for (i=0; i<table->hash_size; i++) { | ||
294 | hash_list = &table->hash_table[i]; | ||
295 | J_ASSERT (list_empty(hash_list)); | ||
296 | } | ||
297 | |||
298 | kfree(table->hash_table); | ||
299 | kmem_cache_free(revoke_table_cache, table); | ||
300 | journal->j_revoke = NULL; | 281 | journal->j_revoke = NULL; |
282 | if (journal->j_revoke_table[0]) | ||
283 | journal_destroy_revoke_table(journal->j_revoke_table[0]); | ||
284 | if (journal->j_revoke_table[1]) | ||
285 | journal_destroy_revoke_table(journal->j_revoke_table[1]); | ||
301 | } | 286 | } |
302 | 287 | ||
303 | 288 | ||
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 67ff2024c23c..d15cd6e7251e 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
@@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks) | |||
291 | goto out; | 291 | goto out; |
292 | } | 292 | } |
293 | 293 | ||
294 | lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); | 294 | lock_map_acquire(&handle->h_lockdep_map); |
295 | 295 | ||
296 | out: | 296 | out: |
297 | return handle; | 297 | return handle; |
@@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
954 | journal_t *journal = handle->h_transaction->t_journal; | 954 | journal_t *journal = handle->h_transaction->t_journal; |
955 | int need_brelse = 0; | 955 | int need_brelse = 0; |
956 | struct journal_head *jh; | 956 | struct journal_head *jh; |
957 | int ret = 0; | ||
957 | 958 | ||
958 | if (is_handle_aborted(handle)) | 959 | if (is_handle_aborted(handle)) |
959 | return 0; | 960 | return ret; |
960 | 961 | ||
961 | jh = journal_add_journal_head(bh); | 962 | jh = journal_add_journal_head(bh); |
962 | JBUFFER_TRACE(jh, "entry"); | 963 | JBUFFER_TRACE(jh, "entry"); |
@@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
1067 | time if it is redirtied */ | 1068 | time if it is redirtied */ |
1068 | } | 1069 | } |
1069 | 1070 | ||
1070 | /* journal_clean_data_list() may have got there first */ | 1071 | /* |
1072 | * We cannot remove the buffer with io error from the | ||
1073 | * committing transaction, because otherwise it would | ||
1074 | * miss the error and the commit would not abort. | ||
1075 | */ | ||
1076 | if (unlikely(!buffer_uptodate(bh))) { | ||
1077 | ret = -EIO; | ||
1078 | goto no_journal; | ||
1079 | } | ||
1080 | |||
1071 | if (jh->b_transaction != NULL) { | 1081 | if (jh->b_transaction != NULL) { |
1072 | JBUFFER_TRACE(jh, "unfile from commit"); | 1082 | JBUFFER_TRACE(jh, "unfile from commit"); |
1073 | __journal_temp_unlink_buffer(jh); | 1083 | __journal_temp_unlink_buffer(jh); |
@@ -1108,7 +1118,7 @@ no_journal: | |||
1108 | } | 1118 | } |
1109 | JBUFFER_TRACE(jh, "exit"); | 1119 | JBUFFER_TRACE(jh, "exit"); |
1110 | journal_put_journal_head(jh); | 1120 | journal_put_journal_head(jh); |
1111 | return 0; | 1121 | return ret; |
1112 | } | 1122 | } |
1113 | 1123 | ||
1114 | /** | 1124 | /** |
@@ -1448,7 +1458,7 @@ int journal_stop(handle_t *handle) | |||
1448 | spin_unlock(&journal->j_state_lock); | 1458 | spin_unlock(&journal->j_state_lock); |
1449 | } | 1459 | } |
1450 | 1460 | ||
1451 | lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); | 1461 | lock_map_release(&handle->h_lockdep_map); |
1452 | 1462 | ||
1453 | jbd_free_handle(handle); | 1463 | jbd_free_handle(handle); |
1454 | return err; | 1464 | return err; |
@@ -1648,12 +1658,42 @@ out: | |||
1648 | return; | 1658 | return; |
1649 | } | 1659 | } |
1650 | 1660 | ||
1661 | /* | ||
1662 | * journal_try_to_free_buffers() could race with journal_commit_transaction() | ||
1663 | * The latter might still hold the a count on buffers when inspecting | ||
1664 | * them on t_syncdata_list or t_locked_list. | ||
1665 | * | ||
1666 | * journal_try_to_free_buffers() will call this function to | ||
1667 | * wait for the current transaction to finish syncing data buffers, before | ||
1668 | * tryinf to free that buffer. | ||
1669 | * | ||
1670 | * Called with journal->j_state_lock held. | ||
1671 | */ | ||
1672 | static void journal_wait_for_transaction_sync_data(journal_t *journal) | ||
1673 | { | ||
1674 | transaction_t *transaction = NULL; | ||
1675 | tid_t tid; | ||
1676 | |||
1677 | spin_lock(&journal->j_state_lock); | ||
1678 | transaction = journal->j_committing_transaction; | ||
1679 | |||
1680 | if (!transaction) { | ||
1681 | spin_unlock(&journal->j_state_lock); | ||
1682 | return; | ||
1683 | } | ||
1684 | |||
1685 | tid = transaction->t_tid; | ||
1686 | spin_unlock(&journal->j_state_lock); | ||
1687 | log_wait_commit(journal, tid); | ||
1688 | } | ||
1651 | 1689 | ||
1652 | /** | 1690 | /** |
1653 | * int journal_try_to_free_buffers() - try to free page buffers. | 1691 | * int journal_try_to_free_buffers() - try to free page buffers. |
1654 | * @journal: journal for operation | 1692 | * @journal: journal for operation |
1655 | * @page: to try and free | 1693 | * @page: to try and free |
1656 | * @unused_gfp_mask: unused | 1694 | * @gfp_mask: we use the mask to detect how hard should we try to release |
1695 | * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to | ||
1696 | * release the buffers. | ||
1657 | * | 1697 | * |
1658 | * | 1698 | * |
1659 | * For all the buffers on this page, | 1699 | * For all the buffers on this page, |
@@ -1682,9 +1722,11 @@ out: | |||
1682 | * journal_try_to_free_buffer() is changing its state. But that | 1722 | * journal_try_to_free_buffer() is changing its state. But that |
1683 | * cannot happen because we never reallocate freed data as metadata | 1723 | * cannot happen because we never reallocate freed data as metadata |
1684 | * while the data is part of a transaction. Yes? | 1724 | * while the data is part of a transaction. Yes? |
1725 | * | ||
1726 | * Return 0 on failure, 1 on success | ||
1685 | */ | 1727 | */ |
1686 | int journal_try_to_free_buffers(journal_t *journal, | 1728 | int journal_try_to_free_buffers(journal_t *journal, |
1687 | struct page *page, gfp_t unused_gfp_mask) | 1729 | struct page *page, gfp_t gfp_mask) |
1688 | { | 1730 | { |
1689 | struct buffer_head *head; | 1731 | struct buffer_head *head; |
1690 | struct buffer_head *bh; | 1732 | struct buffer_head *bh; |
@@ -1713,7 +1755,28 @@ int journal_try_to_free_buffers(journal_t *journal, | |||
1713 | if (buffer_jbd(bh)) | 1755 | if (buffer_jbd(bh)) |
1714 | goto busy; | 1756 | goto busy; |
1715 | } while ((bh = bh->b_this_page) != head); | 1757 | } while ((bh = bh->b_this_page) != head); |
1758 | |||
1716 | ret = try_to_free_buffers(page); | 1759 | ret = try_to_free_buffers(page); |
1760 | |||
1761 | /* | ||
1762 | * There are a number of places where journal_try_to_free_buffers() | ||
1763 | * could race with journal_commit_transaction(), the later still | ||
1764 | * holds the reference to the buffers to free while processing them. | ||
1765 | * try_to_free_buffers() failed to free those buffers. Some of the | ||
1766 | * caller of releasepage() request page buffers to be dropped, otherwise | ||
1767 | * treat the fail-to-free as errors (such as generic_file_direct_IO()) | ||
1768 | * | ||
1769 | * So, if the caller of try_to_release_page() wants the synchronous | ||
1770 | * behaviour(i.e make sure buffers are dropped upon return), | ||
1771 | * let's wait for the current transaction to finish flush of | ||
1772 | * dirty data buffers, then try to free those buffers again, | ||
1773 | * with the journal locked. | ||
1774 | */ | ||
1775 | if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { | ||
1776 | journal_wait_for_transaction_sync_data(journal); | ||
1777 | ret = try_to_free_buffers(page); | ||
1778 | } | ||
1779 | |||
1717 | busy: | 1780 | busy: |
1718 | return ret; | 1781 | return ret; |
1719 | } | 1782 | } |