aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c1
-rw-r--r--fs/jbd2/commit.c295
-rw-r--r--fs/jbd2/journal.c53
-rw-r--r--fs/jbd2/recovery.c12
-rw-r--r--fs/jbd2/transaction.c365
5 files changed, 313 insertions, 413 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6914598022ce..91389c8aee8a 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -688,7 +688,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
688 688
689 J_ASSERT(transaction->t_state == T_FINISHED); 689 J_ASSERT(transaction->t_state == T_FINISHED);
690 J_ASSERT(transaction->t_buffers == NULL); 690 J_ASSERT(transaction->t_buffers == NULL);
691 J_ASSERT(transaction->t_sync_datalist == NULL);
692 J_ASSERT(transaction->t_forget == NULL); 691 J_ASSERT(transaction->t_forget == NULL);
693 J_ASSERT(transaction->t_iobuf_list == NULL); 692 J_ASSERT(transaction->t_iobuf_list == NULL);
694 J_ASSERT(transaction->t_shadow_list == NULL); 693 J_ASSERT(transaction->t_shadow_list == NULL);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 4d99685fdce4..f8b3be873226 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -22,6 +22,8 @@
22#include <linux/pagemap.h> 22#include <linux/pagemap.h>
23#include <linux/jiffies.h> 23#include <linux/jiffies.h>
24#include <linux/crc32.h> 24#include <linux/crc32.h>
25#include <linux/writeback.h>
26#include <linux/backing-dev.h>
25 27
26/* 28/*
27 * Default IO end handler for temporary BJ_IO buffer_heads. 29 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -37,8 +39,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
37} 39}
38 40
39/* 41/*
40 * When an ext3-ordered file is truncated, it is possible that many pages are 42 * When an ext4 file is truncated, it is possible that some pages are not
41 * not sucessfully freed, because they are attached to a committing transaction. 43 * successfully freed, because they are attached to a committing transaction.
42 * After the transaction commits, these pages are left on the LRU, with no 44 * After the transaction commits, these pages are left on the LRU, with no
43 * ->mapping, and with attached buffers. These pages are trivially reclaimable 45 * ->mapping, and with attached buffers. These pages are trivially reclaimable
44 * by the VM, but their apparent absence upsets the VM accounting, and it makes 46 * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -80,21 +82,6 @@ nope:
80} 82}
81 83
82/* 84/*
83 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
84 * held. For ranking reasons we must trylock. If we lose, schedule away and
85 * return 0. j_list_lock is dropped in this case.
86 */
87static int inverted_lock(journal_t *journal, struct buffer_head *bh)
88{
89 if (!jbd_trylock_bh_state(bh)) {
90 spin_unlock(&journal->j_list_lock);
91 schedule();
92 return 0;
93 }
94 return 1;
95}
96
97/*
98 * Done it all: now submit the commit record. We should have 85 * Done it all: now submit the commit record. We should have
99 * cleaned up our previous buffers by now, so if we are in abort 86 * cleaned up our previous buffers by now, so if we are in abort
100 * mode we can now just skip the rest of the journal write 87 * mode we can now just skip the rest of the journal write
@@ -112,6 +99,7 @@ static int journal_submit_commit_record(journal_t *journal,
112 struct buffer_head *bh; 99 struct buffer_head *bh;
113 int ret; 100 int ret;
114 int barrier_done = 0; 101 int barrier_done = 0;
102 struct timespec now = current_kernel_time();
115 103
116 if (is_journal_aborted(journal)) 104 if (is_journal_aborted(journal))
117 return 0; 105 return 0;
@@ -126,6 +114,8 @@ static int journal_submit_commit_record(journal_t *journal,
126 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 114 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
127 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); 115 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
128 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); 116 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
117 tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
118 tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
129 119
130 if (JBD2_HAS_COMPAT_FEATURE(journal, 120 if (JBD2_HAS_COMPAT_FEATURE(journal,
131 JBD2_FEATURE_COMPAT_CHECKSUM)) { 121 JBD2_FEATURE_COMPAT_CHECKSUM)) {
@@ -168,6 +158,7 @@ static int journal_submit_commit_record(journal_t *journal,
168 spin_unlock(&journal->j_state_lock); 158 spin_unlock(&journal->j_state_lock);
169 159
170 /* And try again, without the barrier */ 160 /* And try again, without the barrier */
161 lock_buffer(bh);
171 set_buffer_uptodate(bh); 162 set_buffer_uptodate(bh);
172 set_buffer_dirty(bh); 163 set_buffer_dirty(bh);
173 ret = submit_bh(WRITE, bh); 164 ret = submit_bh(WRITE, bh);
@@ -196,159 +187,104 @@ static int journal_wait_on_commit_record(struct buffer_head *bh)
196} 187}
197 188
198/* 189/*
199 * Wait for all submitted IO to complete. 190 * write the filemap data using writepage() address_space_operations.
191 * We don't do block allocation here even for delalloc. We don't
192 * use writepages() because with dealyed allocation we may be doing
193 * block allocation in writepages().
200 */ 194 */
201static int journal_wait_on_locked_list(journal_t *journal, 195static int journal_submit_inode_data_buffers(struct address_space *mapping)
202 transaction_t *commit_transaction)
203{ 196{
204 int ret = 0; 197 int ret;
205 struct journal_head *jh; 198 struct writeback_control wbc = {
206 199 .sync_mode = WB_SYNC_ALL,
207 while (commit_transaction->t_locked_list) { 200 .nr_to_write = mapping->nrpages * 2,
208 struct buffer_head *bh; 201 .range_start = 0,
209 202 .range_end = i_size_read(mapping->host),
210 jh = commit_transaction->t_locked_list->b_tprev; 203 .for_writepages = 1,
211 bh = jh2bh(jh); 204 };
212 get_bh(bh); 205
213 if (buffer_locked(bh)) { 206 ret = generic_writepages(mapping, &wbc);
214 spin_unlock(&journal->j_list_lock);
215 wait_on_buffer(bh);
216 if (unlikely(!buffer_uptodate(bh)))
217 ret = -EIO;
218 spin_lock(&journal->j_list_lock);
219 }
220 if (!inverted_lock(journal, bh)) {
221 put_bh(bh);
222 spin_lock(&journal->j_list_lock);
223 continue;
224 }
225 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
226 __jbd2_journal_unfile_buffer(jh);
227 jbd_unlock_bh_state(bh);
228 jbd2_journal_remove_journal_head(bh);
229 put_bh(bh);
230 } else {
231 jbd_unlock_bh_state(bh);
232 }
233 put_bh(bh);
234 cond_resched_lock(&journal->j_list_lock);
235 }
236 return ret; 207 return ret;
237 } 208}
238 209
239static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) 210/*
211 * Submit all the data buffers of inode associated with the transaction to
212 * disk.
213 *
214 * We are in a committing transaction. Therefore no new inode can be added to
215 * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
216 * operate on from being released while we write out pages.
217 */
218static int journal_submit_data_buffers(journal_t *journal,
219 transaction_t *commit_transaction)
240{ 220{
241 int i; 221 struct jbd2_inode *jinode;
222 int err, ret = 0;
223 struct address_space *mapping;
242 224
243 for (i = 0; i < bufs; i++) { 225 spin_lock(&journal->j_list_lock);
244 wbuf[i]->b_end_io = end_buffer_write_sync; 226 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
245 /* We use-up our safety reference in submit_bh() */ 227 mapping = jinode->i_vfs_inode->i_mapping;
246 submit_bh(WRITE, wbuf[i]); 228 jinode->i_flags |= JI_COMMIT_RUNNING;
229 spin_unlock(&journal->j_list_lock);
230 /*
231 * submit the inode data buffers. We use writepage
232 * instead of writepages. Because writepages can do
233 * block allocation with delalloc. We need to write
234 * only allocated blocks here.
235 */
236 err = journal_submit_inode_data_buffers(mapping);
237 if (!ret)
238 ret = err;
239 spin_lock(&journal->j_list_lock);
240 J_ASSERT(jinode->i_transaction == commit_transaction);
241 jinode->i_flags &= ~JI_COMMIT_RUNNING;
242 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
247 } 243 }
244 spin_unlock(&journal->j_list_lock);
245 return ret;
248} 246}
249 247
250/* 248/*
251 * Submit all the data buffers to disk 249 * Wait for data submitted for writeout, refile inodes to proper
250 * transaction if needed.
251 *
252 */ 252 */
253static void journal_submit_data_buffers(journal_t *journal, 253static int journal_finish_inode_data_buffers(journal_t *journal,
254 transaction_t *commit_transaction) 254 transaction_t *commit_transaction)
255{ 255{
256 struct journal_head *jh; 256 struct jbd2_inode *jinode, *next_i;
257 struct buffer_head *bh; 257 int err, ret = 0;
258 int locked;
259 int bufs = 0;
260 struct buffer_head **wbuf = journal->j_wbuf;
261 258
262 /* 259 /* For locking, see the comment in journal_submit_data_buffers() */
263 * Whenever we unlock the journal and sleep, things can get added
264 * onto ->t_sync_datalist, so we have to keep looping back to
265 * write_out_data until we *know* that the list is empty.
266 *
267 * Cleanup any flushed data buffers from the data list. Even in
268 * abort mode, we want to flush this out as soon as possible.
269 */
270write_out_data:
271 cond_resched();
272 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
261 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
262 jinode->i_flags |= JI_COMMIT_RUNNING;
263 spin_unlock(&journal->j_list_lock);
264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
265 if (!ret)
266 ret = err;
267 spin_lock(&journal->j_list_lock);
268 jinode->i_flags &= ~JI_COMMIT_RUNNING;
269 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
270 }
273 271
274 while (commit_transaction->t_sync_datalist) { 272 /* Now refile inode to proper lists */
275 jh = commit_transaction->t_sync_datalist; 273 list_for_each_entry_safe(jinode, next_i,
276 bh = jh2bh(jh); 274 &commit_transaction->t_inode_list, i_list) {
277 locked = 0; 275 list_del(&jinode->i_list);
278 276 if (jinode->i_next_transaction) {
279 /* Get reference just to make sure buffer does not disappear 277 jinode->i_transaction = jinode->i_next_transaction;
280 * when we are forced to drop various locks */ 278 jinode->i_next_transaction = NULL;
281 get_bh(bh); 279 list_add(&jinode->i_list,
282 /* If the buffer is dirty, we need to submit IO and hence 280 &jinode->i_transaction->t_inode_list);
283 * we need the buffer lock. We try to lock the buffer without
284 * blocking. If we fail, we need to drop j_list_lock and do
285 * blocking lock_buffer().
286 */
287 if (buffer_dirty(bh)) {
288 if (test_set_buffer_locked(bh)) {
289 BUFFER_TRACE(bh, "needs blocking lock");
290 spin_unlock(&journal->j_list_lock);
291 /* Write out all data to prevent deadlocks */
292 journal_do_submit_data(wbuf, bufs);
293 bufs = 0;
294 lock_buffer(bh);
295 spin_lock(&journal->j_list_lock);
296 }
297 locked = 1;
298 }
299 /* We have to get bh_state lock. Again out of order, sigh. */
300 if (!inverted_lock(journal, bh)) {
301 jbd_lock_bh_state(bh);
302 spin_lock(&journal->j_list_lock);
303 }
304 /* Someone already cleaned up the buffer? */
305 if (!buffer_jbd(bh)
306 || jh->b_transaction != commit_transaction
307 || jh->b_jlist != BJ_SyncData) {
308 jbd_unlock_bh_state(bh);
309 if (locked)
310 unlock_buffer(bh);
311 BUFFER_TRACE(bh, "already cleaned up");
312 put_bh(bh);
313 continue;
314 }
315 if (locked && test_clear_buffer_dirty(bh)) {
316 BUFFER_TRACE(bh, "needs writeout, adding to array");
317 wbuf[bufs++] = bh;
318 __jbd2_journal_file_buffer(jh, commit_transaction,
319 BJ_Locked);
320 jbd_unlock_bh_state(bh);
321 if (bufs == journal->j_wbufsize) {
322 spin_unlock(&journal->j_list_lock);
323 journal_do_submit_data(wbuf, bufs);
324 bufs = 0;
325 goto write_out_data;
326 }
327 } else if (!locked && buffer_locked(bh)) {
328 __jbd2_journal_file_buffer(jh, commit_transaction,
329 BJ_Locked);
330 jbd_unlock_bh_state(bh);
331 put_bh(bh);
332 } else { 281 } else {
333 BUFFER_TRACE(bh, "writeout complete: unfile"); 282 jinode->i_transaction = NULL;
334 __jbd2_journal_unfile_buffer(jh);
335 jbd_unlock_bh_state(bh);
336 if (locked)
337 unlock_buffer(bh);
338 jbd2_journal_remove_journal_head(bh);
339 /* Once for our safety reference, once for
340 * jbd2_journal_remove_journal_head() */
341 put_bh(bh);
342 put_bh(bh);
343 }
344
345 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
346 spin_unlock(&journal->j_list_lock);
347 goto write_out_data;
348 } 283 }
349 } 284 }
350 spin_unlock(&journal->j_list_lock); 285 spin_unlock(&journal->j_list_lock);
351 journal_do_submit_data(wbuf, bufs); 286
287 return ret;
352} 288}
353 289
354static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) 290static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
@@ -523,21 +459,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
523 * Now start flushing things to disk, in the order they appear 459 * Now start flushing things to disk, in the order they appear
524 * on the transaction lists. Data blocks go first. 460 * on the transaction lists. Data blocks go first.
525 */ 461 */
526 err = 0; 462 err = journal_submit_data_buffers(journal, commit_transaction);
527 journal_submit_data_buffers(journal, commit_transaction);
528
529 /*
530 * Wait for all previously submitted IO to complete if commit
531 * record is to be written synchronously.
532 */
533 spin_lock(&journal->j_list_lock);
534 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
535 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
536 err = journal_wait_on_locked_list(journal,
537 commit_transaction);
538
539 spin_unlock(&journal->j_list_lock);
540
541 if (err) 463 if (err)
542 jbd2_journal_abort(journal, err); 464 jbd2_journal_abort(journal, err);
543 465
@@ -546,16 +468,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
546 jbd_debug(3, "JBD: commit phase 2\n"); 468 jbd_debug(3, "JBD: commit phase 2\n");
547 469
548 /* 470 /*
549 * If we found any dirty or locked buffers, then we should have
550 * looped back up to the write_out_data label. If there weren't
551 * any then journal_clean_data_list should have wiped the list
552 * clean by now, so check that it is in fact empty.
553 */
554 J_ASSERT (commit_transaction->t_sync_datalist == NULL);
555
556 jbd_debug (3, "JBD: commit phase 3\n");
557
558 /*
559 * Way to go: we have now written out all of the data for a 471 * Way to go: we have now written out all of the data for a
560 * transaction! Now comes the tricky part: we need to write out 472 * transaction! Now comes the tricky part: we need to write out
561 * metadata. Loop over the transaction's entire buffer list: 473 * metadata. Loop over the transaction's entire buffer list:
@@ -573,6 +485,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
573 J_ASSERT(commit_transaction->t_nr_buffers <= 485 J_ASSERT(commit_transaction->t_nr_buffers <=
574 commit_transaction->t_outstanding_credits); 486 commit_transaction->t_outstanding_credits);
575 487
488 err = 0;
576 descriptor = NULL; 489 descriptor = NULL;
577 bufs = 0; 490 bufs = 0;
578 while (commit_transaction->t_buffers) { 491 while (commit_transaction->t_buffers) {
@@ -747,15 +660,19 @@ start_journal_io:
747 &cbh, crc32_sum); 660 &cbh, crc32_sum);
748 if (err) 661 if (err)
749 __jbd2_journal_abort_hard(journal); 662 __jbd2_journal_abort_hard(journal);
750
751 spin_lock(&journal->j_list_lock);
752 err = journal_wait_on_locked_list(journal,
753 commit_transaction);
754 spin_unlock(&journal->j_list_lock);
755 if (err)
756 __jbd2_journal_abort_hard(journal);
757 } 663 }
758 664
665 /*
666 * This is the right place to wait for data buffers both for ASYNC
667 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
668 * the commit block went to disk (which happens above). If commit is
669 * SYNC, we need to wait for data buffers before we start writing
670 * commit block, which happens below in such setting.
671 */
672 err = journal_finish_inode_data_buffers(journal, commit_transaction);
673 if (err)
674 jbd2_journal_abort(journal, err);
675
759 /* Lo and behold: we have just managed to send a transaction to 676 /* Lo and behold: we have just managed to send a transaction to
760 the log. Before we can commit it, wait for the IO so far to 677 the log. Before we can commit it, wait for the IO so far to
761 complete. Control buffers being written are on the 678 complete. Control buffers being written are on the
@@ -767,7 +684,7 @@ start_journal_io:
767 so we incur less scheduling load. 684 so we incur less scheduling load.
768 */ 685 */
769 686
770 jbd_debug(3, "JBD: commit phase 4\n"); 687 jbd_debug(3, "JBD: commit phase 3\n");
771 688
772 /* 689 /*
773 * akpm: these are BJ_IO, and j_list_lock is not needed. 690 * akpm: these are BJ_IO, and j_list_lock is not needed.
@@ -826,7 +743,7 @@ wait_for_iobuf:
826 743
827 J_ASSERT (commit_transaction->t_shadow_list == NULL); 744 J_ASSERT (commit_transaction->t_shadow_list == NULL);
828 745
829 jbd_debug(3, "JBD: commit phase 5\n"); 746 jbd_debug(3, "JBD: commit phase 4\n");
830 747
831 /* Here we wait for the revoke record and descriptor record buffers */ 748 /* Here we wait for the revoke record and descriptor record buffers */
832 wait_for_ctlbuf: 749 wait_for_ctlbuf:
@@ -853,7 +770,7 @@ wait_for_iobuf:
853 /* AKPM: bforget here */ 770 /* AKPM: bforget here */
854 } 771 }
855 772
856 jbd_debug(3, "JBD: commit phase 6\n"); 773 jbd_debug(3, "JBD: commit phase 5\n");
857 774
858 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 775 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
859 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 776 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -873,9 +790,9 @@ wait_for_iobuf:
873 transaction can be removed from any checkpoint list it was on 790 transaction can be removed from any checkpoint list it was on
874 before. */ 791 before. */
875 792
876 jbd_debug(3, "JBD: commit phase 7\n"); 793 jbd_debug(3, "JBD: commit phase 6\n");
877 794
878 J_ASSERT(commit_transaction->t_sync_datalist == NULL); 795 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
879 J_ASSERT(commit_transaction->t_buffers == NULL); 796 J_ASSERT(commit_transaction->t_buffers == NULL);
880 J_ASSERT(commit_transaction->t_checkpoint_list == NULL); 797 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
881 J_ASSERT(commit_transaction->t_iobuf_list == NULL); 798 J_ASSERT(commit_transaction->t_iobuf_list == NULL);
@@ -996,7 +913,7 @@ restart_loop:
996 913
997 /* Done with this transaction! */ 914 /* Done with this transaction! */
998 915
999 jbd_debug(3, "JBD: commit phase 8\n"); 916 jbd_debug(3, "JBD: commit phase 7\n");
1000 917
1001 J_ASSERT(commit_transaction->t_state == T_COMMIT); 918 J_ASSERT(commit_transaction->t_state == T_COMMIT);
1002 919
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 2e24567c4a79..b26c6d9fe6ae 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -50,7 +50,6 @@ EXPORT_SYMBOL(jbd2_journal_unlock_updates);
50EXPORT_SYMBOL(jbd2_journal_get_write_access); 50EXPORT_SYMBOL(jbd2_journal_get_write_access);
51EXPORT_SYMBOL(jbd2_journal_get_create_access); 51EXPORT_SYMBOL(jbd2_journal_get_create_access);
52EXPORT_SYMBOL(jbd2_journal_get_undo_access); 52EXPORT_SYMBOL(jbd2_journal_get_undo_access);
53EXPORT_SYMBOL(jbd2_journal_dirty_data);
54EXPORT_SYMBOL(jbd2_journal_dirty_metadata); 53EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
55EXPORT_SYMBOL(jbd2_journal_release_buffer); 54EXPORT_SYMBOL(jbd2_journal_release_buffer);
56EXPORT_SYMBOL(jbd2_journal_forget); 55EXPORT_SYMBOL(jbd2_journal_forget);
@@ -82,6 +81,10 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
82EXPORT_SYMBOL(jbd2_journal_invalidatepage); 81EXPORT_SYMBOL(jbd2_journal_invalidatepage);
83EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); 82EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
84EXPORT_SYMBOL(jbd2_journal_force_commit); 83EXPORT_SYMBOL(jbd2_journal_force_commit);
84EXPORT_SYMBOL(jbd2_journal_file_inode);
85EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
86EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
87EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
85 88
86static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); 89static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
87static void __journal_abort_soft (journal_t *journal, int errno); 90static void __journal_abort_soft (journal_t *journal, int errno);
@@ -2195,6 +2198,54 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
2195} 2198}
2196 2199
2197/* 2200/*
2201 * Initialize jbd inode head
2202 */
2203void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
2204{
2205 jinode->i_transaction = NULL;
2206 jinode->i_next_transaction = NULL;
2207 jinode->i_vfs_inode = inode;
2208 jinode->i_flags = 0;
2209 INIT_LIST_HEAD(&jinode->i_list);
2210}
2211
2212/*
2213 * Function to be called before we start removing inode from memory (i.e.,
2214 * clear_inode() is a fine place to be called from). It removes inode from
2215 * transaction's lists.
2216 */
2217void jbd2_journal_release_jbd_inode(journal_t *journal,
2218 struct jbd2_inode *jinode)
2219{
2220 int writeout = 0;
2221
2222 if (!journal)
2223 return;
2224restart:
2225 spin_lock(&journal->j_list_lock);
2226 /* Is commit writing out inode - we have to wait */
2227 if (jinode->i_flags & JI_COMMIT_RUNNING) {
2228 wait_queue_head_t *wq;
2229 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
2230 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
2231 prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
2232 spin_unlock(&journal->j_list_lock);
2233 schedule();
2234 finish_wait(wq, &wait.wait);
2235 goto restart;
2236 }
2237
2238 /* Do we need to wait for data writeback? */
2239 if (journal->j_committing_transaction == jinode->i_transaction)
2240 writeout = 1;
2241 if (jinode->i_transaction) {
2242 list_del(&jinode->i_list);
2243 jinode->i_transaction = NULL;
2244 }
2245 spin_unlock(&journal->j_list_lock);
2246}
2247
2248/*
2198 * debugfs tunables 2249 * debugfs tunables
2199 */ 2250 */
2200#ifdef CONFIG_JBD2_DEBUG 2251#ifdef CONFIG_JBD2_DEBUG
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 5d0405a9e7ca..058f50f65b76 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -344,6 +344,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
344 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 344 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
345 obh->b_size); 345 obh->b_size);
346 } 346 }
347 put_bh(obh);
347 } 348 }
348 return 0; 349 return 0;
349} 350}
@@ -610,9 +611,8 @@ static int do_one_pass(journal_t *journal,
610 chksum_err = chksum_seen = 0; 611 chksum_err = chksum_seen = 0;
611 612
612 if (info->end_transaction) { 613 if (info->end_transaction) {
613 printk(KERN_ERR "JBD: Transaction %u " 614 journal->j_failed_commit =
614 "found to be corrupt.\n", 615 info->end_transaction;
615 next_commit_ID - 1);
616 brelse(bh); 616 brelse(bh);
617 break; 617 break;
618 } 618 }
@@ -643,10 +643,8 @@ static int do_one_pass(journal_t *journal,
643 643
644 if (!JBD2_HAS_INCOMPAT_FEATURE(journal, 644 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ 645 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
646 printk(KERN_ERR 646 journal->j_failed_commit =
647 "JBD: Transaction %u " 647 next_commit_ID;
648 "found to be corrupt.\n",
649 next_commit_ID);
650 brelse(bh); 648 brelse(bh);
651 break; 649 break;
652 } 650 }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d6e006e67804..4f7cadbb19fa 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -41,7 +41,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
41 * new transaction and we can't block without protecting against other 41 * new transaction and we can't block without protecting against other
42 * processes trying to touch the journal while it is in transition. 42 * processes trying to touch the journal while it is in transition.
43 * 43 *
44 * Called under j_state_lock
45 */ 44 */
46 45
47static transaction_t * 46static transaction_t *
@@ -52,6 +51,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
52 transaction->t_tid = journal->j_transaction_sequence++; 51 transaction->t_tid = journal->j_transaction_sequence++;
53 transaction->t_expires = jiffies + journal->j_commit_interval; 52 transaction->t_expires = jiffies + journal->j_commit_interval;
54 spin_lock_init(&transaction->t_handle_lock); 53 spin_lock_init(&transaction->t_handle_lock);
54 INIT_LIST_HEAD(&transaction->t_inode_list);
55 55
56 /* Set up the commit timer for the new transaction. */ 56 /* Set up the commit timer for the new transaction. */
57 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); 57 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
@@ -943,183 +943,6 @@ out:
943} 943}
944 944
945/** 945/**
946 * int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which
947 * needs to be flushed before we can commit the
948 * current transaction.
949 * @handle: transaction
950 * @bh: bufferhead to mark
951 *
952 * The buffer is placed on the transaction's data list and is marked as
953 * belonging to the transaction.
954 *
955 * Returns error number or 0 on success.
956 *
957 * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage
958 * by kswapd.
959 */
960int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
961{
962 journal_t *journal = handle->h_transaction->t_journal;
963 int need_brelse = 0;
964 struct journal_head *jh;
965
966 if (is_handle_aborted(handle))
967 return 0;
968
969 jh = jbd2_journal_add_journal_head(bh);
970 JBUFFER_TRACE(jh, "entry");
971
972 /*
973 * The buffer could *already* be dirty. Writeout can start
974 * at any time.
975 */
976 jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
977
978 /*
979 * What if the buffer is already part of a running transaction?
980 *
981 * There are two cases:
982 * 1) It is part of the current running transaction. Refile it,
983 * just in case we have allocated it as metadata, deallocated
984 * it, then reallocated it as data.
985 * 2) It is part of the previous, still-committing transaction.
986 * If all we want to do is to guarantee that the buffer will be
987 * written to disk before this new transaction commits, then
988 * being sure that the *previous* transaction has this same
989 * property is sufficient for us! Just leave it on its old
990 * transaction.
991 *
992 * In case (2), the buffer must not already exist as metadata
993 * --- that would violate write ordering (a transaction is free
994 * to write its data at any point, even before the previous
995 * committing transaction has committed). The caller must
996 * never, ever allow this to happen: there's nothing we can do
997 * about it in this layer.
998 */
999 jbd_lock_bh_state(bh);
1000 spin_lock(&journal->j_list_lock);
1001
1002 /* Now that we have bh_state locked, are we really still mapped? */
1003 if (!buffer_mapped(bh)) {
1004 JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
1005 goto no_journal;
1006 }
1007
1008 if (jh->b_transaction) {
1009 JBUFFER_TRACE(jh, "has transaction");
1010 if (jh->b_transaction != handle->h_transaction) {
1011 JBUFFER_TRACE(jh, "belongs to older transaction");
1012 J_ASSERT_JH(jh, jh->b_transaction ==
1013 journal->j_committing_transaction);
1014
1015 /* @@@ IS THIS TRUE ? */
1016 /*
1017 * Not any more. Scenario: someone does a write()
1018 * in data=journal mode. The buffer's transaction has
1019 * moved into commit. Then someone does another
1020 * write() to the file. We do the frozen data copyout
1021 * and set b_next_transaction to point to j_running_t.
1022 * And while we're in that state, someone does a
1023 * writepage() in an attempt to pageout the same area
1024 * of the file via a shared mapping. At present that
1025 * calls jbd2_journal_dirty_data(), and we get right here.
1026 * It may be too late to journal the data. Simply
1027 * falling through to the next test will suffice: the
1028 * data will be dirty and wil be checkpointed. The
1029 * ordering comments in the next comment block still
1030 * apply.
1031 */
1032 //J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
1033
1034 /*
1035 * If we're journalling data, and this buffer was
1036 * subject to a write(), it could be metadata, forget
1037 * or shadow against the committing transaction. Now,
1038 * someone has dirtied the same darn page via a mapping
1039 * and it is being writepage()'d.
1040 * We *could* just steal the page from commit, with some
1041 * fancy locking there. Instead, we just skip it -
1042 * don't tie the page's buffers to the new transaction
1043 * at all.
1044 * Implication: if we crash before the writepage() data
1045 * is written into the filesystem, recovery will replay
1046 * the write() data.
1047 */
1048 if (jh->b_jlist != BJ_None &&
1049 jh->b_jlist != BJ_SyncData &&
1050 jh->b_jlist != BJ_Locked) {
1051 JBUFFER_TRACE(jh, "Not stealing");
1052 goto no_journal;
1053 }
1054
1055 /*
1056 * This buffer may be undergoing writeout in commit. We
1057 * can't return from here and let the caller dirty it
1058 * again because that can cause the write-out loop in
1059 * commit to never terminate.
1060 */
1061 if (buffer_dirty(bh)) {
1062 get_bh(bh);
1063 spin_unlock(&journal->j_list_lock);
1064 jbd_unlock_bh_state(bh);
1065 need_brelse = 1;
1066 sync_dirty_buffer(bh);
1067 jbd_lock_bh_state(bh);
1068 spin_lock(&journal->j_list_lock);
1069 /* Since we dropped the lock... */
1070 if (!buffer_mapped(bh)) {
1071 JBUFFER_TRACE(jh, "buffer got unmapped");
1072 goto no_journal;
1073 }
1074 /* The buffer may become locked again at any
1075 time if it is redirtied */
1076 }
1077
1078 /* journal_clean_data_list() may have got there first */
1079 if (jh->b_transaction != NULL) {
1080 JBUFFER_TRACE(jh, "unfile from commit");
1081 __jbd2_journal_temp_unlink_buffer(jh);
1082 /* It still points to the committing
1083 * transaction; move it to this one so
1084 * that the refile assert checks are
1085 * happy. */
1086 jh->b_transaction = handle->h_transaction;
1087 }
1088 /* The buffer will be refiled below */
1089
1090 }
1091 /*
1092 * Special case --- the buffer might actually have been
1093 * allocated and then immediately deallocated in the previous,
1094 * committing transaction, so might still be left on that
1095 * transaction's metadata lists.
1096 */
1097 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
1098 JBUFFER_TRACE(jh, "not on correct data list: unfile");
1099 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
1100 __jbd2_journal_temp_unlink_buffer(jh);
1101 jh->b_transaction = handle->h_transaction;
1102 JBUFFER_TRACE(jh, "file as data");
1103 __jbd2_journal_file_buffer(jh, handle->h_transaction,
1104 BJ_SyncData);
1105 }
1106 } else {
1107 JBUFFER_TRACE(jh, "not on a transaction");
1108 __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
1109 }
1110no_journal:
1111 spin_unlock(&journal->j_list_lock);
1112 jbd_unlock_bh_state(bh);
1113 if (need_brelse) {
1114 BUFFER_TRACE(bh, "brelse");
1115 __brelse(bh);
1116 }
1117 JBUFFER_TRACE(jh, "exit");
1118 jbd2_journal_put_journal_head(jh);
1119 return 0;
1120}
1121
1122/**
1123 * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata 946 * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
1124 * @handle: transaction to add buffer to. 947 * @handle: transaction to add buffer to.
1125 * @bh: buffer to mark 948 * @bh: buffer to mark
@@ -1541,10 +1364,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
1541 * Remove a buffer from the appropriate transaction list. 1364 * Remove a buffer from the appropriate transaction list.
1542 * 1365 *
1543 * Note that this function can *change* the value of 1366 * Note that this function can *change* the value of
1544 * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, 1367 * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list,
1545 * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller 1368 * t_log_list or t_reserved_list. If the caller is holding onto a copy of one
1546 * is holding onto a copy of one of thee pointers, it could go bad. 1369 * of these pointers, it could go bad. Generally the caller needs to re-read
1547 * Generally the caller needs to re-read the pointer from the transaction_t. 1370 * the pointer from the transaction_t.
1548 * 1371 *
1549 * Called under j_list_lock. The journal may not be locked. 1372 * Called under j_list_lock. The journal may not be locked.
1550 */ 1373 */
@@ -1566,9 +1389,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1566 switch (jh->b_jlist) { 1389 switch (jh->b_jlist) {
1567 case BJ_None: 1390 case BJ_None:
1568 return; 1391 return;
1569 case BJ_SyncData:
1570 list = &transaction->t_sync_datalist;
1571 break;
1572 case BJ_Metadata: 1392 case BJ_Metadata:
1573 transaction->t_nr_buffers--; 1393 transaction->t_nr_buffers--;
1574 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); 1394 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
@@ -1589,9 +1409,6 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
1589 case BJ_Reserved: 1409 case BJ_Reserved:
1590 list = &transaction->t_reserved_list; 1410 list = &transaction->t_reserved_list;
1591 break; 1411 break;
1592 case BJ_Locked:
1593 list = &transaction->t_locked_list;
1594 break;
1595 } 1412 }
1596 1413
1597 __blist_del_buffer(list, jh); 1414 __blist_del_buffer(list, jh);
@@ -1634,15 +1451,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
1634 goto out; 1451 goto out;
1635 1452
1636 spin_lock(&journal->j_list_lock); 1453 spin_lock(&journal->j_list_lock);
1637 if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { 1454 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1638 if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
1639 /* A written-back ordered data buffer */
1640 JBUFFER_TRACE(jh, "release data");
1641 __jbd2_journal_unfile_buffer(jh);
1642 jbd2_journal_remove_journal_head(bh);
1643 __brelse(bh);
1644 }
1645 } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
1646 /* written-back checkpointed metadata buffer */ 1455 /* written-back checkpointed metadata buffer */
1647 if (jh->b_jlist == BJ_None) { 1456 if (jh->b_jlist == BJ_None) {
1648 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1457 JBUFFER_TRACE(jh, "remove from checkpoint list");
@@ -1656,12 +1465,43 @@ out:
1656 return; 1465 return;
1657} 1466}
1658 1467
1468/*
1469 * jbd2_journal_try_to_free_buffers() could race with
1470 * jbd2_journal_commit_transaction(). The later might still hold the
1471 * reference count to the buffers when inspecting them on
1472 * t_syncdata_list or t_locked_list.
1473 *
1474 * jbd2_journal_try_to_free_buffers() will call this function to
1475 * wait for the current transaction to finish syncing data buffers, before
1476 * try to free that buffer.
1477 *
1478 * Called with journal->j_state_lock hold.
1479 */
1480static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
1481{
1482 transaction_t *transaction;
1483 tid_t tid;
1484
1485 spin_lock(&journal->j_state_lock);
1486 transaction = journal->j_committing_transaction;
1487
1488 if (!transaction) {
1489 spin_unlock(&journal->j_state_lock);
1490 return;
1491 }
1492
1493 tid = transaction->t_tid;
1494 spin_unlock(&journal->j_state_lock);
1495 jbd2_log_wait_commit(journal, tid);
1496}
1659 1497
1660/** 1498/**
1661 * int jbd2_journal_try_to_free_buffers() - try to free page buffers. 1499 * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
1662 * @journal: journal for operation 1500 * @journal: journal for operation
1663 * @page: to try and free 1501 * @page: to try and free
1664 * @unused_gfp_mask: unused 1502 * @gfp_mask: we use the mask to detect how hard should we try to release
1503 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
1504 * release the buffers.
1665 * 1505 *
1666 * 1506 *
1667 * For all the buffers on this page, 1507 * For all the buffers on this page,
@@ -1690,9 +1530,11 @@ out:
1690 * journal_try_to_free_buffer() is changing its state. But that 1530 * journal_try_to_free_buffer() is changing its state. But that
1691 * cannot happen because we never reallocate freed data as metadata 1531 * cannot happen because we never reallocate freed data as metadata
1692 * while the data is part of a transaction. Yes? 1532 * while the data is part of a transaction. Yes?
1533 *
1534 * Return 0 on failure, 1 on success
1693 */ 1535 */
1694int jbd2_journal_try_to_free_buffers(journal_t *journal, 1536int jbd2_journal_try_to_free_buffers(journal_t *journal,
1695 struct page *page, gfp_t unused_gfp_mask) 1537 struct page *page, gfp_t gfp_mask)
1696{ 1538{
1697 struct buffer_head *head; 1539 struct buffer_head *head;
1698 struct buffer_head *bh; 1540 struct buffer_head *bh;
@@ -1708,7 +1550,8 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1708 /* 1550 /*
1709 * We take our own ref against the journal_head here to avoid 1551 * We take our own ref against the journal_head here to avoid
1710 * having to add tons of locking around each instance of 1552 * having to add tons of locking around each instance of
1711 * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head(). 1553 * jbd2_journal_remove_journal_head() and
1554 * jbd2_journal_put_journal_head().
1712 */ 1555 */
1713 jh = jbd2_journal_grab_journal_head(bh); 1556 jh = jbd2_journal_grab_journal_head(bh);
1714 if (!jh) 1557 if (!jh)
@@ -1721,7 +1564,28 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
1721 if (buffer_jbd(bh)) 1564 if (buffer_jbd(bh))
1722 goto busy; 1565 goto busy;
1723 } while ((bh = bh->b_this_page) != head); 1566 } while ((bh = bh->b_this_page) != head);
1567
1724 ret = try_to_free_buffers(page); 1568 ret = try_to_free_buffers(page);
1569
1570 /*
1571 * There are a number of places where jbd2_journal_try_to_free_buffers()
1572 * could race with jbd2_journal_commit_transaction(), the later still
1573 * holds the reference to the buffers to free while processing them.
1574 * try_to_free_buffers() failed to free those buffers. Some of the
1575 * caller of releasepage() request page buffers to be dropped, otherwise
1576 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1577 *
1578 * So, if the caller of try_to_release_page() wants the synchronous
1579 * behaviour(i.e make sure buffers are dropped upon return),
1580 * let's wait for the current transaction to finish flush of
1581 * dirty data buffers, then try to free those buffers again,
1582 * with the journal locked.
1583 */
1584 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1585 jbd2_journal_wait_for_transaction_sync_data(journal);
1586 ret = try_to_free_buffers(page);
1587 }
1588
1725busy: 1589busy:
1726 return ret; 1590 return ret;
1727} 1591}
@@ -1823,6 +1687,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1823 if (!buffer_jbd(bh)) 1687 if (!buffer_jbd(bh))
1824 goto zap_buffer_unlocked; 1688 goto zap_buffer_unlocked;
1825 1689
1690 /* OK, we have data buffer in journaled mode */
1826 spin_lock(&journal->j_state_lock); 1691 spin_lock(&journal->j_state_lock);
1827 jbd_lock_bh_state(bh); 1692 jbd_lock_bh_state(bh);
1828 spin_lock(&journal->j_list_lock); 1693 spin_lock(&journal->j_list_lock);
@@ -1886,15 +1751,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1886 } 1751 }
1887 } else if (transaction == journal->j_committing_transaction) { 1752 } else if (transaction == journal->j_committing_transaction) {
1888 JBUFFER_TRACE(jh, "on committing transaction"); 1753 JBUFFER_TRACE(jh, "on committing transaction");
1889 if (jh->b_jlist == BJ_Locked) {
1890 /*
1891 * The buffer is on the committing transaction's locked
1892 * list. We have the buffer locked, so I/O has
1893 * completed. So we can nail the buffer now.
1894 */
1895 may_free = __dispose_buffer(jh, transaction);
1896 goto zap_buffer;
1897 }
1898 /* 1754 /*
1899 * If it is committing, we simply cannot touch it. We 1755 * If it is committing, we simply cannot touch it. We
1900 * can remove it's next_transaction pointer from the 1756 * can remove it's next_transaction pointer from the
@@ -2027,9 +1883,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
2027 J_ASSERT_JH(jh, !jh->b_committed_data); 1883 J_ASSERT_JH(jh, !jh->b_committed_data);
2028 J_ASSERT_JH(jh, !jh->b_frozen_data); 1884 J_ASSERT_JH(jh, !jh->b_frozen_data);
2029 return; 1885 return;
2030 case BJ_SyncData:
2031 list = &transaction->t_sync_datalist;
2032 break;
2033 case BJ_Metadata: 1886 case BJ_Metadata:
2034 transaction->t_nr_buffers++; 1887 transaction->t_nr_buffers++;
2035 list = &transaction->t_buffers; 1888 list = &transaction->t_buffers;
@@ -2049,9 +1902,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
2049 case BJ_Reserved: 1902 case BJ_Reserved:
2050 list = &transaction->t_reserved_list; 1903 list = &transaction->t_reserved_list;
2051 break; 1904 break;
2052 case BJ_Locked:
2053 list = &transaction->t_locked_list;
2054 break;
2055 } 1905 }
2056 1906
2057 __blist_add_buffer(list, jh); 1907 __blist_add_buffer(list, jh);
@@ -2141,3 +1991,88 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2141 spin_unlock(&journal->j_list_lock); 1991 spin_unlock(&journal->j_list_lock);
2142 __brelse(bh); 1992 __brelse(bh);
2143} 1993}
1994
1995/*
1996 * File inode in the inode list of the handle's transaction
1997 */
1998int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
1999{
2000 transaction_t *transaction = handle->h_transaction;
2001 journal_t *journal = transaction->t_journal;
2002
2003 if (is_handle_aborted(handle))
2004 return -EIO;
2005
2006 jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
2007 transaction->t_tid);
2008
2009 /*
2010 * First check whether inode isn't already on the transaction's
2011 * lists without taking the lock. Note that this check is safe
2012 * without the lock as we cannot race with somebody removing inode
2013 * from the transaction. The reason is that we remove inode from the
2014 * transaction only in journal_release_jbd_inode() and when we commit
2015 * the transaction. We are guarded from the first case by holding
2016 * a reference to the inode. We are safe against the second case
2017 * because if jinode->i_transaction == transaction, commit code
2018 * cannot touch the transaction because we hold reference to it,
2019 * and if jinode->i_next_transaction == transaction, commit code
2020 * will only file the inode where we want it.
2021 */
2022 if (jinode->i_transaction == transaction ||
2023 jinode->i_next_transaction == transaction)
2024 return 0;
2025
2026 spin_lock(&journal->j_list_lock);
2027
2028 if (jinode->i_transaction == transaction ||
2029 jinode->i_next_transaction == transaction)
2030 goto done;
2031
2032 /* On some different transaction's list - should be
2033 * the committing one */
2034 if (jinode->i_transaction) {
2035 J_ASSERT(jinode->i_next_transaction == NULL);
2036 J_ASSERT(jinode->i_transaction ==
2037 journal->j_committing_transaction);
2038 jinode->i_next_transaction = transaction;
2039 goto done;
2040 }
2041 /* Not on any transaction list... */
2042 J_ASSERT(!jinode->i_next_transaction);
2043 jinode->i_transaction = transaction;
2044 list_add(&jinode->i_list, &transaction->t_inode_list);
2045done:
2046 spin_unlock(&journal->j_list_lock);
2047
2048 return 0;
2049}
2050
2051/*
2052 * This function must be called when inode is journaled in ordered mode
2053 * before truncation happens. It starts writeout of truncated part in
2054 * case it is in the committing transaction so that we stand to ordered
2055 * mode consistency guarantees.
2056 */
2057int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
2058 loff_t new_size)
2059{
2060 journal_t *journal;
2061 transaction_t *commit_trans;
2062 int ret = 0;
2063
2064 if (!inode->i_transaction && !inode->i_next_transaction)
2065 goto out;
2066 journal = inode->i_transaction->t_journal;
2067 spin_lock(&journal->j_state_lock);
2068 commit_trans = journal->j_committing_transaction;
2069 spin_unlock(&journal->j_state_lock);
2070 if (inode->i_transaction == commit_trans) {
2071 ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
2072 new_size, LLONG_MAX);
2073 if (ret)
2074 jbd2_journal_abort(journal, ret);
2075 }
2076out:
2077 return ret;
2078}