diff options
-rw-r--r-- | fs/jbd2/commit.c | 90 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 52 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 86 | ||||
-rw-r--r-- | include/linux/jbd2.h | 42 |
4 files changed, 270 insertions, 0 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 92b6ac3df8ab..3ca107b5c86b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -355,6 +355,81 @@ write_out_data: | |||
355 | journal_do_submit_data(wbuf, bufs); | 355 | journal_do_submit_data(wbuf, bufs); |
356 | } | 356 | } |
357 | 357 | ||
358 | /* | ||
359 | * Submit all the data buffers of inode associated with the transaction to | ||
360 | * disk. | ||
361 | * | ||
362 | * We are in a committing transaction. Therefore no new inode can be added to | ||
363 | * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently | ||
364 | * operate on from being released while we write out pages. | ||
365 | */ | ||
366 | static int journal_submit_inode_data_buffers(journal_t *journal, | ||
367 | transaction_t *commit_transaction) | ||
368 | { | ||
369 | struct jbd2_inode *jinode; | ||
370 | int err, ret = 0; | ||
371 | struct address_space *mapping; | ||
372 | |||
373 | spin_lock(&journal->j_list_lock); | ||
374 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | ||
375 | mapping = jinode->i_vfs_inode->i_mapping; | ||
376 | jinode->i_flags |= JI_COMMIT_RUNNING; | ||
377 | spin_unlock(&journal->j_list_lock); | ||
378 | err = filemap_fdatawrite_range(mapping, 0, | ||
379 | i_size_read(jinode->i_vfs_inode)); | ||
380 | if (!ret) | ||
381 | ret = err; | ||
382 | spin_lock(&journal->j_list_lock); | ||
383 | J_ASSERT(jinode->i_transaction == commit_transaction); | ||
384 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | ||
385 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
386 | } | ||
387 | spin_unlock(&journal->j_list_lock); | ||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * Wait for data submitted for writeout, refile inodes to proper | ||
393 | * transaction if needed. | ||
394 | * | ||
395 | */ | ||
396 | static int journal_finish_inode_data_buffers(journal_t *journal, | ||
397 | transaction_t *commit_transaction) | ||
398 | { | ||
399 | struct jbd2_inode *jinode, *next_i; | ||
400 | int err, ret = 0; | ||
401 | |||
402 | /* For locking, see the comment in journal_submit_inode_data_buffers() */ | ||
403 | spin_lock(&journal->j_list_lock); | ||
404 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | ||
405 | jinode->i_flags |= JI_COMMIT_RUNNING; | ||
406 | spin_unlock(&journal->j_list_lock); | ||
407 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); | ||
408 | if (!ret) | ||
409 | ret = err; | ||
410 | spin_lock(&journal->j_list_lock); | ||
411 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | ||
412 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
413 | } | ||
414 | |||
415 | /* Now refile inode to proper lists */ | ||
416 | list_for_each_entry_safe(jinode, next_i, | ||
417 | &commit_transaction->t_inode_list, i_list) { | ||
418 | list_del(&jinode->i_list); | ||
419 | if (jinode->i_next_transaction) { | ||
420 | jinode->i_transaction = jinode->i_next_transaction; | ||
421 | jinode->i_next_transaction = NULL; | ||
422 | list_add(&jinode->i_list, | ||
423 | &jinode->i_transaction->t_inode_list); | ||
424 | } else { | ||
425 | jinode->i_transaction = NULL; | ||
426 | } | ||
427 | } | ||
428 | spin_unlock(&journal->j_list_lock); | ||
429 | |||
430 | return ret; | ||
431 | } | ||
432 | |||
358 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) | 433 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) |
359 | { | 434 | { |
360 | struct page *page = bh->b_page; | 435 | struct page *page = bh->b_page; |
@@ -529,6 +604,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
529 | */ | 604 | */ |
530 | err = 0; | 605 | err = 0; |
531 | journal_submit_data_buffers(journal, commit_transaction); | 606 | journal_submit_data_buffers(journal, commit_transaction); |
607 | err = journal_submit_inode_data_buffers(journal, commit_transaction); | ||
608 | if (err) | ||
609 | jbd2_journal_abort(journal, err); | ||
532 | 610 | ||
533 | /* | 611 | /* |
534 | * Wait for all previously submitted IO to complete if commit | 612 | * Wait for all previously submitted IO to complete if commit |
@@ -760,6 +838,17 @@ start_journal_io: | |||
760 | __jbd2_journal_abort_hard(journal); | 838 | __jbd2_journal_abort_hard(journal); |
761 | } | 839 | } |
762 | 840 | ||
841 | /* | ||
842 | * This is the right place to wait for data buffers both for ASYNC | ||
843 | * and !ASYNC commit. If commit is ASYNC, we need to wait only after | ||
844 | * the commit block went to disk (which happens above). If commit is | ||
845 | * SYNC, we need to wait for data buffers before we start writing | ||
846 | * commit block, which happens below in such setting. | ||
847 | */ | ||
848 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
849 | if (err) | ||
850 | jbd2_journal_abort(journal, err); | ||
851 | |||
763 | /* Lo and behold: we have just managed to send a transaction to | 852 | /* Lo and behold: we have just managed to send a transaction to |
764 | the log. Before we can commit it, wait for the IO so far to | 853 | the log. Before we can commit it, wait for the IO so far to |
765 | complete. Control buffers being written are on the | 854 | complete. Control buffers being written are on the |
@@ -880,6 +969,7 @@ wait_for_iobuf: | |||
880 | jbd_debug(3, "JBD: commit phase 7\n"); | 969 | jbd_debug(3, "JBD: commit phase 7\n"); |
881 | 970 | ||
882 | J_ASSERT(commit_transaction->t_sync_datalist == NULL); | 971 | J_ASSERT(commit_transaction->t_sync_datalist == NULL); |
972 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | ||
883 | J_ASSERT(commit_transaction->t_buffers == NULL); | 973 | J_ASSERT(commit_transaction->t_buffers == NULL); |
884 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 974 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
885 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); | 975 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 2e24567c4a79..78cf7bd7f604 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -82,6 +82,10 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page); | |||
82 | EXPORT_SYMBOL(jbd2_journal_invalidatepage); | 82 | EXPORT_SYMBOL(jbd2_journal_invalidatepage); |
83 | EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); | 83 | EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); |
84 | EXPORT_SYMBOL(jbd2_journal_force_commit); | 84 | EXPORT_SYMBOL(jbd2_journal_force_commit); |
85 | EXPORT_SYMBOL(jbd2_journal_file_inode); | ||
86 | EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); | ||
87 | EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); | ||
88 | EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); | ||
85 | 89 | ||
86 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); | 90 | static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); |
87 | static void __journal_abort_soft (journal_t *journal, int errno); | 91 | static void __journal_abort_soft (journal_t *journal, int errno); |
@@ -2195,6 +2199,54 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) | |||
2195 | } | 2199 | } |
2196 | 2200 | ||
2197 | /* | 2201 | /* |
2202 | * Initialize jbd inode head | ||
2203 | */ | ||
2204 | void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) | ||
2205 | { | ||
2206 | jinode->i_transaction = NULL; | ||
2207 | jinode->i_next_transaction = NULL; | ||
2208 | jinode->i_vfs_inode = inode; | ||
2209 | jinode->i_flags = 0; | ||
2210 | INIT_LIST_HEAD(&jinode->i_list); | ||
2211 | } | ||
2212 | |||
2213 | /* | ||
2214 | * Function to be called before we start removing inode from memory (i.e., | ||
2215 | * clear_inode() is a fine place to be called from). It removes inode from | ||
2216 | * transaction's lists. | ||
2217 | */ | ||
2218 | void jbd2_journal_release_jbd_inode(journal_t *journal, | ||
2219 | struct jbd2_inode *jinode) | ||
2220 | { | ||
2221 | int writeout = 0; | ||
2222 | |||
2223 | if (!journal) | ||
2224 | return; | ||
2225 | restart: | ||
2226 | spin_lock(&journal->j_list_lock); | ||
2227 | /* Is commit writing out inode - we have to wait */ | ||
2228 | if (jinode->i_flags & JI_COMMIT_RUNNING) { | ||
2229 | wait_queue_head_t *wq; | ||
2230 | DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); | ||
2231 | wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
2232 | prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | ||
2233 | spin_unlock(&journal->j_list_lock); | ||
2234 | schedule(); | ||
2235 | finish_wait(wq, &wait.wait); | ||
2236 | goto restart; | ||
2237 | } | ||
2238 | |||
2239 | /* Do we need to wait for data writeback? */ | ||
2240 | if (journal->j_committing_transaction == jinode->i_transaction) | ||
2241 | writeout = 1; | ||
2242 | if (jinode->i_transaction) { | ||
2243 | list_del(&jinode->i_list); | ||
2244 | jinode->i_transaction = NULL; | ||
2245 | } | ||
2246 | spin_unlock(&journal->j_list_lock); | ||
2247 | } | ||
2248 | |||
2249 | /* | ||
2198 | * debugfs tunables | 2250 | * debugfs tunables |
2199 | */ | 2251 | */ |
2200 | #ifdef CONFIG_JBD2_DEBUG | 2252 | #ifdef CONFIG_JBD2_DEBUG |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index ba620c4493d2..98b596d23705 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -51,6 +51,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
51 | transaction->t_tid = journal->j_transaction_sequence++; | 51 | transaction->t_tid = journal->j_transaction_sequence++; |
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 52 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 53 | spin_lock_init(&transaction->t_handle_lock); |
54 | INIT_LIST_HEAD(&transaction->t_inode_list); | ||
54 | 55 | ||
55 | /* Set up the commit timer for the new transaction. */ | 56 | /* Set up the commit timer for the new transaction. */ |
56 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
@@ -2195,3 +2196,88 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | |||
2195 | spin_unlock(&journal->j_list_lock); | 2196 | spin_unlock(&journal->j_list_lock); |
2196 | __brelse(bh); | 2197 | __brelse(bh); |
2197 | } | 2198 | } |
2199 | |||
2200 | /* | ||
2201 | * File inode in the inode list of the handle's transaction | ||
2202 | */ | ||
2203 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) | ||
2204 | { | ||
2205 | transaction_t *transaction = handle->h_transaction; | ||
2206 | journal_t *journal = transaction->t_journal; | ||
2207 | |||
2208 | if (is_handle_aborted(handle)) | ||
2209 | return -EIO; | ||
2210 | |||
2211 | jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, | ||
2212 | transaction->t_tid); | ||
2213 | |||
2214 | /* | ||
2215 | * First check whether inode isn't already on the transaction's | ||
2216 | * lists without taking the lock. Note that this check is safe | ||
2217 | * without the lock as we cannot race with somebody removing inode | ||
2218 | * from the transaction. The reason is that we remove inode from the | ||
2219 | * transaction only in journal_release_jbd_inode() and when we commit | ||
2220 | * the transaction. We are guarded from the first case by holding | ||
2221 | * a reference to the inode. We are safe against the second case | ||
2222 | * because if jinode->i_transaction == transaction, commit code | ||
2223 | * cannot touch the transaction because we hold reference to it, | ||
2224 | * and if jinode->i_next_transaction == transaction, commit code | ||
2225 | * will only file the inode where we want it. | ||
2226 | */ | ||
2227 | if (jinode->i_transaction == transaction || | ||
2228 | jinode->i_next_transaction == transaction) | ||
2229 | return 0; | ||
2230 | |||
2231 | spin_lock(&journal->j_list_lock); | ||
2232 | |||
2233 | if (jinode->i_transaction == transaction || | ||
2234 | jinode->i_next_transaction == transaction) | ||
2235 | goto done; | ||
2236 | |||
2237 | /* On some different transaction's list - should be | ||
2238 | * the committing one */ | ||
2239 | if (jinode->i_transaction) { | ||
2240 | J_ASSERT(jinode->i_next_transaction == NULL); | ||
2241 | J_ASSERT(jinode->i_transaction == | ||
2242 | journal->j_committing_transaction); | ||
2243 | jinode->i_next_transaction = transaction; | ||
2244 | goto done; | ||
2245 | } | ||
2246 | /* Not on any transaction list... */ | ||
2247 | J_ASSERT(!jinode->i_next_transaction); | ||
2248 | jinode->i_transaction = transaction; | ||
2249 | list_add(&jinode->i_list, &transaction->t_inode_list); | ||
2250 | done: | ||
2251 | spin_unlock(&journal->j_list_lock); | ||
2252 | |||
2253 | return 0; | ||
2254 | } | ||
2255 | |||
2256 | /* | ||
2257 | * This function must be called when inode is journaled in ordered mode | ||
2258 | * before truncation happens. It starts writeout of truncated part in | ||
2259 | * case it is in the committing transaction so that we stand to ordered | ||
2260 | * mode consistency guarantees. | ||
2261 | */ | ||
2262 | int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | ||
2263 | loff_t new_size) | ||
2264 | { | ||
2265 | journal_t *journal; | ||
2266 | transaction_t *commit_trans; | ||
2267 | int ret = 0; | ||
2268 | |||
2269 | if (!inode->i_transaction && !inode->i_next_transaction) | ||
2270 | goto out; | ||
2271 | journal = inode->i_transaction->t_journal; | ||
2272 | spin_lock(&journal->j_state_lock); | ||
2273 | commit_trans = journal->j_committing_transaction; | ||
2274 | spin_unlock(&journal->j_state_lock); | ||
2275 | if (inode->i_transaction == commit_trans) { | ||
2276 | ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping, | ||
2277 | new_size, LLONG_MAX); | ||
2278 | if (ret) | ||
2279 | jbd2_journal_abort(journal, ret); | ||
2280 | } | ||
2281 | out: | ||
2282 | return ret; | ||
2283 | } | ||
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ec9cadf58227..622c3d8ca4ed 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -381,6 +381,38 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | |||
381 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | 381 | bit_spin_unlock(BH_JournalHead, &bh->b_state); |
382 | } | 382 | } |
383 | 383 | ||
384 | /* Flags in jbd_inode->i_flags */ | ||
385 | #define __JI_COMMIT_RUNNING 0 | ||
386 | /* Commit of the inode data in progress. We use this flag to protect us from | ||
387 | * concurrent deletion of inode. We cannot use reference to inode for this | ||
388 | * since we cannot afford doing last iput() on behalf of kjournald | ||
389 | */ | ||
390 | #define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) | ||
391 | |||
392 | /** | ||
393 | * struct jbd_inode is the structure linking inodes in ordered mode | ||
394 | * present in a transaction so that we can sync them during commit. | ||
395 | */ | ||
396 | struct jbd2_inode { | ||
397 | /* Which transaction does this inode belong to? Either the running | ||
398 | * transaction or the committing one. [j_list_lock] */ | ||
399 | transaction_t *i_transaction; | ||
400 | |||
401 | /* Pointer to the running transaction modifying inode's data in case | ||
402 | * there is already a committing transaction touching it. [j_list_lock] */ | ||
403 | transaction_t *i_next_transaction; | ||
404 | |||
405 | /* List of inodes in the i_transaction [j_list_lock] */ | ||
406 | struct list_head i_list; | ||
407 | |||
408 | /* VFS inode this inode belongs to [constant during the lifetime | ||
409 | * of the structure] */ | ||
410 | struct inode *i_vfs_inode; | ||
411 | |||
412 | /* Flags of inode [j_list_lock] */ | ||
413 | unsigned int i_flags; | ||
414 | }; | ||
415 | |||
384 | struct jbd2_revoke_table_s; | 416 | struct jbd2_revoke_table_s; |
385 | 417 | ||
386 | /** | 418 | /** |
@@ -567,6 +599,12 @@ struct transaction_s | |||
567 | struct journal_head *t_log_list; | 599 | struct journal_head *t_log_list; |
568 | 600 | ||
569 | /* | 601 | /* |
602 | * List of inodes whose data we've modified in data=ordered mode. | ||
603 | * [j_list_lock] | ||
604 | */ | ||
605 | struct list_head t_inode_list; | ||
606 | |||
607 | /* | ||
570 | * Protects info related to handles | 608 | * Protects info related to handles |
571 | */ | 609 | */ |
572 | spinlock_t t_handle_lock; | 610 | spinlock_t t_handle_lock; |
@@ -1046,6 +1084,10 @@ extern void jbd2_journal_ack_err (journal_t *); | |||
1046 | extern int jbd2_journal_clear_err (journal_t *); | 1084 | extern int jbd2_journal_clear_err (journal_t *); |
1047 | extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); | 1085 | extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); |
1048 | extern int jbd2_journal_force_commit(journal_t *); | 1086 | extern int jbd2_journal_force_commit(journal_t *); |
1087 | extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); | ||
1088 | extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); | ||
1089 | extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); | ||
1090 | extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); | ||
1049 | 1091 | ||
1050 | /* | 1092 | /* |
1051 | * journal_head management | 1093 | * journal_head management |