summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoss Zwisler <zwisler@chromium.org>2019-06-20 17:24:56 -0400
committerTheodore Ts'o <tytso@mit.edu>2019-06-20 17:24:56 -0400
commit6ba0e7dc64a5adcda2fbe65adc466891795d639e (patch)
tree0ed5070630e27f3fe48300661871fb2ebf8ddf80
parentaa0bfcd939c30617385ffa28682c062d78050eba (diff)
jbd2: introduce jbd2_inode dirty range scoping
Currently both journal_submit_inode_data_buffers() and journal_finish_inode_data_buffers() operate on the entire address space of each of the inodes associated with a given journal entry. The consequence of this is that if we have an inode where we are constantly appending dirty pages we can end up waiting for an indefinite amount of time in journal_finish_inode_data_buffers() while we wait for all the pages under writeback to be written out. The easiest way to cause this type of workload is do just dd from /dev/zero to a file until it fills the entire filesystem. This can cause journal_finish_inode_data_buffers() to wait for the duration of the entire dd operation. We can improve this situation by scoping each of the inode dirty ranges associated with a given transaction. We do this via the jbd2_inode structure so that the scoping is contained within jbd2 and so that it follows the lifetime and locking rules for that structure. This allows us to limit the writeback & wait in journal_submit_inode_data_buffers() and journal_finish_inode_data_buffers() respectively to the dirty range for a given struct jdb2_inode, keeping us from waiting forever if the inode in question is still being appended to. Signed-off-by: Ross Zwisler <zwisler@google.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz> Cc: stable@vger.kernel.org
-rw-r--r--fs/jbd2/commit.c23
-rw-r--r--fs/jbd2/journal.c4
-rw-r--r--fs/jbd2/transaction.c49
-rw-r--r--include/linux/jbd2.h22
4 files changed, 71 insertions, 27 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c8c1d6cc6e5d..132fb92098c7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -187,14 +187,15 @@ static int journal_wait_on_commit_record(journal_t *journal,
187 * use writepages() because with delayed allocation we may be doing 187 * use writepages() because with delayed allocation we may be doing
188 * block allocation in writepages(). 188 * block allocation in writepages().
189 */ 189 */
190static int journal_submit_inode_data_buffers(struct address_space *mapping) 190static int journal_submit_inode_data_buffers(struct address_space *mapping,
191 loff_t dirty_start, loff_t dirty_end)
191{ 192{
192 int ret; 193 int ret;
193 struct writeback_control wbc = { 194 struct writeback_control wbc = {
194 .sync_mode = WB_SYNC_ALL, 195 .sync_mode = WB_SYNC_ALL,
195 .nr_to_write = mapping->nrpages * 2, 196 .nr_to_write = mapping->nrpages * 2,
196 .range_start = 0, 197 .range_start = dirty_start,
197 .range_end = i_size_read(mapping->host), 198 .range_end = dirty_end,
198 }; 199 };
199 200
200 ret = generic_writepages(mapping, &wbc); 201 ret = generic_writepages(mapping, &wbc);
@@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal,
218 219
219 spin_lock(&journal->j_list_lock); 220 spin_lock(&journal->j_list_lock);
220 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 221 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
222 loff_t dirty_start = jinode->i_dirty_start;
223 loff_t dirty_end = jinode->i_dirty_end;
224
221 if (!(jinode->i_flags & JI_WRITE_DATA)) 225 if (!(jinode->i_flags & JI_WRITE_DATA))
222 continue; 226 continue;
223 mapping = jinode->i_vfs_inode->i_mapping; 227 mapping = jinode->i_vfs_inode->i_mapping;
@@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
230 * only allocated blocks here. 234 * only allocated blocks here.
231 */ 235 */
232 trace_jbd2_submit_inode_data(jinode->i_vfs_inode); 236 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
233 err = journal_submit_inode_data_buffers(mapping); 237 err = journal_submit_inode_data_buffers(mapping, dirty_start,
238 dirty_end);
234 if (!ret) 239 if (!ret)
235 ret = err; 240 ret = err;
236 spin_lock(&journal->j_list_lock); 241 spin_lock(&journal->j_list_lock);
@@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
257 /* For locking, see the comment in journal_submit_data_buffers() */ 262 /* For locking, see the comment in journal_submit_data_buffers() */
258 spin_lock(&journal->j_list_lock); 263 spin_lock(&journal->j_list_lock);
259 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 264 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
265 loff_t dirty_start = jinode->i_dirty_start;
266 loff_t dirty_end = jinode->i_dirty_end;
267
260 if (!(jinode->i_flags & JI_WAIT_DATA)) 268 if (!(jinode->i_flags & JI_WAIT_DATA))
261 continue; 269 continue;
262 jinode->i_flags |= JI_COMMIT_RUNNING; 270 jinode->i_flags |= JI_COMMIT_RUNNING;
263 spin_unlock(&journal->j_list_lock); 271 spin_unlock(&journal->j_list_lock);
264 err = filemap_fdatawait_keep_errors( 272 err = filemap_fdatawait_range_keep_errors(
265 jinode->i_vfs_inode->i_mapping); 273 jinode->i_vfs_inode->i_mapping, dirty_start,
274 dirty_end);
266 if (!ret) 275 if (!ret)
267 ret = err; 276 ret = err;
268 spin_lock(&journal->j_list_lock); 277 spin_lock(&journal->j_list_lock);
@@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
282 &jinode->i_transaction->t_inode_list); 291 &jinode->i_transaction->t_inode_list);
283 } else { 292 } else {
284 jinode->i_transaction = NULL; 293 jinode->i_transaction = NULL;
294 jinode->i_dirty_start = 0;
295 jinode->i_dirty_end = 0;
285 } 296 }
286 } 297 }
287 spin_unlock(&journal->j_list_lock); 298 spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 38b426c5ed03..17f679aeba7c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -94,6 +94,8 @@ EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
94EXPORT_SYMBOL(jbd2_journal_force_commit); 94EXPORT_SYMBOL(jbd2_journal_force_commit);
95EXPORT_SYMBOL(jbd2_journal_inode_add_write); 95EXPORT_SYMBOL(jbd2_journal_inode_add_write);
96EXPORT_SYMBOL(jbd2_journal_inode_add_wait); 96EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
97EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
98EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
97EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); 99EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
98EXPORT_SYMBOL(jbd2_journal_release_jbd_inode); 100EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
99EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 101EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
@@ -2574,6 +2576,8 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
2574 jinode->i_next_transaction = NULL; 2576 jinode->i_next_transaction = NULL;
2575 jinode->i_vfs_inode = inode; 2577 jinode->i_vfs_inode = inode;
2576 jinode->i_flags = 0; 2578 jinode->i_flags = 0;
2579 jinode->i_dirty_start = 0;
2580 jinode->i_dirty_end = 0;
2577 INIT_LIST_HEAD(&jinode->i_list); 2581 INIT_LIST_HEAD(&jinode->i_list);
2578} 2582}
2579 2583
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 8ca4fddc705f..990e7b5062e7 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2565,7 +2565,7 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
2565 * File inode in the inode list of the handle's transaction 2565 * File inode in the inode list of the handle's transaction
2566 */ 2566 */
2567static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, 2567static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
2568 unsigned long flags) 2568 unsigned long flags, loff_t start_byte, loff_t end_byte)
2569{ 2569{
2570 transaction_t *transaction = handle->h_transaction; 2570 transaction_t *transaction = handle->h_transaction;
2571 journal_t *journal; 2571 journal_t *journal;
@@ -2577,26 +2577,17 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
2577 jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, 2577 jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
2578 transaction->t_tid); 2578 transaction->t_tid);
2579 2579
2580 /*
2581 * First check whether inode isn't already on the transaction's
2582 * lists without taking the lock. Note that this check is safe
2583 * without the lock as we cannot race with somebody removing inode
2584 * from the transaction. The reason is that we remove inode from the
2585 * transaction only in journal_release_jbd_inode() and when we commit
2586 * the transaction. We are guarded from the first case by holding
2587 * a reference to the inode. We are safe against the second case
2588 * because if jinode->i_transaction == transaction, commit code
2589 * cannot touch the transaction because we hold reference to it,
2590 * and if jinode->i_next_transaction == transaction, commit code
2591 * will only file the inode where we want it.
2592 */
2593 if ((jinode->i_transaction == transaction ||
2594 jinode->i_next_transaction == transaction) &&
2595 (jinode->i_flags & flags) == flags)
2596 return 0;
2597
2598 spin_lock(&journal->j_list_lock); 2580 spin_lock(&journal->j_list_lock);
2599 jinode->i_flags |= flags; 2581 jinode->i_flags |= flags;
2582
2583 if (jinode->i_dirty_end) {
2584 jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
2585 jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
2586 } else {
2587 jinode->i_dirty_start = start_byte;
2588 jinode->i_dirty_end = end_byte;
2589 }
2590
2600 /* Is inode already attached where we need it? */ 2591 /* Is inode already attached where we need it? */
2601 if (jinode->i_transaction == transaction || 2592 if (jinode->i_transaction == transaction ||
2602 jinode->i_next_transaction == transaction) 2593 jinode->i_next_transaction == transaction)
@@ -2631,12 +2622,28 @@ done:
2631int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) 2622int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
2632{ 2623{
2633 return jbd2_journal_file_inode(handle, jinode, 2624 return jbd2_journal_file_inode(handle, jinode,
2634 JI_WRITE_DATA | JI_WAIT_DATA); 2625 JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
2635} 2626}
2636 2627
2637int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) 2628int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
2638{ 2629{
2639 return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA); 2630 return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
2631 LLONG_MAX);
2632}
2633
2634int jbd2_journal_inode_ranged_write(handle_t *handle,
2635 struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
2636{
2637 return jbd2_journal_file_inode(handle, jinode,
2638 JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
2639 start_byte + length - 1);
2640}
2641
2642int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
2643 loff_t start_byte, loff_t length)
2644{
2645 return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
2646 start_byte, start_byte + length - 1);
2640} 2647}
2641 2648
2642/* 2649/*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 5c04181b7c6d..0e0393e7f41a 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -451,6 +451,22 @@ struct jbd2_inode {
451 * @i_flags: Flags of inode [j_list_lock] 451 * @i_flags: Flags of inode [j_list_lock]
452 */ 452 */
453 unsigned long i_flags; 453 unsigned long i_flags;
454
455 /**
456 * @i_dirty_start:
457 *
458 * Offset in bytes where the dirty range for this inode starts.
459 * [j_list_lock]
460 */
461 loff_t i_dirty_start;
462
463 /**
464 * @i_dirty_end:
465 *
466 * Inclusive offset in bytes where the dirty range for this inode
467 * ends. [j_list_lock]
468 */
469 loff_t i_dirty_end;
454}; 470};
455 471
456struct jbd2_revoke_table_s; 472struct jbd2_revoke_table_s;
@@ -1397,6 +1413,12 @@ extern int jbd2_journal_force_commit(journal_t *);
1397extern int jbd2_journal_force_commit_nested(journal_t *); 1413extern int jbd2_journal_force_commit_nested(journal_t *);
1398extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode); 1414extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
1399extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode); 1415extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
1416extern int jbd2_journal_inode_ranged_write(handle_t *handle,
1417 struct jbd2_inode *inode, loff_t start_byte,
1418 loff_t length);
1419extern int jbd2_journal_inode_ranged_wait(handle_t *handle,
1420 struct jbd2_inode *inode, loff_t start_byte,
1421 loff_t length);
1400extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, 1422extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
1401 struct jbd2_inode *inode, loff_t new_size); 1423 struct jbd2_inode *inode, loff_t new_size);
1402extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); 1424extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);