summaryrefslogtreecommitdiffstats
path: root/fs/jbd2/commit.c
diff options
context:
space:
mode:
authorRoss Zwisler <zwisler@chromium.org>2019-06-20 17:24:56 -0400
committerTheodore Ts'o <tytso@mit.edu>2019-06-20 17:24:56 -0400
commit6ba0e7dc64a5adcda2fbe65adc466891795d639e (patch)
tree0ed5070630e27f3fe48300661871fb2ebf8ddf80 /fs/jbd2/commit.c
parentaa0bfcd939c30617385ffa28682c062d78050eba (diff)
jbd2: introduce jbd2_inode dirty range scoping
Currently both journal_submit_inode_data_buffers() and journal_finish_inode_data_buffers() operate on the entire address space of each of the inodes associated with a given journal entry. The consequence of this is that if we have an inode where we are constantly appending dirty pages we can end up waiting for an indefinite amount of time in journal_finish_inode_data_buffers() while we wait for all the pages under writeback to be written out. The easiest way to cause this type of workload is do just dd from /dev/zero to a file until it fills the entire filesystem. This can cause journal_finish_inode_data_buffers() to wait for the duration of the entire dd operation. We can improve this situation by scoping each of the inode dirty ranges associated with a given transaction. We do this via the jbd2_inode structure so that the scoping is contained within jbd2 and so that it follows the lifetime and locking rules for that structure. This allows us to limit the writeback & wait in journal_submit_inode_data_buffers() and journal_finish_inode_data_buffers() respectively to the dirty range for a given struct jdb2_inode, keeping us from waiting forever if the inode in question is still being appended to. Signed-off-by: Ross Zwisler <zwisler@google.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz> Cc: stable@vger.kernel.org
Diffstat (limited to 'fs/jbd2/commit.c')
-rw-r--r--fs/jbd2/commit.c23
1 files changed, 17 insertions, 6 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index c8c1d6cc6e5d..132fb92098c7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -187,14 +187,15 @@ static int journal_wait_on_commit_record(journal_t *journal,
187 * use writepages() because with delayed allocation we may be doing 187 * use writepages() because with delayed allocation we may be doing
188 * block allocation in writepages(). 188 * block allocation in writepages().
189 */ 189 */
190static int journal_submit_inode_data_buffers(struct address_space *mapping) 190static int journal_submit_inode_data_buffers(struct address_space *mapping,
191 loff_t dirty_start, loff_t dirty_end)
191{ 192{
192 int ret; 193 int ret;
193 struct writeback_control wbc = { 194 struct writeback_control wbc = {
194 .sync_mode = WB_SYNC_ALL, 195 .sync_mode = WB_SYNC_ALL,
195 .nr_to_write = mapping->nrpages * 2, 196 .nr_to_write = mapping->nrpages * 2,
196 .range_start = 0, 197 .range_start = dirty_start,
197 .range_end = i_size_read(mapping->host), 198 .range_end = dirty_end,
198 }; 199 };
199 200
200 ret = generic_writepages(mapping, &wbc); 201 ret = generic_writepages(mapping, &wbc);
@@ -218,6 +219,9 @@ static int journal_submit_data_buffers(journal_t *journal,
218 219
219 spin_lock(&journal->j_list_lock); 220 spin_lock(&journal->j_list_lock);
220 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 221 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
222 loff_t dirty_start = jinode->i_dirty_start;
223 loff_t dirty_end = jinode->i_dirty_end;
224
221 if (!(jinode->i_flags & JI_WRITE_DATA)) 225 if (!(jinode->i_flags & JI_WRITE_DATA))
222 continue; 226 continue;
223 mapping = jinode->i_vfs_inode->i_mapping; 227 mapping = jinode->i_vfs_inode->i_mapping;
@@ -230,7 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
230 * only allocated blocks here. 234 * only allocated blocks here.
231 */ 235 */
232 trace_jbd2_submit_inode_data(jinode->i_vfs_inode); 236 trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
233 err = journal_submit_inode_data_buffers(mapping); 237 err = journal_submit_inode_data_buffers(mapping, dirty_start,
238 dirty_end);
234 if (!ret) 239 if (!ret)
235 ret = err; 240 ret = err;
236 spin_lock(&journal->j_list_lock); 241 spin_lock(&journal->j_list_lock);
@@ -257,12 +262,16 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
257 /* For locking, see the comment in journal_submit_data_buffers() */ 262 /* For locking, see the comment in journal_submit_data_buffers() */
258 spin_lock(&journal->j_list_lock); 263 spin_lock(&journal->j_list_lock);
259 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 264 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
265 loff_t dirty_start = jinode->i_dirty_start;
266 loff_t dirty_end = jinode->i_dirty_end;
267
260 if (!(jinode->i_flags & JI_WAIT_DATA)) 268 if (!(jinode->i_flags & JI_WAIT_DATA))
261 continue; 269 continue;
262 jinode->i_flags |= JI_COMMIT_RUNNING; 270 jinode->i_flags |= JI_COMMIT_RUNNING;
263 spin_unlock(&journal->j_list_lock); 271 spin_unlock(&journal->j_list_lock);
264 err = filemap_fdatawait_keep_errors( 272 err = filemap_fdatawait_range_keep_errors(
265 jinode->i_vfs_inode->i_mapping); 273 jinode->i_vfs_inode->i_mapping, dirty_start,
274 dirty_end);
266 if (!ret) 275 if (!ret)
267 ret = err; 276 ret = err;
268 spin_lock(&journal->j_list_lock); 277 spin_lock(&journal->j_list_lock);
@@ -282,6 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
282 &jinode->i_transaction->t_inode_list); 291 &jinode->i_transaction->t_inode_list);
283 } else { 292 } else {
284 jinode->i_transaction = NULL; 293 jinode->i_transaction = NULL;
294 jinode->i_dirty_start = 0;
295 jinode->i_dirty_end = 0;
285 } 296 }
286 } 297 }
287 spin_unlock(&journal->j_list_lock); 298 spin_unlock(&journal->j_list_lock);