diff options
author | Tejun Heo <tj@kernel.org> | 2017-12-12 11:38:30 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2019-01-22 16:39:38 -0500 |
commit | 7fc5854f8c6efae9e7624970ab49a1eac2faefb1 (patch) | |
tree | 7c9ac9ab43c0a47dbaefd155da89c8ba7f7923db /fs/fs-writeback.c | |
parent | 698cef173983b086977e633e46476e0f925ca01e (diff) |
writeback: synchronize sync(2) against cgroup writeback membership switches
sync_inodes_sb() can race against cgwb (cgroup writeback) membership
switches and fail to writeback some inodes. For example, if an inode
switches to another wb while sync_inodes_sb() is in progress, the new
wb might not be visible to bdi_split_work_to_wbs() at all or the inode
might jump from a wb which hasn't issued writebacks yet to one which
already has.
This patch adds backing_dev_info->wb_switch_rwsem to synchronize cgwb
switch path against sync_inodes_sb() so that sync_inodes_sb() is
guaranteed to see all the target wbs and inodes can't jump wbs to
escape syncing.
v2: Fixed misplaced rwsem init. Spotted by Jiufei.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Jiufei Xue <xuejiufei@gmail.com>
Link: http://lkml.kernel.org/r/dc694ae2-f07f-61e1-7097-7c8411cee12d@gmail.com
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 40 |
1 files changed, 38 insertions, 2 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b40168fcc94a..36855c1f8daf 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -331,11 +331,22 @@ struct inode_switch_wbs_context { | |||
331 | struct work_struct work; | 331 | struct work_struct work; |
332 | }; | 332 | }; |
333 | 333 | ||
334 | static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) | ||
335 | { | ||
336 | down_write(&bdi->wb_switch_rwsem); | ||
337 | } | ||
338 | |||
339 | static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) | ||
340 | { | ||
341 | up_write(&bdi->wb_switch_rwsem); | ||
342 | } | ||
343 | |||
334 | static void inode_switch_wbs_work_fn(struct work_struct *work) | 344 | static void inode_switch_wbs_work_fn(struct work_struct *work) |
335 | { | 345 | { |
336 | struct inode_switch_wbs_context *isw = | 346 | struct inode_switch_wbs_context *isw = |
337 | container_of(work, struct inode_switch_wbs_context, work); | 347 | container_of(work, struct inode_switch_wbs_context, work); |
338 | struct inode *inode = isw->inode; | 348 | struct inode *inode = isw->inode; |
349 | struct backing_dev_info *bdi = inode_to_bdi(inode); | ||
339 | struct address_space *mapping = inode->i_mapping; | 350 | struct address_space *mapping = inode->i_mapping; |
340 | struct bdi_writeback *old_wb = inode->i_wb; | 351 | struct bdi_writeback *old_wb = inode->i_wb; |
341 | struct bdi_writeback *new_wb = isw->new_wb; | 352 | struct bdi_writeback *new_wb = isw->new_wb; |
@@ -344,6 +355,12 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) | |||
344 | bool switched = false; | 355 | bool switched = false; |
345 | 356 | ||
346 | /* | 357 | /* |
358 | * If @inode switches cgwb membership while sync_inodes_sb() is | ||
359 | * being issued, sync_inodes_sb() might miss it. Synchronize. | ||
360 | */ | ||
361 | down_read(&bdi->wb_switch_rwsem); | ||
362 | |||
363 | /* | ||
347 | * By the time control reaches here, RCU grace period has passed | 364 | * By the time control reaches here, RCU grace period has passed |
348 | * since I_WB_SWITCH assertion and all wb stat update transactions | 365 | * since I_WB_SWITCH assertion and all wb stat update transactions |
349 | * between unlocked_inode_to_wb_begin/end() are guaranteed to be | 366 | * between unlocked_inode_to_wb_begin/end() are guaranteed to be |
@@ -428,6 +445,8 @@ skip_switch: | |||
428 | spin_unlock(&new_wb->list_lock); | 445 | spin_unlock(&new_wb->list_lock); |
429 | spin_unlock(&old_wb->list_lock); | 446 | spin_unlock(&old_wb->list_lock); |
430 | 447 | ||
448 | up_read(&bdi->wb_switch_rwsem); | ||
449 | |||
431 | if (switched) { | 450 | if (switched) { |
432 | wb_wakeup(new_wb); | 451 | wb_wakeup(new_wb); |
433 | wb_put(old_wb); | 452 | wb_put(old_wb); |
@@ -468,9 +487,18 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) | |||
468 | if (inode->i_state & I_WB_SWITCH) | 487 | if (inode->i_state & I_WB_SWITCH) |
469 | return; | 488 | return; |
470 | 489 | ||
490 | /* | ||
491 | * Avoid starting new switches while sync_inodes_sb() is in | ||
492 | * progress. Otherwise, if the down_write protected issue path | ||
493 | * blocks heavily, we might end up starting a large number of | ||
494 | * switches which will block on the rwsem. | ||
495 | */ | ||
496 | if (!down_read_trylock(&bdi->wb_switch_rwsem)) | ||
497 | return; | ||
498 | |||
471 | isw = kzalloc(sizeof(*isw), GFP_ATOMIC); | 499 | isw = kzalloc(sizeof(*isw), GFP_ATOMIC); |
472 | if (!isw) | 500 | if (!isw) |
473 | return; | 501 | goto out_unlock; |
474 | 502 | ||
475 | /* find and pin the new wb */ | 503 | /* find and pin the new wb */ |
476 | rcu_read_lock(); | 504 | rcu_read_lock(); |
@@ -504,12 +532,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) | |||
504 | * Let's continue after I_WB_SWITCH is guaranteed to be visible. | 532 | * Let's continue after I_WB_SWITCH is guaranteed to be visible. |
505 | */ | 533 | */ |
506 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); | 534 | call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); |
507 | return; | 535 | goto out_unlock; |
508 | 536 | ||
509 | out_free: | 537 | out_free: |
510 | if (isw->new_wb) | 538 | if (isw->new_wb) |
511 | wb_put(isw->new_wb); | 539 | wb_put(isw->new_wb); |
512 | kfree(isw); | 540 | kfree(isw); |
541 | out_unlock: | ||
542 | up_read(&bdi->wb_switch_rwsem); | ||
513 | } | 543 | } |
514 | 544 | ||
515 | /** | 545 | /** |
@@ -887,6 +917,9 @@ fs_initcall(cgroup_writeback_init); | |||
887 | 917 | ||
888 | #else /* CONFIG_CGROUP_WRITEBACK */ | 918 | #else /* CONFIG_CGROUP_WRITEBACK */ |
889 | 919 | ||
920 | static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } | ||
921 | static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { } | ||
922 | |||
890 | static struct bdi_writeback * | 923 | static struct bdi_writeback * |
891 | locked_inode_to_wb_and_lock_list(struct inode *inode) | 924 | locked_inode_to_wb_and_lock_list(struct inode *inode) |
892 | __releases(&inode->i_lock) | 925 | __releases(&inode->i_lock) |
@@ -2413,8 +2446,11 @@ void sync_inodes_sb(struct super_block *sb) | |||
2413 | return; | 2446 | return; |
2414 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 2447 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
2415 | 2448 | ||
2449 | /* protect against inode wb switch, see inode_switch_wbs_work_fn() */ | ||
2450 | bdi_down_write_wb_switch_rwsem(bdi); | ||
2416 | bdi_split_work_to_wbs(bdi, &work, false); | 2451 | bdi_split_work_to_wbs(bdi, &work, false); |
2417 | wb_wait_for_completion(bdi, &done); | 2452 | wb_wait_for_completion(bdi, &done); |
2453 | bdi_up_write_wb_switch_rwsem(bdi); | ||
2418 | 2454 | ||
2419 | wait_sb_inodes(sb); | 2455 | wait_sb_inodes(sb); |
2420 | } | 2456 | } |