aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2016-02-29 18:28:53 -0500
committerJens Axboe <axboe@fb.com>2016-03-03 16:42:50 -0500
commita1a0e23e49037c23ea84bc8cc146a03584d13577 (patch)
treeb8bb7792ed9dd97358bdea0a6a2f36fdb617bbf0 /fs
parente9fc63d682dbbef17921aeb00d03fd52d6735ffd (diff)
writeback: flush inode cgroup wb switches instead of pinning super_block
If cgroup writeback is in use, inodes can be scheduled for asynchronous wb switching. Before 5ff8eaac1636 ("writeback: keep superblock pinned during cgroup writeback association switches"), this could race with umount leading to super_block being destroyed while inodes are pinned for wb switching. 5ff8eaac1636 fixed it by bumping s_active while wb switches are in flight; however, this allowed in-flight wb switches to make umounts asynchronous when the userland expected synchronosity - e.g. fsck immediately following umount may fail because the device is still busy. This patch removes the problematic super_block pinning and instead makes generic_shutdown_super() flush in-flight wb switches. wb switches are now executed on a dedicated isw_wq so that they can be flushed and isw_nr_in_flight keeps track of the number of in-flight wb switches so that flushing can be avoided in most cases. v2: Move cgroup_writeback_umount() further below and add MS_ACTIVE check in inode_switch_wbs() as Jan an Al suggested. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Tahsin Erdogan <tahsin@google.com> Cc: Jan Kara <jack@suse.cz> Cc: Al Viro <viro@ZenIV.linux.org.uk> Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com Fixes: 5ff8eaac1636 ("writeback: keep superblock pinned during cgroup writeback association switches") Cc: stable@vger.kernel.org #v4.5 Reviewed-by: Jan Kara <jack@suse.cz> Tested-by: Tahsin Erdogan <tahsin@google.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/fs-writeback.c54
-rw-r--r--fs/super.c1
2 files changed, 42 insertions, 13 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1f76d8950a57..5c46ed9f3e14 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi,
223#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) 223#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
224 /* one round can affect upto 5 slots */ 224 /* one round can affect upto 5 slots */
225 225
226static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
227static struct workqueue_struct *isw_wq;
228
226void __inode_attach_wb(struct inode *inode, struct page *page) 229void __inode_attach_wb(struct inode *inode, struct page *page)
227{ 230{
228 struct backing_dev_info *bdi = inode_to_bdi(inode); 231 struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
317 struct inode_switch_wbs_context *isw = 320 struct inode_switch_wbs_context *isw =
318 container_of(work, struct inode_switch_wbs_context, work); 321 container_of(work, struct inode_switch_wbs_context, work);
319 struct inode *inode = isw->inode; 322 struct inode *inode = isw->inode;
320 struct super_block *sb = inode->i_sb;
321 struct address_space *mapping = inode->i_mapping; 323 struct address_space *mapping = inode->i_mapping;
322 struct bdi_writeback *old_wb = inode->i_wb; 324 struct bdi_writeback *old_wb = inode->i_wb;
323 struct bdi_writeback *new_wb = isw->new_wb; 325 struct bdi_writeback *new_wb = isw->new_wb;
@@ -424,8 +426,9 @@ skip_switch:
424 wb_put(new_wb); 426 wb_put(new_wb);
425 427
426 iput(inode); 428 iput(inode);
427 deactivate_super(sb);
428 kfree(isw); 429 kfree(isw);
430
431 atomic_dec(&isw_nr_in_flight);
429} 432}
430 433
431static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) 434static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
435 438
436 /* needs to grab bh-unsafe locks, bounce to work item */ 439 /* needs to grab bh-unsafe locks, bounce to work item */
437 INIT_WORK(&isw->work, inode_switch_wbs_work_fn); 440 INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
438 schedule_work(&isw->work); 441 queue_work(isw_wq, &isw->work);
439} 442}
440 443
441/** 444/**
@@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
471 474
472 /* while holding I_WB_SWITCH, no one else can update the association */ 475 /* while holding I_WB_SWITCH, no one else can update the association */
473 spin_lock(&inode->i_lock); 476 spin_lock(&inode->i_lock);
474 477 if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
475 if (inode->i_state & (I_WB_SWITCH | I_FREEING) || 478 inode->i_state & (I_WB_SWITCH | I_FREEING) ||
476 inode_to_wb(inode) == isw->new_wb) 479 inode_to_wb(inode) == isw->new_wb) {
477 goto out_unlock; 480 spin_unlock(&inode->i_lock);
478 481 goto out_free;
479 if (!atomic_inc_not_zero(&inode->i_sb->s_active)) 482 }
480 goto out_unlock;
481
482 inode->i_state |= I_WB_SWITCH; 483 inode->i_state |= I_WB_SWITCH;
483 spin_unlock(&inode->i_lock); 484 spin_unlock(&inode->i_lock);
484 485
485 ihold(inode); 486 ihold(inode);
486 isw->inode = inode; 487 isw->inode = inode;
487 488
489 atomic_inc(&isw_nr_in_flight);
490
488 /* 491 /*
489 * In addition to synchronizing among switchers, I_WB_SWITCH tells 492 * In addition to synchronizing among switchers, I_WB_SWITCH tells
490 * the RCU protected stat update paths to grab the mapping's 493 * the RCU protected stat update paths to grab the mapping's
@@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
494 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); 497 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
495 return; 498 return;
496 499
497out_unlock:
498 spin_unlock(&inode->i_lock);
499out_free: 500out_free:
500 if (isw->new_wb) 501 if (isw->new_wb)
501 wb_put(isw->new_wb); 502 wb_put(isw->new_wb);
@@ -847,6 +848,33 @@ restart:
847 wb_put(last_wb); 848 wb_put(last_wb);
848} 849}
849 850
851/**
852 * cgroup_writeback_umount - flush inode wb switches for umount
853 *
854 * This function is called when a super_block is about to be destroyed and
855 * flushes in-flight inode wb switches. An inode wb switch goes through
856 * RCU and then workqueue, so the two need to be flushed in order to ensure
857 * that all previously scheduled switches are finished. As wb switches are
858 * rare occurrences and synchronize_rcu() can take a while, perform
859 * flushing iff wb switches are in flight.
860 */
861void cgroup_writeback_umount(void)
862{
863 if (atomic_read(&isw_nr_in_flight)) {
864 synchronize_rcu();
865 flush_workqueue(isw_wq);
866 }
867}
868
869static int __init cgroup_writeback_init(void)
870{
871 isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
872 if (!isw_wq)
873 return -ENOMEM;
874 return 0;
875}
876fs_initcall(cgroup_writeback_init);
877
850#else /* CONFIG_CGROUP_WRITEBACK */ 878#else /* CONFIG_CGROUP_WRITEBACK */
851 879
852static struct bdi_writeback * 880static struct bdi_writeback *
diff --git a/fs/super.c b/fs/super.c
index 1182af8fd5ff..74914b1bae70 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb)
415 sb->s_flags &= ~MS_ACTIVE; 415 sb->s_flags &= ~MS_ACTIVE;
416 416
417 fsnotify_unmount_inodes(sb); 417 fsnotify_unmount_inodes(sb);
418 cgroup_writeback_umount();
418 419
419 evict_inodes(sb); 420 evict_inodes(sb);
420 421