aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-04 21:17:17 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-04 21:17:17 -0500
commite5322c54068162846fdbac0f0891cd95f37d4c4e (patch)
tree21d22ea5f1226073e1e106053d16a055be95c387 /fs
parentbdf9d29799296fd787a9d2b99cb8feff77e2b9db (diff)
parent4d6af73d9e43f78651a43ee4c5ad221107ac8365 (diff)
Merge branch 'for-linus2' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Round 2 of this. I cut back to the bare necessities, the patch is still larger than it usually would be at this time, due to the number of NVMe fixes in there. This pull request contains: - The 4 core fixes from Ming, that fix both problems with exceeding the virtual boundary limit in case of merging, and the gap checking for cloned bio's. - NVMe fixes from Keith and Christoph: - Regression on larger user commands, causing problems with reading log pages (for instance). This touches both NVMe, and the block core since that is now generally utilized also for these types of commands. - Hot removal fixes. - User exploitable issue with passthrough IO commands, if !length is given, causing us to fault on writing to the zero page. - Fix for a hang under error conditions - And finally, the current series regression for umount with cgroup writeback, where the final flush would happen async and hence open up window after umount where the device wasn't consistent. fsck right after umount would show this. From Tejun" * 'for-linus2' of git://git.kernel.dk/linux-block: block: support large requests in blk_rq_map_user_iov block: fix blk_rq_get_max_sectors for driver private requests nvme: fix max_segments integer truncation nvme: set queue limits for the admin queue writeback: flush inode cgroup wb switches instead of pinning super_block NVMe: Fix 0-length integrity payload NVMe: Don't allow unsupported flags NVMe: Move error handling to failed reset handler NVMe: Simplify device reset failure NVMe: Fix namespace removal deadlock NVMe: Use IDA for namespace disk naming NVMe: Don't unmap controller registers on reset block: merge: get the 1st and last bvec via helpers block: get the 1st and last bvec via helpers block: check virt boundary in bio_will_gap() block: bio: introduce helpers to get the 1st and last bvec
Diffstat (limited to 'fs')
-rw-r--r--fs/fs-writeback.c54
-rw-r--r--fs/super.c1
2 files changed, 42 insertions, 13 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1f76d8950a57..5c46ed9f3e14 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi,
223#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) 223#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
224 /* one round can affect upto 5 slots */ 224 /* one round can affect upto 5 slots */
225 225
226static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
227static struct workqueue_struct *isw_wq;
228
226void __inode_attach_wb(struct inode *inode, struct page *page) 229void __inode_attach_wb(struct inode *inode, struct page *page)
227{ 230{
228 struct backing_dev_info *bdi = inode_to_bdi(inode); 231 struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
317 struct inode_switch_wbs_context *isw = 320 struct inode_switch_wbs_context *isw =
318 container_of(work, struct inode_switch_wbs_context, work); 321 container_of(work, struct inode_switch_wbs_context, work);
319 struct inode *inode = isw->inode; 322 struct inode *inode = isw->inode;
320 struct super_block *sb = inode->i_sb;
321 struct address_space *mapping = inode->i_mapping; 323 struct address_space *mapping = inode->i_mapping;
322 struct bdi_writeback *old_wb = inode->i_wb; 324 struct bdi_writeback *old_wb = inode->i_wb;
323 struct bdi_writeback *new_wb = isw->new_wb; 325 struct bdi_writeback *new_wb = isw->new_wb;
@@ -424,8 +426,9 @@ skip_switch:
424 wb_put(new_wb); 426 wb_put(new_wb);
425 427
426 iput(inode); 428 iput(inode);
427 deactivate_super(sb);
428 kfree(isw); 429 kfree(isw);
430
431 atomic_dec(&isw_nr_in_flight);
429} 432}
430 433
431static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) 434static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
435 438
436 /* needs to grab bh-unsafe locks, bounce to work item */ 439 /* needs to grab bh-unsafe locks, bounce to work item */
437 INIT_WORK(&isw->work, inode_switch_wbs_work_fn); 440 INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
438 schedule_work(&isw->work); 441 queue_work(isw_wq, &isw->work);
439} 442}
440 443
441/** 444/**
@@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
471 474
472 /* while holding I_WB_SWITCH, no one else can update the association */ 475 /* while holding I_WB_SWITCH, no one else can update the association */
473 spin_lock(&inode->i_lock); 476 spin_lock(&inode->i_lock);
474 477 if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
475 if (inode->i_state & (I_WB_SWITCH | I_FREEING) || 478 inode->i_state & (I_WB_SWITCH | I_FREEING) ||
476 inode_to_wb(inode) == isw->new_wb) 479 inode_to_wb(inode) == isw->new_wb) {
477 goto out_unlock; 480 spin_unlock(&inode->i_lock);
478 481 goto out_free;
479 if (!atomic_inc_not_zero(&inode->i_sb->s_active)) 482 }
480 goto out_unlock;
481
482 inode->i_state |= I_WB_SWITCH; 483 inode->i_state |= I_WB_SWITCH;
483 spin_unlock(&inode->i_lock); 484 spin_unlock(&inode->i_lock);
484 485
485 ihold(inode); 486 ihold(inode);
486 isw->inode = inode; 487 isw->inode = inode;
487 488
489 atomic_inc(&isw_nr_in_flight);
490
488 /* 491 /*
489 * In addition to synchronizing among switchers, I_WB_SWITCH tells 492 * In addition to synchronizing among switchers, I_WB_SWITCH tells
490 * the RCU protected stat update paths to grab the mapping's 493 * the RCU protected stat update paths to grab the mapping's
@@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
494 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); 497 call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
495 return; 498 return;
496 499
497out_unlock:
498 spin_unlock(&inode->i_lock);
499out_free: 500out_free:
500 if (isw->new_wb) 501 if (isw->new_wb)
501 wb_put(isw->new_wb); 502 wb_put(isw->new_wb);
@@ -847,6 +848,33 @@ restart:
847 wb_put(last_wb); 848 wb_put(last_wb);
848} 849}
849 850
851/**
852 * cgroup_writeback_umount - flush inode wb switches for umount
853 *
854 * This function is called when a super_block is about to be destroyed and
855 * flushes in-flight inode wb switches. An inode wb switch goes through
856 * RCU and then workqueue, so the two need to be flushed in order to ensure
857 * that all previously scheduled switches are finished. As wb switches are
858 * rare occurrences and synchronize_rcu() can take a while, perform
859 * flushing iff wb switches are in flight.
860 */
861void cgroup_writeback_umount(void)
862{
863 if (atomic_read(&isw_nr_in_flight)) {
864 synchronize_rcu();
865 flush_workqueue(isw_wq);
866 }
867}
868
869static int __init cgroup_writeback_init(void)
870{
871 isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
872 if (!isw_wq)
873 return -ENOMEM;
874 return 0;
875}
876fs_initcall(cgroup_writeback_init);
877
850#else /* CONFIG_CGROUP_WRITEBACK */ 878#else /* CONFIG_CGROUP_WRITEBACK */
851 879
852static struct bdi_writeback * 880static struct bdi_writeback *
diff --git a/fs/super.c b/fs/super.c
index 1182af8fd5ff..74914b1bae70 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb)
415 sb->s_flags &= ~MS_ACTIVE; 415 sb->s_flags &= ~MS_ACTIVE;
416 416
417 fsnotify_unmount_inodes(sb); 417 fsnotify_unmount_inodes(sb);
418 cgroup_writeback_umount();
418 419
419 evict_inodes(sb); 420 evict_inodes(sb);
420 421