From 6eedc70150d55b5885800eb6664ea226dc2cb66f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:27 +0200 Subject: vfs: Move noop_backing_dev_info check from sync into writeback In principle, a filesystem may want to have ->sync_fs() called during sync(1) although it does not have a bdi (i.e. s_bdi is set to noop_backing_dev_info). Only writeback code really needs bdi set to something reasonable. So move the checks where they are more logical. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/sync.c') diff --git a/fs/sync.c b/fs/sync.c index 11e3d1c44901..b3d2a001293f 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -29,13 +29,6 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { - /* - * This should be safe, as we require bdi backing to actually - * write out data in the first place - */ - if (sb->s_bdi == &noop_backing_dev_info) - return 0; - if (sb->s_qcop && sb->s_qcop->quota_sync) sb->s_qcop->quota_sync(sb, -1, wait); -- cgit v1.2.2 From ceed17236a7491b44ee2be21f56a41ab997cbe7d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:28 +0200 Subject: quota: Split dquot_quota_sync() to writeback and cache flushing part Split off part of dquot_quota_sync() which writes dquots into a quota file to a separate function. In the next patch we will use the function from filesystems and we do not want to abuse ->quota_sync quotactl callback more than necessary. Acked-by: Steven Whitehouse Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/sync.c') diff --git a/fs/sync.c b/fs/sync.c index b3d2a001293f..cae145dd8018 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -30,7 +30,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) { if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, -1, wait); + sb->s_qcop->quota_sync(sb, -1); if (wait) sync_inodes_sb(sb); -- cgit v1.2.2 From a1177825719ccef3f76ef39bbfd5ebb6087d53c7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:29 +0200 Subject: quota: Move quota syncing to ->sync_fs method Since the moment writes to quota files are using block device page cache and space for quota structures is reserved at the moment they are first accessed we have no reason to sync quota before inode writeback. In fact this order is now only harmful since quota information can easily change during inode writeback (either because conversion of delayed-allocated extents or simply because of allocation of new blocks for simple filesystems not using page_mkwrite). So move syncing of quota information after writeback of inodes into ->sync_fs method. This way we do not have to use ->quota_sync callback which is primarily intended for use by quotactl syscall anyway and we get rid of calling ->sync_fs() twice unnecessarily. We skip quota syncing for OCFS2 since it does proper quota journalling in all cases (unlike ext3, ext4, and reiserfs which also support legacy non-journalled quotas) and thus there are no dirty quota structures. CC: "Theodore Ts'o" CC: Joel Becker CC: reiserfs-devel@vger.kernel.org Acked-by: Steven Whitehouse Acked-by: Dave Kleikamp Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/sync.c') diff --git a/fs/sync.c b/fs/sync.c index cae145dd8018..66acd2ba91c4 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -29,9 +29,6 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { - if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, -1); - if (wait) sync_inodes_sb(sb); else -- cgit v1.2.2 From b3de653105180b57af90ef2f5b8441f085f4ff56 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:30 +0200 Subject: vfs: Reorder operations during sys_sync Change the order of operations during sync from for_each_sb { writeback_inodes_sb(); sync_fs(nowait); __sync_blockdev(nowait); } for_each_sb { sync_inodes_sb(); sync_fs(wait); __sync_blockdev(wait); } to for_each_sb writeback_inodes_sb(); for_each_sb sync_fs(nowait); for_each_sb __sync_blockdev(nowait); for_each_sb sync_inodes_sb(); for_each_sb sync_fs(wait); for_each_sb __sync_blockdev(wait); This is a preparation for the following patches in this series. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 46 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) (limited to 'fs/sync.c') diff --git a/fs/sync.c b/fs/sync.c index 66acd2ba91c4..490e90201135 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -67,18 +67,28 @@ int sync_filesystem(struct super_block *sb) } EXPORT_SYMBOL_GPL(sync_filesystem); -static void sync_one_sb(struct super_block *sb, void *arg) +static void sync_inodes_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY)) - __sync_filesystem(sb, *(int *)arg); + sync_inodes_sb(sb); } -/* - * Sync all the data for all the filesystems (called by sys_sync() and - * emergency sync) - */ -static void sync_filesystems(int wait) + +static void writeback_inodes_one_sb(struct super_block *sb, void *arg) { - iterate_supers(sync_one_sb, &wait); + if (!(sb->s_flags & MS_RDONLY)) + writeback_inodes_sb(sb, WB_REASON_SYNC); +} + +static void sync_fs_one_sb(struct super_block *sb, void *arg) +{ + if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, *(int *)arg); +} + +static void sync_blkdev_one_sb(struct super_block *sb, void *arg) +{ + if (!(sb->s_flags & MS_RDONLY)) + __sync_blockdev(sb->s_bdev, *(int *)arg); } /* @@ -87,9 +97,15 @@ static void sync_filesystems(int wait) */ SYSCALL_DEFINE0(sync) { + int nowait = 0, wait = 1; + wakeup_flusher_threads(0, WB_REASON_SYNC); - sync_filesystems(0); - sync_filesystems(1); + iterate_supers(writeback_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_supers(sync_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &wait); + iterate_supers(sync_blkdev_one_sb, &wait); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; @@ -97,12 +113,18 @@ SYSCALL_DEFINE0(sync) static void do_sync_work(struct work_struct *work) { + int nowait = 0; + /* * Sync twice to reduce the possibility we skipped some inodes / pages * because they were temporarily locked */ - sync_filesystems(0); - sync_filesystems(0); + iterate_supers(sync_inodes_one_sb, &nowait); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_supers(sync_inodes_one_sb, &nowait); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_blkdev_one_sb, &nowait); printk("Emergency Sync complete\n"); kfree(work); } -- cgit v1.2.2 From a8c7176b6ded413d5044a00f1d05477b95a6d7ad Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:32 +0200 Subject: vfs: Make sys_sync writeout also block device inodes In case block device does not have filesystem mounted on it, sys_sync will just ignore it and doesn't writeout its dirty pages. This is because writeback code avoids writing inodes from superblock without backing device and blockdev_superblock is such a superblock. Since it's unexpected that sync doesn't writeout dirty data for block devices be nice to users and change the behavior to do so. So now we iterate over all block devices on blockdev_super instead of iterating over all superblocks when syncing block devices. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'fs/sync.c') diff --git a/fs/sync.c b/fs/sync.c index 490e90201135..0b166f26362d 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -85,10 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg) sb->s_op->sync_fs(sb, *(int *)arg); } -static void sync_blkdev_one_sb(struct super_block *sb, void *arg) +static void flush_one_bdev(struct block_device *bdev, void *arg) { - if (!(sb->s_flags & MS_RDONLY)) - __sync_blockdev(sb->s_bdev, *(int *)arg); + __sync_blockdev(bdev, 0); +} + +static void sync_one_bdev(struct block_device *bdev, void *arg) +{ + sync_blockdev(bdev); } /* @@ -102,10 +106,10 @@ SYSCALL_DEFINE0(sync) wakeup_flusher_threads(0, WB_REASON_SYNC); iterate_supers(writeback_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_bdevs(flush_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &wait); - iterate_supers(sync_blkdev_one_sb, &wait); + iterate_bdevs(sync_one_bdev, NULL); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; @@ -121,10 +125,10 @@ static void do_sync_work(struct work_struct *work) */ iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_bdevs(flush_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_bdevs(flush_one_bdev, NULL); printk("Emergency Sync complete\n"); kfree(work); } -- cgit v1.2.2 From d0e91b13eb34d449922124c34f8a05e498daa089 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:33 +0200 Subject: vfs: Remove unnecessary flushing of block devices It is not necessary to write block devices twice. The reason why we first did flush and then proper sync is that for_each_bdev() { write_bdev() wait_for_completion() } is much slower than for_each_bdev() write_bdev() for_each_bdev() wait_for_completion() when there is bigger amount of data. But as is seen in the above, there's no real need to scan pages and submit them twice. We just need to separate the submission and waiting part. This patch does that. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/sync.c') diff --git a/fs/sync.c b/fs/sync.c index 0b166f26362d..131ddae87a1d 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -85,14 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg) sb->s_op->sync_fs(sb, *(int *)arg); } -static void flush_one_bdev(struct block_device *bdev, void *arg) +static void fdatawrite_one_bdev(struct block_device *bdev, void *arg) { - __sync_blockdev(bdev, 0); + filemap_fdatawrite(bdev->bd_inode->i_mapping); } -static void sync_one_bdev(struct block_device *bdev, void *arg) +static void fdatawait_one_bdev(struct block_device *bdev, void *arg) { - sync_blockdev(bdev); + filemap_fdatawait(bdev->bd_inode->i_mapping); } /* @@ -106,10 +106,10 @@ SYSCALL_DEFINE0(sync) wakeup_flusher_threads(0, WB_REASON_SYNC); iterate_supers(writeback_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); - iterate_bdevs(flush_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &wait); - iterate_bdevs(sync_one_bdev, NULL); + iterate_bdevs(fdatawrite_one_bdev, NULL); + iterate_bdevs(fdatawait_one_bdev, NULL); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; @@ -125,10 +125,10 @@ static void do_sync_work(struct work_struct *work) */ iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_bdevs(flush_one_bdev, NULL); + iterate_bdevs(fdatawrite_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_bdevs(flush_one_bdev, NULL); + iterate_bdevs(fdatawrite_one_bdev, NULL); printk("Emergency Sync complete\n"); kfree(work); } -- cgit v1.2.2 From 4ea425b63a3dfeb7707fc7cc7161c11a51e871ed Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:34 +0200 Subject: vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and reorder sync passes wakeup_flusher_threads(0) will queue work doing complete writeback for each flusher thread. Thus there is not much point in submitting another work doing full inode WB_SYNC_NONE writeback by writeback_inodes_sb(). After this change it does not make sense to call nonblocking ->sync_fs and block device flush before calling sync_inodes_sb() because wakeup_flusher_threads() is completely asynchronous and thus these functions would be called in parallel with inode writeback running which will effectively void any work they do. So we move sync_inodes_sb() call before these two functions. Signed-off-by: Jan Kara Signed-off-by: Al Viro --- fs/sync.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs/sync.c') diff --git a/fs/sync.c b/fs/sync.c index 131ddae87a1d..eb8722dc556f 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -73,12 +73,6 @@ static void sync_inodes_one_sb(struct super_block *sb, void *arg) sync_inodes_sb(sb); } -static void writeback_inodes_one_sb(struct super_block *sb, void *arg) -{ - if (!(sb->s_flags & MS_RDONLY)) - writeback_inodes_sb(sb, WB_REASON_SYNC); -} - static void sync_fs_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs) @@ -96,17 +90,22 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) } /* - * sync everything. Start out by waking pdflush, because that writes back - * all queues in parallel. + * Sync everything. We start by waking flusher threads so that most of + * writeback runs on all devices in parallel. Then we sync all inodes reliably + * which effectively also waits for all flusher threads to finish doing + * writeback. At this point all data is on disk so metadata should be stable + * and we tell filesystems to sync their metadata via ->sync_fs() calls. + * Finally, we writeout all block devices because some filesystems (e.g. ext2) + * just write metadata (such as inodes or bitmaps) to block device page cache + * and do not sync it on their own in ->sync_fs(). */ SYSCALL_DEFINE0(sync) { int nowait = 0, wait = 1; wakeup_flusher_threads(0, WB_REASON_SYNC); - iterate_supers(writeback_inodes_one_sb, NULL); - iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &wait); iterate_bdevs(fdatawrite_one_bdev, NULL); iterate_bdevs(fdatawait_one_bdev, NULL); -- cgit v1.2.2