aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-04-07 22:45:07 -0400
committerDave Chinner <david@fromorbit.com>2011-04-07 22:45:07 -0400
commitc6d09b666de11eb272326a6eb6cd3246da571014 (patch)
tree74951ec04191b47a1fb75f9e79de6c00837a7c17
parente828776a8abe6b9bae7ed9638710bff7642c568a (diff)
xfs: introduce a xfssyncd workqueue
All of the work xfssyncd does is background functionality. There is no need for a thread per filesystem to do this work - it can al be managed by a global workqueue now they manage concurrency effectively. Introduce a new gglobal xfssyncd workqueue, and convert the periodic work to use this new functionality. To do this, use a delayed work construct to schedule the next running of the periodic sync work for the filesystem. When the sync work is complete, queue a new delayed work for the next running of the sync work. For laptop mode, we wait on completion for the sync works, so ensure that the sync work queuing interface can flush and wait for work to complete to enable the work queue infrastructure to replace the current sequence number and wakeup that is used. Because the sync work does non-trivial amounts of work, mark the new work queue as CPU intensive. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c30
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c88
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_mount.h4
4 files changed, 63 insertions, 61 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 1ba5c451da36..c71b6ed45e41 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1191,22 +1191,12 @@ xfs_fs_sync_fs(
1191 return -error; 1191 return -error;
1192 1192
1193 if (laptop_mode) { 1193 if (laptop_mode) {
1194 int prev_sync_seq = mp->m_sync_seq;
1195
1196 /* 1194 /*
1197 * The disk must be active because we're syncing. 1195 * The disk must be active because we're syncing.
1198 * We schedule xfssyncd now (now that the disk is 1196 * We schedule xfssyncd now (now that the disk is
1199 * active) instead of later (when it might not be). 1197 * active) instead of later (when it might not be).
1200 */ 1198 */
1201 wake_up_process(mp->m_sync_task); 1199 flush_delayed_work_sync(&mp->m_sync_work);
1202 /*
1203 * We have to wait for the sync iteration to complete.
1204 * If we don't, the disk activity caused by the sync
1205 * will come after the sync is completed, and that
1206 * triggers another sync from laptop mode.
1207 */
1208 wait_event(mp->m_wait_single_sync_task,
1209 mp->m_sync_seq != prev_sync_seq);
1210 } 1200 }
1211 1201
1212 return 0; 1202 return 0;
@@ -1492,7 +1482,6 @@ xfs_fs_fill_super(
1492 atomic_set(&mp->m_active_trans, 0); 1482 atomic_set(&mp->m_active_trans, 0);
1493 INIT_LIST_HEAD(&mp->m_sync_list); 1483 INIT_LIST_HEAD(&mp->m_sync_list);
1494 spin_lock_init(&mp->m_sync_lock); 1484 spin_lock_init(&mp->m_sync_lock);
1495 init_waitqueue_head(&mp->m_wait_single_sync_task);
1496 1485
1497 mp->m_super = sb; 1486 mp->m_super = sb;
1498 sb->s_fs_info = mp; 1487 sb->s_fs_info = mp;
@@ -1833,13 +1822,27 @@ init_xfs_fs(void)
1833 if (error) 1822 if (error)
1834 goto out_cleanup_procfs; 1823 goto out_cleanup_procfs;
1835 1824
1825 /*
1826 * max_active is set to 8 to give enough concurency to allow
1827 * multiple work operations on each CPU to run. This allows multiple
1828 * filesystems to be running sync work concurrently, and scales with
1829 * the number of CPUs in the system.
1830 */
1831 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
1832 if (!xfs_syncd_wq) {
1833 error = -ENOMEM;
1834 goto out_sysctl_unregister;
1835 }
1836
1836 vfs_initquota(); 1837 vfs_initquota();
1837 1838
1838 error = register_filesystem(&xfs_fs_type); 1839 error = register_filesystem(&xfs_fs_type);
1839 if (error) 1840 if (error)
1840 goto out_sysctl_unregister; 1841 goto out_destroy_xfs_syncd;
1841 return 0; 1842 return 0;
1842 1843
1844 out_destroy_xfs_syncd:
1845 destroy_workqueue(xfs_syncd_wq);
1843 out_sysctl_unregister: 1846 out_sysctl_unregister:
1844 xfs_sysctl_unregister(); 1847 xfs_sysctl_unregister();
1845 out_cleanup_procfs: 1848 out_cleanup_procfs:
@@ -1861,6 +1864,7 @@ exit_xfs_fs(void)
1861{ 1864{
1862 vfs_exitquota(); 1865 vfs_exitquota();
1863 unregister_filesystem(&xfs_fs_type); 1866 unregister_filesystem(&xfs_fs_type);
1867 destroy_workqueue(xfs_syncd_wq);
1864 xfs_sysctl_unregister(); 1868 xfs_sysctl_unregister();
1865 xfs_cleanup_procfs(); 1869 xfs_cleanup_procfs();
1866 xfs_buf_terminate(); 1870 xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 594cd822d84d..4a582d8100e4 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -39,6 +39,8 @@
39#include <linux/kthread.h> 39#include <linux/kthread.h>
40#include <linux/freezer.h> 40#include <linux/freezer.h>
41 41
42struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
43
42/* 44/*
43 * The inode lookup is done in batches to keep the amount of lock traffic and 45 * The inode lookup is done in batches to keep the amount of lock traffic and
44 * radix tree lookups to a minimum. The batch size is a trade off between 46 * radix tree lookups to a minimum. The batch size is a trade off between
@@ -489,32 +491,6 @@ xfs_flush_inodes(
489 xfs_log_force(ip->i_mount, XFS_LOG_SYNC); 491 xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
490} 492}
491 493
492/*
493 * Every sync period we need to unpin all items, reclaim inodes and sync
494 * disk quotas. We might need to cover the log to indicate that the
495 * filesystem is idle and not frozen.
496 */
497STATIC void
498xfs_sync_worker(
499 struct xfs_mount *mp,
500 void *unused)
501{
502 int error;
503
504 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
505 /* dgc: errors ignored here */
506 if (mp->m_super->s_frozen == SB_UNFROZEN &&
507 xfs_log_need_covered(mp))
508 error = xfs_fs_log_dummy(mp);
509 else
510 xfs_log_force(mp, 0);
511 xfs_reclaim_inodes(mp, 0);
512 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
513 }
514 mp->m_sync_seq++;
515 wake_up(&mp->m_wait_single_sync_task);
516}
517
518STATIC int 494STATIC int
519xfssyncd( 495xfssyncd(
520 void *arg) 496 void *arg)
@@ -528,34 +504,19 @@ xfssyncd(
528 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); 504 timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
529 for (;;) { 505 for (;;) {
530 if (list_empty(&mp->m_sync_list)) 506 if (list_empty(&mp->m_sync_list))
531 timeleft = schedule_timeout_interruptible(timeleft); 507 schedule_timeout_interruptible(timeleft);
532 /* swsusp */ 508 /* swsusp */
533 try_to_freeze(); 509 try_to_freeze();
534 if (kthread_should_stop() && list_empty(&mp->m_sync_list)) 510 if (kthread_should_stop() && list_empty(&mp->m_sync_list))
535 break; 511 break;
536 512
537 spin_lock(&mp->m_sync_lock); 513 spin_lock(&mp->m_sync_lock);
538 /*
539 * We can get woken by laptop mode, to do a sync -
540 * that's the (only!) case where the list would be
541 * empty with time remaining.
542 */
543 if (!timeleft || list_empty(&mp->m_sync_list)) {
544 if (!timeleft)
545 timeleft = xfs_syncd_centisecs *
546 msecs_to_jiffies(10);
547 INIT_LIST_HEAD(&mp->m_sync_work.w_list);
548 list_add_tail(&mp->m_sync_work.w_list,
549 &mp->m_sync_list);
550 }
551 list_splice_init(&mp->m_sync_list, &tmp); 514 list_splice_init(&mp->m_sync_list, &tmp);
552 spin_unlock(&mp->m_sync_lock); 515 spin_unlock(&mp->m_sync_lock);
553 516
554 list_for_each_entry_safe(work, n, &tmp, w_list) { 517 list_for_each_entry_safe(work, n, &tmp, w_list) {
555 (*work->w_syncer)(mp, work->w_data); 518 (*work->w_syncer)(mp, work->w_data);
556 list_del(&work->w_list); 519 list_del(&work->w_list);
557 if (work == &mp->m_sync_work)
558 continue;
559 if (work->w_completion) 520 if (work->w_completion)
560 complete(work->w_completion); 521 complete(work->w_completion);
561 kmem_free(work); 522 kmem_free(work);
@@ -565,13 +526,49 @@ xfssyncd(
565 return 0; 526 return 0;
566} 527}
567 528
529static void
530xfs_syncd_queue_sync(
531 struct xfs_mount *mp)
532{
533 queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
534 msecs_to_jiffies(xfs_syncd_centisecs * 10));
535}
536
537/*
538 * Every sync period we need to unpin all items, reclaim inodes and sync
539 * disk quotas. We might need to cover the log to indicate that the
540 * filesystem is idle and not frozen.
541 */
542STATIC void
543xfs_sync_worker(
544 struct work_struct *work)
545{
546 struct xfs_mount *mp = container_of(to_delayed_work(work),
547 struct xfs_mount, m_sync_work);
548 int error;
549
550 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
551 /* dgc: errors ignored here */
552 if (mp->m_super->s_frozen == SB_UNFROZEN &&
553 xfs_log_need_covered(mp))
554 error = xfs_fs_log_dummy(mp);
555 else
556 xfs_log_force(mp, 0);
557 xfs_reclaim_inodes(mp, 0);
558 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
559 }
560
561 /* queue us up again */
562 xfs_syncd_queue_sync(mp);
563}
564
568int 565int
569xfs_syncd_init( 566xfs_syncd_init(
570 struct xfs_mount *mp) 567 struct xfs_mount *mp)
571{ 568{
572 mp->m_sync_work.w_syncer = xfs_sync_worker; 569 INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
573 mp->m_sync_work.w_mount = mp; 570 xfs_syncd_queue_sync(mp);
574 mp->m_sync_work.w_completion = NULL; 571
575 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); 572 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
576 if (IS_ERR(mp->m_sync_task)) 573 if (IS_ERR(mp->m_sync_task))
577 return -PTR_ERR(mp->m_sync_task); 574 return -PTR_ERR(mp->m_sync_task);
@@ -582,6 +579,7 @@ void
582xfs_syncd_stop( 579xfs_syncd_stop(
583 struct xfs_mount *mp) 580 struct xfs_mount *mp)
584{ 581{
582 cancel_delayed_work_sync(&mp->m_sync_work);
585 kthread_stop(mp->m_sync_task); 583 kthread_stop(mp->m_sync_task);
586} 584}
587 585
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 32ba6628290c..e3a6ad27415f 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work {
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
34 34
35extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
36
35int xfs_syncd_init(struct xfs_mount *mp); 37int xfs_syncd_init(struct xfs_mount *mp);
36void xfs_syncd_stop(struct xfs_mount *mp); 38void xfs_syncd_stop(struct xfs_mount *mp);
37 39
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a62e8971539d..2c11e62be888 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -203,12 +203,10 @@ typedef struct xfs_mount {
203 struct mutex m_icsb_mutex; /* balancer sync lock */ 203 struct mutex m_icsb_mutex; /* balancer sync lock */
204#endif 204#endif
205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 205 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
206 struct delayed_work m_sync_work; /* background sync work */
206 struct task_struct *m_sync_task; /* generalised sync thread */ 207 struct task_struct *m_sync_task; /* generalised sync thread */
207 xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */
208 struct list_head m_sync_list; /* sync thread work item list */ 208 struct list_head m_sync_list; /* sync thread work item list */
209 spinlock_t m_sync_lock; /* work item list lock */ 209 spinlock_t m_sync_lock; /* work item list lock */
210 int m_sync_seq; /* sync thread generation no. */
211 wait_queue_head_t m_wait_single_sync_task;
212 __int64_t m_update_flags; /* sb flags we need to update 210 __int64_t m_update_flags; /* sb flags we need to update
213 on the next remount,rw */ 211 on the next remount,rw */
214 struct shrinker m_inode_shrink; /* inode reclaim shrinker */ 212 struct shrinker m_inode_shrink; /* inode reclaim shrinker */