diff options
author | Dave Chinner <dchinner@redhat.com> | 2011-04-07 22:45:07 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2011-04-07 22:45:07 -0400 |
commit | c6d09b666de11eb272326a6eb6cd3246da571014 (patch) | |
tree | 74951ec04191b47a1fb75f9e79de6c00837a7c17 /fs | |
parent | e828776a8abe6b9bae7ed9638710bff7642c568a (diff) |
xfs: introduce a xfssyncd workqueue
All of the work xfssyncd does is background functionality. There is
no need for a thread per filesystem to do this work - it can al be
managed by a global workqueue now they manage concurrency
effectively.
Introduce a new gglobal xfssyncd workqueue, and convert the periodic
work to use this new functionality. To do this, use a delayed work
construct to schedule the next running of the periodic sync work
for the filesystem. When the sync work is complete, queue a new
delayed work for the next running of the sync work.
For laptop mode, we wait on completion for the sync works, so ensure
that the sync work queuing interface can flush and wait for work to
complete to enable the work queue infrastructure to replace the
current sequence number and wakeup that is used.
Because the sync work does non-trivial amounts of work, mark the
new work queue as CPU intensive.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 30 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 88 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 4 |
4 files changed, 63 insertions, 61 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 1ba5c451da36..c71b6ed45e41 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -1191,22 +1191,12 @@ xfs_fs_sync_fs( | |||
1191 | return -error; | 1191 | return -error; |
1192 | 1192 | ||
1193 | if (laptop_mode) { | 1193 | if (laptop_mode) { |
1194 | int prev_sync_seq = mp->m_sync_seq; | ||
1195 | |||
1196 | /* | 1194 | /* |
1197 | * The disk must be active because we're syncing. | 1195 | * The disk must be active because we're syncing. |
1198 | * We schedule xfssyncd now (now that the disk is | 1196 | * We schedule xfssyncd now (now that the disk is |
1199 | * active) instead of later (when it might not be). | 1197 | * active) instead of later (when it might not be). |
1200 | */ | 1198 | */ |
1201 | wake_up_process(mp->m_sync_task); | 1199 | flush_delayed_work_sync(&mp->m_sync_work); |
1202 | /* | ||
1203 | * We have to wait for the sync iteration to complete. | ||
1204 | * If we don't, the disk activity caused by the sync | ||
1205 | * will come after the sync is completed, and that | ||
1206 | * triggers another sync from laptop mode. | ||
1207 | */ | ||
1208 | wait_event(mp->m_wait_single_sync_task, | ||
1209 | mp->m_sync_seq != prev_sync_seq); | ||
1210 | } | 1200 | } |
1211 | 1201 | ||
1212 | return 0; | 1202 | return 0; |
@@ -1492,7 +1482,6 @@ xfs_fs_fill_super( | |||
1492 | atomic_set(&mp->m_active_trans, 0); | 1482 | atomic_set(&mp->m_active_trans, 0); |
1493 | INIT_LIST_HEAD(&mp->m_sync_list); | 1483 | INIT_LIST_HEAD(&mp->m_sync_list); |
1494 | spin_lock_init(&mp->m_sync_lock); | 1484 | spin_lock_init(&mp->m_sync_lock); |
1495 | init_waitqueue_head(&mp->m_wait_single_sync_task); | ||
1496 | 1485 | ||
1497 | mp->m_super = sb; | 1486 | mp->m_super = sb; |
1498 | sb->s_fs_info = mp; | 1487 | sb->s_fs_info = mp; |
@@ -1833,13 +1822,27 @@ init_xfs_fs(void) | |||
1833 | if (error) | 1822 | if (error) |
1834 | goto out_cleanup_procfs; | 1823 | goto out_cleanup_procfs; |
1835 | 1824 | ||
1825 | /* | ||
1826 | * max_active is set to 8 to give enough concurency to allow | ||
1827 | * multiple work operations on each CPU to run. This allows multiple | ||
1828 | * filesystems to be running sync work concurrently, and scales with | ||
1829 | * the number of CPUs in the system. | ||
1830 | */ | ||
1831 | xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); | ||
1832 | if (!xfs_syncd_wq) { | ||
1833 | error = -ENOMEM; | ||
1834 | goto out_sysctl_unregister; | ||
1835 | } | ||
1836 | |||
1836 | vfs_initquota(); | 1837 | vfs_initquota(); |
1837 | 1838 | ||
1838 | error = register_filesystem(&xfs_fs_type); | 1839 | error = register_filesystem(&xfs_fs_type); |
1839 | if (error) | 1840 | if (error) |
1840 | goto out_sysctl_unregister; | 1841 | goto out_destroy_xfs_syncd; |
1841 | return 0; | 1842 | return 0; |
1842 | 1843 | ||
1844 | out_destroy_xfs_syncd: | ||
1845 | destroy_workqueue(xfs_syncd_wq); | ||
1843 | out_sysctl_unregister: | 1846 | out_sysctl_unregister: |
1844 | xfs_sysctl_unregister(); | 1847 | xfs_sysctl_unregister(); |
1845 | out_cleanup_procfs: | 1848 | out_cleanup_procfs: |
@@ -1861,6 +1864,7 @@ exit_xfs_fs(void) | |||
1861 | { | 1864 | { |
1862 | vfs_exitquota(); | 1865 | vfs_exitquota(); |
1863 | unregister_filesystem(&xfs_fs_type); | 1866 | unregister_filesystem(&xfs_fs_type); |
1867 | destroy_workqueue(xfs_syncd_wq); | ||
1864 | xfs_sysctl_unregister(); | 1868 | xfs_sysctl_unregister(); |
1865 | xfs_cleanup_procfs(); | 1869 | xfs_cleanup_procfs(); |
1866 | xfs_buf_terminate(); | 1870 | xfs_buf_terminate(); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 594cd822d84d..4a582d8100e4 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -39,6 +39,8 @@ | |||
39 | #include <linux/kthread.h> | 39 | #include <linux/kthread.h> |
40 | #include <linux/freezer.h> | 40 | #include <linux/freezer.h> |
41 | 41 | ||
42 | struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
43 | |||
42 | /* | 44 | /* |
43 | * The inode lookup is done in batches to keep the amount of lock traffic and | 45 | * The inode lookup is done in batches to keep the amount of lock traffic and |
44 | * radix tree lookups to a minimum. The batch size is a trade off between | 46 | * radix tree lookups to a minimum. The batch size is a trade off between |
@@ -489,32 +491,6 @@ xfs_flush_inodes( | |||
489 | xfs_log_force(ip->i_mount, XFS_LOG_SYNC); | 491 | xfs_log_force(ip->i_mount, XFS_LOG_SYNC); |
490 | } | 492 | } |
491 | 493 | ||
492 | /* | ||
493 | * Every sync period we need to unpin all items, reclaim inodes and sync | ||
494 | * disk quotas. We might need to cover the log to indicate that the | ||
495 | * filesystem is idle and not frozen. | ||
496 | */ | ||
497 | STATIC void | ||
498 | xfs_sync_worker( | ||
499 | struct xfs_mount *mp, | ||
500 | void *unused) | ||
501 | { | ||
502 | int error; | ||
503 | |||
504 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | ||
505 | /* dgc: errors ignored here */ | ||
506 | if (mp->m_super->s_frozen == SB_UNFROZEN && | ||
507 | xfs_log_need_covered(mp)) | ||
508 | error = xfs_fs_log_dummy(mp); | ||
509 | else | ||
510 | xfs_log_force(mp, 0); | ||
511 | xfs_reclaim_inodes(mp, 0); | ||
512 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
513 | } | ||
514 | mp->m_sync_seq++; | ||
515 | wake_up(&mp->m_wait_single_sync_task); | ||
516 | } | ||
517 | |||
518 | STATIC int | 494 | STATIC int |
519 | xfssyncd( | 495 | xfssyncd( |
520 | void *arg) | 496 | void *arg) |
@@ -528,34 +504,19 @@ xfssyncd( | |||
528 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); | 504 | timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); |
529 | for (;;) { | 505 | for (;;) { |
530 | if (list_empty(&mp->m_sync_list)) | 506 | if (list_empty(&mp->m_sync_list)) |
531 | timeleft = schedule_timeout_interruptible(timeleft); | 507 | schedule_timeout_interruptible(timeleft); |
532 | /* swsusp */ | 508 | /* swsusp */ |
533 | try_to_freeze(); | 509 | try_to_freeze(); |
534 | if (kthread_should_stop() && list_empty(&mp->m_sync_list)) | 510 | if (kthread_should_stop() && list_empty(&mp->m_sync_list)) |
535 | break; | 511 | break; |
536 | 512 | ||
537 | spin_lock(&mp->m_sync_lock); | 513 | spin_lock(&mp->m_sync_lock); |
538 | /* | ||
539 | * We can get woken by laptop mode, to do a sync - | ||
540 | * that's the (only!) case where the list would be | ||
541 | * empty with time remaining. | ||
542 | */ | ||
543 | if (!timeleft || list_empty(&mp->m_sync_list)) { | ||
544 | if (!timeleft) | ||
545 | timeleft = xfs_syncd_centisecs * | ||
546 | msecs_to_jiffies(10); | ||
547 | INIT_LIST_HEAD(&mp->m_sync_work.w_list); | ||
548 | list_add_tail(&mp->m_sync_work.w_list, | ||
549 | &mp->m_sync_list); | ||
550 | } | ||
551 | list_splice_init(&mp->m_sync_list, &tmp); | 514 | list_splice_init(&mp->m_sync_list, &tmp); |
552 | spin_unlock(&mp->m_sync_lock); | 515 | spin_unlock(&mp->m_sync_lock); |
553 | 516 | ||
554 | list_for_each_entry_safe(work, n, &tmp, w_list) { | 517 | list_for_each_entry_safe(work, n, &tmp, w_list) { |
555 | (*work->w_syncer)(mp, work->w_data); | 518 | (*work->w_syncer)(mp, work->w_data); |
556 | list_del(&work->w_list); | 519 | list_del(&work->w_list); |
557 | if (work == &mp->m_sync_work) | ||
558 | continue; | ||
559 | if (work->w_completion) | 520 | if (work->w_completion) |
560 | complete(work->w_completion); | 521 | complete(work->w_completion); |
561 | kmem_free(work); | 522 | kmem_free(work); |
@@ -565,13 +526,49 @@ xfssyncd( | |||
565 | return 0; | 526 | return 0; |
566 | } | 527 | } |
567 | 528 | ||
529 | static void | ||
530 | xfs_syncd_queue_sync( | ||
531 | struct xfs_mount *mp) | ||
532 | { | ||
533 | queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, | ||
534 | msecs_to_jiffies(xfs_syncd_centisecs * 10)); | ||
535 | } | ||
536 | |||
537 | /* | ||
538 | * Every sync period we need to unpin all items, reclaim inodes and sync | ||
539 | * disk quotas. We might need to cover the log to indicate that the | ||
540 | * filesystem is idle and not frozen. | ||
541 | */ | ||
542 | STATIC void | ||
543 | xfs_sync_worker( | ||
544 | struct work_struct *work) | ||
545 | { | ||
546 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
547 | struct xfs_mount, m_sync_work); | ||
548 | int error; | ||
549 | |||
550 | if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { | ||
551 | /* dgc: errors ignored here */ | ||
552 | if (mp->m_super->s_frozen == SB_UNFROZEN && | ||
553 | xfs_log_need_covered(mp)) | ||
554 | error = xfs_fs_log_dummy(mp); | ||
555 | else | ||
556 | xfs_log_force(mp, 0); | ||
557 | xfs_reclaim_inodes(mp, 0); | ||
558 | error = xfs_qm_sync(mp, SYNC_TRYLOCK); | ||
559 | } | ||
560 | |||
561 | /* queue us up again */ | ||
562 | xfs_syncd_queue_sync(mp); | ||
563 | } | ||
564 | |||
568 | int | 565 | int |
569 | xfs_syncd_init( | 566 | xfs_syncd_init( |
570 | struct xfs_mount *mp) | 567 | struct xfs_mount *mp) |
571 | { | 568 | { |
572 | mp->m_sync_work.w_syncer = xfs_sync_worker; | 569 | INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); |
573 | mp->m_sync_work.w_mount = mp; | 570 | xfs_syncd_queue_sync(mp); |
574 | mp->m_sync_work.w_completion = NULL; | 571 | |
575 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); | 572 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname); |
576 | if (IS_ERR(mp->m_sync_task)) | 573 | if (IS_ERR(mp->m_sync_task)) |
577 | return -PTR_ERR(mp->m_sync_task); | 574 | return -PTR_ERR(mp->m_sync_task); |
@@ -582,6 +579,7 @@ void | |||
582 | xfs_syncd_stop( | 579 | xfs_syncd_stop( |
583 | struct xfs_mount *mp) | 580 | struct xfs_mount *mp) |
584 | { | 581 | { |
582 | cancel_delayed_work_sync(&mp->m_sync_work); | ||
585 | kthread_stop(mp->m_sync_task); | 583 | kthread_stop(mp->m_sync_task); |
586 | } | 584 | } |
587 | 585 | ||
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 32ba6628290c..e3a6ad27415f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -32,6 +32,8 @@ typedef struct xfs_sync_work { | |||
32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ | 32 | #define SYNC_WAIT 0x0001 /* wait for i/o to complete */ |
33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ | 33 | #define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ |
34 | 34 | ||
35 | extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ | ||
36 | |||
35 | int xfs_syncd_init(struct xfs_mount *mp); | 37 | int xfs_syncd_init(struct xfs_mount *mp); |
36 | void xfs_syncd_stop(struct xfs_mount *mp); | 38 | void xfs_syncd_stop(struct xfs_mount *mp); |
37 | 39 | ||
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index a62e8971539d..2c11e62be888 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -203,12 +203,10 @@ typedef struct xfs_mount { | |||
203 | struct mutex m_icsb_mutex; /* balancer sync lock */ | 203 | struct mutex m_icsb_mutex; /* balancer sync lock */ |
204 | #endif | 204 | #endif |
205 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ | 205 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ |
206 | struct delayed_work m_sync_work; /* background sync work */ | ||
206 | struct task_struct *m_sync_task; /* generalised sync thread */ | 207 | struct task_struct *m_sync_task; /* generalised sync thread */ |
207 | xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ | ||
208 | struct list_head m_sync_list; /* sync thread work item list */ | 208 | struct list_head m_sync_list; /* sync thread work item list */ |
209 | spinlock_t m_sync_lock; /* work item list lock */ | 209 | spinlock_t m_sync_lock; /* work item list lock */ |
210 | int m_sync_seq; /* sync thread generation no. */ | ||
211 | wait_queue_head_t m_wait_single_sync_task; | ||
212 | __int64_t m_update_flags; /* sb flags we need to update | 210 | __int64_t m_update_flags; /* sb flags we need to update |
213 | on the next remount,rw */ | 211 | on the next remount,rw */ |
214 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ | 212 | struct shrinker m_inode_shrink; /* inode reclaim shrinker */ |