xfs: convert ENOSPC inode flushing to use new syncd workqueue

On of the problems with the current inode flush at ENOSPC is that we queue a flush per ENOSPC event, regardless of how many are already queued. Thi can result in hundreds of queued flushes, most of which simply burn CPU scanned and do no real work. This simply slows down allocation at ENOSPC. We really only need one active flush at a time, and we can easily implement that via the new xfs_syncd_wq. All we need to do is queue a flush if one is not already active, then block waiting for the currently active flush to complete. The result is that we only ever have a single ENOSPC inode flush active at a time and this greatly reduces the overhead of ENOSPC processing. On my 2p test machine, this results in tests exercising ENOSPC conditions running significantly faster - 042 halves execution time, 083 drops from 60s to 5s, etc - while not introducing test regressions. This allows us to remove the old xfssyncd threads and infrastructure as they are no longer used. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com>
author: Dave Chinner <dchinner@redhat.com> 2011-04-07 22:45:07 -0400
committer: Dave Chinner <david@fromorbit.com> 2011-04-07 22:45:07 -0400
commit: 89e4cb550a492cfca038a555fcc1bdac58822ec3 (patch)
tree: ab688a1849d6361c92b9f60ae0586045908010da /fs
parent: c6d09b666de11eb272326a6eb6cd3246da571014 (diff)
3 files changed, 36 insertions, 102 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index c71b6ed45e41..ee0e981aa9d1 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1480,8 +1480,6 @@ xfs_fs_fill_super(
        spin_lock_init(&mp->m_sb_lock);
        mutex_init(&mp->m_growlock);
        atomic_set(&mp->m_active_trans, 0);
-        INIT_LIST_HEAD(&mp->m_sync_list);
-        spin_lock_init(&mp->m_sync_lock);
        mp->m_super = sb;
        sb->s_fs_info = mp;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 4a582d8100e4..af3275965c77 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -433,99 +433,6 @@ xfs_quiesce_attr(
        xfs_unmountfs_writesb(mp);
 }
-/*
- * Enqueue a work item to be picked up by the vfs xfssyncd thread.
- * Doing this has two advantages:
- * - It saves on stack space, which is tight in certain situations
- * - It can be used (with care) as a mechanism to avoid deadlocks.
- * Flushing while allocating in a full filesystem requires both.
- */
-STATIC void
-xfs_syncd_queue_work(
-        struct xfs_mount *mp,
-        void            *data,
-        void            (*syncer)(struct xfs_mount *, void *),
-        struct completion *completion)
-{
-        struct xfs_sync_work *work;
-        work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
-        INIT_LIST_HEAD(&work->w_list);
-        work->w_syncer = syncer;
-        work->w_data = data;
-        work->w_mount = mp;
-        work->w_completion = completion;
-        spin_lock(&mp->m_sync_lock);
-        list_add_tail(&work->w_list, &mp->m_sync_list);
-        spin_unlock(&mp->m_sync_lock);
-        wake_up_process(mp->m_sync_task);
-}
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room...
- */
-STATIC void
-xfs_flush_inodes_work(
-        struct xfs_mount *mp,
-        void            *arg)
-{
-        struct inode    *inode = arg;
-        xfs_sync_data(mp, SYNC_TRYLOCK);
-        xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
-        iput(inode);
-}
-void
-xfs_flush_inodes(
-        xfs_inode_t     *ip)
-{
-        struct inode    *inode = VFS_I(ip);
-        DECLARE_COMPLETION_ONSTACK(completion);
-        igrab(inode);
-        xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
-        wait_for_completion(&completion);
-        xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
-}
-STATIC int
-xfssyncd(
-        void                    *arg)
-{
-        struct xfs_mount        *mp = arg;
-        long                    timeleft;
-        xfs_sync_work_t         *work, *n;
-        LIST_HEAD               (tmp);
-        set_freezable();
-        timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
-        for (;;) {
-                if (list_empty(&mp->m_sync_list))
-                        schedule_timeout_interruptible(timeleft);
-                /* swsusp */
-                try_to_freeze();
-                if (kthread_should_stop() && list_empty(&mp->m_sync_list))
-                        break;
-                spin_lock(&mp->m_sync_lock);
-                list_splice_init(&mp->m_sync_list, &tmp);
-                spin_unlock(&mp->m_sync_lock);
-                list_for_each_entry_safe(work, n, &tmp, w_list) {
-                        (*work->w_syncer)(mp, work->w_data);
-                        list_del(&work->w_list);
-                        if (work->w_completion)
-                                complete(work->w_completion);
-                        kmem_free(work);
-                }
-        }
-        return 0;
-}
 static void
 xfs_syncd_queue_sync(
        struct xfs_mount        *mp)
@@ -562,16 +469,47 @@ xfs_sync_worker(
        xfs_syncd_queue_sync(mp);
 }
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room.
+ *
+ * Queue a new data flush if there isn't one already in progress and
+ * wait for completion of the flush. This means that we only ever have one
+ * inode flush in progress no matter how many ENOSPC events are occurring and
+ * so will prevent the system from bogging down due to every concurrent
+ * ENOSPC event scanning all the active inodes in the system for writeback.
+ */
+void
+xfs_flush_inodes(
+        struct xfs_inode        *ip)
+{
+        struct xfs_mount        *mp = ip->i_mount;
+        queue_work(xfs_syncd_wq, &mp->m_flush_work);
+        flush_work_sync(&mp->m_flush_work);
+}
+STATIC void
+xfs_flush_worker(
+        struct work_struct *work)
+{
+        struct xfs_mount *mp = container_of(work,
+                                        struct xfs_mount, m_flush_work);
+        xfs_sync_data(mp, SYNC_TRYLOCK);
+        xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
+}
 int
 xfs_syncd_init(
        struct xfs_mount        *mp)
 {
+        INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
        INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
        xfs_syncd_queue_sync(mp);
-        mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
-        if (IS_ERR(mp->m_sync_task))
-                return -PTR_ERR(mp->m_sync_task);
        return 0;
 }
@@ -580,7 +518,7 @@ xfs_syncd_stop(
        struct xfs_mount        *mp)
 {
        cancel_delayed_work_sync(&mp->m_sync_work);
-        kthread_stop(mp->m_sync_task);
+        cancel_work_sync(&mp->m_flush_work);
 }
 void
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 2c11e62be888..a0ad90e95299 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -204,9 +204,7 @@ typedef struct xfs_mount {
 #endif
        struct xfs_mru_cache    *m_filestream;  /* per-mount filestream data */
        struct delayed_work     m_sync_work;    /* background sync work */
-        struct task_struct      *m_sync_task;   /* generalised sync thread */
+        struct work_struct      m_flush_work;   /* background inode flush */
-        struct list_head        m_sync_list;    /* sync thread work item list */
-        spinlock_t              m_sync_lock;    /* work item list lock */
        __int64_t               m_update_flags; /* sb flags we need to update
                                                   on the next remount,rw */
        struct shrinker         m_inode_shrink; /* inode reclaim shrinker */
author	Dave Chinner <dchinner@redhat.com>	2011-04-07 22:45:07 -0400
committer	Dave Chinner <david@fromorbit.com>	2011-04-07 22:45:07 -0400
commit	89e4cb550a492cfca038a555fcc1bdac58822ec3 (patch)
tree	ab688a1849d6361c92b9f60ae0586045908010da /fs
parent	c6d09b666de11eb272326a6eb6cd3246da571014 (diff)

diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index c71b6ed45e41..ee0e981aa9d1 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1480,8 +1480,6 @@ xfs_fs_fill_super(
1480	spin_lock_init(&mp->m_sb_lock);	1480	spin_lock_init(&mp->m_sb_lock);
1481	mutex_init(&mp->m_growlock);	1481	mutex_init(&mp->m_growlock);
1482	atomic_set(&mp->m_active_trans, 0);	1482	atomic_set(&mp->m_active_trans, 0);
1483	INIT_LIST_HEAD(&mp->m_sync_list);
1484	spin_lock_init(&mp->m_sync_lock);
1485		1483
1486	mp->m_super = sb;	1484	mp->m_super = sb;
1487	sb->s_fs_info = mp;	1485	sb->s_fs_info = mp;


diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 4a582d8100e4..af3275965c77 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -433,99 +433,6 @@ xfs_quiesce_attr(
433	xfs_unmountfs_writesb(mp);	433	xfs_unmountfs_writesb(mp);
434	}	434	}
435		435
436	/*
437	* Enqueue a work item to be picked up by the vfs xfssyncd thread.
438	* Doing this has two advantages:
439	* - It saves on stack space, which is tight in certain situations
440	* - It can be used (with care) as a mechanism to avoid deadlocks.
441	* Flushing while allocating in a full filesystem requires both.
442	*/
443	STATIC void
444	xfs_syncd_queue_work(
445	struct xfs_mount *mp,
446	void *data,
447	void (syncer)(struct xfs_mount , void *),
448	struct completion *completion)
449	{
450	struct xfs_sync_work *work;
451
452	work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
453	INIT_LIST_HEAD(&work->w_list);
454	work->w_syncer = syncer;
455	work->w_data = data;
456	work->w_mount = mp;
457	work->w_completion = completion;
458	spin_lock(&mp->m_sync_lock);
459	list_add_tail(&work->w_list, &mp->m_sync_list);
460	spin_unlock(&mp->m_sync_lock);
461	wake_up_process(mp->m_sync_task);
462	}
463
464	/*
465	* Flush delayed allocate data, attempting to free up reserved space
466	* from existing allocations. At this point a new allocation attempt
467	* has failed with ENOSPC and we are in the process of scratching our
468	* heads, looking about for more room...
469	*/
470	STATIC void
471	xfs_flush_inodes_work(
472	struct xfs_mount *mp,
473	void *arg)
474	{
475	struct inode *inode = arg;
476	xfs_sync_data(mp, SYNC_TRYLOCK);
477	xfs_sync_data(mp, SYNC_TRYLOCK \| SYNC_WAIT);
478	iput(inode);
479	}
480
481	void
482	xfs_flush_inodes(
483	xfs_inode_t *ip)
484	{
485	struct inode *inode = VFS_I(ip);
486	DECLARE_COMPLETION_ONSTACK(completion);
487
488	igrab(inode);
489	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
490	wait_for_completion(&completion);
491	xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
492	}
493
494	STATIC int
495	xfssyncd(
496	void *arg)
497	{
498	struct xfs_mount *mp = arg;
499	long timeleft;
500	xfs_sync_work_t work, n;
501	LIST_HEAD (tmp);
502
503	set_freezable();
504	timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
505	for (;;) {
506	if (list_empty(&mp->m_sync_list))
507	schedule_timeout_interruptible(timeleft);
508	/* swsusp */
509	try_to_freeze();
510	if (kthread_should_stop() && list_empty(&mp->m_sync_list))
511	break;
512
513	spin_lock(&mp->m_sync_lock);
514	list_splice_init(&mp->m_sync_list, &tmp);
515	spin_unlock(&mp->m_sync_lock);
516
517	list_for_each_entry_safe(work, n, &tmp, w_list) {
518	(*work->w_syncer)(mp, work->w_data);
519	list_del(&work->w_list);
520	if (work->w_completion)
521	complete(work->w_completion);
522	kmem_free(work);
523	}
524	}
525
526	return 0;
527	}
528
529	static void	436	static void
530	xfs_syncd_queue_sync(	437	xfs_syncd_queue_sync(
531	struct xfs_mount *mp)	438	struct xfs_mount *mp)
@@ -562,16 +469,47 @@ xfs_sync_worker(
562	xfs_syncd_queue_sync(mp);	469	xfs_syncd_queue_sync(mp);
563	}	470	}
564		471
		472	/*
		473	* Flush delayed allocate data, attempting to free up reserved space
		474	* from existing allocations. At this point a new allocation attempt
		475	* has failed with ENOSPC and we are in the process of scratching our
		476	* heads, looking about for more room.
		477	*
		478	* Queue a new data flush if there isn't one already in progress and
		479	* wait for completion of the flush. This means that we only ever have one
		480	* inode flush in progress no matter how many ENOSPC events are occurring and
		481	* so will prevent the system from bogging down due to every concurrent
		482	* ENOSPC event scanning all the active inodes in the system for writeback.
		483	*/
		484	void
		485	xfs_flush_inodes(
		486	struct xfs_inode *ip)
		487	{
		488	struct xfs_mount *mp = ip->i_mount;
		489
		490	queue_work(xfs_syncd_wq, &mp->m_flush_work);
		491	flush_work_sync(&mp->m_flush_work);
		492	}
		493
		494	STATIC void
		495	xfs_flush_worker(
		496	struct work_struct *work)
		497	{
		498	struct xfs_mount *mp = container_of(work,
		499	struct xfs_mount, m_flush_work);
		500
		501	xfs_sync_data(mp, SYNC_TRYLOCK);
		502	xfs_sync_data(mp, SYNC_TRYLOCK \| SYNC_WAIT);
		503	}
		504
565	int	505	int
566	xfs_syncd_init(	506	xfs_syncd_init(
567	struct xfs_mount *mp)	507	struct xfs_mount *mp)
568	{	508	{
		509	INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
569	INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);	510	INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
570	xfs_syncd_queue_sync(mp);	511	xfs_syncd_queue_sync(mp);
571		512
572	mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
573	if (IS_ERR(mp->m_sync_task))
574	return -PTR_ERR(mp->m_sync_task);
575	return 0;	513	return 0;
576	}	514	}
577		515
@@ -580,7 +518,7 @@ xfs_syncd_stop(
580	struct xfs_mount *mp)	518	struct xfs_mount *mp)
581	{	519	{
582	cancel_delayed_work_sync(&mp->m_sync_work);	520	cancel_delayed_work_sync(&mp->m_sync_work);
583	kthread_stop(mp->m_sync_task);	521	cancel_work_sync(&mp->m_flush_work);
584	}	522	}
585		523
586	void	524	void


diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 2c11e62be888..a0ad90e95299 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h
@@ -204,9 +204,7 @@ typedef struct xfs_mount {
204	#endif	204	#endif
205	struct xfs_mru_cache m_filestream; / per-mount filestream data */	205	struct xfs_mru_cache m_filestream; / per-mount filestream data */
206	struct delayed_work m_sync_work; /* background sync work */	206	struct delayed_work m_sync_work; /* background sync work */
207	struct task_struct m_sync_task; / generalised sync thread */	207	struct work_struct m_flush_work; /* background inode flush */
208	struct list_head m_sync_list; /* sync thread work item list */
209	spinlock_t m_sync_lock; /* work item list lock */
210	__int64_t m_update_flags; /* sb flags we need to update	208	__int64_t m_update_flags; /* sb flags we need to update
211	on the next remount,rw */	209	on the next remount,rw */
212	struct shrinker m_inode_shrink; /* inode reclaim shrinker */	210	struct shrinker m_inode_shrink; /* inode reclaim shrinker */