1 files changed, 117 insertions, 113 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 594cd822d84d..e4f9c1b0836c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -22,6 +22,7 @@
 #include "xfs_log.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
+#include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
@@ -39,6 +40,8 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
+struct workqueue_struct *xfs_syncd_wq;  /* sync workqueue */
 /*
 * The inode lookup is done in batches to keep the amount of lock traffic and
 * radix tree lookups to a minimum. The batch size is a trade off between
@@ -401,7 +404,7 @@ xfs_quiesce_fs(
 /*
 * Second stage of a quiesce. The data is already synced, now we have to take
 * care of the metadata. New transactions are already blocked, so we need to
- * wait for any remaining transactions to drain out before proceding.
+ * wait for any remaining transactions to drain out before proceeding.
 */
 void
 xfs_quiesce_attr(
@@ -431,62 +434,12 @@ xfs_quiesce_attr(
        xfs_unmountfs_writesb(mp);
 }
-/*
+static void
- * Enqueue a work item to be picked up by the vfs xfssyncd thread.
+xfs_syncd_queue_sync(
- * Doing this has two advantages:
+        struct xfs_mount        *mp)
- * - It saves on stack space, which is tight in certain situations
- * - It can be used (with care) as a mechanism to avoid deadlocks.
- * Flushing while allocating in a full filesystem requires both.
- */
-STATIC void
-xfs_syncd_queue_work(
-        struct xfs_mount *mp,
-        void            *data,
-        void            (*syncer)(struct xfs_mount *, void *),
-        struct completion *completion)
-{
-        struct xfs_sync_work *work;
-        work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
-        INIT_LIST_HEAD(&work->w_list);
-        work->w_syncer = syncer;
-        work->w_data = data;
-        work->w_mount = mp;
-        work->w_completion = completion;
-        spin_lock(&mp->m_sync_lock);
-        list_add_tail(&work->w_list, &mp->m_sync_list);
-        spin_unlock(&mp->m_sync_lock);
-        wake_up_process(mp->m_sync_task);
-}
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations.  At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room...
- */
-STATIC void
-xfs_flush_inodes_work(
-        struct xfs_mount *mp,
-        void            *arg)
-{
-        struct inode    *inode = arg;
-        xfs_sync_data(mp, SYNC_TRYLOCK);
-        xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
-        iput(inode);
-}
-void
-xfs_flush_inodes(
-        xfs_inode_t     *ip)
 {
-        struct inode    *inode = VFS_I(ip);
+        queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
-        DECLARE_COMPLETION_ONSTACK(completion);
+                                msecs_to_jiffies(xfs_syncd_centisecs * 10));
-        igrab(inode);
-        xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
-        wait_for_completion(&completion);
-        xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
 }
 /*
@@ -496,9 +449,10 @@ xfs_flush_inodes(
 */
 STATIC void
 xfs_sync_worker(
-        struct xfs_mount *mp,
+        struct work_struct *work)
-        void            *unused)
 {
+        struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                        struct xfs_mount, m_sync_work);
        int             error;
        if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
@@ -508,73 +462,106 @@ xfs_sync_worker(
                        error = xfs_fs_log_dummy(mp);
                else
                        xfs_log_force(mp, 0);
-                xfs_reclaim_inodes(mp, 0);
                error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+                /* start pushing all the metadata that is currently dirty */
+                xfs_ail_push_all(mp->m_ail);
        }
-        mp->m_sync_seq++;
-        wake_up(&mp->m_wait_single_sync_task);
+        /* queue us up again */
+        xfs_syncd_queue_sync(mp);
 }
-STATIC int
+/*
-xfssyncd(
+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
-        void                    *arg)
+ * isn't a reclaim pass already in progress. By default it runs every 5s based
+ * on the xfs syncd work default of 30s. Perhaps this should have it's own
+ * tunable, but that can be done if this method proves to be ineffective or too
+ * aggressive.
+ */
+static void
+xfs_syncd_queue_reclaim(
+        struct xfs_mount        *mp)
 {
-        struct xfs_mount        *mp = arg;
-        long                    timeleft;
-        xfs_sync_work_t         *work, *n;
-        LIST_HEAD               (tmp);
-        set_freezable();
-        timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
-        for (;;) {
-                if (list_empty(&mp->m_sync_list))
-                        timeleft = schedule_timeout_interruptible(timeleft);
-                /* swsusp */
-                try_to_freeze();
-                if (kthread_should_stop() && list_empty(&mp->m_sync_list))
-                        break;
-                spin_lock(&mp->m_sync_lock);
+        /*
-                /*
+         * We can have inodes enter reclaim after we've shut down the syncd
-                 * We can get woken by laptop mode, to do a sync -
+         * workqueue during unmount, so don't allow reclaim work to be queued
-                 * that's the (only!) case where the list would be
+         * during unmount.
-                 * empty with time remaining.
+         */
-                 */
+        if (!(mp->m_super->s_flags & MS_ACTIVE))
-                if (!timeleft || list_empty(&mp->m_sync_list)) {
+                return;
-                        if (!timeleft)
-                                timeleft = xfs_syncd_centisecs *
-                                                        msecs_to_jiffies(10);
-                        INIT_LIST_HEAD(&mp->m_sync_work.w_list);
-                        list_add_tail(&mp->m_sync_work.w_list,
-                                        &mp->m_sync_list);
-                }
-                list_splice_init(&mp->m_sync_list, &tmp);
-                spin_unlock(&mp->m_sync_lock);
-                list_for_each_entry_safe(work, n, &tmp, w_list) {
+        rcu_read_lock();
-                        (*work->w_syncer)(mp, work->w_data);
+        if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
-                        list_del(&work->w_list);
+                queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
-                        if (work == &mp->m_sync_work)
+                        msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
-                                continue;
-                        if (work->w_completion)
-                                complete(work->w_completion);
-                        kmem_free(work);
-                }
        }
+        rcu_read_unlock();
+}
-        return 0;
+/*
+ * This is a fast pass over the inode cache to try to get reclaim moving on as
+ * many inodes as possible in a short period of time. It kicks itself every few
+ * seconds, as well as being kicked by the inode cache shrinker when memory
+ * goes low. It scans as quickly as possible avoiding locked inodes or those
+ * already being flushed, and once done schedules a future pass.
+ */
+STATIC void
+xfs_reclaim_worker(
+        struct work_struct *work)
+{
+        struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                        struct xfs_mount, m_reclaim_work);
+        xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
+        xfs_syncd_queue_reclaim(mp);
+}
+/*
+ * Flush delayed allocate data, attempting to free up reserved space
+ * from existing allocations.  At this point a new allocation attempt
+ * has failed with ENOSPC and we are in the process of scratching our
+ * heads, looking about for more room.
+ *
+ * Queue a new data flush if there isn't one already in progress and
+ * wait for completion of the flush. This means that we only ever have one
+ * inode flush in progress no matter how many ENOSPC events are occurring and
+ * so will prevent the system from bogging down due to every concurrent
+ * ENOSPC event scanning all the active inodes in the system for writeback.
+ */
+void
+xfs_flush_inodes(
+        struct xfs_inode        *ip)
+{
+        struct xfs_mount        *mp = ip->i_mount;
+        queue_work(xfs_syncd_wq, &mp->m_flush_work);
+        flush_work_sync(&mp->m_flush_work);
+}
+STATIC void
+xfs_flush_worker(
+        struct work_struct *work)
+{
+        struct xfs_mount *mp = container_of(work,
+                                        struct xfs_mount, m_flush_work);
+        xfs_sync_data(mp, SYNC_TRYLOCK);
+        xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
 }
 int
 xfs_syncd_init(
        struct xfs_mount        *mp)
 {
-        mp->m_sync_work.w_syncer = xfs_sync_worker;
+        INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
-        mp->m_sync_work.w_mount = mp;
+        INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
-        mp->m_sync_work.w_completion = NULL;
+        INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
-        mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
-        if (IS_ERR(mp->m_sync_task))
+        xfs_syncd_queue_sync(mp);
-                return -PTR_ERR(mp->m_sync_task);
+        xfs_syncd_queue_reclaim(mp);
        return 0;
 }
@@ -582,7 +569,9 @@ void
 xfs_syncd_stop(
        struct xfs_mount        *mp)
 {
-        kthread_stop(mp->m_sync_task);
+        cancel_delayed_work_sync(&mp->m_sync_work);
+        cancel_delayed_work_sync(&mp->m_reclaim_work);
+        cancel_work_sync(&mp->m_flush_work);
 }
 void
@@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
                                XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
                                XFS_ICI_RECLAIM_TAG);
                spin_unlock(&ip->i_mount->m_perag_lock);
+                /* schedule periodic background inode reclaim */
+                xfs_syncd_queue_reclaim(ip->i_mount);
                trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
                                                        -1, _RET_IP_);
        }
@@ -1017,7 +1010,13 @@ xfs_reclaim_inodes(
 }
 /*
- * Shrinker infrastructure.
+ * Inode cache shrinker.
+ *
+ * When called we make sure that there is a background (fast) inode reclaim in
+ * progress, while we will throttle the speed of reclaim via doiing synchronous
+ * reclaim of inodes. That means if we come across dirty inodes, we wait for
+ * them to be cleaned, which we hope will not be very long due to the
+ * background walker having already kicked the IO off on those dirty inodes.
 */
 static int
 xfs_reclaim_inode_shrink(
@@ -1032,10 +1031,15 @@ xfs_reclaim_inode_shrink(
        mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
        if (nr_to_scan) {
+                /* kick background reclaimer and push the AIL */
+                xfs_syncd_queue_reclaim(mp);
+                xfs_ail_push_all(mp->m_ail);
                if (!(gfp_mask & __GFP_FS))
                        return -1;
-                xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
+                xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
+                                        &nr_to_scan);
                /* terminate if we don't exhaust the scan */
                if (nr_to_scan > 0)
                        return -1;

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 594cd822d84d..e4f9c1b0836c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -22,6 +22,7 @@
22	#include "xfs_log.h"	22	#include "xfs_log.h"
23	#include "xfs_inum.h"	23	#include "xfs_inum.h"
24	#include "xfs_trans.h"	24	#include "xfs_trans.h"
		25	#include "xfs_trans_priv.h"
25	#include "xfs_sb.h"	26	#include "xfs_sb.h"
26	#include "xfs_ag.h"	27	#include "xfs_ag.h"
27	#include "xfs_mount.h"	28	#include "xfs_mount.h"
@@ -39,6 +40,8 @@
39	#include <linux/kthread.h>	40	#include <linux/kthread.h>
40	#include <linux/freezer.h>	41	#include <linux/freezer.h>
41		42
		43	struct workqueue_struct xfs_syncd_wq; / sync workqueue */
		44
42	/*	45	/*
43	* The inode lookup is done in batches to keep the amount of lock traffic and	46	* The inode lookup is done in batches to keep the amount of lock traffic and
44	* radix tree lookups to a minimum. The batch size is a trade off between	47	* radix tree lookups to a minimum. The batch size is a trade off between
@@ -401,7 +404,7 @@ xfs_quiesce_fs(
401	/*	404	/*
402	* Second stage of a quiesce. The data is already synced, now we have to take	405	* Second stage of a quiesce. The data is already synced, now we have to take
403	* care of the metadata. New transactions are already blocked, so we need to	406	* care of the metadata. New transactions are already blocked, so we need to
404	* wait for any remaining transactions to drain out before proceding.	407	* wait for any remaining transactions to drain out before proceeding.
405	*/	408	*/
406	void	409	void
407	xfs_quiesce_attr(	410	xfs_quiesce_attr(
@@ -431,62 +434,12 @@ xfs_quiesce_attr(
431	xfs_unmountfs_writesb(mp);	434	xfs_unmountfs_writesb(mp);
432	}	435	}
433		436
434	/*	437	static void
435	* Enqueue a work item to be picked up by the vfs xfssyncd thread.	438	xfs_syncd_queue_sync(
436	* Doing this has two advantages:	439	struct xfs_mount *mp)
437	* - It saves on stack space, which is tight in certain situations
438	* - It can be used (with care) as a mechanism to avoid deadlocks.
439	* Flushing while allocating in a full filesystem requires both.
440	*/
441	STATIC void
442	xfs_syncd_queue_work(
443	struct xfs_mount *mp,
444	void *data,
445	void (syncer)(struct xfs_mount , void *),
446	struct completion *completion)
447	{
448	struct xfs_sync_work *work;
449
450	work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
451	INIT_LIST_HEAD(&work->w_list);
452	work->w_syncer = syncer;
453	work->w_data = data;
454	work->w_mount = mp;
455	work->w_completion = completion;
456	spin_lock(&mp->m_sync_lock);
457	list_add_tail(&work->w_list, &mp->m_sync_list);
458	spin_unlock(&mp->m_sync_lock);
459	wake_up_process(mp->m_sync_task);
460	}
461
462	/*
463	* Flush delayed allocate data, attempting to free up reserved space
464	* from existing allocations. At this point a new allocation attempt
465	* has failed with ENOSPC and we are in the process of scratching our
466	* heads, looking about for more room...
467	*/
468	STATIC void
469	xfs_flush_inodes_work(
470	struct xfs_mount *mp,
471	void *arg)
472	{
473	struct inode *inode = arg;
474	xfs_sync_data(mp, SYNC_TRYLOCK);
475	xfs_sync_data(mp, SYNC_TRYLOCK \| SYNC_WAIT);
476	iput(inode);
477	}
478
479	void
480	xfs_flush_inodes(
481	xfs_inode_t *ip)
482	{	440	{
483	struct inode *inode = VFS_I(ip);	441	queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
484	DECLARE_COMPLETION_ONSTACK(completion);	442	msecs_to_jiffies(xfs_syncd_centisecs * 10));
485
486	igrab(inode);
487	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
488	wait_for_completion(&completion);
489	xfs_log_force(ip->i_mount, XFS_LOG_SYNC);
490	}	443	}
491		444
492	/*	445	/*
@@ -496,9 +449,10 @@ xfs_flush_inodes(
496	*/	449	*/
497	STATIC void	450	STATIC void
498	xfs_sync_worker(	451	xfs_sync_worker(
499	struct xfs_mount *mp,	452	struct work_struct *work)
500	void *unused)
501	{	453	{
		454	struct xfs_mount *mp = container_of(to_delayed_work(work),
		455	struct xfs_mount, m_sync_work);
502	int error;	456	int error;
503		457
504	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {	458	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
@@ -508,73 +462,106 @@ xfs_sync_worker(
508	error = xfs_fs_log_dummy(mp);	462	error = xfs_fs_log_dummy(mp);
509	else	463	else
510	xfs_log_force(mp, 0);	464	xfs_log_force(mp, 0);
511	xfs_reclaim_inodes(mp, 0);
512	error = xfs_qm_sync(mp, SYNC_TRYLOCK);	465	error = xfs_qm_sync(mp, SYNC_TRYLOCK);
		466
		467	/* start pushing all the metadata that is currently dirty */
		468	xfs_ail_push_all(mp->m_ail);
513	}	469	}
514	mp->m_sync_seq++;	470
515	wake_up(&mp->m_wait_single_sync_task);	471	/* queue us up again */
		472	xfs_syncd_queue_sync(mp);
516	}	473	}
517		474
518	STATIC int	475	/*
519	xfssyncd(	476	* Queue a new inode reclaim pass if there are reclaimable inodes and there
520	void *arg)	477	* isn't a reclaim pass already in progress. By default it runs every 5s based
		478	* on the xfs syncd work default of 30s. Perhaps this should have it's own
		479	* tunable, but that can be done if this method proves to be ineffective or too
		480	* aggressive.
		481	*/
		482	static void
		483	xfs_syncd_queue_reclaim(
		484	struct xfs_mount *mp)
521	{	485	{
522	struct xfs_mount *mp = arg;
523	long timeleft;
524	xfs_sync_work_t work, n;
525	LIST_HEAD (tmp);
526
527	set_freezable();
528	timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
529	for (;;) {
530	if (list_empty(&mp->m_sync_list))
531	timeleft = schedule_timeout_interruptible(timeleft);
532	/* swsusp */
533	try_to_freeze();
534	if (kthread_should_stop() && list_empty(&mp->m_sync_list))
535	break;
536		486
537	spin_lock(&mp->m_sync_lock);	487	/*
538	/*	488	* We can have inodes enter reclaim after we've shut down the syncd
539	* We can get woken by laptop mode, to do a sync -	489	* workqueue during unmount, so don't allow reclaim work to be queued
540	* that's the (only!) case where the list would be	490	* during unmount.
541	* empty with time remaining.	491	*/
542	*/	492	if (!(mp->m_super->s_flags & MS_ACTIVE))
543	if (!timeleft \|\| list_empty(&mp->m_sync_list)) {	493	return;
544	if (!timeleft)
545	timeleft = xfs_syncd_centisecs *
546	msecs_to_jiffies(10);
547	INIT_LIST_HEAD(&mp->m_sync_work.w_list);
548	list_add_tail(&mp->m_sync_work.w_list,
549	&mp->m_sync_list);
550	}
551	list_splice_init(&mp->m_sync_list, &tmp);
552	spin_unlock(&mp->m_sync_lock);
553		494
554	list_for_each_entry_safe(work, n, &tmp, w_list) {	495	rcu_read_lock();
555	(*work->w_syncer)(mp, work->w_data);	496	if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
556	list_del(&work->w_list);	497	queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
557	if (work == &mp->m_sync_work)	498	msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
558	continue;
559	if (work->w_completion)
560	complete(work->w_completion);
561	kmem_free(work);
562	}
563	}	499	}
		500	rcu_read_unlock();
		501	}
564		502
565	return 0;	503	/*
		504	* This is a fast pass over the inode cache to try to get reclaim moving on as
		505	* many inodes as possible in a short period of time. It kicks itself every few
		506	* seconds, as well as being kicked by the inode cache shrinker when memory
		507	* goes low. It scans as quickly as possible avoiding locked inodes or those
		508	* already being flushed, and once done schedules a future pass.
		509	*/
		510	STATIC void
		511	xfs_reclaim_worker(
		512	struct work_struct *work)
		513	{
		514	struct xfs_mount *mp = container_of(to_delayed_work(work),
		515	struct xfs_mount, m_reclaim_work);
		516
		517	xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
		518	xfs_syncd_queue_reclaim(mp);
		519	}
		520
		521	/*
		522	* Flush delayed allocate data, attempting to free up reserved space
		523	* from existing allocations. At this point a new allocation attempt
		524	* has failed with ENOSPC and we are in the process of scratching our
		525	* heads, looking about for more room.
		526	*
		527	* Queue a new data flush if there isn't one already in progress and
		528	* wait for completion of the flush. This means that we only ever have one
		529	* inode flush in progress no matter how many ENOSPC events are occurring and
		530	* so will prevent the system from bogging down due to every concurrent
		531	* ENOSPC event scanning all the active inodes in the system for writeback.
		532	*/
		533	void
		534	xfs_flush_inodes(
		535	struct xfs_inode *ip)
		536	{
		537	struct xfs_mount *mp = ip->i_mount;
		538
		539	queue_work(xfs_syncd_wq, &mp->m_flush_work);
		540	flush_work_sync(&mp->m_flush_work);
		541	}
		542
		543	STATIC void
		544	xfs_flush_worker(
		545	struct work_struct *work)
		546	{
		547	struct xfs_mount *mp = container_of(work,
		548	struct xfs_mount, m_flush_work);
		549
		550	xfs_sync_data(mp, SYNC_TRYLOCK);
		551	xfs_sync_data(mp, SYNC_TRYLOCK \| SYNC_WAIT);
566	}	552	}
567		553
568	int	554	int
569	xfs_syncd_init(	555	xfs_syncd_init(
570	struct xfs_mount *mp)	556	struct xfs_mount *mp)
571	{	557	{
572	mp->m_sync_work.w_syncer = xfs_sync_worker;	558	INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
573	mp->m_sync_work.w_mount = mp;	559	INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
574	mp->m_sync_work.w_completion = NULL;	560	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
575	mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);	561
576	if (IS_ERR(mp->m_sync_task))	562	xfs_syncd_queue_sync(mp);
577	return -PTR_ERR(mp->m_sync_task);	563	xfs_syncd_queue_reclaim(mp);
		564
578	return 0;	565	return 0;
579	}	566	}
580		567
@@ -582,7 +569,9 @@ void
582	xfs_syncd_stop(	569	xfs_syncd_stop(
583	struct xfs_mount *mp)	570	struct xfs_mount *mp)
584	{	571	{
585	kthread_stop(mp->m_sync_task);	572	cancel_delayed_work_sync(&mp->m_sync_work);
		573	cancel_delayed_work_sync(&mp->m_reclaim_work);
		574	cancel_work_sync(&mp->m_flush_work);
586	}	575	}
587		576
588	void	577	void
@@ -601,6 +590,10 @@ __xfs_inode_set_reclaim_tag(
601	XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),	590	XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
602	XFS_ICI_RECLAIM_TAG);	591	XFS_ICI_RECLAIM_TAG);
603	spin_unlock(&ip->i_mount->m_perag_lock);	592	spin_unlock(&ip->i_mount->m_perag_lock);
		593
		594	/* schedule periodic background inode reclaim */
		595	xfs_syncd_queue_reclaim(ip->i_mount);
		596
604	trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,	597	trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
605	-1, _RET_IP_);	598	-1, _RET_IP_);
606	}	599	}
@@ -1017,7 +1010,13 @@ xfs_reclaim_inodes(
1017	}	1010	}
1018		1011
1019	/*	1012	/*
1020	* Shrinker infrastructure.	1013	* Inode cache shrinker.
		1014	*
		1015	* When called we make sure that there is a background (fast) inode reclaim in
		1016	* progress, while we will throttle the speed of reclaim via doiing synchronous
		1017	* reclaim of inodes. That means if we come across dirty inodes, we wait for
		1018	* them to be cleaned, which we hope will not be very long due to the
		1019	* background walker having already kicked the IO off on those dirty inodes.
1021	*/	1020	*/
1022	static int	1021	static int
1023	xfs_reclaim_inode_shrink(	1022	xfs_reclaim_inode_shrink(
@@ -1032,10 +1031,15 @@ xfs_reclaim_inode_shrink(
1032		1031
1033	mp = container_of(shrink, struct xfs_mount, m_inode_shrink);	1032	mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1034	if (nr_to_scan) {	1033	if (nr_to_scan) {
		1034	/* kick background reclaimer and push the AIL */
		1035	xfs_syncd_queue_reclaim(mp);
		1036	xfs_ail_push_all(mp->m_ail);
		1037
1035	if (!(gfp_mask & __GFP_FS))	1038	if (!(gfp_mask & __GFP_FS))
1036	return -1;	1039	return -1;
1037		1040
1038	xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);	1041	xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK \| SYNC_WAIT,
		1042	&nr_to_scan);
1039	/* terminate if we don't exhaust the scan */	1043	/* terminate if we don't exhaust the scan */
1040	if (nr_to_scan > 0)	1044	if (nr_to_scan > 0)
1041	return -1;	1045	return -1;