xfs: serialise inode reclaim within an AG

Memory reclaim via shrinkers has a terrible habit of having N+M concurrent shrinker executions (N = num CPUs, M = num kswapds) all trying to shrink the same cache. When the cache they are all working on is protected by a single spinlock, massive contention an slowdowns occur. Wrap the per-ag inode caches with a reclaim mutex to serialise reclaim access to the AG. This will block concurrent reclaim in each AG but still allow reclaim to scan multiple AGs concurrently. Allow shrinkers to move on to the next AG if it can't get the lock, and if we can't get any AG, then start blocking on locks. To prevent reclaimers from continually scanning the same inodes in each AG, add a cursor that tracks where the last reclaim got up to and start from that point on the next reclaim. This should avoid only ever scanning a small number of inodes at the satart of each AG and not making progress. If we have a non-shrinker based reclaim pass, ignore the cursor and reset it to zero once we are done. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Alex Elder <aelder@sgi.com>
author: Dave Chinner <dchinner@redhat.com> 2010-09-26 21:09:51 -0400
committer: Alex Elder <aelder@sgi.com> 2010-10-18 16:07:55 -0400
commit: 69b491c214d7fd4d4df972ae5377be99ca3753db (patch)
tree: b0d022080d8da893e525ee6502878424cffbd8c2 /fs/xfs/linux-2.6
parent: e3a20c0b02e1704ab115dfa9d012caf0fbc45ed0 (diff)
1 files changed, 30 insertions, 0 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 754bc591a247..37d33254981d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -837,8 +837,12 @@ xfs_reclaim_inodes_ag(
        int                     error = 0;
        int                     last_error = 0;
        xfs_agnumber_t          ag;
+        int                     trylock = flags & SYNC_TRYLOCK;
+        int                     skipped;
+restart:
        ag = 0;
+        skipped = 0;
        while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
                unsigned long   first_index = 0;
                int             done = 0;
@@ -846,6 +850,15 @@ xfs_reclaim_inodes_ag(
                ag = pag->pag_agno + 1;
+                if (trylock) {
+                        if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+                                skipped++;
+                                continue;
+                        }
+                        first_index = pag->pag_ici_reclaim_cursor;
+                } else
+                        mutex_lock(&pag->pag_ici_reclaim_lock);
                do {
                        struct xfs_inode *batch[XFS_LOOKUP_BATCH];
                        int     i;
@@ -898,8 +911,25 @@ xfs_reclaim_inodes_ag(
                } while (nr_found && !done && *nr_to_scan > 0);
+                if (trylock && !done)
+                        pag->pag_ici_reclaim_cursor = first_index;
+                else
+                        pag->pag_ici_reclaim_cursor = 0;
+                mutex_unlock(&pag->pag_ici_reclaim_lock);
                xfs_perag_put(pag);
        }
+        /*
+         * if we skipped any AG, and we still have scan count remaining, do
+         * another pass this time using blocking reclaim semantics (i.e
+         * waiting on the reclaim locks and ignoring the reclaim cursors). This
+         * ensure that when we get more reclaimers than AGs we block rather
+         * than spin trying to execute reclaim.
+         */
+        if (trylock && skipped && *nr_to_scan > 0) {
+                trylock = 0;
+                goto restart;
+        }
        return XFS_ERROR(last_error);
 }
author	Dave Chinner <dchinner@redhat.com>	2010-09-26 21:09:51 -0400
committer	Alex Elder <aelder@sgi.com>	2010-10-18 16:07:55 -0400
commit	69b491c214d7fd4d4df972ae5377be99ca3753db (patch)
tree	b0d022080d8da893e525ee6502878424cffbd8c2 /fs/xfs/linux-2.6
parent	e3a20c0b02e1704ab115dfa9d012caf0fbc45ed0 (diff)

diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 754bc591a247..37d33254981d 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -837,8 +837,12 @@ xfs_reclaim_inodes_ag(
837	int error = 0;	837	int error = 0;
838	int last_error = 0;	838	int last_error = 0;
839	xfs_agnumber_t ag;	839	xfs_agnumber_t ag;
		840	int trylock = flags & SYNC_TRYLOCK;
		841	int skipped;
840		842
		843	restart:
841	ag = 0;	844	ag = 0;
		845	skipped = 0;
842	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {	846	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
843	unsigned long first_index = 0;	847	unsigned long first_index = 0;
844	int done = 0;	848	int done = 0;
@@ -846,6 +850,15 @@ xfs_reclaim_inodes_ag(
846		850
847	ag = pag->pag_agno + 1;	851	ag = pag->pag_agno + 1;
848		852
		853	if (trylock) {
		854	if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
		855	skipped++;
		856	continue;
		857	}
		858	first_index = pag->pag_ici_reclaim_cursor;
		859	} else
		860	mutex_lock(&pag->pag_ici_reclaim_lock);
		861
849	do {	862	do {
850	struct xfs_inode *batch[XFS_LOOKUP_BATCH];	863	struct xfs_inode *batch[XFS_LOOKUP_BATCH];
851	int i;	864	int i;
@@ -898,8 +911,25 @@ xfs_reclaim_inodes_ag(
898		911
899	} while (nr_found && !done && *nr_to_scan > 0);	912	} while (nr_found && !done && *nr_to_scan > 0);
900		913
		914	if (trylock && !done)
		915	pag->pag_ici_reclaim_cursor = first_index;
		916	else
		917	pag->pag_ici_reclaim_cursor = 0;
		918	mutex_unlock(&pag->pag_ici_reclaim_lock);
901	xfs_perag_put(pag);	919	xfs_perag_put(pag);
902	}	920	}
		921
		922	/*
		923	* if we skipped any AG, and we still have scan count remaining, do
		924	* another pass this time using blocking reclaim semantics (i.e
		925	* waiting on the reclaim locks and ignoring the reclaim cursors). This
		926	* ensure that when we get more reclaimers than AGs we block rather
		927	* than spin trying to execute reclaim.
		928	*/
		929	if (trylock && skipped && *nr_to_scan > 0) {
		930	trylock = 0;
		931	goto restart;
		932	}
903	return XFS_ERROR(last_error);	933	return XFS_ERROR(last_error);
904	}	934	}
905		935