aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-09-26 21:09:51 -0400
committerAlex Elder <aelder@sgi.com>2010-10-18 16:07:55 -0400
commit69b491c214d7fd4d4df972ae5377be99ca3753db (patch)
treeb0d022080d8da893e525ee6502878424cffbd8c2 /fs/xfs
parente3a20c0b02e1704ab115dfa9d012caf0fbc45ed0 (diff)
xfs: serialise inode reclaim within an AG
Memory reclaim via shrinkers has a terrible habit of having N+M concurrent shrinker executions (N = num CPUs, M = num kswapds) all trying to shrink the same cache. When the cache they are all working on is protected by a single spinlock, massive contention an slowdowns occur. Wrap the per-ag inode caches with a reclaim mutex to serialise reclaim access to the AG. This will block concurrent reclaim in each AG but still allow reclaim to scan multiple AGs concurrently. Allow shrinkers to move on to the next AG if it can't get the lock, and if we can't get any AG, then start blocking on locks. To prevent reclaimers from continually scanning the same inodes in each AG, add a cursor that tracks where the last reclaim got up to and start from that point on the next reclaim. This should avoid only ever scanning a small number of inodes at the satart of each AG and not making progress. If we have a non-shrinker based reclaim pass, ignore the cursor and reset it to zero once we are done. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c30
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_mount.c1
3 files changed, 33 insertions, 0 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 754bc591a247..37d33254981d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -837,8 +837,12 @@ xfs_reclaim_inodes_ag(
837 int error = 0; 837 int error = 0;
838 int last_error = 0; 838 int last_error = 0;
839 xfs_agnumber_t ag; 839 xfs_agnumber_t ag;
840 int trylock = flags & SYNC_TRYLOCK;
841 int skipped;
840 842
843restart:
841 ag = 0; 844 ag = 0;
845 skipped = 0;
842 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 846 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
843 unsigned long first_index = 0; 847 unsigned long first_index = 0;
844 int done = 0; 848 int done = 0;
@@ -846,6 +850,15 @@ xfs_reclaim_inodes_ag(
846 850
847 ag = pag->pag_agno + 1; 851 ag = pag->pag_agno + 1;
848 852
853 if (trylock) {
854 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
855 skipped++;
856 continue;
857 }
858 first_index = pag->pag_ici_reclaim_cursor;
859 } else
860 mutex_lock(&pag->pag_ici_reclaim_lock);
861
849 do { 862 do {
850 struct xfs_inode *batch[XFS_LOOKUP_BATCH]; 863 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
851 int i; 864 int i;
@@ -898,8 +911,25 @@ xfs_reclaim_inodes_ag(
898 911
899 } while (nr_found && !done && *nr_to_scan > 0); 912 } while (nr_found && !done && *nr_to_scan > 0);
900 913
914 if (trylock && !done)
915 pag->pag_ici_reclaim_cursor = first_index;
916 else
917 pag->pag_ici_reclaim_cursor = 0;
918 mutex_unlock(&pag->pag_ici_reclaim_lock);
901 xfs_perag_put(pag); 919 xfs_perag_put(pag);
902 } 920 }
921
922 /*
923 * if we skipped any AG, and we still have scan count remaining, do
924 * another pass this time using blocking reclaim semantics (i.e
925 * waiting on the reclaim locks and ignoring the reclaim cursors). This
926 * ensure that when we get more reclaimers than AGs we block rather
927 * than spin trying to execute reclaim.
928 */
929 if (trylock && skipped && *nr_to_scan > 0) {
930 trylock = 0;
931 goto restart;
932 }
903 return XFS_ERROR(last_error); 933 return XFS_ERROR(last_error);
904} 934}
905 935
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 51c42c202bf1..baeec83d01f9 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -230,6 +230,8 @@ typedef struct xfs_perag {
230 rwlock_t pag_ici_lock; /* incore inode lock */ 230 rwlock_t pag_ici_lock; /* incore inode lock */
231 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 231 struct radix_tree_root pag_ici_root; /* incore inode cache root */
232 int pag_ici_reclaimable; /* reclaimable inodes */ 232 int pag_ici_reclaimable; /* reclaimable inodes */
233 struct mutex pag_ici_reclaim_lock; /* serialisation point */
234 unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
233 235
234 /* for rcu-safe freeing */ 236 /* for rcu-safe freeing */
235 struct rcu_head rcu_head; 237 struct rcu_head rcu_head;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d66e87c7c3a6..59859c343e04 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -477,6 +477,7 @@ xfs_initialize_perag(
477 pag->pag_agno = index; 477 pag->pag_agno = index;
478 pag->pag_mount = mp; 478 pag->pag_mount = mp;
479 rwlock_init(&pag->pag_ici_lock); 479 rwlock_init(&pag->pag_ici_lock);
480 mutex_init(&pag->pag_ici_reclaim_lock);
480 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 481 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
481 482
482 if (radix_tree_preload(GFP_NOFS)) 483 if (radix_tree_preload(GFP_NOFS))