aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@engr.sgi.com>2006-02-01 06:05:35 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-02-01 11:53:16 -0500
commit2a16e3f4b0c408b9e50297d2ec27e295d490267a (patch)
tree71569cfdf06fb83497e88e4e6543fbe624488aa3
parent1b2ffb7896ad46067f5b9ebf7de1891d74a4cdef (diff)
[PATCH] Reclaim slab during zone reclaim
If large amounts of zone memory are used by empty slabs then zone_reclaim becomes uneffective. This patch shakes the slab a bit. The problem with this patch is that the slab reclaim is not containable to a zone. Thus slab reclaim may affect the whole system and be extremely slow. This also means that we cannot determine how many pages were freed in this zone. Thus we need to go off node for at least one allocation. The functionality is disabled by default. We could modify the shrinkers to take a zone parameter but that would be quite invasive. Better ideas are welcome. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--Documentation/sysctl/vm.txt6
-rw-r--r--mm/vmscan.c14
2 files changed, 20 insertions, 0 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 4bca2a3d9174..a46c10fcddfc 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -137,6 +137,7 @@ This is value ORed together of
1371 = Zone reclaim on 1371 = Zone reclaim on
1382 = Zone reclaim writes dirty pages out 1382 = Zone reclaim writes dirty pages out
1394 = Zone reclaim swaps pages 1394 = Zone reclaim swaps pages
1408 = Also do a global slab reclaim pass
140 141
141zone_reclaim_mode is set during bootup to 1 if it is determined that pages 142zone_reclaim_mode is set during bootup to 1 if it is determined that pages
142from remote zones will cause a measurable performance reduction. The 143from remote zones will cause a measurable performance reduction. The
@@ -160,6 +161,11 @@ Allowing regular swap effectively restricts allocations to the local
160node unless explicitly overridden by memory policies or cpuset 161node unless explicitly overridden by memory policies or cpuset
161configurations. 162configurations.
162 163
164It may be advisable to allow slab reclaim if the system makes heavy
165use of files and builds up large slab caches. However, the slab
166shrink operation is global, may take a long time and free slabs
167in all nodes of the system.
168
163================================================================ 169================================================================
164 170
165zone_reclaim_interval: 171zone_reclaim_interval:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9e2ef3624d77..aa4b80dbe3ad 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1596,6 +1596,7 @@ int zone_reclaim_mode __read_mostly;
1596#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ 1596#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */
1597#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ 1597#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
1598#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ 1598#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
1599#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */
1599 1600
1600/* 1601/*
1601 * Mininum time between zone reclaim scans 1602 * Mininum time between zone reclaim scans
@@ -1666,6 +1667,19 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1666 1667
1667 } while (sc.nr_reclaimed < nr_pages && sc.priority > 0); 1668 } while (sc.nr_reclaimed < nr_pages && sc.priority > 0);
1668 1669
1670 if (sc.nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
1671 /*
1672 * shrink_slab does not currently allow us to determine
1673 * how many pages were freed in the zone. So we just
1674 * shake the slab and then go offnode for a single allocation.
1675 *
1676 * shrink_slab will free memory on all zones and may take
1677 * a long time.
1678 */
1679 shrink_slab(sc.nr_scanned, gfp_mask, order);
1680 sc.nr_reclaimed = 1; /* Avoid getting the off node timeout */
1681 }
1682
1669 p->reclaim_state = NULL; 1683 p->reclaim_state = NULL;
1670 current->flags &= ~PF_MEMALLOC; 1684 current->flags &= ~PF_MEMALLOC;
1671 1685