aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-09-26 02:31:52 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-26 11:48:51 -0400
commit0ff38490c836dc379ff7ec45b10a15a662f4e5f6 (patch)
treecb42d5d3cace3c8d12f0b304879039c503807981 /mm/vmscan.c
parent972d1a7b140569084439a81265a0f15b74e924e0 (diff)
[PATCH] zone_reclaim: dynamic slab reclaim
Currently one can enable slab reclaim by setting an explicit option in /proc/sys/vm/zone_reclaim_mode. Slab reclaim is then used as a final option if the freeing of unmapped file backed pages is not enough to free enough pages to allow a local allocation. However, that means that the slab can grow excessively and that most memory of a node may be used by slabs. We have had a case where a machine with 46GB of memory was using 40-42GB for slab. Zone reclaim was effective in dealing with pagecache pages. However, slab reclaim was only done during global reclaim (which is a bit rare on NUMA systems). This patch implements slab reclaim during zone reclaim. Zone reclaim occurs if there is a danger of an off node allocation. At that point we 1. Shrink the per node page cache if the number of pagecache pages is more than min_unmapped_ratio percent of pages in a zone. 2. Shrink the slab cache if the number of the nodes reclaimable slab pages (patch depends on earlier one that implements that counter) are more than min_slab_ratio (a new /proc/sys/vm tunable). The shrinking of the slab cache is a bit problematic since it is not node specific. So we simply calculate what point in the slab we want to reach (current per node slab use minus the number of pages that neeed to be allocated) and then repeately run the global reclaim until that is unsuccessful or we have reached the limit. I hope we will have zone based slab reclaim at some point which will make that easier. The default for the min_slab_ratio is 5% Also remove the slab option from /proc/sys/vm/zone_reclaim_mode. [akpm@osdl.org: cleanups] Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c58
1 files changed, 37 insertions, 21 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 349797ba4bac..089e943c4d38 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1527,7 +1527,6 @@ int zone_reclaim_mode __read_mostly;
1527#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ 1527#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */
1528#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ 1528#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
1529#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ 1529#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
1530#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */
1531 1530
1532/* 1531/*
1533 * Priority for ZONE_RECLAIM. This determines the fraction of pages 1532 * Priority for ZONE_RECLAIM. This determines the fraction of pages
@@ -1543,6 +1542,12 @@ int zone_reclaim_mode __read_mostly;
1543int sysctl_min_unmapped_ratio = 1; 1542int sysctl_min_unmapped_ratio = 1;
1544 1543
1545/* 1544/*
1545 * If the number of slab pages in a zone grows beyond this percentage then
1546 * slab reclaim needs to occur.
1547 */
1548int sysctl_min_slab_ratio = 5;
1549
1550/*
1546 * Try to free up some pages from this zone through reclaim. 1551 * Try to free up some pages from this zone through reclaim.
1547 */ 1552 */
1548static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) 1553static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
@@ -1573,29 +1578,37 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1573 reclaim_state.reclaimed_slab = 0; 1578 reclaim_state.reclaimed_slab = 0;
1574 p->reclaim_state = &reclaim_state; 1579 p->reclaim_state = &reclaim_state;
1575 1580
1576 /* 1581 if (zone_page_state(zone, NR_FILE_PAGES) -
1577 * Free memory by calling shrink zone with increasing priorities 1582 zone_page_state(zone, NR_FILE_MAPPED) >
1578 * until we have enough memory freed. 1583 zone->min_unmapped_pages) {
1579 */ 1584 /*
1580 priority = ZONE_RECLAIM_PRIORITY; 1585 * Free memory by calling shrink zone with increasing
1581 do { 1586 * priorities until we have enough memory freed.
1582 nr_reclaimed += shrink_zone(priority, zone, &sc); 1587 */
1583 priority--; 1588 priority = ZONE_RECLAIM_PRIORITY;
1584 } while (priority >= 0 && nr_reclaimed < nr_pages); 1589 do {
1590 nr_reclaimed += shrink_zone(priority, zone, &sc);
1591 priority--;
1592 } while (priority >= 0 && nr_reclaimed < nr_pages);
1593 }
1585 1594
1586 if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { 1595 if (zone_page_state(zone, NR_SLAB_RECLAIMABLE) > zone->min_slab_pages) {
1587 /* 1596 /*
1588 * shrink_slab() does not currently allow us to determine how 1597 * shrink_slab() does not currently allow us to determine how
1589 * many pages were freed in this zone. So we just shake the slab 1598 * many pages were freed in this zone. So we take the current
1590 * a bit and then go off node for this particular allocation 1599 * number of slab pages and shake the slab until it is reduced
1591 * despite possibly having freed enough memory to allocate in 1600 * by the same nr_pages that we used for reclaiming unmapped
1592 * this zone. If we freed local memory then the next 1601 * pages.
1593 * allocations will be local again.
1594 * 1602 *
1595 * shrink_slab will free memory on all zones and may take 1603 * Note that shrink_slab will free memory on all zones and may
1596 * a long time. 1604 * take a long time.
1597 */ 1605 */
1598 shrink_slab(sc.nr_scanned, gfp_mask, order); 1606 unsigned long limit = zone_page_state(zone,
1607 NR_SLAB_RECLAIMABLE) - nr_pages;
1608
1609 while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
1610 zone_page_state(zone, NR_SLAB_RECLAIMABLE) > limit)
1611 ;
1599 } 1612 }
1600 1613
1601 p->reclaim_state = NULL; 1614 p->reclaim_state = NULL;
@@ -1609,7 +1622,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1609 int node_id; 1622 int node_id;
1610 1623
1611 /* 1624 /*
1612 * Zone reclaim reclaims unmapped file backed pages. 1625 * Zone reclaim reclaims unmapped file backed pages and
1626 * slab pages if we are over the defined limits.
1613 * 1627 *
1614 * A small portion of unmapped file backed pages is needed for 1628 * A small portion of unmapped file backed pages is needed for
1615 * file I/O otherwise pages read by file I/O will be immediately 1629 * file I/O otherwise pages read by file I/O will be immediately
@@ -1618,7 +1632,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1618 * unmapped file backed pages. 1632 * unmapped file backed pages.
1619 */ 1633 */
1620 if (zone_page_state(zone, NR_FILE_PAGES) - 1634 if (zone_page_state(zone, NR_FILE_PAGES) -
1621 zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages) 1635 zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages
1636 && zone_page_state(zone, NR_SLAB_RECLAIMABLE)
1637 <= zone->min_slab_pages)
1622 return 0; 1638 return 0;
1623 1639
1624 /* 1640 /*