diff options
-rw-r--r-- | Documentation/sysctl/vm.txt | 27 | ||||
-rw-r--r-- | include/linux/mmzone.h | 3 | ||||
-rw-r--r-- | include/linux/swap.h | 1 | ||||
-rw-r--r-- | include/linux/sysctl.h | 1 | ||||
-rw-r--r-- | kernel/sysctl.c | 11 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 | ||||
-rw-r--r-- | mm/vmscan.c | 58 |
7 files changed, 90 insertions, 28 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 7cee90223d3a..20d0d797f539 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -29,6 +29,7 @@ Currently, these files are in /proc/sys/vm: | |||
29 | - drop-caches | 29 | - drop-caches |
30 | - zone_reclaim_mode | 30 | - zone_reclaim_mode |
31 | - min_unmapped_ratio | 31 | - min_unmapped_ratio |
32 | - min_slab_ratio | ||
32 | - panic_on_oom | 33 | - panic_on_oom |
33 | 34 | ||
34 | ============================================================== | 35 | ============================================================== |
@@ -138,7 +139,6 @@ This is value ORed together of | |||
138 | 1 = Zone reclaim on | 139 | 1 = Zone reclaim on |
139 | 2 = Zone reclaim writes dirty pages out | 140 | 2 = Zone reclaim writes dirty pages out |
140 | 4 = Zone reclaim swaps pages | 141 | 4 = Zone reclaim swaps pages |
141 | 8 = Also do a global slab reclaim pass | ||
142 | 142 | ||
143 | zone_reclaim_mode is set during bootup to 1 if it is determined that pages | 143 | zone_reclaim_mode is set during bootup to 1 if it is determined that pages |
144 | from remote zones will cause a measurable performance reduction. The | 144 | from remote zones will cause a measurable performance reduction. The |
@@ -162,18 +162,13 @@ Allowing regular swap effectively restricts allocations to the local | |||
162 | node unless explicitly overridden by memory policies or cpuset | 162 | node unless explicitly overridden by memory policies or cpuset |
163 | configurations. | 163 | configurations. |
164 | 164 | ||
165 | It may be advisable to allow slab reclaim if the system makes heavy | ||
166 | use of files and builds up large slab caches. However, the slab | ||
167 | shrink operation is global, may take a long time and free slabs | ||
168 | in all nodes of the system. | ||
169 | |||
170 | ============================================================= | 165 | ============================================================= |
171 | 166 | ||
172 | min_unmapped_ratio: | 167 | min_unmapped_ratio: |
173 | 168 | ||
174 | This is available only on NUMA kernels. | 169 | This is available only on NUMA kernels. |
175 | 170 | ||
176 | A percentage of the file backed pages in each zone. Zone reclaim will only | 171 | A percentage of the total pages in each zone. Zone reclaim will only |
177 | occur if more than this percentage of pages are file backed and unmapped. | 172 | occur if more than this percentage of pages are file backed and unmapped. |
178 | This is to insure that a minimal amount of local pages is still available for | 173 | This is to insure that a minimal amount of local pages is still available for |
179 | file I/O even if the node is overallocated. | 174 | file I/O even if the node is overallocated. |
@@ -182,6 +177,24 @@ The default is 1 percent. | |||
182 | 177 | ||
183 | ============================================================= | 178 | ============================================================= |
184 | 179 | ||
180 | min_slab_ratio: | ||
181 | |||
182 | This is available only on NUMA kernels. | ||
183 | |||
184 | A percentage of the total pages in each zone. On Zone reclaim | ||
185 | (fallback from the local zone occurs) slabs will be reclaimed if more | ||
186 | than this percentage of pages in a zone are reclaimable slab pages. | ||
187 | This insures that the slab growth stays under control even in NUMA | ||
188 | systems that rarely perform global reclaim. | ||
189 | |||
190 | The default is 5 percent. | ||
191 | |||
192 | Note that slab reclaim is triggered in a per zone / node fashion. | ||
193 | The process of reclaiming slab memory is currently not node specific | ||
194 | and may not be fast. | ||
195 | |||
196 | ============================================================= | ||
197 | |||
185 | panic_on_oom | 198 | panic_on_oom |
186 | 199 | ||
187 | This enables or disables panic on out-of-memory feature. If this is set to 1, | 200 | This enables or disables panic on out-of-memory feature. If this is set to 1, |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 08c41b9f92e0..3693f1a52788 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -171,6 +171,7 @@ struct zone { | |||
171 | * zone reclaim becomes active if more unmapped pages exist. | 171 | * zone reclaim becomes active if more unmapped pages exist. |
172 | */ | 172 | */ |
173 | unsigned long min_unmapped_pages; | 173 | unsigned long min_unmapped_pages; |
174 | unsigned long min_slab_pages; | ||
174 | struct per_cpu_pageset *pageset[NR_CPUS]; | 175 | struct per_cpu_pageset *pageset[NR_CPUS]; |
175 | #else | 176 | #else |
176 | struct per_cpu_pageset pageset[NR_CPUS]; | 177 | struct per_cpu_pageset pageset[NR_CPUS]; |
@@ -448,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file | |||
448 | void __user *, size_t *, loff_t *); | 449 | void __user *, size_t *, loff_t *); |
449 | int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, | 450 | int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, |
450 | struct file *, void __user *, size_t *, loff_t *); | 451 | struct file *, void __user *, size_t *, loff_t *); |
452 | int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, | ||
453 | struct file *, void __user *, size_t *, loff_t *); | ||
451 | 454 | ||
452 | #include <linux/topology.h> | 455 | #include <linux/topology.h> |
453 | /* Returns the number of the current Node. */ | 456 | /* Returns the number of the current Node. */ |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 32db06c8ffe0..a2f5ad7c2d2e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -193,6 +193,7 @@ extern long vm_total_pages; | |||
193 | #ifdef CONFIG_NUMA | 193 | #ifdef CONFIG_NUMA |
194 | extern int zone_reclaim_mode; | 194 | extern int zone_reclaim_mode; |
195 | extern int sysctl_min_unmapped_ratio; | 195 | extern int sysctl_min_unmapped_ratio; |
196 | extern int sysctl_min_slab_ratio; | ||
196 | extern int zone_reclaim(struct zone *, gfp_t, unsigned int); | 197 | extern int zone_reclaim(struct zone *, gfp_t, unsigned int); |
197 | #else | 198 | #else |
198 | #define zone_reclaim_mode 0 | 199 | #define zone_reclaim_mode 0 |
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 736ed917a4f8..eca555781d05 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h | |||
@@ -191,6 +191,7 @@ enum | |||
191 | VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ | 191 | VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ |
192 | VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ | 192 | VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ |
193 | VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ | 193 | VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ |
194 | VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ | ||
194 | }; | 195 | }; |
195 | 196 | ||
196 | 197 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 362a0cc37138..fd43c3e6786b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -943,6 +943,17 @@ static ctl_table vm_table[] = { | |||
943 | .extra1 = &zero, | 943 | .extra1 = &zero, |
944 | .extra2 = &one_hundred, | 944 | .extra2 = &one_hundred, |
945 | }, | 945 | }, |
946 | { | ||
947 | .ctl_name = VM_MIN_SLAB, | ||
948 | .procname = "min_slab_ratio", | ||
949 | .data = &sysctl_min_slab_ratio, | ||
950 | .maxlen = sizeof(sysctl_min_slab_ratio), | ||
951 | .mode = 0644, | ||
952 | .proc_handler = &sysctl_min_slab_ratio_sysctl_handler, | ||
953 | .strategy = &sysctl_intvec, | ||
954 | .extra1 = &zero, | ||
955 | .extra2 = &one_hundred, | ||
956 | }, | ||
946 | #endif | 957 | #endif |
947 | #ifdef CONFIG_X86_32 | 958 | #ifdef CONFIG_X86_32 |
948 | { | 959 | { |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 47e98423b30d..cf913bdd433e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2005,6 +2005,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, | |||
2005 | #ifdef CONFIG_NUMA | 2005 | #ifdef CONFIG_NUMA |
2006 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) | 2006 | zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) |
2007 | / 100; | 2007 | / 100; |
2008 | zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; | ||
2008 | #endif | 2009 | #endif |
2009 | zone->name = zone_names[j]; | 2010 | zone->name = zone_names[j]; |
2010 | spin_lock_init(&zone->lock); | 2011 | spin_lock_init(&zone->lock); |
@@ -2318,6 +2319,22 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write, | |||
2318 | sysctl_min_unmapped_ratio) / 100; | 2319 | sysctl_min_unmapped_ratio) / 100; |
2319 | return 0; | 2320 | return 0; |
2320 | } | 2321 | } |
2322 | |||
2323 | int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write, | ||
2324 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | ||
2325 | { | ||
2326 | struct zone *zone; | ||
2327 | int rc; | ||
2328 | |||
2329 | rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | ||
2330 | if (rc) | ||
2331 | return rc; | ||
2332 | |||
2333 | for_each_zone(zone) | ||
2334 | zone->min_slab_pages = (zone->present_pages * | ||
2335 | sysctl_min_slab_ratio) / 100; | ||
2336 | return 0; | ||
2337 | } | ||
2321 | #endif | 2338 | #endif |
2322 | 2339 | ||
2323 | /* | 2340 | /* |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 349797ba4bac..089e943c4d38 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1527,7 +1527,6 @@ int zone_reclaim_mode __read_mostly; | |||
1527 | #define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ | 1527 | #define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ |
1528 | #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ | 1528 | #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ |
1529 | #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ | 1529 | #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ |
1530 | #define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */ | ||
1531 | 1530 | ||
1532 | /* | 1531 | /* |
1533 | * Priority for ZONE_RECLAIM. This determines the fraction of pages | 1532 | * Priority for ZONE_RECLAIM. This determines the fraction of pages |
@@ -1543,6 +1542,12 @@ int zone_reclaim_mode __read_mostly; | |||
1543 | int sysctl_min_unmapped_ratio = 1; | 1542 | int sysctl_min_unmapped_ratio = 1; |
1544 | 1543 | ||
1545 | /* | 1544 | /* |
1545 | * If the number of slab pages in a zone grows beyond this percentage then | ||
1546 | * slab reclaim needs to occur. | ||
1547 | */ | ||
1548 | int sysctl_min_slab_ratio = 5; | ||
1549 | |||
1550 | /* | ||
1546 | * Try to free up some pages from this zone through reclaim. | 1551 | * Try to free up some pages from this zone through reclaim. |
1547 | */ | 1552 | */ |
1548 | static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | 1553 | static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) |
@@ -1573,29 +1578,37 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1573 | reclaim_state.reclaimed_slab = 0; | 1578 | reclaim_state.reclaimed_slab = 0; |
1574 | p->reclaim_state = &reclaim_state; | 1579 | p->reclaim_state = &reclaim_state; |
1575 | 1580 | ||
1576 | /* | 1581 | if (zone_page_state(zone, NR_FILE_PAGES) - |
1577 | * Free memory by calling shrink zone with increasing priorities | 1582 | zone_page_state(zone, NR_FILE_MAPPED) > |
1578 | * until we have enough memory freed. | 1583 | zone->min_unmapped_pages) { |
1579 | */ | 1584 | /* |
1580 | priority = ZONE_RECLAIM_PRIORITY; | 1585 | * Free memory by calling shrink zone with increasing |
1581 | do { | 1586 | * priorities until we have enough memory freed. |
1582 | nr_reclaimed += shrink_zone(priority, zone, &sc); | 1587 | */ |
1583 | priority--; | 1588 | priority = ZONE_RECLAIM_PRIORITY; |
1584 | } while (priority >= 0 && nr_reclaimed < nr_pages); | 1589 | do { |
1590 | nr_reclaimed += shrink_zone(priority, zone, &sc); | ||
1591 | priority--; | ||
1592 | } while (priority >= 0 && nr_reclaimed < nr_pages); | ||
1593 | } | ||
1585 | 1594 | ||
1586 | if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { | 1595 | if (zone_page_state(zone, NR_SLAB_RECLAIMABLE) > zone->min_slab_pages) { |
1587 | /* | 1596 | /* |
1588 | * shrink_slab() does not currently allow us to determine how | 1597 | * shrink_slab() does not currently allow us to determine how |
1589 | * many pages were freed in this zone. So we just shake the slab | 1598 | * many pages were freed in this zone. So we take the current |
1590 | * a bit and then go off node for this particular allocation | 1599 | * number of slab pages and shake the slab until it is reduced |
1591 | * despite possibly having freed enough memory to allocate in | 1600 | * by the same nr_pages that we used for reclaiming unmapped |
1592 | * this zone. If we freed local memory then the next | 1601 | * pages. |
1593 | * allocations will be local again. | ||
1594 | * | 1602 | * |
1595 | * shrink_slab will free memory on all zones and may take | 1603 | * Note that shrink_slab will free memory on all zones and may |
1596 | * a long time. | 1604 | * take a long time. |
1597 | */ | 1605 | */ |
1598 | shrink_slab(sc.nr_scanned, gfp_mask, order); | 1606 | unsigned long limit = zone_page_state(zone, |
1607 | NR_SLAB_RECLAIMABLE) - nr_pages; | ||
1608 | |||
1609 | while (shrink_slab(sc.nr_scanned, gfp_mask, order) && | ||
1610 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) > limit) | ||
1611 | ; | ||
1599 | } | 1612 | } |
1600 | 1613 | ||
1601 | p->reclaim_state = NULL; | 1614 | p->reclaim_state = NULL; |
@@ -1609,7 +1622,8 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1609 | int node_id; | 1622 | int node_id; |
1610 | 1623 | ||
1611 | /* | 1624 | /* |
1612 | * Zone reclaim reclaims unmapped file backed pages. | 1625 | * Zone reclaim reclaims unmapped file backed pages and |
1626 | * slab pages if we are over the defined limits. | ||
1613 | * | 1627 | * |
1614 | * A small portion of unmapped file backed pages is needed for | 1628 | * A small portion of unmapped file backed pages is needed for |
1615 | * file I/O otherwise pages read by file I/O will be immediately | 1629 | * file I/O otherwise pages read by file I/O will be immediately |
@@ -1618,7 +1632,9 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1618 | * unmapped file backed pages. | 1632 | * unmapped file backed pages. |
1619 | */ | 1633 | */ |
1620 | if (zone_page_state(zone, NR_FILE_PAGES) - | 1634 | if (zone_page_state(zone, NR_FILE_PAGES) - |
1621 | zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages) | 1635 | zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages |
1636 | && zone_page_state(zone, NR_SLAB_RECLAIMABLE) | ||
1637 | <= zone->min_slab_pages) | ||
1622 | return 0; | 1638 | return 0; |
1623 | 1639 | ||
1624 | /* | 1640 | /* |