aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorBalbir Singh <balbir@linux.vnet.ibm.com>2009-09-23 18:56:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-24 10:20:59 -0400
commit4e41695356fb4e0b153be1440ad027e46e0a7ea2 (patch)
tree547dae77d1655a1acb260ea8b266c7b8a48f2d2c /mm/vmscan.c
parent75822b4495b62e8721e9b88e3cf9e653a0c85b73 (diff)
memory controller: soft limit reclaim on contention
Implement reclaim from groups over their soft limit Permit reclaim from memory cgroups on contention (via the direct reclaim path). memory cgroup soft limit reclaim finds the group that exceeds its soft limit by the largest number of pages and reclaims pages from it and then reinserts the cgroup into its correct place in the rbtree. Add additional checks to mem_cgroup_hierarchical_reclaim() to detect long loops in case all swap is turned off. The code has been refactored and the loop check (loop < 2) has been enhanced for soft limits. For soft limits, we try to do more targetted reclaim. Instead of bailing out after two loops, the routine now reclaims memory proportional to the size by which the soft limit is exceeded. The proportion has been empirically determined. [akpm@linux-foundation.org: build fix] [kamezawa.hiroyu@jp.fujitsu.com: fix softlimit css refcnt handling] [nishimura@mxp.nes.nec.co.jp: refcount of the "victim" should be decremented before exiting the loop] Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c45
1 files changed, 44 insertions, 1 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 613e89f471d9..2423782214ab 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
1836 1836
1837#ifdef CONFIG_CGROUP_MEM_RES_CTLR 1837#ifdef CONFIG_CGROUP_MEM_RES_CTLR
1838 1838
1839unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
1840 gfp_t gfp_mask, bool noswap,
1841 unsigned int swappiness,
1842 struct zone *zone, int nid)
1843{
1844 struct scan_control sc = {
1845 .may_writepage = !laptop_mode,
1846 .may_unmap = 1,
1847 .may_swap = !noswap,
1848 .swap_cluster_max = SWAP_CLUSTER_MAX,
1849 .swappiness = swappiness,
1850 .order = 0,
1851 .mem_cgroup = mem,
1852 .isolate_pages = mem_cgroup_isolate_pages,
1853 };
1854 nodemask_t nm = nodemask_of_node(nid);
1855
1856 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1857 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
1858 sc.nodemask = &nm;
1859 sc.nr_reclaimed = 0;
1860 sc.nr_scanned = 0;
1861 /*
1862 * NOTE: Although we can get the priority field, using it
1863 * here is not a good idea, since it limits the pages we can scan.
1864 * if we don't reclaim here, the shrink_zone from balance_pgdat
1865 * will pick up pages from other mem cgroup's as well. We hack
1866 * the priority and make it zero.
1867 */
1868 shrink_zone(0, zone, &sc);
1869 return sc.nr_reclaimed;
1870}
1871
1839unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, 1872unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1840 gfp_t gfp_mask, 1873 gfp_t gfp_mask,
1841 bool noswap, 1874 bool noswap,
1842 unsigned int swappiness) 1875 unsigned int swappiness)
1843{ 1876{
1877 struct zonelist *zonelist;
1844 struct scan_control sc = { 1878 struct scan_control sc = {
1845 .may_writepage = !laptop_mode, 1879 .may_writepage = !laptop_mode,
1846 .may_unmap = 1, 1880 .may_unmap = 1,
@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1852 .isolate_pages = mem_cgroup_isolate_pages, 1886 .isolate_pages = mem_cgroup_isolate_pages,
1853 .nodemask = NULL, /* we don't care the placement */ 1887 .nodemask = NULL, /* we don't care the placement */
1854 }; 1888 };
1855 struct zonelist *zonelist;
1856 1889
1857 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 1890 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
1858 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 1891 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -1974,6 +2007,7 @@ loop_again:
1974 for (i = 0; i <= end_zone; i++) { 2007 for (i = 0; i <= end_zone; i++) {
1975 struct zone *zone = pgdat->node_zones + i; 2008 struct zone *zone = pgdat->node_zones + i;
1976 int nr_slab; 2009 int nr_slab;
2010 int nid, zid;
1977 2011
1978 if (!populated_zone(zone)) 2012 if (!populated_zone(zone))
1979 continue; 2013 continue;
@@ -1988,6 +2022,15 @@ loop_again:
1988 temp_priority[i] = priority; 2022 temp_priority[i] = priority;
1989 sc.nr_scanned = 0; 2023 sc.nr_scanned = 0;
1990 note_zone_scanning_priority(zone, priority); 2024 note_zone_scanning_priority(zone, priority);
2025
2026 nid = pgdat->node_id;
2027 zid = zone_idx(zone);
2028 /*
2029 * Call soft limit reclaim before calling shrink_zone.
2030 * For now we ignore the return value
2031 */
2032 mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
2033 nid, zid);
1991 /* 2034 /*
1992 * We put equal pressure on every zone, unless one 2035 * We put equal pressure on every zone, unless one
1993 * zone has way too many pages free already. 2036 * zone has way too many pages free already.