aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <jweiner@redhat.com>2011-11-02 16:38:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-11-02 19:07:00 -0400
commit9b272977e3b99a8699361d214b51f98c8a9e0e7b (patch)
tree2113cee95a42ea893aa6eddb01b14e563153fabb /mm
parent0a619e58703b86d53d07e938eade9a91a4a863c6 (diff)
memcg: skip scanning active lists based on individual size
Reclaim decides to skip scanning an active list when the corresponding inactive list is above a certain size in comparison to leave the assumed working set alone while there are still enough reclaim candidates around. The memcg implementation of comparing those lists instead reports whether the whole memcg is low on the requested type of inactive pages, considering all nodes and zones. This can lead to an oversized active list not being scanned because of the state of the other lists in the memcg, as well as an active list being scanned while its corresponding inactive list has enough pages. Not only is this wrong, it's also a scalability hazard, because the global memory state over all nodes and zones has to be gathered for each memcg and zone scanned. Make these calculations purely based on the size of the two LRU lists that are actually affected by the outcome of the decision. Signed-off-by: Johannes Weiner <jweiner@redhat.com> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <bsingharora@gmail.com> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Reviewed-by: Ying Han <yinghan@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c51
-rw-r--r--mm/vmscan.c4
2 files changed, 19 insertions, 36 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f6c4beb4db56..ce7b35d024e9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1104,15 +1104,19 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
1104 return ret; 1104 return ret;
1105} 1105}
1106 1106
1107static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages) 1107int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
1108{ 1108{
1109 unsigned long active; 1109 unsigned long inactive_ratio;
1110 int nid = zone_to_nid(zone);
1111 int zid = zone_idx(zone);
1110 unsigned long inactive; 1112 unsigned long inactive;
1113 unsigned long active;
1111 unsigned long gb; 1114 unsigned long gb;
1112 unsigned long inactive_ratio;
1113 1115
1114 inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); 1116 inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1115 active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); 1117 BIT(LRU_INACTIVE_ANON));
1118 active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1119 BIT(LRU_ACTIVE_ANON));
1116 1120
1117 gb = (inactive + active) >> (30 - PAGE_SHIFT); 1121 gb = (inactive + active) >> (30 - PAGE_SHIFT);
1118 if (gb) 1122 if (gb)
@@ -1120,39 +1124,20 @@ static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_
1120 else 1124 else
1121 inactive_ratio = 1; 1125 inactive_ratio = 1;
1122 1126
1123 if (present_pages) { 1127 return inactive * inactive_ratio < active;
1124 present_pages[0] = inactive;
1125 present_pages[1] = active;
1126 }
1127
1128 return inactive_ratio;
1129} 1128}
1130 1129
1131int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) 1130int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
1132{
1133 unsigned long active;
1134 unsigned long inactive;
1135 unsigned long present_pages[2];
1136 unsigned long inactive_ratio;
1137
1138 inactive_ratio = calc_inactive_ratio(memcg, present_pages);
1139
1140 inactive = present_pages[0];
1141 active = present_pages[1];
1142
1143 if (inactive * inactive_ratio < active)
1144 return 1;
1145
1146 return 0;
1147}
1148
1149int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
1150{ 1131{
1151 unsigned long active; 1132 unsigned long active;
1152 unsigned long inactive; 1133 unsigned long inactive;
1134 int zid = zone_idx(zone);
1135 int nid = zone_to_nid(zone);
1153 1136
1154 inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); 1137 inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1155 active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); 1138 BIT(LRU_INACTIVE_FILE));
1139 active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1140 BIT(LRU_ACTIVE_FILE));
1156 1141
1157 return (active > inactive); 1142 return (active > inactive);
1158} 1143}
@@ -4192,8 +4177,6 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
4192 } 4177 }
4193 4178
4194#ifdef CONFIG_DEBUG_VM 4179#ifdef CONFIG_DEBUG_VM
4195 cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
4196
4197 { 4180 {
4198 int nid, zid; 4181 int nid, zid;
4199 struct mem_cgroup_per_zone *mz; 4182 struct mem_cgroup_per_zone *mz;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a90c603a8d02..132d1ddb2238 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1767,7 +1767,7 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
1767 if (scanning_global_lru(sc)) 1767 if (scanning_global_lru(sc))
1768 low = inactive_anon_is_low_global(zone); 1768 low = inactive_anon_is_low_global(zone);
1769 else 1769 else
1770 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); 1770 low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
1771 return low; 1771 return low;
1772} 1772}
1773#else 1773#else
@@ -1810,7 +1810,7 @@ static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
1810 if (scanning_global_lru(sc)) 1810 if (scanning_global_lru(sc))
1811 low = inactive_file_is_low_global(zone); 1811 low = inactive_file_is_low_global(zone);
1812 else 1812 else
1813 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup); 1813 low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
1814 return low; 1814 return low;
1815} 1815}
1816 1816