aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorJohannes Weiner <jweiner@redhat.com>2011-09-14 19:21:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-09-14 21:09:38 -0400
commit185efc0f9a1f2d6ad6d4782c5d9e529f3290567f (patch)
tree9330dac6b7f17fad7d99e444b3544210109e2d99 /mm/memcontrol.c
parenta4d3e9e76337059406fcf3ead288c0df22a790e9 (diff)
memcg: Revert "memcg: add memory.vmscan_stat"
Revert the post-3.0 commit 82f9d486e59f5 ("memcg: add memory.vmscan_stat"). The implementation of per-memcg reclaim statistics violates how memcg hierarchies usually behave: hierarchically. The reclaim statistics are accounted to child memcgs and the parent hitting the limit, but not to hierarchy levels in between. Usually, hierarchical statistics are perfectly recursive, with each level representing the sum of itself and all its children. Since this exports statistics to userspace, this may lead to confusion and problems with changing things after the release, so revert it now, we can try again later. Signed-off-by: Johannes Weiner <jweiner@redhat.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Michal Hocko <mhocko@suse.cz> Cc: Ying Han <yinghan@google.com> Cc: Balbir Singh <bsingharora@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c172
1 files changed, 6 insertions, 166 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ebd1e86bef1c..3508777837c7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list {
204static void mem_cgroup_threshold(struct mem_cgroup *mem); 204static void mem_cgroup_threshold(struct mem_cgroup *mem);
205static void mem_cgroup_oom_notify(struct mem_cgroup *mem); 205static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
206 206
207enum {
208 SCAN_BY_LIMIT,
209 SCAN_BY_SYSTEM,
210 NR_SCAN_CONTEXT,
211 SCAN_BY_SHRINK, /* not recorded now */
212};
213
214enum {
215 SCAN,
216 SCAN_ANON,
217 SCAN_FILE,
218 ROTATE,
219 ROTATE_ANON,
220 ROTATE_FILE,
221 FREED,
222 FREED_ANON,
223 FREED_FILE,
224 ELAPSED,
225 NR_SCANSTATS,
226};
227
228struct scanstat {
229 spinlock_t lock;
230 unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS];
231 unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS];
232};
233
234const char *scanstat_string[NR_SCANSTATS] = {
235 "scanned_pages",
236 "scanned_anon_pages",
237 "scanned_file_pages",
238 "rotated_pages",
239 "rotated_anon_pages",
240 "rotated_file_pages",
241 "freed_pages",
242 "freed_anon_pages",
243 "freed_file_pages",
244 "elapsed_ns",
245};
246#define SCANSTAT_WORD_LIMIT "_by_limit"
247#define SCANSTAT_WORD_SYSTEM "_by_system"
248#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy"
249
250
251/* 207/*
252 * The memory controller data structure. The memory controller controls both 208 * The memory controller data structure. The memory controller controls both
253 * page cache and RSS per cgroup. We would eventually like to provide 209 * page cache and RSS per cgroup. We would eventually like to provide
@@ -313,8 +269,7 @@ struct mem_cgroup {
313 269
314 /* For oom notifier event fd */ 270 /* For oom notifier event fd */
315 struct list_head oom_notify; 271 struct list_head oom_notify;
316 /* For recording LRU-scan statistics */ 272
317 struct scanstat scanstat;
318 /* 273 /*
319 * Should we move charges of a task when a task is moved into this 274 * Should we move charges of a task when a task is moved into this
320 * mem_cgroup ? And what type of charges should we move ? 275 * mem_cgroup ? And what type of charges should we move ?
@@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1678} 1633}
1679#endif 1634#endif
1680 1635
1681static void __mem_cgroup_record_scanstat(unsigned long *stats,
1682 struct memcg_scanrecord *rec)
1683{
1684
1685 stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1];
1686 stats[SCAN_ANON] += rec->nr_scanned[0];
1687 stats[SCAN_FILE] += rec->nr_scanned[1];
1688
1689 stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1];
1690 stats[ROTATE_ANON] += rec->nr_rotated[0];
1691 stats[ROTATE_FILE] += rec->nr_rotated[1];
1692
1693 stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1];
1694 stats[FREED_ANON] += rec->nr_freed[0];
1695 stats[FREED_FILE] += rec->nr_freed[1];
1696
1697 stats[ELAPSED] += rec->elapsed;
1698}
1699
1700static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec)
1701{
1702 struct mem_cgroup *mem;
1703 int context = rec->context;
1704
1705 if (context >= NR_SCAN_CONTEXT)
1706 return;
1707
1708 mem = rec->mem;
1709 spin_lock(&mem->scanstat.lock);
1710 __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec);
1711 spin_unlock(&mem->scanstat.lock);
1712
1713 mem = rec->root;
1714 spin_lock(&mem->scanstat.lock);
1715 __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec);
1716 spin_unlock(&mem->scanstat.lock);
1717}
1718
1719/* 1636/*
1720 * Scan the hierarchy if needed to reclaim memory. We remember the last child 1637 * Scan the hierarchy if needed to reclaim memory. We remember the last child
1721 * we reclaimed from, so that we don't end up penalizing one child extensively 1638 * we reclaimed from, so that we don't end up penalizing one child extensively
@@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1740 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; 1657 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
1741 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; 1658 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1742 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; 1659 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1743 struct memcg_scanrecord rec;
1744 unsigned long excess; 1660 unsigned long excess;
1745 unsigned long scanned; 1661 unsigned long nr_scanned;
1746 1662
1747 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; 1663 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
1748 1664
@@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1750 if (!check_soft && !shrink && root_mem->memsw_is_minimum) 1666 if (!check_soft && !shrink && root_mem->memsw_is_minimum)
1751 noswap = true; 1667 noswap = true;
1752 1668
1753 if (shrink)
1754 rec.context = SCAN_BY_SHRINK;
1755 else if (check_soft)
1756 rec.context = SCAN_BY_SYSTEM;
1757 else
1758 rec.context = SCAN_BY_LIMIT;
1759
1760 rec.root = root_mem;
1761
1762 while (1) { 1669 while (1) {
1763 victim = mem_cgroup_select_victim(root_mem); 1670 victim = mem_cgroup_select_victim(root_mem);
1764 if (victim == root_mem) { 1671 if (victim == root_mem) {
@@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1799 css_put(&victim->css); 1706 css_put(&victim->css);
1800 continue; 1707 continue;
1801 } 1708 }
1802 rec.mem = victim;
1803 rec.nr_scanned[0] = 0;
1804 rec.nr_scanned[1] = 0;
1805 rec.nr_rotated[0] = 0;
1806 rec.nr_rotated[1] = 0;
1807 rec.nr_freed[0] = 0;
1808 rec.nr_freed[1] = 0;
1809 rec.elapsed = 0;
1810 /* we use swappiness of local cgroup */ 1709 /* we use swappiness of local cgroup */
1811 if (check_soft) { 1710 if (check_soft) {
1812 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, 1711 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1813 noswap, zone, &rec, &scanned); 1712 noswap, zone, &nr_scanned);
1814 *total_scanned += scanned; 1713 *total_scanned += nr_scanned;
1815 } else 1714 } else
1816 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, 1715 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1817 noswap, &rec); 1716 noswap);
1818 mem_cgroup_record_scanstat(&rec);
1819 css_put(&victim->css); 1717 css_put(&victim->css);
1820 /* 1718 /*
1821 * At shrinking usage, we can't check we should stop here or 1719 * At shrinking usage, we can't check we should stop here or
@@ -3854,18 +3752,14 @@ try_to_free:
3854 /* try to free all pages in this cgroup */ 3752 /* try to free all pages in this cgroup */
3855 shrink = 1; 3753 shrink = 1;
3856 while (nr_retries && mem->res.usage > 0) { 3754 while (nr_retries && mem->res.usage > 0) {
3857 struct memcg_scanrecord rec;
3858 int progress; 3755 int progress;
3859 3756
3860 if (signal_pending(current)) { 3757 if (signal_pending(current)) {
3861 ret = -EINTR; 3758 ret = -EINTR;
3862 goto out; 3759 goto out;
3863 } 3760 }
3864 rec.context = SCAN_BY_SHRINK;
3865 rec.mem = mem;
3866 rec.root = mem;
3867 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, 3761 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
3868 false, &rec); 3762 false);
3869 if (!progress) { 3763 if (!progress) {
3870 nr_retries--; 3764 nr_retries--;
3871 /* maybe some writeback is necessary */ 3765 /* maybe some writeback is necessary */
@@ -4709,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
4709} 4603}
4710#endif /* CONFIG_NUMA */ 4604#endif /* CONFIG_NUMA */
4711 4605
4712static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp,
4713 struct cftype *cft,
4714 struct cgroup_map_cb *cb)
4715{
4716 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4717 char string[64];
4718 int i;
4719
4720 for (i = 0; i < NR_SCANSTATS; i++) {
4721 strcpy(string, scanstat_string[i]);
4722 strcat(string, SCANSTAT_WORD_LIMIT);
4723 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]);
4724 }
4725
4726 for (i = 0; i < NR_SCANSTATS; i++) {
4727 strcpy(string, scanstat_string[i]);
4728 strcat(string, SCANSTAT_WORD_SYSTEM);
4729 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]);
4730 }
4731
4732 for (i = 0; i < NR_SCANSTATS; i++) {
4733 strcpy(string, scanstat_string[i]);
4734 strcat(string, SCANSTAT_WORD_LIMIT);
4735 strcat(string, SCANSTAT_WORD_HIERARCHY);
4736 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]);
4737 }
4738 for (i = 0; i < NR_SCANSTATS; i++) {
4739 strcpy(string, scanstat_string[i]);
4740 strcat(string, SCANSTAT_WORD_SYSTEM);
4741 strcat(string, SCANSTAT_WORD_HIERARCHY);
4742 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]);
4743 }
4744 return 0;
4745}
4746
4747static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp,
4748 unsigned int event)
4749{
4750 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4751
4752 spin_lock(&mem->scanstat.lock);
4753 memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats));
4754 memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats));
4755 spin_unlock(&mem->scanstat.lock);
4756 return 0;
4757}
4758
4759
4760static struct cftype mem_cgroup_files[] = { 4606static struct cftype mem_cgroup_files[] = {
4761 { 4607 {
4762 .name = "usage_in_bytes", 4608 .name = "usage_in_bytes",
@@ -4827,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = {
4827 .mode = S_IRUGO, 4673 .mode = S_IRUGO,
4828 }, 4674 },
4829#endif 4675#endif
4830 {
4831 .name = "vmscan_stat",
4832 .read_map = mem_cgroup_vmscan_stat_read,
4833 .trigger = mem_cgroup_reset_vmscan_stat,
4834 },
4835}; 4676};
4836 4677
4837#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4678#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -5095,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5095 atomic_set(&mem->refcnt, 1); 4936 atomic_set(&mem->refcnt, 1);
5096 mem->move_charge_at_immigrate = 0; 4937 mem->move_charge_at_immigrate = 0;
5097 mutex_init(&mem->thresholds_lock); 4938 mutex_init(&mem->thresholds_lock);
5098 spin_lock_init(&mem->scanstat.lock);
5099 return &mem->css; 4939 return &mem->css;
5100free_out: 4940free_out:
5101 __mem_cgroup_free(mem); 4941 __mem_cgroup_free(mem);