aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c172
1 files changed, 166 insertions, 6 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dfeca594fd7a..04e505bfd7dd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -205,6 +205,50 @@ struct mem_cgroup_eventfd_list {
205static void mem_cgroup_threshold(struct mem_cgroup *mem); 205static void mem_cgroup_threshold(struct mem_cgroup *mem);
206static void mem_cgroup_oom_notify(struct mem_cgroup *mem); 206static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
207 207
208enum {
209 SCAN_BY_LIMIT,
210 SCAN_BY_SYSTEM,
211 NR_SCAN_CONTEXT,
212 SCAN_BY_SHRINK, /* not recorded now */
213};
214
215enum {
216 SCAN,
217 SCAN_ANON,
218 SCAN_FILE,
219 ROTATE,
220 ROTATE_ANON,
221 ROTATE_FILE,
222 FREED,
223 FREED_ANON,
224 FREED_FILE,
225 ELAPSED,
226 NR_SCANSTATS,
227};
228
229struct scanstat {
230 spinlock_t lock;
231 unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS];
232 unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS];
233};
234
235const char *scanstat_string[NR_SCANSTATS] = {
236 "scanned_pages",
237 "scanned_anon_pages",
238 "scanned_file_pages",
239 "rotated_pages",
240 "rotated_anon_pages",
241 "rotated_file_pages",
242 "freed_pages",
243 "freed_anon_pages",
244 "freed_file_pages",
245 "elapsed_ns",
246};
247#define SCANSTAT_WORD_LIMIT "_by_limit"
248#define SCANSTAT_WORD_SYSTEM "_by_system"
249#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy"
250
251
208/* 252/*
209 * The memory controller data structure. The memory controller controls both 253 * The memory controller data structure. The memory controller controls both
210 * page cache and RSS per cgroup. We would eventually like to provide 254 * page cache and RSS per cgroup. We would eventually like to provide
@@ -270,7 +314,8 @@ struct mem_cgroup {
270 314
271 /* For oom notifier event fd */ 315 /* For oom notifier event fd */
272 struct list_head oom_notify; 316 struct list_head oom_notify;
273 317 /* For recording LRU-scan statistics */
318 struct scanstat scanstat;
274 /* 319 /*
275 * Should we move charges of a task when a task is moved into this 320 * Should we move charges of a task when a task is moved into this
276 * mem_cgroup ? And what type of charges should we move ? 321 * mem_cgroup ? And what type of charges should we move ?
@@ -1623,6 +1668,44 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1623} 1668}
1624#endif 1669#endif
1625 1670
1671static void __mem_cgroup_record_scanstat(unsigned long *stats,
1672 struct memcg_scanrecord *rec)
1673{
1674
1675 stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1];
1676 stats[SCAN_ANON] += rec->nr_scanned[0];
1677 stats[SCAN_FILE] += rec->nr_scanned[1];
1678
1679 stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1];
1680 stats[ROTATE_ANON] += rec->nr_rotated[0];
1681 stats[ROTATE_FILE] += rec->nr_rotated[1];
1682
1683 stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1];
1684 stats[FREED_ANON] += rec->nr_freed[0];
1685 stats[FREED_FILE] += rec->nr_freed[1];
1686
1687 stats[ELAPSED] += rec->elapsed;
1688}
1689
1690static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec)
1691{
1692 struct mem_cgroup *mem;
1693 int context = rec->context;
1694
1695 if (context >= NR_SCAN_CONTEXT)
1696 return;
1697
1698 mem = rec->mem;
1699 spin_lock(&mem->scanstat.lock);
1700 __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec);
1701 spin_unlock(&mem->scanstat.lock);
1702
1703 mem = rec->root;
1704 spin_lock(&mem->scanstat.lock);
1705 __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec);
1706 spin_unlock(&mem->scanstat.lock);
1707}
1708
1626/* 1709/*
1627 * Scan the hierarchy if needed to reclaim memory. We remember the last child 1710 * Scan the hierarchy if needed to reclaim memory. We remember the last child
1628 * we reclaimed from, so that we don't end up penalizing one child extensively 1711 * we reclaimed from, so that we don't end up penalizing one child extensively
@@ -1647,8 +1730,9 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1647 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; 1730 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
1648 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; 1731 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1649 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; 1732 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1733 struct memcg_scanrecord rec;
1650 unsigned long excess; 1734 unsigned long excess;
1651 unsigned long nr_scanned; 1735 unsigned long scanned;
1652 1736
1653 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; 1737 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
1654 1738
@@ -1656,6 +1740,15 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1656 if (!check_soft && !shrink && root_mem->memsw_is_minimum) 1740 if (!check_soft && !shrink && root_mem->memsw_is_minimum)
1657 noswap = true; 1741 noswap = true;
1658 1742
1743 if (shrink)
1744 rec.context = SCAN_BY_SHRINK;
1745 else if (check_soft)
1746 rec.context = SCAN_BY_SYSTEM;
1747 else
1748 rec.context = SCAN_BY_LIMIT;
1749
1750 rec.root = root_mem;
1751
1659 while (1) { 1752 while (1) {
1660 victim = mem_cgroup_select_victim(root_mem); 1753 victim = mem_cgroup_select_victim(root_mem);
1661 if (victim == root_mem) { 1754 if (victim == root_mem) {
@@ -1696,14 +1789,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1696 css_put(&victim->css); 1789 css_put(&victim->css);
1697 continue; 1790 continue;
1698 } 1791 }
1792 rec.mem = victim;
1793 rec.nr_scanned[0] = 0;
1794 rec.nr_scanned[1] = 0;
1795 rec.nr_rotated[0] = 0;
1796 rec.nr_rotated[1] = 0;
1797 rec.nr_freed[0] = 0;
1798 rec.nr_freed[1] = 0;
1799 rec.elapsed = 0;
1699 /* we use swappiness of local cgroup */ 1800 /* we use swappiness of local cgroup */
1700 if (check_soft) { 1801 if (check_soft) {
1701 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, 1802 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1702 noswap, zone, &nr_scanned); 1803 noswap, zone, &rec, &scanned);
1703 *total_scanned += nr_scanned; 1804 *total_scanned += scanned;
1704 } else 1805 } else
1705 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, 1806 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1706 noswap); 1807 noswap, &rec);
1808 mem_cgroup_record_scanstat(&rec);
1707 css_put(&victim->css); 1809 css_put(&victim->css);
1708 /* 1810 /*
1709 * At shrinking usage, we can't check we should stop here or 1811 * At shrinking usage, we can't check we should stop here or
@@ -3792,14 +3894,18 @@ try_to_free:
3792 /* try to free all pages in this cgroup */ 3894 /* try to free all pages in this cgroup */
3793 shrink = 1; 3895 shrink = 1;
3794 while (nr_retries && mem->res.usage > 0) { 3896 while (nr_retries && mem->res.usage > 0) {
3897 struct memcg_scanrecord rec;
3795 int progress; 3898 int progress;
3796 3899
3797 if (signal_pending(current)) { 3900 if (signal_pending(current)) {
3798 ret = -EINTR; 3901 ret = -EINTR;
3799 goto out; 3902 goto out;
3800 } 3903 }
3904 rec.context = SCAN_BY_SHRINK;
3905 rec.mem = mem;
3906 rec.root = mem;
3801 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, 3907 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
3802 false); 3908 false, &rec);
3803 if (!progress) { 3909 if (!progress) {
3804 nr_retries--; 3910 nr_retries--;
3805 /* maybe some writeback is necessary */ 3911 /* maybe some writeback is necessary */
@@ -4643,6 +4749,54 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
4643} 4749}
4644#endif /* CONFIG_NUMA */ 4750#endif /* CONFIG_NUMA */
4645 4751
4752static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp,
4753 struct cftype *cft,
4754 struct cgroup_map_cb *cb)
4755{
4756 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4757 char string[64];
4758 int i;
4759
4760 for (i = 0; i < NR_SCANSTATS; i++) {
4761 strcpy(string, scanstat_string[i]);
4762 strcat(string, SCANSTAT_WORD_LIMIT);
4763 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]);
4764 }
4765
4766 for (i = 0; i < NR_SCANSTATS; i++) {
4767 strcpy(string, scanstat_string[i]);
4768 strcat(string, SCANSTAT_WORD_SYSTEM);
4769 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]);
4770 }
4771
4772 for (i = 0; i < NR_SCANSTATS; i++) {
4773 strcpy(string, scanstat_string[i]);
4774 strcat(string, SCANSTAT_WORD_LIMIT);
4775 strcat(string, SCANSTAT_WORD_HIERARCHY);
4776 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]);
4777 }
4778 for (i = 0; i < NR_SCANSTATS; i++) {
4779 strcpy(string, scanstat_string[i]);
4780 strcat(string, SCANSTAT_WORD_SYSTEM);
4781 strcat(string, SCANSTAT_WORD_HIERARCHY);
4782 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]);
4783 }
4784 return 0;
4785}
4786
4787static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp,
4788 unsigned int event)
4789{
4790 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4791
4792 spin_lock(&mem->scanstat.lock);
4793 memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats));
4794 memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats));
4795 spin_unlock(&mem->scanstat.lock);
4796 return 0;
4797}
4798
4799
4646static struct cftype mem_cgroup_files[] = { 4800static struct cftype mem_cgroup_files[] = {
4647 { 4801 {
4648 .name = "usage_in_bytes", 4802 .name = "usage_in_bytes",
@@ -4713,6 +4867,11 @@ static struct cftype mem_cgroup_files[] = {
4713 .mode = S_IRUGO, 4867 .mode = S_IRUGO,
4714 }, 4868 },
4715#endif 4869#endif
4870 {
4871 .name = "vmscan_stat",
4872 .read_map = mem_cgroup_vmscan_stat_read,
4873 .trigger = mem_cgroup_reset_vmscan_stat,
4874 },
4716}; 4875};
4717 4876
4718#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4877#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -4976,6 +5135,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4976 atomic_set(&mem->refcnt, 1); 5135 atomic_set(&mem->refcnt, 1);
4977 mem->move_charge_at_immigrate = 0; 5136 mem->move_charge_at_immigrate = 0;
4978 mutex_init(&mem->thresholds_lock); 5137 mutex_init(&mem->thresholds_lock);
5138 spin_lock_init(&mem->scanstat.lock);
4979 return &mem->css; 5139 return &mem->css;
4980free_out: 5140free_out:
4981 __mem_cgroup_free(mem); 5141 __mem_cgroup_free(mem);