aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c198
1 files changed, 13 insertions, 185 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 930de9437271..3508777837c7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list {
204static void mem_cgroup_threshold(struct mem_cgroup *mem); 204static void mem_cgroup_threshold(struct mem_cgroup *mem);
205static void mem_cgroup_oom_notify(struct mem_cgroup *mem); 205static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
206 206
207enum {
208 SCAN_BY_LIMIT,
209 SCAN_BY_SYSTEM,
210 NR_SCAN_CONTEXT,
211 SCAN_BY_SHRINK, /* not recorded now */
212};
213
214enum {
215 SCAN,
216 SCAN_ANON,
217 SCAN_FILE,
218 ROTATE,
219 ROTATE_ANON,
220 ROTATE_FILE,
221 FREED,
222 FREED_ANON,
223 FREED_FILE,
224 ELAPSED,
225 NR_SCANSTATS,
226};
227
228struct scanstat {
229 spinlock_t lock;
230 unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS];
231 unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS];
232};
233
234const char *scanstat_string[NR_SCANSTATS] = {
235 "scanned_pages",
236 "scanned_anon_pages",
237 "scanned_file_pages",
238 "rotated_pages",
239 "rotated_anon_pages",
240 "rotated_file_pages",
241 "freed_pages",
242 "freed_anon_pages",
243 "freed_file_pages",
244 "elapsed_ns",
245};
246#define SCANSTAT_WORD_LIMIT "_by_limit"
247#define SCANSTAT_WORD_SYSTEM "_by_system"
248#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy"
249
250
251/* 207/*
252 * The memory controller data structure. The memory controller controls both 208 * The memory controller data structure. The memory controller controls both
253 * page cache and RSS per cgroup. We would eventually like to provide 209 * page cache and RSS per cgroup. We would eventually like to provide
@@ -313,8 +269,7 @@ struct mem_cgroup {
313 269
314 /* For oom notifier event fd */ 270 /* For oom notifier event fd */
315 struct list_head oom_notify; 271 struct list_head oom_notify;
316 /* For recording LRU-scan statistics */ 272
317 struct scanstat scanstat;
318 /* 273 /*
319 * Should we move charges of a task when a task is moved into this 274 * Should we move charges of a task when a task is moved into this
320 * mem_cgroup ? And what type of charges should we move ? 275 * mem_cgroup ? And what type of charges should we move ?
@@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1678} 1633}
1679#endif 1634#endif
1680 1635
1681static void __mem_cgroup_record_scanstat(unsigned long *stats,
1682 struct memcg_scanrecord *rec)
1683{
1684
1685 stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1];
1686 stats[SCAN_ANON] += rec->nr_scanned[0];
1687 stats[SCAN_FILE] += rec->nr_scanned[1];
1688
1689 stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1];
1690 stats[ROTATE_ANON] += rec->nr_rotated[0];
1691 stats[ROTATE_FILE] += rec->nr_rotated[1];
1692
1693 stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1];
1694 stats[FREED_ANON] += rec->nr_freed[0];
1695 stats[FREED_FILE] += rec->nr_freed[1];
1696
1697 stats[ELAPSED] += rec->elapsed;
1698}
1699
1700static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec)
1701{
1702 struct mem_cgroup *mem;
1703 int context = rec->context;
1704
1705 if (context >= NR_SCAN_CONTEXT)
1706 return;
1707
1708 mem = rec->mem;
1709 spin_lock(&mem->scanstat.lock);
1710 __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec);
1711 spin_unlock(&mem->scanstat.lock);
1712
1713 mem = rec->root;
1714 spin_lock(&mem->scanstat.lock);
1715 __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec);
1716 spin_unlock(&mem->scanstat.lock);
1717}
1718
1719/* 1636/*
1720 * Scan the hierarchy if needed to reclaim memory. We remember the last child 1637 * Scan the hierarchy if needed to reclaim memory. We remember the last child
1721 * we reclaimed from, so that we don't end up penalizing one child extensively 1638 * we reclaimed from, so that we don't end up penalizing one child extensively
@@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1740 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; 1657 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
1741 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; 1658 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1742 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; 1659 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1743 struct memcg_scanrecord rec;
1744 unsigned long excess; 1660 unsigned long excess;
1745 unsigned long scanned; 1661 unsigned long nr_scanned;
1746 1662
1747 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; 1663 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
1748 1664
@@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1750 if (!check_soft && !shrink && root_mem->memsw_is_minimum) 1666 if (!check_soft && !shrink && root_mem->memsw_is_minimum)
1751 noswap = true; 1667 noswap = true;
1752 1668
1753 if (shrink)
1754 rec.context = SCAN_BY_SHRINK;
1755 else if (check_soft)
1756 rec.context = SCAN_BY_SYSTEM;
1757 else
1758 rec.context = SCAN_BY_LIMIT;
1759
1760 rec.root = root_mem;
1761
1762 while (1) { 1669 while (1) {
1763 victim = mem_cgroup_select_victim(root_mem); 1670 victim = mem_cgroup_select_victim(root_mem);
1764 if (victim == root_mem) { 1671 if (victim == root_mem) {
@@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1799 css_put(&victim->css); 1706 css_put(&victim->css);
1800 continue; 1707 continue;
1801 } 1708 }
1802 rec.mem = victim;
1803 rec.nr_scanned[0] = 0;
1804 rec.nr_scanned[1] = 0;
1805 rec.nr_rotated[0] = 0;
1806 rec.nr_rotated[1] = 0;
1807 rec.nr_freed[0] = 0;
1808 rec.nr_freed[1] = 0;
1809 rec.elapsed = 0;
1810 /* we use swappiness of local cgroup */ 1709 /* we use swappiness of local cgroup */
1811 if (check_soft) { 1710 if (check_soft) {
1812 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, 1711 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1813 noswap, zone, &rec, &scanned); 1712 noswap, zone, &nr_scanned);
1814 *total_scanned += scanned; 1713 *total_scanned += nr_scanned;
1815 } else 1714 } else
1816 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, 1715 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1817 noswap, &rec); 1716 noswap);
1818 mem_cgroup_record_scanstat(&rec);
1819 css_put(&victim->css); 1717 css_put(&victim->css);
1820 /* 1718 /*
1821 * At shrinking usage, we can't check we should stop here or 1719 * At shrinking usage, we can't check we should stop here or
@@ -1841,29 +1739,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1841 */ 1739 */
1842static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) 1740static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
1843{ 1741{
1844 int lock_count = -1;
1845 struct mem_cgroup *iter, *failed = NULL; 1742 struct mem_cgroup *iter, *failed = NULL;
1846 bool cond = true; 1743 bool cond = true;
1847 1744
1848 for_each_mem_cgroup_tree_cond(iter, mem, cond) { 1745 for_each_mem_cgroup_tree_cond(iter, mem, cond) {
1849 bool locked = iter->oom_lock; 1746 if (iter->oom_lock) {
1850
1851 iter->oom_lock = true;
1852 if (lock_count == -1)
1853 lock_count = iter->oom_lock;
1854 else if (lock_count != locked) {
1855 /* 1747 /*
1856 * this subtree of our hierarchy is already locked 1748 * this subtree of our hierarchy is already locked
1857 * so we cannot give a lock. 1749 * so we cannot give a lock.
1858 */ 1750 */
1859 lock_count = 0;
1860 failed = iter; 1751 failed = iter;
1861 cond = false; 1752 cond = false;
1862 } 1753 } else
1754 iter->oom_lock = true;
1863 } 1755 }
1864 1756
1865 if (!failed) 1757 if (!failed)
1866 goto done; 1758 return true;
1867 1759
1868 /* 1760 /*
1869 * OK, we failed to lock the whole subtree so we have to clean up 1761 * OK, we failed to lock the whole subtree so we have to clean up
@@ -1877,8 +1769,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
1877 } 1769 }
1878 iter->oom_lock = false; 1770 iter->oom_lock = false;
1879 } 1771 }
1880done: 1772 return false;
1881 return lock_count;
1882} 1773}
1883 1774
1884/* 1775/*
@@ -2169,13 +2060,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
2169 2060
2170 /* Notify other cpus that system-wide "drain" is running */ 2061 /* Notify other cpus that system-wide "drain" is running */
2171 get_online_cpus(); 2062 get_online_cpus();
2172 /* 2063 curcpu = get_cpu();
2173 * Get a hint for avoiding draining charges on the current cpu,
2174 * which must be exhausted by our charging. It is not required that
2175 * this be a precise check, so we use raw_smp_processor_id() instead of
2176 * getcpu()/putcpu().
2177 */
2178 curcpu = raw_smp_processor_id();
2179 for_each_online_cpu(cpu) { 2064 for_each_online_cpu(cpu) {
2180 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); 2065 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
2181 struct mem_cgroup *mem; 2066 struct mem_cgroup *mem;
@@ -2192,6 +2077,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
2192 schedule_work_on(cpu, &stock->work); 2077 schedule_work_on(cpu, &stock->work);
2193 } 2078 }
2194 } 2079 }
2080 put_cpu();
2195 2081
2196 if (!sync) 2082 if (!sync)
2197 goto out; 2083 goto out;
@@ -3866,18 +3752,14 @@ try_to_free:
3866 /* try to free all pages in this cgroup */ 3752 /* try to free all pages in this cgroup */
3867 shrink = 1; 3753 shrink = 1;
3868 while (nr_retries && mem->res.usage > 0) { 3754 while (nr_retries && mem->res.usage > 0) {
3869 struct memcg_scanrecord rec;
3870 int progress; 3755 int progress;
3871 3756
3872 if (signal_pending(current)) { 3757 if (signal_pending(current)) {
3873 ret = -EINTR; 3758 ret = -EINTR;
3874 goto out; 3759 goto out;
3875 } 3760 }
3876 rec.context = SCAN_BY_SHRINK;
3877 rec.mem = mem;
3878 rec.root = mem;
3879 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, 3761 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
3880 false, &rec); 3762 false);
3881 if (!progress) { 3763 if (!progress) {
3882 nr_retries--; 3764 nr_retries--;
3883 /* maybe some writeback is necessary */ 3765 /* maybe some writeback is necessary */
@@ -4721,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
4721} 4603}
4722#endif /* CONFIG_NUMA */ 4604#endif /* CONFIG_NUMA */
4723 4605
4724static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp,
4725 struct cftype *cft,
4726 struct cgroup_map_cb *cb)
4727{
4728 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4729 char string[64];
4730 int i;
4731
4732 for (i = 0; i < NR_SCANSTATS; i++) {
4733 strcpy(string, scanstat_string[i]);
4734 strcat(string, SCANSTAT_WORD_LIMIT);
4735 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]);
4736 }
4737
4738 for (i = 0; i < NR_SCANSTATS; i++) {
4739 strcpy(string, scanstat_string[i]);
4740 strcat(string, SCANSTAT_WORD_SYSTEM);
4741 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]);
4742 }
4743
4744 for (i = 0; i < NR_SCANSTATS; i++) {
4745 strcpy(string, scanstat_string[i]);
4746 strcat(string, SCANSTAT_WORD_LIMIT);
4747 strcat(string, SCANSTAT_WORD_HIERARCHY);
4748 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]);
4749 }
4750 for (i = 0; i < NR_SCANSTATS; i++) {
4751 strcpy(string, scanstat_string[i]);
4752 strcat(string, SCANSTAT_WORD_SYSTEM);
4753 strcat(string, SCANSTAT_WORD_HIERARCHY);
4754 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]);
4755 }
4756 return 0;
4757}
4758
4759static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp,
4760 unsigned int event)
4761{
4762 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4763
4764 spin_lock(&mem->scanstat.lock);
4765 memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats));
4766 memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats));
4767 spin_unlock(&mem->scanstat.lock);
4768 return 0;
4769}
4770
4771
4772static struct cftype mem_cgroup_files[] = { 4606static struct cftype mem_cgroup_files[] = {
4773 { 4607 {
4774 .name = "usage_in_bytes", 4608 .name = "usage_in_bytes",
@@ -4839,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = {
4839 .mode = S_IRUGO, 4673 .mode = S_IRUGO,
4840 }, 4674 },
4841#endif 4675#endif
4842 {
4843 .name = "vmscan_stat",
4844 .read_map = mem_cgroup_vmscan_stat_read,
4845 .trigger = mem_cgroup_reset_vmscan_stat,
4846 },
4847}; 4676};
4848 4677
4849#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4678#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -5107,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5107 atomic_set(&mem->refcnt, 1); 4936 atomic_set(&mem->refcnt, 1);
5108 mem->move_charge_at_immigrate = 0; 4937 mem->move_charge_at_immigrate = 0;
5109 mutex_init(&mem->thresholds_lock); 4938 mutex_init(&mem->thresholds_lock);
5110 spin_lock_init(&mem->scanstat.lock);
5111 return &mem->css; 4939 return &mem->css;
5112free_out: 4940free_out:
5113 __mem_cgroup_free(mem); 4941 __mem_cgroup_free(mem);