diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 198 |
1 files changed, 13 insertions, 185 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 930de9437271..3508777837c7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list { | |||
204 | static void mem_cgroup_threshold(struct mem_cgroup *mem); | 204 | static void mem_cgroup_threshold(struct mem_cgroup *mem); |
205 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); | 205 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); |
206 | 206 | ||
207 | enum { | ||
208 | SCAN_BY_LIMIT, | ||
209 | SCAN_BY_SYSTEM, | ||
210 | NR_SCAN_CONTEXT, | ||
211 | SCAN_BY_SHRINK, /* not recorded now */ | ||
212 | }; | ||
213 | |||
214 | enum { | ||
215 | SCAN, | ||
216 | SCAN_ANON, | ||
217 | SCAN_FILE, | ||
218 | ROTATE, | ||
219 | ROTATE_ANON, | ||
220 | ROTATE_FILE, | ||
221 | FREED, | ||
222 | FREED_ANON, | ||
223 | FREED_FILE, | ||
224 | ELAPSED, | ||
225 | NR_SCANSTATS, | ||
226 | }; | ||
227 | |||
228 | struct scanstat { | ||
229 | spinlock_t lock; | ||
230 | unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | ||
231 | unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | ||
232 | }; | ||
233 | |||
234 | const char *scanstat_string[NR_SCANSTATS] = { | ||
235 | "scanned_pages", | ||
236 | "scanned_anon_pages", | ||
237 | "scanned_file_pages", | ||
238 | "rotated_pages", | ||
239 | "rotated_anon_pages", | ||
240 | "rotated_file_pages", | ||
241 | "freed_pages", | ||
242 | "freed_anon_pages", | ||
243 | "freed_file_pages", | ||
244 | "elapsed_ns", | ||
245 | }; | ||
246 | #define SCANSTAT_WORD_LIMIT "_by_limit" | ||
247 | #define SCANSTAT_WORD_SYSTEM "_by_system" | ||
248 | #define SCANSTAT_WORD_HIERARCHY "_under_hierarchy" | ||
249 | |||
250 | |||
251 | /* | 207 | /* |
252 | * The memory controller data structure. The memory controller controls both | 208 | * The memory controller data structure. The memory controller controls both |
253 | * page cache and RSS per cgroup. We would eventually like to provide | 209 | * page cache and RSS per cgroup. We would eventually like to provide |
@@ -313,8 +269,7 @@ struct mem_cgroup { | |||
313 | 269 | ||
314 | /* For oom notifier event fd */ | 270 | /* For oom notifier event fd */ |
315 | struct list_head oom_notify; | 271 | struct list_head oom_notify; |
316 | /* For recording LRU-scan statistics */ | 272 | |
317 | struct scanstat scanstat; | ||
318 | /* | 273 | /* |
319 | * Should we move charges of a task when a task is moved into this | 274 | * Should we move charges of a task when a task is moved into this |
320 | * mem_cgroup ? And what type of charges should we move ? | 275 | * mem_cgroup ? And what type of charges should we move ? |
@@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) | |||
1678 | } | 1633 | } |
1679 | #endif | 1634 | #endif |
1680 | 1635 | ||
1681 | static void __mem_cgroup_record_scanstat(unsigned long *stats, | ||
1682 | struct memcg_scanrecord *rec) | ||
1683 | { | ||
1684 | |||
1685 | stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1]; | ||
1686 | stats[SCAN_ANON] += rec->nr_scanned[0]; | ||
1687 | stats[SCAN_FILE] += rec->nr_scanned[1]; | ||
1688 | |||
1689 | stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1]; | ||
1690 | stats[ROTATE_ANON] += rec->nr_rotated[0]; | ||
1691 | stats[ROTATE_FILE] += rec->nr_rotated[1]; | ||
1692 | |||
1693 | stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1]; | ||
1694 | stats[FREED_ANON] += rec->nr_freed[0]; | ||
1695 | stats[FREED_FILE] += rec->nr_freed[1]; | ||
1696 | |||
1697 | stats[ELAPSED] += rec->elapsed; | ||
1698 | } | ||
1699 | |||
1700 | static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec) | ||
1701 | { | ||
1702 | struct mem_cgroup *mem; | ||
1703 | int context = rec->context; | ||
1704 | |||
1705 | if (context >= NR_SCAN_CONTEXT) | ||
1706 | return; | ||
1707 | |||
1708 | mem = rec->mem; | ||
1709 | spin_lock(&mem->scanstat.lock); | ||
1710 | __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec); | ||
1711 | spin_unlock(&mem->scanstat.lock); | ||
1712 | |||
1713 | mem = rec->root; | ||
1714 | spin_lock(&mem->scanstat.lock); | ||
1715 | __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec); | ||
1716 | spin_unlock(&mem->scanstat.lock); | ||
1717 | } | ||
1718 | |||
1719 | /* | 1636 | /* |
1720 | * Scan the hierarchy if needed to reclaim memory. We remember the last child | 1637 | * Scan the hierarchy if needed to reclaim memory. We remember the last child |
1721 | * we reclaimed from, so that we don't end up penalizing one child extensively | 1638 | * we reclaimed from, so that we don't end up penalizing one child extensively |
@@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1740 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; | 1657 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; |
1741 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | 1658 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; |
1742 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | 1659 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; |
1743 | struct memcg_scanrecord rec; | ||
1744 | unsigned long excess; | 1660 | unsigned long excess; |
1745 | unsigned long scanned; | 1661 | unsigned long nr_scanned; |
1746 | 1662 | ||
1747 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; | 1663 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; |
1748 | 1664 | ||
@@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1750 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) | 1666 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) |
1751 | noswap = true; | 1667 | noswap = true; |
1752 | 1668 | ||
1753 | if (shrink) | ||
1754 | rec.context = SCAN_BY_SHRINK; | ||
1755 | else if (check_soft) | ||
1756 | rec.context = SCAN_BY_SYSTEM; | ||
1757 | else | ||
1758 | rec.context = SCAN_BY_LIMIT; | ||
1759 | |||
1760 | rec.root = root_mem; | ||
1761 | |||
1762 | while (1) { | 1669 | while (1) { |
1763 | victim = mem_cgroup_select_victim(root_mem); | 1670 | victim = mem_cgroup_select_victim(root_mem); |
1764 | if (victim == root_mem) { | 1671 | if (victim == root_mem) { |
@@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1799 | css_put(&victim->css); | 1706 | css_put(&victim->css); |
1800 | continue; | 1707 | continue; |
1801 | } | 1708 | } |
1802 | rec.mem = victim; | ||
1803 | rec.nr_scanned[0] = 0; | ||
1804 | rec.nr_scanned[1] = 0; | ||
1805 | rec.nr_rotated[0] = 0; | ||
1806 | rec.nr_rotated[1] = 0; | ||
1807 | rec.nr_freed[0] = 0; | ||
1808 | rec.nr_freed[1] = 0; | ||
1809 | rec.elapsed = 0; | ||
1810 | /* we use swappiness of local cgroup */ | 1709 | /* we use swappiness of local cgroup */ |
1811 | if (check_soft) { | 1710 | if (check_soft) { |
1812 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, | 1711 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, |
1813 | noswap, zone, &rec, &scanned); | 1712 | noswap, zone, &nr_scanned); |
1814 | *total_scanned += scanned; | 1713 | *total_scanned += nr_scanned; |
1815 | } else | 1714 | } else |
1816 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, | 1715 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, |
1817 | noswap, &rec); | 1716 | noswap); |
1818 | mem_cgroup_record_scanstat(&rec); | ||
1819 | css_put(&victim->css); | 1717 | css_put(&victim->css); |
1820 | /* | 1718 | /* |
1821 | * At shrinking usage, we can't check we should stop here or | 1719 | * At shrinking usage, we can't check we should stop here or |
@@ -1841,29 +1739,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1841 | */ | 1739 | */ |
1842 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | 1740 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) |
1843 | { | 1741 | { |
1844 | int lock_count = -1; | ||
1845 | struct mem_cgroup *iter, *failed = NULL; | 1742 | struct mem_cgroup *iter, *failed = NULL; |
1846 | bool cond = true; | 1743 | bool cond = true; |
1847 | 1744 | ||
1848 | for_each_mem_cgroup_tree_cond(iter, mem, cond) { | 1745 | for_each_mem_cgroup_tree_cond(iter, mem, cond) { |
1849 | bool locked = iter->oom_lock; | 1746 | if (iter->oom_lock) { |
1850 | |||
1851 | iter->oom_lock = true; | ||
1852 | if (lock_count == -1) | ||
1853 | lock_count = iter->oom_lock; | ||
1854 | else if (lock_count != locked) { | ||
1855 | /* | 1747 | /* |
1856 | * this subtree of our hierarchy is already locked | 1748 | * this subtree of our hierarchy is already locked |
1857 | * so we cannot give a lock. | 1749 | * so we cannot give a lock. |
1858 | */ | 1750 | */ |
1859 | lock_count = 0; | ||
1860 | failed = iter; | 1751 | failed = iter; |
1861 | cond = false; | 1752 | cond = false; |
1862 | } | 1753 | } else |
1754 | iter->oom_lock = true; | ||
1863 | } | 1755 | } |
1864 | 1756 | ||
1865 | if (!failed) | 1757 | if (!failed) |
1866 | goto done; | 1758 | return true; |
1867 | 1759 | ||
1868 | /* | 1760 | /* |
1869 | * OK, we failed to lock the whole subtree so we have to clean up | 1761 | * OK, we failed to lock the whole subtree so we have to clean up |
@@ -1877,8 +1769,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | |||
1877 | } | 1769 | } |
1878 | iter->oom_lock = false; | 1770 | iter->oom_lock = false; |
1879 | } | 1771 | } |
1880 | done: | 1772 | return false; |
1881 | return lock_count; | ||
1882 | } | 1773 | } |
1883 | 1774 | ||
1884 | /* | 1775 | /* |
@@ -2169,13 +2060,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) | |||
2169 | 2060 | ||
2170 | /* Notify other cpus that system-wide "drain" is running */ | 2061 | /* Notify other cpus that system-wide "drain" is running */ |
2171 | get_online_cpus(); | 2062 | get_online_cpus(); |
2172 | /* | 2063 | curcpu = get_cpu(); |
2173 | * Get a hint for avoiding draining charges on the current cpu, | ||
2174 | * which must be exhausted by our charging. It is not required that | ||
2175 | * this be a precise check, so we use raw_smp_processor_id() instead of | ||
2176 | * getcpu()/putcpu(). | ||
2177 | */ | ||
2178 | curcpu = raw_smp_processor_id(); | ||
2179 | for_each_online_cpu(cpu) { | 2064 | for_each_online_cpu(cpu) { |
2180 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | 2065 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); |
2181 | struct mem_cgroup *mem; | 2066 | struct mem_cgroup *mem; |
@@ -2192,6 +2077,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) | |||
2192 | schedule_work_on(cpu, &stock->work); | 2077 | schedule_work_on(cpu, &stock->work); |
2193 | } | 2078 | } |
2194 | } | 2079 | } |
2080 | put_cpu(); | ||
2195 | 2081 | ||
2196 | if (!sync) | 2082 | if (!sync) |
2197 | goto out; | 2083 | goto out; |
@@ -3866,18 +3752,14 @@ try_to_free: | |||
3866 | /* try to free all pages in this cgroup */ | 3752 | /* try to free all pages in this cgroup */ |
3867 | shrink = 1; | 3753 | shrink = 1; |
3868 | while (nr_retries && mem->res.usage > 0) { | 3754 | while (nr_retries && mem->res.usage > 0) { |
3869 | struct memcg_scanrecord rec; | ||
3870 | int progress; | 3755 | int progress; |
3871 | 3756 | ||
3872 | if (signal_pending(current)) { | 3757 | if (signal_pending(current)) { |
3873 | ret = -EINTR; | 3758 | ret = -EINTR; |
3874 | goto out; | 3759 | goto out; |
3875 | } | 3760 | } |
3876 | rec.context = SCAN_BY_SHRINK; | ||
3877 | rec.mem = mem; | ||
3878 | rec.root = mem; | ||
3879 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, | 3761 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, |
3880 | false, &rec); | 3762 | false); |
3881 | if (!progress) { | 3763 | if (!progress) { |
3882 | nr_retries--; | 3764 | nr_retries--; |
3883 | /* maybe some writeback is necessary */ | 3765 | /* maybe some writeback is necessary */ |
@@ -4721,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file) | |||
4721 | } | 4603 | } |
4722 | #endif /* CONFIG_NUMA */ | 4604 | #endif /* CONFIG_NUMA */ |
4723 | 4605 | ||
4724 | static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp, | ||
4725 | struct cftype *cft, | ||
4726 | struct cgroup_map_cb *cb) | ||
4727 | { | ||
4728 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | ||
4729 | char string[64]; | ||
4730 | int i; | ||
4731 | |||
4732 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
4733 | strcpy(string, scanstat_string[i]); | ||
4734 | strcat(string, SCANSTAT_WORD_LIMIT); | ||
4735 | cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]); | ||
4736 | } | ||
4737 | |||
4738 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
4739 | strcpy(string, scanstat_string[i]); | ||
4740 | strcat(string, SCANSTAT_WORD_SYSTEM); | ||
4741 | cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]); | ||
4742 | } | ||
4743 | |||
4744 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
4745 | strcpy(string, scanstat_string[i]); | ||
4746 | strcat(string, SCANSTAT_WORD_LIMIT); | ||
4747 | strcat(string, SCANSTAT_WORD_HIERARCHY); | ||
4748 | cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]); | ||
4749 | } | ||
4750 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
4751 | strcpy(string, scanstat_string[i]); | ||
4752 | strcat(string, SCANSTAT_WORD_SYSTEM); | ||
4753 | strcat(string, SCANSTAT_WORD_HIERARCHY); | ||
4754 | cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]); | ||
4755 | } | ||
4756 | return 0; | ||
4757 | } | ||
4758 | |||
4759 | static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp, | ||
4760 | unsigned int event) | ||
4761 | { | ||
4762 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | ||
4763 | |||
4764 | spin_lock(&mem->scanstat.lock); | ||
4765 | memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats)); | ||
4766 | memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats)); | ||
4767 | spin_unlock(&mem->scanstat.lock); | ||
4768 | return 0; | ||
4769 | } | ||
4770 | |||
4771 | |||
4772 | static struct cftype mem_cgroup_files[] = { | 4606 | static struct cftype mem_cgroup_files[] = { |
4773 | { | 4607 | { |
4774 | .name = "usage_in_bytes", | 4608 | .name = "usage_in_bytes", |
@@ -4839,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = { | |||
4839 | .mode = S_IRUGO, | 4673 | .mode = S_IRUGO, |
4840 | }, | 4674 | }, |
4841 | #endif | 4675 | #endif |
4842 | { | ||
4843 | .name = "vmscan_stat", | ||
4844 | .read_map = mem_cgroup_vmscan_stat_read, | ||
4845 | .trigger = mem_cgroup_reset_vmscan_stat, | ||
4846 | }, | ||
4847 | }; | 4676 | }; |
4848 | 4677 | ||
4849 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4678 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
@@ -5107,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
5107 | atomic_set(&mem->refcnt, 1); | 4936 | atomic_set(&mem->refcnt, 1); |
5108 | mem->move_charge_at_immigrate = 0; | 4937 | mem->move_charge_at_immigrate = 0; |
5109 | mutex_init(&mem->thresholds_lock); | 4938 | mutex_init(&mem->thresholds_lock); |
5110 | spin_lock_init(&mem->scanstat.lock); | ||
5111 | return &mem->css; | 4939 | return &mem->css; |
5112 | free_out: | 4940 | free_out: |
5113 | __mem_cgroup_free(mem); | 4941 | __mem_cgroup_free(mem); |