diff options
Diffstat (limited to 'mm/memcontrol.c')
| -rw-r--r-- | mm/memcontrol.c | 198 |
1 files changed, 13 insertions, 185 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 930de9437271..3508777837c7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list { | |||
| 204 | static void mem_cgroup_threshold(struct mem_cgroup *mem); | 204 | static void mem_cgroup_threshold(struct mem_cgroup *mem); |
| 205 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); | 205 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); |
| 206 | 206 | ||
| 207 | enum { | ||
| 208 | SCAN_BY_LIMIT, | ||
| 209 | SCAN_BY_SYSTEM, | ||
| 210 | NR_SCAN_CONTEXT, | ||
| 211 | SCAN_BY_SHRINK, /* not recorded now */ | ||
| 212 | }; | ||
| 213 | |||
| 214 | enum { | ||
| 215 | SCAN, | ||
| 216 | SCAN_ANON, | ||
| 217 | SCAN_FILE, | ||
| 218 | ROTATE, | ||
| 219 | ROTATE_ANON, | ||
| 220 | ROTATE_FILE, | ||
| 221 | FREED, | ||
| 222 | FREED_ANON, | ||
| 223 | FREED_FILE, | ||
| 224 | ELAPSED, | ||
| 225 | NR_SCANSTATS, | ||
| 226 | }; | ||
| 227 | |||
| 228 | struct scanstat { | ||
| 229 | spinlock_t lock; | ||
| 230 | unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | ||
| 231 | unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | ||
| 232 | }; | ||
| 233 | |||
| 234 | const char *scanstat_string[NR_SCANSTATS] = { | ||
| 235 | "scanned_pages", | ||
| 236 | "scanned_anon_pages", | ||
| 237 | "scanned_file_pages", | ||
| 238 | "rotated_pages", | ||
| 239 | "rotated_anon_pages", | ||
| 240 | "rotated_file_pages", | ||
| 241 | "freed_pages", | ||
| 242 | "freed_anon_pages", | ||
| 243 | "freed_file_pages", | ||
| 244 | "elapsed_ns", | ||
| 245 | }; | ||
| 246 | #define SCANSTAT_WORD_LIMIT "_by_limit" | ||
| 247 | #define SCANSTAT_WORD_SYSTEM "_by_system" | ||
| 248 | #define SCANSTAT_WORD_HIERARCHY "_under_hierarchy" | ||
| 249 | |||
| 250 | |||
| 251 | /* | 207 | /* |
| 252 | * The memory controller data structure. The memory controller controls both | 208 | * The memory controller data structure. The memory controller controls both |
| 253 | * page cache and RSS per cgroup. We would eventually like to provide | 209 | * page cache and RSS per cgroup. We would eventually like to provide |
| @@ -313,8 +269,7 @@ struct mem_cgroup { | |||
| 313 | 269 | ||
| 314 | /* For oom notifier event fd */ | 270 | /* For oom notifier event fd */ |
| 315 | struct list_head oom_notify; | 271 | struct list_head oom_notify; |
| 316 | /* For recording LRU-scan statistics */ | 272 | |
| 317 | struct scanstat scanstat; | ||
| 318 | /* | 273 | /* |
| 319 | * Should we move charges of a task when a task is moved into this | 274 | * Should we move charges of a task when a task is moved into this |
| 320 | * mem_cgroup ? And what type of charges should we move ? | 275 | * mem_cgroup ? And what type of charges should we move ? |
| @@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) | |||
| 1678 | } | 1633 | } |
| 1679 | #endif | 1634 | #endif |
| 1680 | 1635 | ||
| 1681 | static void __mem_cgroup_record_scanstat(unsigned long *stats, | ||
| 1682 | struct memcg_scanrecord *rec) | ||
| 1683 | { | ||
| 1684 | |||
| 1685 | stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1]; | ||
| 1686 | stats[SCAN_ANON] += rec->nr_scanned[0]; | ||
| 1687 | stats[SCAN_FILE] += rec->nr_scanned[1]; | ||
| 1688 | |||
| 1689 | stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1]; | ||
| 1690 | stats[ROTATE_ANON] += rec->nr_rotated[0]; | ||
| 1691 | stats[ROTATE_FILE] += rec->nr_rotated[1]; | ||
| 1692 | |||
| 1693 | stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1]; | ||
| 1694 | stats[FREED_ANON] += rec->nr_freed[0]; | ||
| 1695 | stats[FREED_FILE] += rec->nr_freed[1]; | ||
| 1696 | |||
| 1697 | stats[ELAPSED] += rec->elapsed; | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec) | ||
| 1701 | { | ||
| 1702 | struct mem_cgroup *mem; | ||
| 1703 | int context = rec->context; | ||
| 1704 | |||
| 1705 | if (context >= NR_SCAN_CONTEXT) | ||
| 1706 | return; | ||
| 1707 | |||
| 1708 | mem = rec->mem; | ||
| 1709 | spin_lock(&mem->scanstat.lock); | ||
| 1710 | __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec); | ||
| 1711 | spin_unlock(&mem->scanstat.lock); | ||
| 1712 | |||
| 1713 | mem = rec->root; | ||
| 1714 | spin_lock(&mem->scanstat.lock); | ||
| 1715 | __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec); | ||
| 1716 | spin_unlock(&mem->scanstat.lock); | ||
| 1717 | } | ||
| 1718 | |||
| 1719 | /* | 1636 | /* |
| 1720 | * Scan the hierarchy if needed to reclaim memory. We remember the last child | 1637 | * Scan the hierarchy if needed to reclaim memory. We remember the last child |
| 1721 | * we reclaimed from, so that we don't end up penalizing one child extensively | 1638 | * we reclaimed from, so that we don't end up penalizing one child extensively |
| @@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 1740 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; | 1657 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; |
| 1741 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | 1658 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; |
| 1742 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | 1659 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; |
| 1743 | struct memcg_scanrecord rec; | ||
| 1744 | unsigned long excess; | 1660 | unsigned long excess; |
| 1745 | unsigned long scanned; | 1661 | unsigned long nr_scanned; |
| 1746 | 1662 | ||
| 1747 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; | 1663 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; |
| 1748 | 1664 | ||
| @@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 1750 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) | 1666 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) |
| 1751 | noswap = true; | 1667 | noswap = true; |
| 1752 | 1668 | ||
| 1753 | if (shrink) | ||
| 1754 | rec.context = SCAN_BY_SHRINK; | ||
| 1755 | else if (check_soft) | ||
| 1756 | rec.context = SCAN_BY_SYSTEM; | ||
| 1757 | else | ||
| 1758 | rec.context = SCAN_BY_LIMIT; | ||
| 1759 | |||
| 1760 | rec.root = root_mem; | ||
| 1761 | |||
| 1762 | while (1) { | 1669 | while (1) { |
| 1763 | victim = mem_cgroup_select_victim(root_mem); | 1670 | victim = mem_cgroup_select_victim(root_mem); |
| 1764 | if (victim == root_mem) { | 1671 | if (victim == root_mem) { |
| @@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 1799 | css_put(&victim->css); | 1706 | css_put(&victim->css); |
| 1800 | continue; | 1707 | continue; |
| 1801 | } | 1708 | } |
| 1802 | rec.mem = victim; | ||
| 1803 | rec.nr_scanned[0] = 0; | ||
| 1804 | rec.nr_scanned[1] = 0; | ||
| 1805 | rec.nr_rotated[0] = 0; | ||
| 1806 | rec.nr_rotated[1] = 0; | ||
| 1807 | rec.nr_freed[0] = 0; | ||
| 1808 | rec.nr_freed[1] = 0; | ||
| 1809 | rec.elapsed = 0; | ||
| 1810 | /* we use swappiness of local cgroup */ | 1709 | /* we use swappiness of local cgroup */ |
| 1811 | if (check_soft) { | 1710 | if (check_soft) { |
| 1812 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, | 1711 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, |
| 1813 | noswap, zone, &rec, &scanned); | 1712 | noswap, zone, &nr_scanned); |
| 1814 | *total_scanned += scanned; | 1713 | *total_scanned += nr_scanned; |
| 1815 | } else | 1714 | } else |
| 1816 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, | 1715 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, |
| 1817 | noswap, &rec); | 1716 | noswap); |
| 1818 | mem_cgroup_record_scanstat(&rec); | ||
| 1819 | css_put(&victim->css); | 1717 | css_put(&victim->css); |
| 1820 | /* | 1718 | /* |
| 1821 | * At shrinking usage, we can't check we should stop here or | 1719 | * At shrinking usage, we can't check we should stop here or |
| @@ -1841,29 +1739,23 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
| 1841 | */ | 1739 | */ |
| 1842 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | 1740 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) |
| 1843 | { | 1741 | { |
| 1844 | int lock_count = -1; | ||
| 1845 | struct mem_cgroup *iter, *failed = NULL; | 1742 | struct mem_cgroup *iter, *failed = NULL; |
| 1846 | bool cond = true; | 1743 | bool cond = true; |
| 1847 | 1744 | ||
| 1848 | for_each_mem_cgroup_tree_cond(iter, mem, cond) { | 1745 | for_each_mem_cgroup_tree_cond(iter, mem, cond) { |
| 1849 | bool locked = iter->oom_lock; | 1746 | if (iter->oom_lock) { |
| 1850 | |||
| 1851 | iter->oom_lock = true; | ||
| 1852 | if (lock_count == -1) | ||
| 1853 | lock_count = iter->oom_lock; | ||
| 1854 | else if (lock_count != locked) { | ||
| 1855 | /* | 1747 | /* |
| 1856 | * this subtree of our hierarchy is already locked | 1748 | * this subtree of our hierarchy is already locked |
| 1857 | * so we cannot give a lock. | 1749 | * so we cannot give a lock. |
| 1858 | */ | 1750 | */ |
| 1859 | lock_count = 0; | ||
| 1860 | failed = iter; | 1751 | failed = iter; |
| 1861 | cond = false; | 1752 | cond = false; |
| 1862 | } | 1753 | } else |
| 1754 | iter->oom_lock = true; | ||
| 1863 | } | 1755 | } |
| 1864 | 1756 | ||
| 1865 | if (!failed) | 1757 | if (!failed) |
| 1866 | goto done; | 1758 | return true; |
| 1867 | 1759 | ||
| 1868 | /* | 1760 | /* |
| 1869 | * OK, we failed to lock the whole subtree so we have to clean up | 1761 | * OK, we failed to lock the whole subtree so we have to clean up |
| @@ -1877,8 +1769,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | |||
| 1877 | } | 1769 | } |
| 1878 | iter->oom_lock = false; | 1770 | iter->oom_lock = false; |
| 1879 | } | 1771 | } |
| 1880 | done: | 1772 | return false; |
| 1881 | return lock_count; | ||
| 1882 | } | 1773 | } |
| 1883 | 1774 | ||
| 1884 | /* | 1775 | /* |
| @@ -2169,13 +2060,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) | |||
| 2169 | 2060 | ||
| 2170 | /* Notify other cpus that system-wide "drain" is running */ | 2061 | /* Notify other cpus that system-wide "drain" is running */ |
| 2171 | get_online_cpus(); | 2062 | get_online_cpus(); |
| 2172 | /* | 2063 | curcpu = get_cpu(); |
| 2173 | * Get a hint for avoiding draining charges on the current cpu, | ||
| 2174 | * which must be exhausted by our charging. It is not required that | ||
| 2175 | * this be a precise check, so we use raw_smp_processor_id() instead of | ||
| 2176 | * getcpu()/putcpu(). | ||
| 2177 | */ | ||
| 2178 | curcpu = raw_smp_processor_id(); | ||
| 2179 | for_each_online_cpu(cpu) { | 2064 | for_each_online_cpu(cpu) { |
| 2180 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | 2065 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); |
| 2181 | struct mem_cgroup *mem; | 2066 | struct mem_cgroup *mem; |
| @@ -2192,6 +2077,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) | |||
| 2192 | schedule_work_on(cpu, &stock->work); | 2077 | schedule_work_on(cpu, &stock->work); |
| 2193 | } | 2078 | } |
| 2194 | } | 2079 | } |
| 2080 | put_cpu(); | ||
| 2195 | 2081 | ||
| 2196 | if (!sync) | 2082 | if (!sync) |
| 2197 | goto out; | 2083 | goto out; |
| @@ -3866,18 +3752,14 @@ try_to_free: | |||
| 3866 | /* try to free all pages in this cgroup */ | 3752 | /* try to free all pages in this cgroup */ |
| 3867 | shrink = 1; | 3753 | shrink = 1; |
| 3868 | while (nr_retries && mem->res.usage > 0) { | 3754 | while (nr_retries && mem->res.usage > 0) { |
| 3869 | struct memcg_scanrecord rec; | ||
| 3870 | int progress; | 3755 | int progress; |
| 3871 | 3756 | ||
| 3872 | if (signal_pending(current)) { | 3757 | if (signal_pending(current)) { |
| 3873 | ret = -EINTR; | 3758 | ret = -EINTR; |
| 3874 | goto out; | 3759 | goto out; |
| 3875 | } | 3760 | } |
| 3876 | rec.context = SCAN_BY_SHRINK; | ||
| 3877 | rec.mem = mem; | ||
| 3878 | rec.root = mem; | ||
| 3879 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, | 3761 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, |
| 3880 | false, &rec); | 3762 | false); |
| 3881 | if (!progress) { | 3763 | if (!progress) { |
| 3882 | nr_retries--; | 3764 | nr_retries--; |
| 3883 | /* maybe some writeback is necessary */ | 3765 | /* maybe some writeback is necessary */ |
| @@ -4721,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file) | |||
| 4721 | } | 4603 | } |
| 4722 | #endif /* CONFIG_NUMA */ | 4604 | #endif /* CONFIG_NUMA */ |
| 4723 | 4605 | ||
| 4724 | static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp, | ||
| 4725 | struct cftype *cft, | ||
| 4726 | struct cgroup_map_cb *cb) | ||
| 4727 | { | ||
| 4728 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | ||
| 4729 | char string[64]; | ||
| 4730 | int i; | ||
| 4731 | |||
| 4732 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
| 4733 | strcpy(string, scanstat_string[i]); | ||
| 4734 | strcat(string, SCANSTAT_WORD_LIMIT); | ||
| 4735 | cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]); | ||
| 4736 | } | ||
| 4737 | |||
| 4738 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
| 4739 | strcpy(string, scanstat_string[i]); | ||
| 4740 | strcat(string, SCANSTAT_WORD_SYSTEM); | ||
| 4741 | cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]); | ||
| 4742 | } | ||
| 4743 | |||
| 4744 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
| 4745 | strcpy(string, scanstat_string[i]); | ||
| 4746 | strcat(string, SCANSTAT_WORD_LIMIT); | ||
| 4747 | strcat(string, SCANSTAT_WORD_HIERARCHY); | ||
| 4748 | cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]); | ||
| 4749 | } | ||
| 4750 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
| 4751 | strcpy(string, scanstat_string[i]); | ||
| 4752 | strcat(string, SCANSTAT_WORD_SYSTEM); | ||
| 4753 | strcat(string, SCANSTAT_WORD_HIERARCHY); | ||
| 4754 | cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]); | ||
| 4755 | } | ||
| 4756 | return 0; | ||
| 4757 | } | ||
| 4758 | |||
| 4759 | static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp, | ||
| 4760 | unsigned int event) | ||
| 4761 | { | ||
| 4762 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | ||
| 4763 | |||
| 4764 | spin_lock(&mem->scanstat.lock); | ||
| 4765 | memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats)); | ||
| 4766 | memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats)); | ||
| 4767 | spin_unlock(&mem->scanstat.lock); | ||
| 4768 | return 0; | ||
| 4769 | } | ||
| 4770 | |||
| 4771 | |||
| 4772 | static struct cftype mem_cgroup_files[] = { | 4606 | static struct cftype mem_cgroup_files[] = { |
| 4773 | { | 4607 | { |
| 4774 | .name = "usage_in_bytes", | 4608 | .name = "usage_in_bytes", |
| @@ -4839,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = { | |||
| 4839 | .mode = S_IRUGO, | 4673 | .mode = S_IRUGO, |
| 4840 | }, | 4674 | }, |
| 4841 | #endif | 4675 | #endif |
| 4842 | { | ||
| 4843 | .name = "vmscan_stat", | ||
| 4844 | .read_map = mem_cgroup_vmscan_stat_read, | ||
| 4845 | .trigger = mem_cgroup_reset_vmscan_stat, | ||
| 4846 | }, | ||
| 4847 | }; | 4676 | }; |
| 4848 | 4677 | ||
| 4849 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4678 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
| @@ -5107,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
| 5107 | atomic_set(&mem->refcnt, 1); | 4936 | atomic_set(&mem->refcnt, 1); |
| 5108 | mem->move_charge_at_immigrate = 0; | 4937 | mem->move_charge_at_immigrate = 0; |
| 5109 | mutex_init(&mem->thresholds_lock); | 4938 | mutex_init(&mem->thresholds_lock); |
| 5110 | spin_lock_init(&mem->scanstat.lock); | ||
| 5111 | return &mem->css; | 4939 | return &mem->css; |
| 5112 | free_out: | 4940 | free_out: |
| 5113 | __mem_cgroup_free(mem); | 4941 | __mem_cgroup_free(mem); |
