diff options
-rw-r--r-- | Documentation/cgroups/memory.txt | 85 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 19 | ||||
-rw-r--r-- | include/linux/swap.h | 6 | ||||
-rw-r--r-- | mm/memcontrol.c | 172 | ||||
-rw-r--r-- | mm/vmscan.c | 39 |
5 files changed, 18 insertions, 303 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 6f3c598971f..06eb6d957c8 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -380,7 +380,7 @@ will be charged as a new owner of it. | |||
380 | 380 | ||
381 | 5.2 stat file | 381 | 5.2 stat file |
382 | 382 | ||
383 | 5.2.1 memory.stat file includes following statistics | 383 | memory.stat file includes following statistics |
384 | 384 | ||
385 | # per-memory cgroup local status | 385 | # per-memory cgroup local status |
386 | cache - # of bytes of page cache memory. | 386 | cache - # of bytes of page cache memory. |
@@ -438,89 +438,6 @@ Note: | |||
438 | file_mapped is accounted only when the memory cgroup is owner of page | 438 | file_mapped is accounted only when the memory cgroup is owner of page |
439 | cache.) | 439 | cache.) |
440 | 440 | ||
441 | 5.2.2 memory.vmscan_stat | ||
442 | |||
443 | memory.vmscan_stat includes statistics information for memory scanning and | ||
444 | freeing, reclaiming. The statistics shows memory scanning information since | ||
445 | memory cgroup creation and can be reset to 0 by writing 0 as | ||
446 | |||
447 | #echo 0 > ../memory.vmscan_stat | ||
448 | |||
449 | This file contains following statistics. | ||
450 | |||
451 | [param]_[file_or_anon]_pages_by_[reason]_[under_heararchy] | ||
452 | [param]_elapsed_ns_by_[reason]_[under_hierarchy] | ||
453 | |||
454 | For example, | ||
455 | |||
456 | scanned_file_pages_by_limit indicates the number of scanned | ||
457 | file pages at vmscan. | ||
458 | |||
459 | Now, 3 parameters are supported | ||
460 | |||
461 | scanned - the number of pages scanned by vmscan | ||
462 | rotated - the number of pages activated at vmscan | ||
463 | freed - the number of pages freed by vmscan | ||
464 | |||
465 | If "rotated" is high against scanned/freed, the memcg seems busy. | ||
466 | |||
467 | Now, 2 reason are supported | ||
468 | |||
469 | limit - the memory cgroup's limit | ||
470 | system - global memory pressure + softlimit | ||
471 | (global memory pressure not under softlimit is not handled now) | ||
472 | |||
473 | When under_hierarchy is added in the tail, the number indicates the | ||
474 | total memcg scan of its children and itself. | ||
475 | |||
476 | elapsed_ns is a elapsed time in nanosecond. This may include sleep time | ||
477 | and not indicates CPU usage. So, please take this as just showing | ||
478 | latency. | ||
479 | |||
480 | Here is an example. | ||
481 | |||
482 | # cat /cgroup/memory/A/memory.vmscan_stat | ||
483 | scanned_pages_by_limit 9471864 | ||
484 | scanned_anon_pages_by_limit 6640629 | ||
485 | scanned_file_pages_by_limit 2831235 | ||
486 | rotated_pages_by_limit 4243974 | ||
487 | rotated_anon_pages_by_limit 3971968 | ||
488 | rotated_file_pages_by_limit 272006 | ||
489 | freed_pages_by_limit 2318492 | ||
490 | freed_anon_pages_by_limit 962052 | ||
491 | freed_file_pages_by_limit 1356440 | ||
492 | elapsed_ns_by_limit 351386416101 | ||
493 | scanned_pages_by_system 0 | ||
494 | scanned_anon_pages_by_system 0 | ||
495 | scanned_file_pages_by_system 0 | ||
496 | rotated_pages_by_system 0 | ||
497 | rotated_anon_pages_by_system 0 | ||
498 | rotated_file_pages_by_system 0 | ||
499 | freed_pages_by_system 0 | ||
500 | freed_anon_pages_by_system 0 | ||
501 | freed_file_pages_by_system 0 | ||
502 | elapsed_ns_by_system 0 | ||
503 | scanned_pages_by_limit_under_hierarchy 9471864 | ||
504 | scanned_anon_pages_by_limit_under_hierarchy 6640629 | ||
505 | scanned_file_pages_by_limit_under_hierarchy 2831235 | ||
506 | rotated_pages_by_limit_under_hierarchy 4243974 | ||
507 | rotated_anon_pages_by_limit_under_hierarchy 3971968 | ||
508 | rotated_file_pages_by_limit_under_hierarchy 272006 | ||
509 | freed_pages_by_limit_under_hierarchy 2318492 | ||
510 | freed_anon_pages_by_limit_under_hierarchy 962052 | ||
511 | freed_file_pages_by_limit_under_hierarchy 1356440 | ||
512 | elapsed_ns_by_limit_under_hierarchy 351386416101 | ||
513 | scanned_pages_by_system_under_hierarchy 0 | ||
514 | scanned_anon_pages_by_system_under_hierarchy 0 | ||
515 | scanned_file_pages_by_system_under_hierarchy 0 | ||
516 | rotated_pages_by_system_under_hierarchy 0 | ||
517 | rotated_anon_pages_by_system_under_hierarchy 0 | ||
518 | rotated_file_pages_by_system_under_hierarchy 0 | ||
519 | freed_pages_by_system_under_hierarchy 0 | ||
520 | freed_anon_pages_by_system_under_hierarchy 0 | ||
521 | freed_file_pages_by_system_under_hierarchy 0 | ||
522 | elapsed_ns_by_system_under_hierarchy 0 | ||
523 | |||
524 | 5.3 swappiness | 441 | 5.3 swappiness |
525 | 442 | ||
526 | Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. | 443 | Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3b535db00a9..343bd7661f2 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -39,16 +39,6 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
39 | struct mem_cgroup *mem_cont, | 39 | struct mem_cgroup *mem_cont, |
40 | int active, int file); | 40 | int active, int file); |
41 | 41 | ||
42 | struct memcg_scanrecord { | ||
43 | struct mem_cgroup *mem; /* scanend memory cgroup */ | ||
44 | struct mem_cgroup *root; /* scan target hierarchy root */ | ||
45 | int context; /* scanning context (see memcontrol.c) */ | ||
46 | unsigned long nr_scanned[2]; /* the number of scanned pages */ | ||
47 | unsigned long nr_rotated[2]; /* the number of rotated pages */ | ||
48 | unsigned long nr_freed[2]; /* the number of freed pages */ | ||
49 | unsigned long elapsed; /* nsec of time elapsed while scanning */ | ||
50 | }; | ||
51 | |||
52 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 42 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
53 | /* | 43 | /* |
54 | * All "charge" functions with gfp_mask should use GFP_KERNEL or | 44 | * All "charge" functions with gfp_mask should use GFP_KERNEL or |
@@ -127,15 +117,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page); | |||
127 | extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, | 117 | extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, |
128 | struct task_struct *p); | 118 | struct task_struct *p); |
129 | 119 | ||
130 | extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, | ||
131 | gfp_t gfp_mask, bool noswap, | ||
132 | struct memcg_scanrecord *rec); | ||
133 | extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | ||
134 | gfp_t gfp_mask, bool noswap, | ||
135 | struct zone *zone, | ||
136 | struct memcg_scanrecord *rec, | ||
137 | unsigned long *nr_scanned); | ||
138 | |||
139 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 120 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
140 | extern int do_swap_account; | 121 | extern int do_swap_account; |
141 | #endif | 122 | #endif |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 14d62490922..c71f84bb62e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -252,6 +252,12 @@ static inline void lru_cache_add_file(struct page *page) | |||
252 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | 252 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
253 | gfp_t gfp_mask, nodemask_t *mask); | 253 | gfp_t gfp_mask, nodemask_t *mask); |
254 | extern int __isolate_lru_page(struct page *page, int mode, int file); | 254 | extern int __isolate_lru_page(struct page *page, int mode, int file); |
255 | extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, | ||
256 | gfp_t gfp_mask, bool noswap); | ||
257 | extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | ||
258 | gfp_t gfp_mask, bool noswap, | ||
259 | struct zone *zone, | ||
260 | unsigned long *nr_scanned); | ||
255 | extern unsigned long shrink_all_memory(unsigned long nr_pages); | 261 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
256 | extern int vm_swappiness; | 262 | extern int vm_swappiness; |
257 | extern int remove_mapping(struct address_space *mapping, struct page *page); | 263 | extern int remove_mapping(struct address_space *mapping, struct page *page); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ebd1e86bef1..3508777837c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list { | |||
204 | static void mem_cgroup_threshold(struct mem_cgroup *mem); | 204 | static void mem_cgroup_threshold(struct mem_cgroup *mem); |
205 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); | 205 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); |
206 | 206 | ||
207 | enum { | ||
208 | SCAN_BY_LIMIT, | ||
209 | SCAN_BY_SYSTEM, | ||
210 | NR_SCAN_CONTEXT, | ||
211 | SCAN_BY_SHRINK, /* not recorded now */ | ||
212 | }; | ||
213 | |||
214 | enum { | ||
215 | SCAN, | ||
216 | SCAN_ANON, | ||
217 | SCAN_FILE, | ||
218 | ROTATE, | ||
219 | ROTATE_ANON, | ||
220 | ROTATE_FILE, | ||
221 | FREED, | ||
222 | FREED_ANON, | ||
223 | FREED_FILE, | ||
224 | ELAPSED, | ||
225 | NR_SCANSTATS, | ||
226 | }; | ||
227 | |||
228 | struct scanstat { | ||
229 | spinlock_t lock; | ||
230 | unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | ||
231 | unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS]; | ||
232 | }; | ||
233 | |||
234 | const char *scanstat_string[NR_SCANSTATS] = { | ||
235 | "scanned_pages", | ||
236 | "scanned_anon_pages", | ||
237 | "scanned_file_pages", | ||
238 | "rotated_pages", | ||
239 | "rotated_anon_pages", | ||
240 | "rotated_file_pages", | ||
241 | "freed_pages", | ||
242 | "freed_anon_pages", | ||
243 | "freed_file_pages", | ||
244 | "elapsed_ns", | ||
245 | }; | ||
246 | #define SCANSTAT_WORD_LIMIT "_by_limit" | ||
247 | #define SCANSTAT_WORD_SYSTEM "_by_system" | ||
248 | #define SCANSTAT_WORD_HIERARCHY "_under_hierarchy" | ||
249 | |||
250 | |||
251 | /* | 207 | /* |
252 | * The memory controller data structure. The memory controller controls both | 208 | * The memory controller data structure. The memory controller controls both |
253 | * page cache and RSS per cgroup. We would eventually like to provide | 209 | * page cache and RSS per cgroup. We would eventually like to provide |
@@ -313,8 +269,7 @@ struct mem_cgroup { | |||
313 | 269 | ||
314 | /* For oom notifier event fd */ | 270 | /* For oom notifier event fd */ |
315 | struct list_head oom_notify; | 271 | struct list_head oom_notify; |
316 | /* For recording LRU-scan statistics */ | 272 | |
317 | struct scanstat scanstat; | ||
318 | /* | 273 | /* |
319 | * Should we move charges of a task when a task is moved into this | 274 | * Should we move charges of a task when a task is moved into this |
320 | * mem_cgroup ? And what type of charges should we move ? | 275 | * mem_cgroup ? And what type of charges should we move ? |
@@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) | |||
1678 | } | 1633 | } |
1679 | #endif | 1634 | #endif |
1680 | 1635 | ||
1681 | static void __mem_cgroup_record_scanstat(unsigned long *stats, | ||
1682 | struct memcg_scanrecord *rec) | ||
1683 | { | ||
1684 | |||
1685 | stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1]; | ||
1686 | stats[SCAN_ANON] += rec->nr_scanned[0]; | ||
1687 | stats[SCAN_FILE] += rec->nr_scanned[1]; | ||
1688 | |||
1689 | stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1]; | ||
1690 | stats[ROTATE_ANON] += rec->nr_rotated[0]; | ||
1691 | stats[ROTATE_FILE] += rec->nr_rotated[1]; | ||
1692 | |||
1693 | stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1]; | ||
1694 | stats[FREED_ANON] += rec->nr_freed[0]; | ||
1695 | stats[FREED_FILE] += rec->nr_freed[1]; | ||
1696 | |||
1697 | stats[ELAPSED] += rec->elapsed; | ||
1698 | } | ||
1699 | |||
1700 | static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec) | ||
1701 | { | ||
1702 | struct mem_cgroup *mem; | ||
1703 | int context = rec->context; | ||
1704 | |||
1705 | if (context >= NR_SCAN_CONTEXT) | ||
1706 | return; | ||
1707 | |||
1708 | mem = rec->mem; | ||
1709 | spin_lock(&mem->scanstat.lock); | ||
1710 | __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec); | ||
1711 | spin_unlock(&mem->scanstat.lock); | ||
1712 | |||
1713 | mem = rec->root; | ||
1714 | spin_lock(&mem->scanstat.lock); | ||
1715 | __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec); | ||
1716 | spin_unlock(&mem->scanstat.lock); | ||
1717 | } | ||
1718 | |||
1719 | /* | 1636 | /* |
1720 | * Scan the hierarchy if needed to reclaim memory. We remember the last child | 1637 | * Scan the hierarchy if needed to reclaim memory. We remember the last child |
1721 | * we reclaimed from, so that we don't end up penalizing one child extensively | 1638 | * we reclaimed from, so that we don't end up penalizing one child extensively |
@@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1740 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; | 1657 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; |
1741 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | 1658 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; |
1742 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | 1659 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; |
1743 | struct memcg_scanrecord rec; | ||
1744 | unsigned long excess; | 1660 | unsigned long excess; |
1745 | unsigned long scanned; | 1661 | unsigned long nr_scanned; |
1746 | 1662 | ||
1747 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; | 1663 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; |
1748 | 1664 | ||
@@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1750 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) | 1666 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) |
1751 | noswap = true; | 1667 | noswap = true; |
1752 | 1668 | ||
1753 | if (shrink) | ||
1754 | rec.context = SCAN_BY_SHRINK; | ||
1755 | else if (check_soft) | ||
1756 | rec.context = SCAN_BY_SYSTEM; | ||
1757 | else | ||
1758 | rec.context = SCAN_BY_LIMIT; | ||
1759 | |||
1760 | rec.root = root_mem; | ||
1761 | |||
1762 | while (1) { | 1669 | while (1) { |
1763 | victim = mem_cgroup_select_victim(root_mem); | 1670 | victim = mem_cgroup_select_victim(root_mem); |
1764 | if (victim == root_mem) { | 1671 | if (victim == root_mem) { |
@@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1799 | css_put(&victim->css); | 1706 | css_put(&victim->css); |
1800 | continue; | 1707 | continue; |
1801 | } | 1708 | } |
1802 | rec.mem = victim; | ||
1803 | rec.nr_scanned[0] = 0; | ||
1804 | rec.nr_scanned[1] = 0; | ||
1805 | rec.nr_rotated[0] = 0; | ||
1806 | rec.nr_rotated[1] = 0; | ||
1807 | rec.nr_freed[0] = 0; | ||
1808 | rec.nr_freed[1] = 0; | ||
1809 | rec.elapsed = 0; | ||
1810 | /* we use swappiness of local cgroup */ | 1709 | /* we use swappiness of local cgroup */ |
1811 | if (check_soft) { | 1710 | if (check_soft) { |
1812 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, | 1711 | ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, |
1813 | noswap, zone, &rec, &scanned); | 1712 | noswap, zone, &nr_scanned); |
1814 | *total_scanned += scanned; | 1713 | *total_scanned += nr_scanned; |
1815 | } else | 1714 | } else |
1816 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, | 1715 | ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, |
1817 | noswap, &rec); | 1716 | noswap); |
1818 | mem_cgroup_record_scanstat(&rec); | ||
1819 | css_put(&victim->css); | 1717 | css_put(&victim->css); |
1820 | /* | 1718 | /* |
1821 | * At shrinking usage, we can't check we should stop here or | 1719 | * At shrinking usage, we can't check we should stop here or |
@@ -3854,18 +3752,14 @@ try_to_free: | |||
3854 | /* try to free all pages in this cgroup */ | 3752 | /* try to free all pages in this cgroup */ |
3855 | shrink = 1; | 3753 | shrink = 1; |
3856 | while (nr_retries && mem->res.usage > 0) { | 3754 | while (nr_retries && mem->res.usage > 0) { |
3857 | struct memcg_scanrecord rec; | ||
3858 | int progress; | 3755 | int progress; |
3859 | 3756 | ||
3860 | if (signal_pending(current)) { | 3757 | if (signal_pending(current)) { |
3861 | ret = -EINTR; | 3758 | ret = -EINTR; |
3862 | goto out; | 3759 | goto out; |
3863 | } | 3760 | } |
3864 | rec.context = SCAN_BY_SHRINK; | ||
3865 | rec.mem = mem; | ||
3866 | rec.root = mem; | ||
3867 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, | 3761 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, |
3868 | false, &rec); | 3762 | false); |
3869 | if (!progress) { | 3763 | if (!progress) { |
3870 | nr_retries--; | 3764 | nr_retries--; |
3871 | /* maybe some writeback is necessary */ | 3765 | /* maybe some writeback is necessary */ |
@@ -4709,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file) | |||
4709 | } | 4603 | } |
4710 | #endif /* CONFIG_NUMA */ | 4604 | #endif /* CONFIG_NUMA */ |
4711 | 4605 | ||
4712 | static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp, | ||
4713 | struct cftype *cft, | ||
4714 | struct cgroup_map_cb *cb) | ||
4715 | { | ||
4716 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | ||
4717 | char string[64]; | ||
4718 | int i; | ||
4719 | |||
4720 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
4721 | strcpy(string, scanstat_string[i]); | ||
4722 | strcat(string, SCANSTAT_WORD_LIMIT); | ||
4723 | cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]); | ||
4724 | } | ||
4725 | |||
4726 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
4727 | strcpy(string, scanstat_string[i]); | ||
4728 | strcat(string, SCANSTAT_WORD_SYSTEM); | ||
4729 | cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]); | ||
4730 | } | ||
4731 | |||
4732 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
4733 | strcpy(string, scanstat_string[i]); | ||
4734 | strcat(string, SCANSTAT_WORD_LIMIT); | ||
4735 | strcat(string, SCANSTAT_WORD_HIERARCHY); | ||
4736 | cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]); | ||
4737 | } | ||
4738 | for (i = 0; i < NR_SCANSTATS; i++) { | ||
4739 | strcpy(string, scanstat_string[i]); | ||
4740 | strcat(string, SCANSTAT_WORD_SYSTEM); | ||
4741 | strcat(string, SCANSTAT_WORD_HIERARCHY); | ||
4742 | cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]); | ||
4743 | } | ||
4744 | return 0; | ||
4745 | } | ||
4746 | |||
4747 | static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp, | ||
4748 | unsigned int event) | ||
4749 | { | ||
4750 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | ||
4751 | |||
4752 | spin_lock(&mem->scanstat.lock); | ||
4753 | memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats)); | ||
4754 | memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats)); | ||
4755 | spin_unlock(&mem->scanstat.lock); | ||
4756 | return 0; | ||
4757 | } | ||
4758 | |||
4759 | |||
4760 | static struct cftype mem_cgroup_files[] = { | 4606 | static struct cftype mem_cgroup_files[] = { |
4761 | { | 4607 | { |
4762 | .name = "usage_in_bytes", | 4608 | .name = "usage_in_bytes", |
@@ -4827,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = { | |||
4827 | .mode = S_IRUGO, | 4673 | .mode = S_IRUGO, |
4828 | }, | 4674 | }, |
4829 | #endif | 4675 | #endif |
4830 | { | ||
4831 | .name = "vmscan_stat", | ||
4832 | .read_map = mem_cgroup_vmscan_stat_read, | ||
4833 | .trigger = mem_cgroup_reset_vmscan_stat, | ||
4834 | }, | ||
4835 | }; | 4676 | }; |
4836 | 4677 | ||
4837 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4678 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
@@ -5095,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
5095 | atomic_set(&mem->refcnt, 1); | 4936 | atomic_set(&mem->refcnt, 1); |
5096 | mem->move_charge_at_immigrate = 0; | 4937 | mem->move_charge_at_immigrate = 0; |
5097 | mutex_init(&mem->thresholds_lock); | 4938 | mutex_init(&mem->thresholds_lock); |
5098 | spin_lock_init(&mem->scanstat.lock); | ||
5099 | return &mem->css; | 4939 | return &mem->css; |
5100 | free_out: | 4940 | free_out: |
5101 | __mem_cgroup_free(mem); | 4941 | __mem_cgroup_free(mem); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index e49bcb6d494..b55699cd906 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -105,7 +105,6 @@ struct scan_control { | |||
105 | 105 | ||
106 | /* Which cgroup do we reclaim from */ | 106 | /* Which cgroup do we reclaim from */ |
107 | struct mem_cgroup *mem_cgroup; | 107 | struct mem_cgroup *mem_cgroup; |
108 | struct memcg_scanrecord *memcg_record; | ||
109 | 108 | ||
110 | /* | 109 | /* |
111 | * Nodemask of nodes allowed by the caller. If NULL, all nodes | 110 | * Nodemask of nodes allowed by the caller. If NULL, all nodes |
@@ -1349,8 +1348,6 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, | |||
1349 | int file = is_file_lru(lru); | 1348 | int file = is_file_lru(lru); |
1350 | int numpages = hpage_nr_pages(page); | 1349 | int numpages = hpage_nr_pages(page); |
1351 | reclaim_stat->recent_rotated[file] += numpages; | 1350 | reclaim_stat->recent_rotated[file] += numpages; |
1352 | if (!scanning_global_lru(sc)) | ||
1353 | sc->memcg_record->nr_rotated[file] += numpages; | ||
1354 | } | 1351 | } |
1355 | if (!pagevec_add(&pvec, page)) { | 1352 | if (!pagevec_add(&pvec, page)) { |
1356 | spin_unlock_irq(&zone->lru_lock); | 1353 | spin_unlock_irq(&zone->lru_lock); |
@@ -1394,10 +1391,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone, | |||
1394 | 1391 | ||
1395 | reclaim_stat->recent_scanned[0] += *nr_anon; | 1392 | reclaim_stat->recent_scanned[0] += *nr_anon; |
1396 | reclaim_stat->recent_scanned[1] += *nr_file; | 1393 | reclaim_stat->recent_scanned[1] += *nr_file; |
1397 | if (!scanning_global_lru(sc)) { | ||
1398 | sc->memcg_record->nr_scanned[0] += *nr_anon; | ||
1399 | sc->memcg_record->nr_scanned[1] += *nr_file; | ||
1400 | } | ||
1401 | } | 1394 | } |
1402 | 1395 | ||
1403 | /* | 1396 | /* |
@@ -1511,9 +1504,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1511 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); | 1504 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); |
1512 | } | 1505 | } |
1513 | 1506 | ||
1514 | if (!scanning_global_lru(sc)) | ||
1515 | sc->memcg_record->nr_freed[file] += nr_reclaimed; | ||
1516 | |||
1517 | local_irq_disable(); | 1507 | local_irq_disable(); |
1518 | if (current_is_kswapd()) | 1508 | if (current_is_kswapd()) |
1519 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); | 1509 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); |
@@ -1613,8 +1603,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1613 | } | 1603 | } |
1614 | 1604 | ||
1615 | reclaim_stat->recent_scanned[file] += nr_taken; | 1605 | reclaim_stat->recent_scanned[file] += nr_taken; |
1616 | if (!scanning_global_lru(sc)) | ||
1617 | sc->memcg_record->nr_scanned[file] += nr_taken; | ||
1618 | 1606 | ||
1619 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | 1607 | __count_zone_vm_events(PGREFILL, zone, pgscanned); |
1620 | if (file) | 1608 | if (file) |
@@ -1666,8 +1654,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1666 | * get_scan_ratio. | 1654 | * get_scan_ratio. |
1667 | */ | 1655 | */ |
1668 | reclaim_stat->recent_rotated[file] += nr_rotated; | 1656 | reclaim_stat->recent_rotated[file] += nr_rotated; |
1669 | if (!scanning_global_lru(sc)) | ||
1670 | sc->memcg_record->nr_rotated[file] += nr_rotated; | ||
1671 | 1657 | ||
1672 | move_active_pages_to_lru(zone, &l_active, | 1658 | move_active_pages_to_lru(zone, &l_active, |
1673 | LRU_ACTIVE + file * LRU_FILE); | 1659 | LRU_ACTIVE + file * LRU_FILE); |
@@ -2265,10 +2251,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2265 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 2251 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
2266 | 2252 | ||
2267 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | 2253 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
2268 | gfp_t gfp_mask, bool noswap, | 2254 | gfp_t gfp_mask, bool noswap, |
2269 | struct zone *zone, | 2255 | struct zone *zone, |
2270 | struct memcg_scanrecord *rec, | 2256 | unsigned long *nr_scanned) |
2271 | unsigned long *scanned) | ||
2272 | { | 2257 | { |
2273 | struct scan_control sc = { | 2258 | struct scan_control sc = { |
2274 | .nr_scanned = 0, | 2259 | .nr_scanned = 0, |
@@ -2278,9 +2263,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2278 | .may_swap = !noswap, | 2263 | .may_swap = !noswap, |
2279 | .order = 0, | 2264 | .order = 0, |
2280 | .mem_cgroup = mem, | 2265 | .mem_cgroup = mem, |
2281 | .memcg_record = rec, | ||
2282 | }; | 2266 | }; |
2283 | ktime_t start, end; | ||
2284 | 2267 | ||
2285 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2268 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2286 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 2269 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
@@ -2289,7 +2272,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2289 | sc.may_writepage, | 2272 | sc.may_writepage, |
2290 | sc.gfp_mask); | 2273 | sc.gfp_mask); |
2291 | 2274 | ||
2292 | start = ktime_get(); | ||
2293 | /* | 2275 | /* |
2294 | * NOTE: Although we can get the priority field, using it | 2276 | * NOTE: Although we can get the priority field, using it |
2295 | * here is not a good idea, since it limits the pages we can scan. | 2277 | * here is not a good idea, since it limits the pages we can scan. |
@@ -2298,25 +2280,19 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2298 | * the priority and make it zero. | 2280 | * the priority and make it zero. |
2299 | */ | 2281 | */ |
2300 | shrink_zone(0, zone, &sc); | 2282 | shrink_zone(0, zone, &sc); |
2301 | end = ktime_get(); | ||
2302 | |||
2303 | if (rec) | ||
2304 | rec->elapsed += ktime_to_ns(ktime_sub(end, start)); | ||
2305 | *scanned = sc.nr_scanned; | ||
2306 | 2283 | ||
2307 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2284 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2308 | 2285 | ||
2286 | *nr_scanned = sc.nr_scanned; | ||
2309 | return sc.nr_reclaimed; | 2287 | return sc.nr_reclaimed; |
2310 | } | 2288 | } |
2311 | 2289 | ||
2312 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | 2290 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
2313 | gfp_t gfp_mask, | 2291 | gfp_t gfp_mask, |
2314 | bool noswap, | 2292 | bool noswap) |
2315 | struct memcg_scanrecord *rec) | ||
2316 | { | 2293 | { |
2317 | struct zonelist *zonelist; | 2294 | struct zonelist *zonelist; |
2318 | unsigned long nr_reclaimed; | 2295 | unsigned long nr_reclaimed; |
2319 | ktime_t start, end; | ||
2320 | int nid; | 2296 | int nid; |
2321 | struct scan_control sc = { | 2297 | struct scan_control sc = { |
2322 | .may_writepage = !laptop_mode, | 2298 | .may_writepage = !laptop_mode, |
@@ -2325,7 +2301,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2325 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | 2301 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2326 | .order = 0, | 2302 | .order = 0, |
2327 | .mem_cgroup = mem_cont, | 2303 | .mem_cgroup = mem_cont, |
2328 | .memcg_record = rec, | ||
2329 | .nodemask = NULL, /* we don't care the placement */ | 2304 | .nodemask = NULL, /* we don't care the placement */ |
2330 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2305 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2331 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), | 2306 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), |
@@ -2334,7 +2309,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2334 | .gfp_mask = sc.gfp_mask, | 2309 | .gfp_mask = sc.gfp_mask, |
2335 | }; | 2310 | }; |
2336 | 2311 | ||
2337 | start = ktime_get(); | ||
2338 | /* | 2312 | /* |
2339 | * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't | 2313 | * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't |
2340 | * take care of from where we get pages. So the node where we start the | 2314 | * take care of from where we get pages. So the node where we start the |
@@ -2349,9 +2323,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2349 | sc.gfp_mask); | 2323 | sc.gfp_mask); |
2350 | 2324 | ||
2351 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); | 2325 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); |
2352 | end = ktime_get(); | ||
2353 | if (rec) | ||
2354 | rec->elapsed += ktime_to_ns(ktime_sub(end, start)); | ||
2355 | 2326 | ||
2356 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); | 2327 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); |
2357 | 2328 | ||