aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/memory.txt85
-rw-r--r--include/linux/memcontrol.h19
-rw-r--r--include/linux/swap.h6
-rw-r--r--mm/memcontrol.c172
-rw-r--r--mm/vmscan.c39
5 files changed, 18 insertions, 303 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 6f3c598971f..06eb6d957c8 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -380,7 +380,7 @@ will be charged as a new owner of it.
380 380
3815.2 stat file 3815.2 stat file
382 382
3835.2.1 memory.stat file includes following statistics 383memory.stat file includes following statistics
384 384
385# per-memory cgroup local status 385# per-memory cgroup local status
386cache - # of bytes of page cache memory. 386cache - # of bytes of page cache memory.
@@ -438,89 +438,6 @@ Note:
438 file_mapped is accounted only when the memory cgroup is owner of page 438 file_mapped is accounted only when the memory cgroup is owner of page
439 cache.) 439 cache.)
440 440
4415.2.2 memory.vmscan_stat
442
443memory.vmscan_stat includes statistics information for memory scanning and
444freeing, reclaiming. The statistics shows memory scanning information since
445memory cgroup creation and can be reset to 0 by writing 0 as
446
447 #echo 0 > ../memory.vmscan_stat
448
449This file contains following statistics.
450
451[param]_[file_or_anon]_pages_by_[reason]_[under_heararchy]
452[param]_elapsed_ns_by_[reason]_[under_hierarchy]
453
454For example,
455
456 scanned_file_pages_by_limit indicates the number of scanned
457 file pages at vmscan.
458
459Now, 3 parameters are supported
460
461 scanned - the number of pages scanned by vmscan
462 rotated - the number of pages activated at vmscan
463 freed - the number of pages freed by vmscan
464
465If "rotated" is high against scanned/freed, the memcg seems busy.
466
467Now, 2 reason are supported
468
469 limit - the memory cgroup's limit
470 system - global memory pressure + softlimit
471 (global memory pressure not under softlimit is not handled now)
472
473When under_hierarchy is added in the tail, the number indicates the
474total memcg scan of its children and itself.
475
476elapsed_ns is a elapsed time in nanosecond. This may include sleep time
477and not indicates CPU usage. So, please take this as just showing
478latency.
479
480Here is an example.
481
482# cat /cgroup/memory/A/memory.vmscan_stat
483scanned_pages_by_limit 9471864
484scanned_anon_pages_by_limit 6640629
485scanned_file_pages_by_limit 2831235
486rotated_pages_by_limit 4243974
487rotated_anon_pages_by_limit 3971968
488rotated_file_pages_by_limit 272006
489freed_pages_by_limit 2318492
490freed_anon_pages_by_limit 962052
491freed_file_pages_by_limit 1356440
492elapsed_ns_by_limit 351386416101
493scanned_pages_by_system 0
494scanned_anon_pages_by_system 0
495scanned_file_pages_by_system 0
496rotated_pages_by_system 0
497rotated_anon_pages_by_system 0
498rotated_file_pages_by_system 0
499freed_pages_by_system 0
500freed_anon_pages_by_system 0
501freed_file_pages_by_system 0
502elapsed_ns_by_system 0
503scanned_pages_by_limit_under_hierarchy 9471864
504scanned_anon_pages_by_limit_under_hierarchy 6640629
505scanned_file_pages_by_limit_under_hierarchy 2831235
506rotated_pages_by_limit_under_hierarchy 4243974
507rotated_anon_pages_by_limit_under_hierarchy 3971968
508rotated_file_pages_by_limit_under_hierarchy 272006
509freed_pages_by_limit_under_hierarchy 2318492
510freed_anon_pages_by_limit_under_hierarchy 962052
511freed_file_pages_by_limit_under_hierarchy 1356440
512elapsed_ns_by_limit_under_hierarchy 351386416101
513scanned_pages_by_system_under_hierarchy 0
514scanned_anon_pages_by_system_under_hierarchy 0
515scanned_file_pages_by_system_under_hierarchy 0
516rotated_pages_by_system_under_hierarchy 0
517rotated_anon_pages_by_system_under_hierarchy 0
518rotated_file_pages_by_system_under_hierarchy 0
519freed_pages_by_system_under_hierarchy 0
520freed_anon_pages_by_system_under_hierarchy 0
521freed_file_pages_by_system_under_hierarchy 0
522elapsed_ns_by_system_under_hierarchy 0
523
5245.3 swappiness 4415.3 swappiness
525 442
526Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. 443Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3b535db00a9..343bd7661f2 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -39,16 +39,6 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
39 struct mem_cgroup *mem_cont, 39 struct mem_cgroup *mem_cont,
40 int active, int file); 40 int active, int file);
41 41
42struct memcg_scanrecord {
43 struct mem_cgroup *mem; /* scanend memory cgroup */
44 struct mem_cgroup *root; /* scan target hierarchy root */
45 int context; /* scanning context (see memcontrol.c) */
46 unsigned long nr_scanned[2]; /* the number of scanned pages */
47 unsigned long nr_rotated[2]; /* the number of rotated pages */
48 unsigned long nr_freed[2]; /* the number of freed pages */
49 unsigned long elapsed; /* nsec of time elapsed while scanning */
50};
51
52#ifdef CONFIG_CGROUP_MEM_RES_CTLR 42#ifdef CONFIG_CGROUP_MEM_RES_CTLR
53/* 43/*
54 * All "charge" functions with gfp_mask should use GFP_KERNEL or 44 * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -127,15 +117,6 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page);
127extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, 117extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
128 struct task_struct *p); 118 struct task_struct *p);
129 119
130extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
131 gfp_t gfp_mask, bool noswap,
132 struct memcg_scanrecord *rec);
133extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
134 gfp_t gfp_mask, bool noswap,
135 struct zone *zone,
136 struct memcg_scanrecord *rec,
137 unsigned long *nr_scanned);
138
139#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 120#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
140extern int do_swap_account; 121extern int do_swap_account;
141#endif 122#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 14d62490922..c71f84bb62e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -252,6 +252,12 @@ static inline void lru_cache_add_file(struct page *page)
252extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 252extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
253 gfp_t gfp_mask, nodemask_t *mask); 253 gfp_t gfp_mask, nodemask_t *mask);
254extern int __isolate_lru_page(struct page *page, int mode, int file); 254extern int __isolate_lru_page(struct page *page, int mode, int file);
255extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
256 gfp_t gfp_mask, bool noswap);
257extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
258 gfp_t gfp_mask, bool noswap,
259 struct zone *zone,
260 unsigned long *nr_scanned);
255extern unsigned long shrink_all_memory(unsigned long nr_pages); 261extern unsigned long shrink_all_memory(unsigned long nr_pages);
256extern int vm_swappiness; 262extern int vm_swappiness;
257extern int remove_mapping(struct address_space *mapping, struct page *page); 263extern int remove_mapping(struct address_space *mapping, struct page *page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ebd1e86bef1..3508777837c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -204,50 +204,6 @@ struct mem_cgroup_eventfd_list {
204static void mem_cgroup_threshold(struct mem_cgroup *mem); 204static void mem_cgroup_threshold(struct mem_cgroup *mem);
205static void mem_cgroup_oom_notify(struct mem_cgroup *mem); 205static void mem_cgroup_oom_notify(struct mem_cgroup *mem);
206 206
207enum {
208 SCAN_BY_LIMIT,
209 SCAN_BY_SYSTEM,
210 NR_SCAN_CONTEXT,
211 SCAN_BY_SHRINK, /* not recorded now */
212};
213
214enum {
215 SCAN,
216 SCAN_ANON,
217 SCAN_FILE,
218 ROTATE,
219 ROTATE_ANON,
220 ROTATE_FILE,
221 FREED,
222 FREED_ANON,
223 FREED_FILE,
224 ELAPSED,
225 NR_SCANSTATS,
226};
227
228struct scanstat {
229 spinlock_t lock;
230 unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS];
231 unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS];
232};
233
234const char *scanstat_string[NR_SCANSTATS] = {
235 "scanned_pages",
236 "scanned_anon_pages",
237 "scanned_file_pages",
238 "rotated_pages",
239 "rotated_anon_pages",
240 "rotated_file_pages",
241 "freed_pages",
242 "freed_anon_pages",
243 "freed_file_pages",
244 "elapsed_ns",
245};
246#define SCANSTAT_WORD_LIMIT "_by_limit"
247#define SCANSTAT_WORD_SYSTEM "_by_system"
248#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy"
249
250
251/* 207/*
252 * The memory controller data structure. The memory controller controls both 208 * The memory controller data structure. The memory controller controls both
253 * page cache and RSS per cgroup. We would eventually like to provide 209 * page cache and RSS per cgroup. We would eventually like to provide
@@ -313,8 +269,7 @@ struct mem_cgroup {
313 269
314 /* For oom notifier event fd */ 270 /* For oom notifier event fd */
315 struct list_head oom_notify; 271 struct list_head oom_notify;
316 /* For recording LRU-scan statistics */ 272
317 struct scanstat scanstat;
318 /* 273 /*
319 * Should we move charges of a task when a task is moved into this 274 * Should we move charges of a task when a task is moved into this
320 * mem_cgroup ? And what type of charges should we move ? 275 * mem_cgroup ? And what type of charges should we move ?
@@ -1678,44 +1633,6 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1678} 1633}
1679#endif 1634#endif
1680 1635
1681static void __mem_cgroup_record_scanstat(unsigned long *stats,
1682 struct memcg_scanrecord *rec)
1683{
1684
1685 stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1];
1686 stats[SCAN_ANON] += rec->nr_scanned[0];
1687 stats[SCAN_FILE] += rec->nr_scanned[1];
1688
1689 stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1];
1690 stats[ROTATE_ANON] += rec->nr_rotated[0];
1691 stats[ROTATE_FILE] += rec->nr_rotated[1];
1692
1693 stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1];
1694 stats[FREED_ANON] += rec->nr_freed[0];
1695 stats[FREED_FILE] += rec->nr_freed[1];
1696
1697 stats[ELAPSED] += rec->elapsed;
1698}
1699
1700static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec)
1701{
1702 struct mem_cgroup *mem;
1703 int context = rec->context;
1704
1705 if (context >= NR_SCAN_CONTEXT)
1706 return;
1707
1708 mem = rec->mem;
1709 spin_lock(&mem->scanstat.lock);
1710 __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec);
1711 spin_unlock(&mem->scanstat.lock);
1712
1713 mem = rec->root;
1714 spin_lock(&mem->scanstat.lock);
1715 __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec);
1716 spin_unlock(&mem->scanstat.lock);
1717}
1718
1719/* 1636/*
1720 * Scan the hierarchy if needed to reclaim memory. We remember the last child 1637 * Scan the hierarchy if needed to reclaim memory. We remember the last child
1721 * we reclaimed from, so that we don't end up penalizing one child extensively 1638 * we reclaimed from, so that we don't end up penalizing one child extensively
@@ -1740,9 +1657,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1740 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; 1657 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
1741 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; 1658 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1742 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; 1659 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1743 struct memcg_scanrecord rec;
1744 unsigned long excess; 1660 unsigned long excess;
1745 unsigned long scanned; 1661 unsigned long nr_scanned;
1746 1662
1747 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; 1663 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT;
1748 1664
@@ -1750,15 +1666,6 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1750 if (!check_soft && !shrink && root_mem->memsw_is_minimum) 1666 if (!check_soft && !shrink && root_mem->memsw_is_minimum)
1751 noswap = true; 1667 noswap = true;
1752 1668
1753 if (shrink)
1754 rec.context = SCAN_BY_SHRINK;
1755 else if (check_soft)
1756 rec.context = SCAN_BY_SYSTEM;
1757 else
1758 rec.context = SCAN_BY_LIMIT;
1759
1760 rec.root = root_mem;
1761
1762 while (1) { 1669 while (1) {
1763 victim = mem_cgroup_select_victim(root_mem); 1670 victim = mem_cgroup_select_victim(root_mem);
1764 if (victim == root_mem) { 1671 if (victim == root_mem) {
@@ -1799,23 +1706,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1799 css_put(&victim->css); 1706 css_put(&victim->css);
1800 continue; 1707 continue;
1801 } 1708 }
1802 rec.mem = victim;
1803 rec.nr_scanned[0] = 0;
1804 rec.nr_scanned[1] = 0;
1805 rec.nr_rotated[0] = 0;
1806 rec.nr_rotated[1] = 0;
1807 rec.nr_freed[0] = 0;
1808 rec.nr_freed[1] = 0;
1809 rec.elapsed = 0;
1810 /* we use swappiness of local cgroup */ 1709 /* we use swappiness of local cgroup */
1811 if (check_soft) { 1710 if (check_soft) {
1812 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, 1711 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1813 noswap, zone, &rec, &scanned); 1712 noswap, zone, &nr_scanned);
1814 *total_scanned += scanned; 1713 *total_scanned += nr_scanned;
1815 } else 1714 } else
1816 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, 1715 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1817 noswap, &rec); 1716 noswap);
1818 mem_cgroup_record_scanstat(&rec);
1819 css_put(&victim->css); 1717 css_put(&victim->css);
1820 /* 1718 /*
1821 * At shrinking usage, we can't check we should stop here or 1719 * At shrinking usage, we can't check we should stop here or
@@ -3854,18 +3752,14 @@ try_to_free:
3854 /* try to free all pages in this cgroup */ 3752 /* try to free all pages in this cgroup */
3855 shrink = 1; 3753 shrink = 1;
3856 while (nr_retries && mem->res.usage > 0) { 3754 while (nr_retries && mem->res.usage > 0) {
3857 struct memcg_scanrecord rec;
3858 int progress; 3755 int progress;
3859 3756
3860 if (signal_pending(current)) { 3757 if (signal_pending(current)) {
3861 ret = -EINTR; 3758 ret = -EINTR;
3862 goto out; 3759 goto out;
3863 } 3760 }
3864 rec.context = SCAN_BY_SHRINK;
3865 rec.mem = mem;
3866 rec.root = mem;
3867 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, 3761 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
3868 false, &rec); 3762 false);
3869 if (!progress) { 3763 if (!progress) {
3870 nr_retries--; 3764 nr_retries--;
3871 /* maybe some writeback is necessary */ 3765 /* maybe some writeback is necessary */
@@ -4709,54 +4603,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
4709} 4603}
4710#endif /* CONFIG_NUMA */ 4604#endif /* CONFIG_NUMA */
4711 4605
4712static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp,
4713 struct cftype *cft,
4714 struct cgroup_map_cb *cb)
4715{
4716 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4717 char string[64];
4718 int i;
4719
4720 for (i = 0; i < NR_SCANSTATS; i++) {
4721 strcpy(string, scanstat_string[i]);
4722 strcat(string, SCANSTAT_WORD_LIMIT);
4723 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]);
4724 }
4725
4726 for (i = 0; i < NR_SCANSTATS; i++) {
4727 strcpy(string, scanstat_string[i]);
4728 strcat(string, SCANSTAT_WORD_SYSTEM);
4729 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]);
4730 }
4731
4732 for (i = 0; i < NR_SCANSTATS; i++) {
4733 strcpy(string, scanstat_string[i]);
4734 strcat(string, SCANSTAT_WORD_LIMIT);
4735 strcat(string, SCANSTAT_WORD_HIERARCHY);
4736 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]);
4737 }
4738 for (i = 0; i < NR_SCANSTATS; i++) {
4739 strcpy(string, scanstat_string[i]);
4740 strcat(string, SCANSTAT_WORD_SYSTEM);
4741 strcat(string, SCANSTAT_WORD_HIERARCHY);
4742 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]);
4743 }
4744 return 0;
4745}
4746
4747static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp,
4748 unsigned int event)
4749{
4750 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4751
4752 spin_lock(&mem->scanstat.lock);
4753 memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats));
4754 memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats));
4755 spin_unlock(&mem->scanstat.lock);
4756 return 0;
4757}
4758
4759
4760static struct cftype mem_cgroup_files[] = { 4606static struct cftype mem_cgroup_files[] = {
4761 { 4607 {
4762 .name = "usage_in_bytes", 4608 .name = "usage_in_bytes",
@@ -4827,11 +4673,6 @@ static struct cftype mem_cgroup_files[] = {
4827 .mode = S_IRUGO, 4673 .mode = S_IRUGO,
4828 }, 4674 },
4829#endif 4675#endif
4830 {
4831 .name = "vmscan_stat",
4832 .read_map = mem_cgroup_vmscan_stat_read,
4833 .trigger = mem_cgroup_reset_vmscan_stat,
4834 },
4835}; 4676};
4836 4677
4837#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4678#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -5095,7 +4936,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5095 atomic_set(&mem->refcnt, 1); 4936 atomic_set(&mem->refcnt, 1);
5096 mem->move_charge_at_immigrate = 0; 4937 mem->move_charge_at_immigrate = 0;
5097 mutex_init(&mem->thresholds_lock); 4938 mutex_init(&mem->thresholds_lock);
5098 spin_lock_init(&mem->scanstat.lock);
5099 return &mem->css; 4939 return &mem->css;
5100free_out: 4940free_out:
5101 __mem_cgroup_free(mem); 4941 __mem_cgroup_free(mem);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e49bcb6d494..b55699cd906 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -105,7 +105,6 @@ struct scan_control {
105 105
106 /* Which cgroup do we reclaim from */ 106 /* Which cgroup do we reclaim from */
107 struct mem_cgroup *mem_cgroup; 107 struct mem_cgroup *mem_cgroup;
108 struct memcg_scanrecord *memcg_record;
109 108
110 /* 109 /*
111 * Nodemask of nodes allowed by the caller. If NULL, all nodes 110 * Nodemask of nodes allowed by the caller. If NULL, all nodes
@@ -1349,8 +1348,6 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc,
1349 int file = is_file_lru(lru); 1348 int file = is_file_lru(lru);
1350 int numpages = hpage_nr_pages(page); 1349 int numpages = hpage_nr_pages(page);
1351 reclaim_stat->recent_rotated[file] += numpages; 1350 reclaim_stat->recent_rotated[file] += numpages;
1352 if (!scanning_global_lru(sc))
1353 sc->memcg_record->nr_rotated[file] += numpages;
1354 } 1351 }
1355 if (!pagevec_add(&pvec, page)) { 1352 if (!pagevec_add(&pvec, page)) {
1356 spin_unlock_irq(&zone->lru_lock); 1353 spin_unlock_irq(&zone->lru_lock);
@@ -1394,10 +1391,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone,
1394 1391
1395 reclaim_stat->recent_scanned[0] += *nr_anon; 1392 reclaim_stat->recent_scanned[0] += *nr_anon;
1396 reclaim_stat->recent_scanned[1] += *nr_file; 1393 reclaim_stat->recent_scanned[1] += *nr_file;
1397 if (!scanning_global_lru(sc)) {
1398 sc->memcg_record->nr_scanned[0] += *nr_anon;
1399 sc->memcg_record->nr_scanned[1] += *nr_file;
1400 }
1401} 1394}
1402 1395
1403/* 1396/*
@@ -1511,9 +1504,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
1511 nr_reclaimed += shrink_page_list(&page_list, zone, sc); 1504 nr_reclaimed += shrink_page_list(&page_list, zone, sc);
1512 } 1505 }
1513 1506
1514 if (!scanning_global_lru(sc))
1515 sc->memcg_record->nr_freed[file] += nr_reclaimed;
1516
1517 local_irq_disable(); 1507 local_irq_disable();
1518 if (current_is_kswapd()) 1508 if (current_is_kswapd())
1519 __count_vm_events(KSWAPD_STEAL, nr_reclaimed); 1509 __count_vm_events(KSWAPD_STEAL, nr_reclaimed);
@@ -1613,8 +1603,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1613 } 1603 }
1614 1604
1615 reclaim_stat->recent_scanned[file] += nr_taken; 1605 reclaim_stat->recent_scanned[file] += nr_taken;
1616 if (!scanning_global_lru(sc))
1617 sc->memcg_record->nr_scanned[file] += nr_taken;
1618 1606
1619 __count_zone_vm_events(PGREFILL, zone, pgscanned); 1607 __count_zone_vm_events(PGREFILL, zone, pgscanned);
1620 if (file) 1608 if (file)
@@ -1666,8 +1654,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
1666 * get_scan_ratio. 1654 * get_scan_ratio.
1667 */ 1655 */
1668 reclaim_stat->recent_rotated[file] += nr_rotated; 1656 reclaim_stat->recent_rotated[file] += nr_rotated;
1669 if (!scanning_global_lru(sc))
1670 sc->memcg_record->nr_rotated[file] += nr_rotated;
1671 1657
1672 move_active_pages_to_lru(zone, &l_active, 1658 move_active_pages_to_lru(zone, &l_active,
1673 LRU_ACTIVE + file * LRU_FILE); 1659 LRU_ACTIVE + file * LRU_FILE);
@@ -2265,10 +2251,9 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2265#ifdef CONFIG_CGROUP_MEM_RES_CTLR 2251#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2266 2252
2267unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, 2253unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2268 gfp_t gfp_mask, bool noswap, 2254 gfp_t gfp_mask, bool noswap,
2269 struct zone *zone, 2255 struct zone *zone,
2270 struct memcg_scanrecord *rec, 2256 unsigned long *nr_scanned)
2271 unsigned long *scanned)
2272{ 2257{
2273 struct scan_control sc = { 2258 struct scan_control sc = {
2274 .nr_scanned = 0, 2259 .nr_scanned = 0,
@@ -2278,9 +2263,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2278 .may_swap = !noswap, 2263 .may_swap = !noswap,
2279 .order = 0, 2264 .order = 0,
2280 .mem_cgroup = mem, 2265 .mem_cgroup = mem,
2281 .memcg_record = rec,
2282 }; 2266 };
2283 ktime_t start, end;
2284 2267
2285 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2268 sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2286 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); 2269 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2289,7 +2272,6 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2289 sc.may_writepage, 2272 sc.may_writepage,
2290 sc.gfp_mask); 2273 sc.gfp_mask);
2291 2274
2292 start = ktime_get();
2293 /* 2275 /*
2294 * NOTE: Although we can get the priority field, using it 2276 * NOTE: Although we can get the priority field, using it
2295 * here is not a good idea, since it limits the pages we can scan. 2277 * here is not a good idea, since it limits the pages we can scan.
@@ -2298,25 +2280,19 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
2298 * the priority and make it zero. 2280 * the priority and make it zero.
2299 */ 2281 */
2300 shrink_zone(0, zone, &sc); 2282 shrink_zone(0, zone, &sc);
2301 end = ktime_get();
2302
2303 if (rec)
2304 rec->elapsed += ktime_to_ns(ktime_sub(end, start));
2305 *scanned = sc.nr_scanned;
2306 2283
2307 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2284 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2308 2285
2286 *nr_scanned = sc.nr_scanned;
2309 return sc.nr_reclaimed; 2287 return sc.nr_reclaimed;
2310} 2288}
2311 2289
2312unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, 2290unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2313 gfp_t gfp_mask, 2291 gfp_t gfp_mask,
2314 bool noswap, 2292 bool noswap)
2315 struct memcg_scanrecord *rec)
2316{ 2293{
2317 struct zonelist *zonelist; 2294 struct zonelist *zonelist;
2318 unsigned long nr_reclaimed; 2295 unsigned long nr_reclaimed;
2319 ktime_t start, end;
2320 int nid; 2296 int nid;
2321 struct scan_control sc = { 2297 struct scan_control sc = {
2322 .may_writepage = !laptop_mode, 2298 .may_writepage = !laptop_mode,
@@ -2325,7 +2301,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2325 .nr_to_reclaim = SWAP_CLUSTER_MAX, 2301 .nr_to_reclaim = SWAP_CLUSTER_MAX,
2326 .order = 0, 2302 .order = 0,
2327 .mem_cgroup = mem_cont, 2303 .mem_cgroup = mem_cont,
2328 .memcg_record = rec,
2329 .nodemask = NULL, /* we don't care the placement */ 2304 .nodemask = NULL, /* we don't care the placement */
2330 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | 2305 .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
2331 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), 2306 (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2334,7 +2309,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2334 .gfp_mask = sc.gfp_mask, 2309 .gfp_mask = sc.gfp_mask,
2335 }; 2310 };
2336 2311
2337 start = ktime_get();
2338 /* 2312 /*
2339 * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't 2313 * Unlike direct reclaim via alloc_pages(), memcg's reclaim doesn't
2340 * take care of from where we get pages. So the node where we start the 2314 * take care of from where we get pages. So the node where we start the
@@ -2349,9 +2323,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
2349 sc.gfp_mask); 2323 sc.gfp_mask);
2350 2324
2351 nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink); 2325 nr_reclaimed = do_try_to_free_pages(zonelist, &sc, &shrink);
2352 end = ktime_get();
2353 if (rec)
2354 rec->elapsed += ktime_to_ns(ktime_sub(end, start));
2355 2326
2356 trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); 2327 trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
2357 2328