diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 761 |
1 files changed, 328 insertions, 433 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7685d4a0b3ce..f72b5e52451a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -59,7 +59,7 @@ | |||
59 | 59 | ||
60 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; | 60 | struct cgroup_subsys mem_cgroup_subsys __read_mostly; |
61 | #define MEM_CGROUP_RECLAIM_RETRIES 5 | 61 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
62 | struct mem_cgroup *root_mem_cgroup __read_mostly; | 62 | static struct mem_cgroup *root_mem_cgroup __read_mostly; |
63 | 63 | ||
64 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 64 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
65 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ | 65 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ |
@@ -73,7 +73,7 @@ static int really_do_swap_account __initdata = 0; | |||
73 | #endif | 73 | #endif |
74 | 74 | ||
75 | #else | 75 | #else |
76 | #define do_swap_account (0) | 76 | #define do_swap_account 0 |
77 | #endif | 77 | #endif |
78 | 78 | ||
79 | 79 | ||
@@ -88,18 +88,31 @@ enum mem_cgroup_stat_index { | |||
88 | MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ | 88 | MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ |
89 | MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ | 89 | MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ |
90 | MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ | 90 | MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ |
91 | MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */ | ||
92 | MEM_CGROUP_STAT_NSTATS, | 91 | MEM_CGROUP_STAT_NSTATS, |
93 | }; | 92 | }; |
94 | 93 | ||
94 | static const char * const mem_cgroup_stat_names[] = { | ||
95 | "cache", | ||
96 | "rss", | ||
97 | "mapped_file", | ||
98 | "swap", | ||
99 | }; | ||
100 | |||
95 | enum mem_cgroup_events_index { | 101 | enum mem_cgroup_events_index { |
96 | MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ | 102 | MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ |
97 | MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ | 103 | MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ |
98 | MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */ | ||
99 | MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ | 104 | MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ |
100 | MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ | 105 | MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ |
101 | MEM_CGROUP_EVENTS_NSTATS, | 106 | MEM_CGROUP_EVENTS_NSTATS, |
102 | }; | 107 | }; |
108 | |||
109 | static const char * const mem_cgroup_events_names[] = { | ||
110 | "pgpgin", | ||
111 | "pgpgout", | ||
112 | "pgfault", | ||
113 | "pgmajfault", | ||
114 | }; | ||
115 | |||
103 | /* | 116 | /* |
104 | * Per memcg event counter is incremented at every pagein/pageout. With THP, | 117 | * Per memcg event counter is incremented at every pagein/pageout. With THP, |
105 | * it will be incremated by the number of pages. This counter is used for | 118 | * it will be incremated by the number of pages. This counter is used for |
@@ -112,13 +125,14 @@ enum mem_cgroup_events_target { | |||
112 | MEM_CGROUP_TARGET_NUMAINFO, | 125 | MEM_CGROUP_TARGET_NUMAINFO, |
113 | MEM_CGROUP_NTARGETS, | 126 | MEM_CGROUP_NTARGETS, |
114 | }; | 127 | }; |
115 | #define THRESHOLDS_EVENTS_TARGET (128) | 128 | #define THRESHOLDS_EVENTS_TARGET 128 |
116 | #define SOFTLIMIT_EVENTS_TARGET (1024) | 129 | #define SOFTLIMIT_EVENTS_TARGET 1024 |
117 | #define NUMAINFO_EVENTS_TARGET (1024) | 130 | #define NUMAINFO_EVENTS_TARGET 1024 |
118 | 131 | ||
119 | struct mem_cgroup_stat_cpu { | 132 | struct mem_cgroup_stat_cpu { |
120 | long count[MEM_CGROUP_STAT_NSTATS]; | 133 | long count[MEM_CGROUP_STAT_NSTATS]; |
121 | unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; | 134 | unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; |
135 | unsigned long nr_page_events; | ||
122 | unsigned long targets[MEM_CGROUP_NTARGETS]; | 136 | unsigned long targets[MEM_CGROUP_NTARGETS]; |
123 | }; | 137 | }; |
124 | 138 | ||
@@ -138,7 +152,6 @@ struct mem_cgroup_per_zone { | |||
138 | 152 | ||
139 | struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; | 153 | struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; |
140 | 154 | ||
141 | struct zone_reclaim_stat reclaim_stat; | ||
142 | struct rb_node tree_node; /* RB tree node */ | 155 | struct rb_node tree_node; /* RB tree node */ |
143 | unsigned long long usage_in_excess;/* Set to the value by which */ | 156 | unsigned long long usage_in_excess;/* Set to the value by which */ |
144 | /* the soft limit is exceeded*/ | 157 | /* the soft limit is exceeded*/ |
@@ -182,7 +195,7 @@ struct mem_cgroup_threshold { | |||
182 | 195 | ||
183 | /* For threshold */ | 196 | /* For threshold */ |
184 | struct mem_cgroup_threshold_ary { | 197 | struct mem_cgroup_threshold_ary { |
185 | /* An array index points to threshold just below usage. */ | 198 | /* An array index points to threshold just below or equal to usage. */ |
186 | int current_threshold; | 199 | int current_threshold; |
187 | /* Size of entries[] */ | 200 | /* Size of entries[] */ |
188 | unsigned int size; | 201 | unsigned int size; |
@@ -245,8 +258,8 @@ struct mem_cgroup { | |||
245 | */ | 258 | */ |
246 | struct rcu_head rcu_freeing; | 259 | struct rcu_head rcu_freeing; |
247 | /* | 260 | /* |
248 | * But when using vfree(), that cannot be done at | 261 | * We also need some space for a worker in deferred freeing. |
249 | * interrupt time, so we must then queue the work. | 262 | * By the time we call it, rcu_freeing is no longer in use. |
250 | */ | 263 | */ |
251 | struct work_struct work_freeing; | 264 | struct work_struct work_freeing; |
252 | }; | 265 | }; |
@@ -305,7 +318,7 @@ struct mem_cgroup { | |||
305 | /* | 318 | /* |
306 | * percpu counter. | 319 | * percpu counter. |
307 | */ | 320 | */ |
308 | struct mem_cgroup_stat_cpu *stat; | 321 | struct mem_cgroup_stat_cpu __percpu *stat; |
309 | /* | 322 | /* |
310 | * used when a cpu is offlined or other synchronizations | 323 | * used when a cpu is offlined or other synchronizations |
311 | * See mem_cgroup_read_stat(). | 324 | * See mem_cgroup_read_stat(). |
@@ -360,8 +373,8 @@ static bool move_file(void) | |||
360 | * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft | 373 | * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft |
361 | * limit reclaim to prevent infinite loops, if they ever occur. | 374 | * limit reclaim to prevent infinite loops, if they ever occur. |
362 | */ | 375 | */ |
363 | #define MEM_CGROUP_MAX_RECLAIM_LOOPS (100) | 376 | #define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 |
364 | #define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS (2) | 377 | #define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2 |
365 | 378 | ||
366 | enum charge_type { | 379 | enum charge_type { |
367 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 380 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
@@ -377,8 +390,8 @@ enum charge_type { | |||
377 | #define _MEM (0) | 390 | #define _MEM (0) |
378 | #define _MEMSWAP (1) | 391 | #define _MEMSWAP (1) |
379 | #define _OOM_TYPE (2) | 392 | #define _OOM_TYPE (2) |
380 | #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) | 393 | #define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val)) |
381 | #define MEMFILE_TYPE(val) (((val) >> 16) & 0xffff) | 394 | #define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff) |
382 | #define MEMFILE_ATTR(val) ((val) & 0xffff) | 395 | #define MEMFILE_ATTR(val) ((val) & 0xffff) |
383 | /* Used for OOM nofiier */ | 396 | /* Used for OOM nofiier */ |
384 | #define OOM_CONTROL (0) | 397 | #define OOM_CONTROL (0) |
@@ -404,6 +417,7 @@ void sock_update_memcg(struct sock *sk) | |||
404 | { | 417 | { |
405 | if (mem_cgroup_sockets_enabled) { | 418 | if (mem_cgroup_sockets_enabled) { |
406 | struct mem_cgroup *memcg; | 419 | struct mem_cgroup *memcg; |
420 | struct cg_proto *cg_proto; | ||
407 | 421 | ||
408 | BUG_ON(!sk->sk_prot->proto_cgroup); | 422 | BUG_ON(!sk->sk_prot->proto_cgroup); |
409 | 423 | ||
@@ -423,9 +437,10 @@ void sock_update_memcg(struct sock *sk) | |||
423 | 437 | ||
424 | rcu_read_lock(); | 438 | rcu_read_lock(); |
425 | memcg = mem_cgroup_from_task(current); | 439 | memcg = mem_cgroup_from_task(current); |
426 | if (!mem_cgroup_is_root(memcg)) { | 440 | cg_proto = sk->sk_prot->proto_cgroup(memcg); |
441 | if (!mem_cgroup_is_root(memcg) && memcg_proto_active(cg_proto)) { | ||
427 | mem_cgroup_get(memcg); | 442 | mem_cgroup_get(memcg); |
428 | sk->sk_cgrp = sk->sk_prot->proto_cgroup(memcg); | 443 | sk->sk_cgrp = cg_proto; |
429 | } | 444 | } |
430 | rcu_read_unlock(); | 445 | rcu_read_unlock(); |
431 | } | 446 | } |
@@ -454,6 +469,19 @@ EXPORT_SYMBOL(tcp_proto_cgroup); | |||
454 | #endif /* CONFIG_INET */ | 469 | #endif /* CONFIG_INET */ |
455 | #endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ | 470 | #endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ |
456 | 471 | ||
472 | #if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) | ||
473 | static void disarm_sock_keys(struct mem_cgroup *memcg) | ||
474 | { | ||
475 | if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto)) | ||
476 | return; | ||
477 | static_key_slow_dec(&memcg_socket_limit_enabled); | ||
478 | } | ||
479 | #else | ||
480 | static void disarm_sock_keys(struct mem_cgroup *memcg) | ||
481 | { | ||
482 | } | ||
483 | #endif | ||
484 | |||
457 | static void drain_all_stock_async(struct mem_cgroup *memcg); | 485 | static void drain_all_stock_async(struct mem_cgroup *memcg); |
458 | 486 | ||
459 | static struct mem_cgroup_per_zone * | 487 | static struct mem_cgroup_per_zone * |
@@ -718,12 +746,21 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, | |||
718 | nr_pages = -nr_pages; /* for event */ | 746 | nr_pages = -nr_pages; /* for event */ |
719 | } | 747 | } |
720 | 748 | ||
721 | __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); | 749 | __this_cpu_add(memcg->stat->nr_page_events, nr_pages); |
722 | 750 | ||
723 | preempt_enable(); | 751 | preempt_enable(); |
724 | } | 752 | } |
725 | 753 | ||
726 | unsigned long | 754 | unsigned long |
755 | mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) | ||
756 | { | ||
757 | struct mem_cgroup_per_zone *mz; | ||
758 | |||
759 | mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); | ||
760 | return mz->lru_size[lru]; | ||
761 | } | ||
762 | |||
763 | static unsigned long | ||
727 | mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, | 764 | mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, |
728 | unsigned int lru_mask) | 765 | unsigned int lru_mask) |
729 | { | 766 | { |
@@ -770,7 +807,7 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, | |||
770 | { | 807 | { |
771 | unsigned long val, next; | 808 | unsigned long val, next; |
772 | 809 | ||
773 | val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); | 810 | val = __this_cpu_read(memcg->stat->nr_page_events); |
774 | next = __this_cpu_read(memcg->stat->targets[target]); | 811 | next = __this_cpu_read(memcg->stat->targets[target]); |
775 | /* from time_after() in jiffies.h */ | 812 | /* from time_after() in jiffies.h */ |
776 | if ((long)next - (long)val < 0) { | 813 | if ((long)next - (long)val < 0) { |
@@ -1013,7 +1050,7 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event); | |||
1013 | /** | 1050 | /** |
1014 | * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg | 1051 | * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg |
1015 | * @zone: zone of the wanted lruvec | 1052 | * @zone: zone of the wanted lruvec |
1016 | * @mem: memcg of the wanted lruvec | 1053 | * @memcg: memcg of the wanted lruvec |
1017 | * | 1054 | * |
1018 | * Returns the lru list vector holding pages for the given @zone and | 1055 | * Returns the lru list vector holding pages for the given @zone and |
1019 | * @mem. This can be the global zone lruvec, if the memory controller | 1056 | * @mem. This can be the global zone lruvec, if the memory controller |
@@ -1046,19 +1083,11 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, | |||
1046 | */ | 1083 | */ |
1047 | 1084 | ||
1048 | /** | 1085 | /** |
1049 | * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec | 1086 | * mem_cgroup_page_lruvec - return lruvec for adding an lru page |
1050 | * @zone: zone of the page | ||
1051 | * @page: the page | 1087 | * @page: the page |
1052 | * @lru: current lru | 1088 | * @zone: zone of the page |
1053 | * | ||
1054 | * This function accounts for @page being added to @lru, and returns | ||
1055 | * the lruvec for the given @zone and the memcg @page is charged to. | ||
1056 | * | ||
1057 | * The callsite is then responsible for physically linking the page to | ||
1058 | * the returned lruvec->lists[@lru]. | ||
1059 | */ | 1089 | */ |
1060 | struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, | 1090 | struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) |
1061 | enum lru_list lru) | ||
1062 | { | 1091 | { |
1063 | struct mem_cgroup_per_zone *mz; | 1092 | struct mem_cgroup_per_zone *mz; |
1064 | struct mem_cgroup *memcg; | 1093 | struct mem_cgroup *memcg; |
@@ -1071,7 +1100,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, | |||
1071 | memcg = pc->mem_cgroup; | 1100 | memcg = pc->mem_cgroup; |
1072 | 1101 | ||
1073 | /* | 1102 | /* |
1074 | * Surreptitiously switch any uncharged page to root: | 1103 | * Surreptitiously switch any uncharged offlist page to root: |
1075 | * an uncharged page off lru does nothing to secure | 1104 | * an uncharged page off lru does nothing to secure |
1076 | * its former mem_cgroup from sudden removal. | 1105 | * its former mem_cgroup from sudden removal. |
1077 | * | 1106 | * |
@@ -1079,85 +1108,60 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page, | |||
1079 | * under page_cgroup lock: between them, they make all uses | 1108 | * under page_cgroup lock: between them, they make all uses |
1080 | * of pc->mem_cgroup safe. | 1109 | * of pc->mem_cgroup safe. |
1081 | */ | 1110 | */ |
1082 | if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup) | 1111 | if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup) |
1083 | pc->mem_cgroup = memcg = root_mem_cgroup; | 1112 | pc->mem_cgroup = memcg = root_mem_cgroup; |
1084 | 1113 | ||
1085 | mz = page_cgroup_zoneinfo(memcg, page); | 1114 | mz = page_cgroup_zoneinfo(memcg, page); |
1086 | /* compound_order() is stabilized through lru_lock */ | ||
1087 | mz->lru_size[lru] += 1 << compound_order(page); | ||
1088 | return &mz->lruvec; | 1115 | return &mz->lruvec; |
1089 | } | 1116 | } |
1090 | 1117 | ||
1091 | /** | 1118 | /** |
1092 | * mem_cgroup_lru_del_list - account for removing an lru page | 1119 | * mem_cgroup_update_lru_size - account for adding or removing an lru page |
1093 | * @page: the page | 1120 | * @lruvec: mem_cgroup per zone lru vector |
1094 | * @lru: target lru | 1121 | * @lru: index of lru list the page is sitting on |
1095 | * | 1122 | * @nr_pages: positive when adding or negative when removing |
1096 | * This function accounts for @page being removed from @lru. | ||
1097 | * | 1123 | * |
1098 | * The callsite is then responsible for physically unlinking | 1124 | * This function must be called when a page is added to or removed from an |
1099 | * @page->lru. | 1125 | * lru list. |
1100 | */ | 1126 | */ |
1101 | void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) | 1127 | void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, |
1128 | int nr_pages) | ||
1102 | { | 1129 | { |
1103 | struct mem_cgroup_per_zone *mz; | 1130 | struct mem_cgroup_per_zone *mz; |
1104 | struct mem_cgroup *memcg; | 1131 | unsigned long *lru_size; |
1105 | struct page_cgroup *pc; | ||
1106 | 1132 | ||
1107 | if (mem_cgroup_disabled()) | 1133 | if (mem_cgroup_disabled()) |
1108 | return; | 1134 | return; |
1109 | 1135 | ||
1110 | pc = lookup_page_cgroup(page); | 1136 | mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); |
1111 | memcg = pc->mem_cgroup; | 1137 | lru_size = mz->lru_size + lru; |
1112 | VM_BUG_ON(!memcg); | 1138 | *lru_size += nr_pages; |
1113 | mz = page_cgroup_zoneinfo(memcg, page); | 1139 | VM_BUG_ON((long)(*lru_size) < 0); |
1114 | /* huge page split is done under lru_lock. so, we have no races. */ | ||
1115 | VM_BUG_ON(mz->lru_size[lru] < (1 << compound_order(page))); | ||
1116 | mz->lru_size[lru] -= 1 << compound_order(page); | ||
1117 | } | ||
1118 | |||
1119 | void mem_cgroup_lru_del(struct page *page) | ||
1120 | { | ||
1121 | mem_cgroup_lru_del_list(page, page_lru(page)); | ||
1122 | } | ||
1123 | |||
1124 | /** | ||
1125 | * mem_cgroup_lru_move_lists - account for moving a page between lrus | ||
1126 | * @zone: zone of the page | ||
1127 | * @page: the page | ||
1128 | * @from: current lru | ||
1129 | * @to: target lru | ||
1130 | * | ||
1131 | * This function accounts for @page being moved between the lrus @from | ||
1132 | * and @to, and returns the lruvec for the given @zone and the memcg | ||
1133 | * @page is charged to. | ||
1134 | * | ||
1135 | * The callsite is then responsible for physically relinking | ||
1136 | * @page->lru to the returned lruvec->lists[@to]. | ||
1137 | */ | ||
1138 | struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, | ||
1139 | struct page *page, | ||
1140 | enum lru_list from, | ||
1141 | enum lru_list to) | ||
1142 | { | ||
1143 | /* XXX: Optimize this, especially for @from == @to */ | ||
1144 | mem_cgroup_lru_del_list(page, from); | ||
1145 | return mem_cgroup_lru_add_list(zone, page, to); | ||
1146 | } | 1140 | } |
1147 | 1141 | ||
1148 | /* | 1142 | /* |
1149 | * Checks whether given mem is same or in the root_mem_cgroup's | 1143 | * Checks whether given mem is same or in the root_mem_cgroup's |
1150 | * hierarchy subtree | 1144 | * hierarchy subtree |
1151 | */ | 1145 | */ |
1146 | bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, | ||
1147 | struct mem_cgroup *memcg) | ||
1148 | { | ||
1149 | if (root_memcg == memcg) | ||
1150 | return true; | ||
1151 | if (!root_memcg->use_hierarchy || !memcg) | ||
1152 | return false; | ||
1153 | return css_is_ancestor(&memcg->css, &root_memcg->css); | ||
1154 | } | ||
1155 | |||
1152 | static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, | 1156 | static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, |
1153 | struct mem_cgroup *memcg) | 1157 | struct mem_cgroup *memcg) |
1154 | { | 1158 | { |
1155 | if (root_memcg != memcg) { | 1159 | bool ret; |
1156 | return (root_memcg->use_hierarchy && | ||
1157 | css_is_ancestor(&memcg->css, &root_memcg->css)); | ||
1158 | } | ||
1159 | 1160 | ||
1160 | return true; | 1161 | rcu_read_lock(); |
1162 | ret = __mem_cgroup_same_or_subtree(root_memcg, memcg); | ||
1163 | rcu_read_unlock(); | ||
1164 | return ret; | ||
1161 | } | 1165 | } |
1162 | 1166 | ||
1163 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) | 1167 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) |
@@ -1195,19 +1199,15 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) | |||
1195 | return ret; | 1199 | return ret; |
1196 | } | 1200 | } |
1197 | 1201 | ||
1198 | int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) | 1202 | int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) |
1199 | { | 1203 | { |
1200 | unsigned long inactive_ratio; | 1204 | unsigned long inactive_ratio; |
1201 | int nid = zone_to_nid(zone); | ||
1202 | int zid = zone_idx(zone); | ||
1203 | unsigned long inactive; | 1205 | unsigned long inactive; |
1204 | unsigned long active; | 1206 | unsigned long active; |
1205 | unsigned long gb; | 1207 | unsigned long gb; |
1206 | 1208 | ||
1207 | inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, | 1209 | inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON); |
1208 | BIT(LRU_INACTIVE_ANON)); | 1210 | active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON); |
1209 | active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, | ||
1210 | BIT(LRU_ACTIVE_ANON)); | ||
1211 | 1211 | ||
1212 | gb = (inactive + active) >> (30 - PAGE_SHIFT); | 1212 | gb = (inactive + active) >> (30 - PAGE_SHIFT); |
1213 | if (gb) | 1213 | if (gb) |
@@ -1218,55 +1218,23 @@ int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) | |||
1218 | return inactive * inactive_ratio < active; | 1218 | return inactive * inactive_ratio < active; |
1219 | } | 1219 | } |
1220 | 1220 | ||
1221 | int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) | 1221 | int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec) |
1222 | { | 1222 | { |
1223 | unsigned long active; | 1223 | unsigned long active; |
1224 | unsigned long inactive; | 1224 | unsigned long inactive; |
1225 | int zid = zone_idx(zone); | ||
1226 | int nid = zone_to_nid(zone); | ||
1227 | 1225 | ||
1228 | inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, | 1226 | inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_FILE); |
1229 | BIT(LRU_INACTIVE_FILE)); | 1227 | active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_FILE); |
1230 | active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, | ||
1231 | BIT(LRU_ACTIVE_FILE)); | ||
1232 | 1228 | ||
1233 | return (active > inactive); | 1229 | return (active > inactive); |
1234 | } | 1230 | } |
1235 | 1231 | ||
1236 | struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, | ||
1237 | struct zone *zone) | ||
1238 | { | ||
1239 | int nid = zone_to_nid(zone); | ||
1240 | int zid = zone_idx(zone); | ||
1241 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(memcg, nid, zid); | ||
1242 | |||
1243 | return &mz->reclaim_stat; | ||
1244 | } | ||
1245 | |||
1246 | struct zone_reclaim_stat * | ||
1247 | mem_cgroup_get_reclaim_stat_from_page(struct page *page) | ||
1248 | { | ||
1249 | struct page_cgroup *pc; | ||
1250 | struct mem_cgroup_per_zone *mz; | ||
1251 | |||
1252 | if (mem_cgroup_disabled()) | ||
1253 | return NULL; | ||
1254 | |||
1255 | pc = lookup_page_cgroup(page); | ||
1256 | if (!PageCgroupUsed(pc)) | ||
1257 | return NULL; | ||
1258 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | ||
1259 | smp_rmb(); | ||
1260 | mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); | ||
1261 | return &mz->reclaim_stat; | ||
1262 | } | ||
1263 | |||
1264 | #define mem_cgroup_from_res_counter(counter, member) \ | 1232 | #define mem_cgroup_from_res_counter(counter, member) \ |
1265 | container_of(counter, struct mem_cgroup, member) | 1233 | container_of(counter, struct mem_cgroup, member) |
1266 | 1234 | ||
1267 | /** | 1235 | /** |
1268 | * mem_cgroup_margin - calculate chargeable space of a memory cgroup | 1236 | * mem_cgroup_margin - calculate chargeable space of a memory cgroup |
1269 | * @mem: the memory cgroup | 1237 | * @memcg: the memory cgroup |
1270 | * | 1238 | * |
1271 | * Returns the maximum amount of memory @mem can be charged with, in | 1239 | * Returns the maximum amount of memory @mem can be charged with, in |
1272 | * pages. | 1240 | * pages. |
@@ -1540,7 +1508,7 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, | |||
1540 | 1508 | ||
1541 | /** | 1509 | /** |
1542 | * test_mem_cgroup_node_reclaimable | 1510 | * test_mem_cgroup_node_reclaimable |
1543 | * @mem: the target memcg | 1511 | * @memcg: the target memcg |
1544 | * @nid: the node ID to be checked. | 1512 | * @nid: the node ID to be checked. |
1545 | * @noswap : specify true here if the user wants flle only information. | 1513 | * @noswap : specify true here if the user wants flle only information. |
1546 | * | 1514 | * |
@@ -1634,7 +1602,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) | |||
1634 | * unused nodes. But scan_nodes is lazily updated and may not cotain | 1602 | * unused nodes. But scan_nodes is lazily updated and may not cotain |
1635 | * enough new information. We need to do double check. | 1603 | * enough new information. We need to do double check. |
1636 | */ | 1604 | */ |
1637 | bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) | 1605 | static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) |
1638 | { | 1606 | { |
1639 | int nid; | 1607 | int nid; |
1640 | 1608 | ||
@@ -1669,7 +1637,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) | |||
1669 | return 0; | 1637 | return 0; |
1670 | } | 1638 | } |
1671 | 1639 | ||
1672 | bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) | 1640 | static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) |
1673 | { | 1641 | { |
1674 | return test_mem_cgroup_node_reclaimable(memcg, 0, noswap); | 1642 | return test_mem_cgroup_node_reclaimable(memcg, 0, noswap); |
1675 | } | 1643 | } |
@@ -1843,7 +1811,8 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) | |||
1843 | /* | 1811 | /* |
1844 | * try to call OOM killer. returns false if we should exit memory-reclaim loop. | 1812 | * try to call OOM killer. returns false if we should exit memory-reclaim loop. |
1845 | */ | 1813 | */ |
1846 | bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, int order) | 1814 | static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, |
1815 | int order) | ||
1847 | { | 1816 | { |
1848 | struct oom_wait_info owait; | 1817 | struct oom_wait_info owait; |
1849 | bool locked, need_to_kill; | 1818 | bool locked, need_to_kill; |
@@ -1992,7 +1961,7 @@ struct memcg_stock_pcp { | |||
1992 | unsigned int nr_pages; | 1961 | unsigned int nr_pages; |
1993 | struct work_struct work; | 1962 | struct work_struct work; |
1994 | unsigned long flags; | 1963 | unsigned long flags; |
1995 | #define FLUSHING_CACHED_CHARGE (0) | 1964 | #define FLUSHING_CACHED_CHARGE 0 |
1996 | }; | 1965 | }; |
1997 | static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); | 1966 | static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); |
1998 | static DEFINE_MUTEX(percpu_charge_mutex); | 1967 | static DEFINE_MUTEX(percpu_charge_mutex); |
@@ -2139,7 +2108,7 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu) | |||
2139 | int i; | 2108 | int i; |
2140 | 2109 | ||
2141 | spin_lock(&memcg->pcp_counter_lock); | 2110 | spin_lock(&memcg->pcp_counter_lock); |
2142 | for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { | 2111 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { |
2143 | long x = per_cpu(memcg->stat->count[i], cpu); | 2112 | long x = per_cpu(memcg->stat->count[i], cpu); |
2144 | 2113 | ||
2145 | per_cpu(memcg->stat->count[i], cpu) = 0; | 2114 | per_cpu(memcg->stat->count[i], cpu) = 0; |
@@ -2427,6 +2396,24 @@ static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, | |||
2427 | } | 2396 | } |
2428 | 2397 | ||
2429 | /* | 2398 | /* |
2399 | * Cancel chrages in this cgroup....doesn't propagate to parent cgroup. | ||
2400 | * This is useful when moving usage to parent cgroup. | ||
2401 | */ | ||
2402 | static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, | ||
2403 | unsigned int nr_pages) | ||
2404 | { | ||
2405 | unsigned long bytes = nr_pages * PAGE_SIZE; | ||
2406 | |||
2407 | if (mem_cgroup_is_root(memcg)) | ||
2408 | return; | ||
2409 | |||
2410 | res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); | ||
2411 | if (do_swap_account) | ||
2412 | res_counter_uncharge_until(&memcg->memsw, | ||
2413 | memcg->memsw.parent, bytes); | ||
2414 | } | ||
2415 | |||
2416 | /* | ||
2430 | * A helper function to get mem_cgroup from ID. must be called under | 2417 | * A helper function to get mem_cgroup from ID. must be called under |
2431 | * rcu_read_lock(). The caller must check css_is_removed() or some if | 2418 | * rcu_read_lock(). The caller must check css_is_removed() or some if |
2432 | * it's concern. (dropping refcnt from swap can be called against removed | 2419 | * it's concern. (dropping refcnt from swap can be called against removed |
@@ -2481,6 +2468,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2481 | { | 2468 | { |
2482 | struct page_cgroup *pc = lookup_page_cgroup(page); | 2469 | struct page_cgroup *pc = lookup_page_cgroup(page); |
2483 | struct zone *uninitialized_var(zone); | 2470 | struct zone *uninitialized_var(zone); |
2471 | struct lruvec *lruvec; | ||
2484 | bool was_on_lru = false; | 2472 | bool was_on_lru = false; |
2485 | bool anon; | 2473 | bool anon; |
2486 | 2474 | ||
@@ -2503,8 +2491,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2503 | zone = page_zone(page); | 2491 | zone = page_zone(page); |
2504 | spin_lock_irq(&zone->lru_lock); | 2492 | spin_lock_irq(&zone->lru_lock); |
2505 | if (PageLRU(page)) { | 2493 | if (PageLRU(page)) { |
2494 | lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); | ||
2506 | ClearPageLRU(page); | 2495 | ClearPageLRU(page); |
2507 | del_page_from_lru_list(zone, page, page_lru(page)); | 2496 | del_page_from_lru_list(page, lruvec, page_lru(page)); |
2508 | was_on_lru = true; | 2497 | was_on_lru = true; |
2509 | } | 2498 | } |
2510 | } | 2499 | } |
@@ -2522,9 +2511,10 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2522 | 2511 | ||
2523 | if (lrucare) { | 2512 | if (lrucare) { |
2524 | if (was_on_lru) { | 2513 | if (was_on_lru) { |
2514 | lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); | ||
2525 | VM_BUG_ON(PageLRU(page)); | 2515 | VM_BUG_ON(PageLRU(page)); |
2526 | SetPageLRU(page); | 2516 | SetPageLRU(page); |
2527 | add_page_to_lru_list(zone, page, page_lru(page)); | 2517 | add_page_to_lru_list(page, lruvec, page_lru(page)); |
2528 | } | 2518 | } |
2529 | spin_unlock_irq(&zone->lru_lock); | 2519 | spin_unlock_irq(&zone->lru_lock); |
2530 | } | 2520 | } |
@@ -2547,7 +2537,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2547 | 2537 | ||
2548 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 2538 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
2549 | 2539 | ||
2550 | #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION)) | 2540 | #define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION) |
2551 | /* | 2541 | /* |
2552 | * Because tail pages are not marked as "used", set it. We're under | 2542 | * Because tail pages are not marked as "used", set it. We're under |
2553 | * zone->lru_lock, 'splitting on pmd' and compound_lock. | 2543 | * zone->lru_lock, 'splitting on pmd' and compound_lock. |
@@ -2578,23 +2568,19 @@ void mem_cgroup_split_huge_fixup(struct page *head) | |||
2578 | * @pc: page_cgroup of the page. | 2568 | * @pc: page_cgroup of the page. |
2579 | * @from: mem_cgroup which the page is moved from. | 2569 | * @from: mem_cgroup which the page is moved from. |
2580 | * @to: mem_cgroup which the page is moved to. @from != @to. | 2570 | * @to: mem_cgroup which the page is moved to. @from != @to. |
2581 | * @uncharge: whether we should call uncharge and css_put against @from. | ||
2582 | * | 2571 | * |
2583 | * The caller must confirm following. | 2572 | * The caller must confirm following. |
2584 | * - page is not on LRU (isolate_page() is useful.) | 2573 | * - page is not on LRU (isolate_page() is useful.) |
2585 | * - compound_lock is held when nr_pages > 1 | 2574 | * - compound_lock is held when nr_pages > 1 |
2586 | * | 2575 | * |
2587 | * This function doesn't do "charge" nor css_get to new cgroup. It should be | 2576 | * This function doesn't do "charge" to new cgroup and doesn't do "uncharge" |
2588 | * done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is | 2577 | * from old cgroup. |
2589 | * true, this function does "uncharge" from old cgroup, but it doesn't if | ||
2590 | * @uncharge is false, so a caller should do "uncharge". | ||
2591 | */ | 2578 | */ |
2592 | static int mem_cgroup_move_account(struct page *page, | 2579 | static int mem_cgroup_move_account(struct page *page, |
2593 | unsigned int nr_pages, | 2580 | unsigned int nr_pages, |
2594 | struct page_cgroup *pc, | 2581 | struct page_cgroup *pc, |
2595 | struct mem_cgroup *from, | 2582 | struct mem_cgroup *from, |
2596 | struct mem_cgroup *to, | 2583 | struct mem_cgroup *to) |
2597 | bool uncharge) | ||
2598 | { | 2584 | { |
2599 | unsigned long flags; | 2585 | unsigned long flags; |
2600 | int ret; | 2586 | int ret; |
@@ -2628,9 +2614,6 @@ static int mem_cgroup_move_account(struct page *page, | |||
2628 | preempt_enable(); | 2614 | preempt_enable(); |
2629 | } | 2615 | } |
2630 | mem_cgroup_charge_statistics(from, anon, -nr_pages); | 2616 | mem_cgroup_charge_statistics(from, anon, -nr_pages); |
2631 | if (uncharge) | ||
2632 | /* This is not "cancel", but cancel_charge does all we need. */ | ||
2633 | __mem_cgroup_cancel_charge(from, nr_pages); | ||
2634 | 2617 | ||
2635 | /* caller should have done css_get */ | 2618 | /* caller should have done css_get */ |
2636 | pc->mem_cgroup = to; | 2619 | pc->mem_cgroup = to; |
@@ -2664,15 +2647,13 @@ static int mem_cgroup_move_parent(struct page *page, | |||
2664 | struct mem_cgroup *child, | 2647 | struct mem_cgroup *child, |
2665 | gfp_t gfp_mask) | 2648 | gfp_t gfp_mask) |
2666 | { | 2649 | { |
2667 | struct cgroup *cg = child->css.cgroup; | ||
2668 | struct cgroup *pcg = cg->parent; | ||
2669 | struct mem_cgroup *parent; | 2650 | struct mem_cgroup *parent; |
2670 | unsigned int nr_pages; | 2651 | unsigned int nr_pages; |
2671 | unsigned long uninitialized_var(flags); | 2652 | unsigned long uninitialized_var(flags); |
2672 | int ret; | 2653 | int ret; |
2673 | 2654 | ||
2674 | /* Is ROOT ? */ | 2655 | /* Is ROOT ? */ |
2675 | if (!pcg) | 2656 | if (mem_cgroup_is_root(child)) |
2676 | return -EINVAL; | 2657 | return -EINVAL; |
2677 | 2658 | ||
2678 | ret = -EBUSY; | 2659 | ret = -EBUSY; |
@@ -2683,21 +2664,23 @@ static int mem_cgroup_move_parent(struct page *page, | |||
2683 | 2664 | ||
2684 | nr_pages = hpage_nr_pages(page); | 2665 | nr_pages = hpage_nr_pages(page); |
2685 | 2666 | ||
2686 | parent = mem_cgroup_from_cont(pcg); | 2667 | parent = parent_mem_cgroup(child); |
2687 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false); | 2668 | /* |
2688 | if (ret) | 2669 | * If no parent, move charges to root cgroup. |
2689 | goto put_back; | 2670 | */ |
2671 | if (!parent) | ||
2672 | parent = root_mem_cgroup; | ||
2690 | 2673 | ||
2691 | if (nr_pages > 1) | 2674 | if (nr_pages > 1) |
2692 | flags = compound_lock_irqsave(page); | 2675 | flags = compound_lock_irqsave(page); |
2693 | 2676 | ||
2694 | ret = mem_cgroup_move_account(page, nr_pages, pc, child, parent, true); | 2677 | ret = mem_cgroup_move_account(page, nr_pages, |
2695 | if (ret) | 2678 | pc, child, parent); |
2696 | __mem_cgroup_cancel_charge(parent, nr_pages); | 2679 | if (!ret) |
2680 | __mem_cgroup_cancel_local_charge(child, nr_pages); | ||
2697 | 2681 | ||
2698 | if (nr_pages > 1) | 2682 | if (nr_pages > 1) |
2699 | compound_unlock_irqrestore(page, flags); | 2683 | compound_unlock_irqrestore(page, flags); |
2700 | put_back: | ||
2701 | putback_lru_page(page); | 2684 | putback_lru_page(page); |
2702 | put: | 2685 | put: |
2703 | put_page(page); | 2686 | put_page(page); |
@@ -2845,24 +2828,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, | |||
2845 | */ | 2828 | */ |
2846 | if (do_swap_account && PageSwapCache(page)) { | 2829 | if (do_swap_account && PageSwapCache(page)) { |
2847 | swp_entry_t ent = {.val = page_private(page)}; | 2830 | swp_entry_t ent = {.val = page_private(page)}; |
2848 | struct mem_cgroup *swap_memcg; | 2831 | mem_cgroup_uncharge_swap(ent); |
2849 | unsigned short id; | ||
2850 | |||
2851 | id = swap_cgroup_record(ent, 0); | ||
2852 | rcu_read_lock(); | ||
2853 | swap_memcg = mem_cgroup_lookup(id); | ||
2854 | if (swap_memcg) { | ||
2855 | /* | ||
2856 | * This recorded memcg can be obsolete one. So, avoid | ||
2857 | * calling css_tryget | ||
2858 | */ | ||
2859 | if (!mem_cgroup_is_root(swap_memcg)) | ||
2860 | res_counter_uncharge(&swap_memcg->memsw, | ||
2861 | PAGE_SIZE); | ||
2862 | mem_cgroup_swap_statistics(swap_memcg, false); | ||
2863 | mem_cgroup_put(swap_memcg); | ||
2864 | } | ||
2865 | rcu_read_unlock(); | ||
2866 | } | 2832 | } |
2867 | /* | 2833 | /* |
2868 | * At swapin, we may charge account against cgroup which has no tasks. | 2834 | * At swapin, we may charge account against cgroup which has no tasks. |
@@ -3155,7 +3121,6 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
3155 | * @entry: swap entry to be moved | 3121 | * @entry: swap entry to be moved |
3156 | * @from: mem_cgroup which the entry is moved from | 3122 | * @from: mem_cgroup which the entry is moved from |
3157 | * @to: mem_cgroup which the entry is moved to | 3123 | * @to: mem_cgroup which the entry is moved to |
3158 | * @need_fixup: whether we should fixup res_counters and refcounts. | ||
3159 | * | 3124 | * |
3160 | * It succeeds only when the swap_cgroup's record for this entry is the same | 3125 | * It succeeds only when the swap_cgroup's record for this entry is the same |
3161 | * as the mem_cgroup's id of @from. | 3126 | * as the mem_cgroup's id of @from. |
@@ -3166,7 +3131,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
3166 | * both res and memsw, and called css_get(). | 3131 | * both res and memsw, and called css_get(). |
3167 | */ | 3132 | */ |
3168 | static int mem_cgroup_move_swap_account(swp_entry_t entry, | 3133 | static int mem_cgroup_move_swap_account(swp_entry_t entry, |
3169 | struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup) | 3134 | struct mem_cgroup *from, struct mem_cgroup *to) |
3170 | { | 3135 | { |
3171 | unsigned short old_id, new_id; | 3136 | unsigned short old_id, new_id; |
3172 | 3137 | ||
@@ -3185,24 +3150,13 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry, | |||
3185 | * swap-in, the refcount of @to might be decreased to 0. | 3150 | * swap-in, the refcount of @to might be decreased to 0. |
3186 | */ | 3151 | */ |
3187 | mem_cgroup_get(to); | 3152 | mem_cgroup_get(to); |
3188 | if (need_fixup) { | ||
3189 | if (!mem_cgroup_is_root(from)) | ||
3190 | res_counter_uncharge(&from->memsw, PAGE_SIZE); | ||
3191 | mem_cgroup_put(from); | ||
3192 | /* | ||
3193 | * we charged both to->res and to->memsw, so we should | ||
3194 | * uncharge to->res. | ||
3195 | */ | ||
3196 | if (!mem_cgroup_is_root(to)) | ||
3197 | res_counter_uncharge(&to->res, PAGE_SIZE); | ||
3198 | } | ||
3199 | return 0; | 3153 | return 0; |
3200 | } | 3154 | } |
3201 | return -EINVAL; | 3155 | return -EINVAL; |
3202 | } | 3156 | } |
3203 | #else | 3157 | #else |
3204 | static inline int mem_cgroup_move_swap_account(swp_entry_t entry, | 3158 | static inline int mem_cgroup_move_swap_account(swp_entry_t entry, |
3205 | struct mem_cgroup *from, struct mem_cgroup *to, bool need_fixup) | 3159 | struct mem_cgroup *from, struct mem_cgroup *to) |
3206 | { | 3160 | { |
3207 | return -EINVAL; | 3161 | return -EINVAL; |
3208 | } | 3162 | } |
@@ -3363,7 +3317,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg, | |||
3363 | void mem_cgroup_replace_page_cache(struct page *oldpage, | 3317 | void mem_cgroup_replace_page_cache(struct page *oldpage, |
3364 | struct page *newpage) | 3318 | struct page *newpage) |
3365 | { | 3319 | { |
3366 | struct mem_cgroup *memcg; | 3320 | struct mem_cgroup *memcg = NULL; |
3367 | struct page_cgroup *pc; | 3321 | struct page_cgroup *pc; |
3368 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; | 3322 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; |
3369 | 3323 | ||
@@ -3373,11 +3327,20 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, | |||
3373 | pc = lookup_page_cgroup(oldpage); | 3327 | pc = lookup_page_cgroup(oldpage); |
3374 | /* fix accounting on old pages */ | 3328 | /* fix accounting on old pages */ |
3375 | lock_page_cgroup(pc); | 3329 | lock_page_cgroup(pc); |
3376 | memcg = pc->mem_cgroup; | 3330 | if (PageCgroupUsed(pc)) { |
3377 | mem_cgroup_charge_statistics(memcg, false, -1); | 3331 | memcg = pc->mem_cgroup; |
3378 | ClearPageCgroupUsed(pc); | 3332 | mem_cgroup_charge_statistics(memcg, false, -1); |
3333 | ClearPageCgroupUsed(pc); | ||
3334 | } | ||
3379 | unlock_page_cgroup(pc); | 3335 | unlock_page_cgroup(pc); |
3380 | 3336 | ||
3337 | /* | ||
3338 | * When called from shmem_replace_page(), in some cases the | ||
3339 | * oldpage has already been charged, and in some cases not. | ||
3340 | */ | ||
3341 | if (!memcg) | ||
3342 | return; | ||
3343 | |||
3381 | if (PageSwapBacked(oldpage)) | 3344 | if (PageSwapBacked(oldpage)) |
3382 | type = MEM_CGROUP_CHARGE_TYPE_SHMEM; | 3345 | type = MEM_CGROUP_CHARGE_TYPE_SHMEM; |
3383 | 3346 | ||
@@ -3793,7 +3756,7 @@ try_to_free: | |||
3793 | goto move_account; | 3756 | goto move_account; |
3794 | } | 3757 | } |
3795 | 3758 | ||
3796 | int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) | 3759 | static int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) |
3797 | { | 3760 | { |
3798 | return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); | 3761 | return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); |
3799 | } | 3762 | } |
@@ -3873,14 +3836,21 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) | |||
3873 | return val << PAGE_SHIFT; | 3836 | return val << PAGE_SHIFT; |
3874 | } | 3837 | } |
3875 | 3838 | ||
3876 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | 3839 | static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, |
3840 | struct file *file, char __user *buf, | ||
3841 | size_t nbytes, loff_t *ppos) | ||
3877 | { | 3842 | { |
3878 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 3843 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
3844 | char str[64]; | ||
3879 | u64 val; | 3845 | u64 val; |
3880 | int type, name; | 3846 | int type, name, len; |
3881 | 3847 | ||
3882 | type = MEMFILE_TYPE(cft->private); | 3848 | type = MEMFILE_TYPE(cft->private); |
3883 | name = MEMFILE_ATTR(cft->private); | 3849 | name = MEMFILE_ATTR(cft->private); |
3850 | |||
3851 | if (!do_swap_account && type == _MEMSWAP) | ||
3852 | return -EOPNOTSUPP; | ||
3853 | |||
3884 | switch (type) { | 3854 | switch (type) { |
3885 | case _MEM: | 3855 | case _MEM: |
3886 | if (name == RES_USAGE) | 3856 | if (name == RES_USAGE) |
@@ -3897,7 +3867,9 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | |||
3897 | default: | 3867 | default: |
3898 | BUG(); | 3868 | BUG(); |
3899 | } | 3869 | } |
3900 | return val; | 3870 | |
3871 | len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val); | ||
3872 | return simple_read_from_buffer(buf, nbytes, ppos, str, len); | ||
3901 | } | 3873 | } |
3902 | /* | 3874 | /* |
3903 | * The user of this function is... | 3875 | * The user of this function is... |
@@ -3913,6 +3885,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | |||
3913 | 3885 | ||
3914 | type = MEMFILE_TYPE(cft->private); | 3886 | type = MEMFILE_TYPE(cft->private); |
3915 | name = MEMFILE_ATTR(cft->private); | 3887 | name = MEMFILE_ATTR(cft->private); |
3888 | |||
3889 | if (!do_swap_account && type == _MEMSWAP) | ||
3890 | return -EOPNOTSUPP; | ||
3891 | |||
3916 | switch (name) { | 3892 | switch (name) { |
3917 | case RES_LIMIT: | 3893 | case RES_LIMIT: |
3918 | if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ | 3894 | if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ |
@@ -3978,12 +3954,15 @@ out: | |||
3978 | 3954 | ||
3979 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | 3955 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) |
3980 | { | 3956 | { |
3981 | struct mem_cgroup *memcg; | 3957 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
3982 | int type, name; | 3958 | int type, name; |
3983 | 3959 | ||
3984 | memcg = mem_cgroup_from_cont(cont); | ||
3985 | type = MEMFILE_TYPE(event); | 3960 | type = MEMFILE_TYPE(event); |
3986 | name = MEMFILE_ATTR(event); | 3961 | name = MEMFILE_ATTR(event); |
3962 | |||
3963 | if (!do_swap_account && type == _MEMSWAP) | ||
3964 | return -EOPNOTSUPP; | ||
3965 | |||
3987 | switch (name) { | 3966 | switch (name) { |
3988 | case RES_MAX_USAGE: | 3967 | case RES_MAX_USAGE: |
3989 | if (type == _MEM) | 3968 | if (type == _MEM) |
@@ -4035,103 +4014,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp, | |||
4035 | } | 4014 | } |
4036 | #endif | 4015 | #endif |
4037 | 4016 | ||
4038 | |||
4039 | /* For read statistics */ | ||
4040 | enum { | ||
4041 | MCS_CACHE, | ||
4042 | MCS_RSS, | ||
4043 | MCS_FILE_MAPPED, | ||
4044 | MCS_PGPGIN, | ||
4045 | MCS_PGPGOUT, | ||
4046 | MCS_SWAP, | ||
4047 | MCS_PGFAULT, | ||
4048 | MCS_PGMAJFAULT, | ||
4049 | MCS_INACTIVE_ANON, | ||
4050 | MCS_ACTIVE_ANON, | ||
4051 | MCS_INACTIVE_FILE, | ||
4052 | MCS_ACTIVE_FILE, | ||
4053 | MCS_UNEVICTABLE, | ||
4054 | NR_MCS_STAT, | ||
4055 | }; | ||
4056 | |||
4057 | struct mcs_total_stat { | ||
4058 | s64 stat[NR_MCS_STAT]; | ||
4059 | }; | ||
4060 | |||
4061 | struct { | ||
4062 | char *local_name; | ||
4063 | char *total_name; | ||
4064 | } memcg_stat_strings[NR_MCS_STAT] = { | ||
4065 | {"cache", "total_cache"}, | ||
4066 | {"rss", "total_rss"}, | ||
4067 | {"mapped_file", "total_mapped_file"}, | ||
4068 | {"pgpgin", "total_pgpgin"}, | ||
4069 | {"pgpgout", "total_pgpgout"}, | ||
4070 | {"swap", "total_swap"}, | ||
4071 | {"pgfault", "total_pgfault"}, | ||
4072 | {"pgmajfault", "total_pgmajfault"}, | ||
4073 | {"inactive_anon", "total_inactive_anon"}, | ||
4074 | {"active_anon", "total_active_anon"}, | ||
4075 | {"inactive_file", "total_inactive_file"}, | ||
4076 | {"active_file", "total_active_file"}, | ||
4077 | {"unevictable", "total_unevictable"} | ||
4078 | }; | ||
4079 | |||
4080 | |||
4081 | static void | ||
4082 | mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s) | ||
4083 | { | ||
4084 | s64 val; | ||
4085 | |||
4086 | /* per cpu stat */ | ||
4087 | val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE); | ||
4088 | s->stat[MCS_CACHE] += val * PAGE_SIZE; | ||
4089 | val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS); | ||
4090 | s->stat[MCS_RSS] += val * PAGE_SIZE; | ||
4091 | val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); | ||
4092 | s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; | ||
4093 | val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN); | ||
4094 | s->stat[MCS_PGPGIN] += val; | ||
4095 | val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT); | ||
4096 | s->stat[MCS_PGPGOUT] += val; | ||
4097 | if (do_swap_account) { | ||
4098 | val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SWAPOUT); | ||
4099 | s->stat[MCS_SWAP] += val * PAGE_SIZE; | ||
4100 | } | ||
4101 | val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGFAULT); | ||
4102 | s->stat[MCS_PGFAULT] += val; | ||
4103 | val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT); | ||
4104 | s->stat[MCS_PGMAJFAULT] += val; | ||
4105 | |||
4106 | /* per zone stat */ | ||
4107 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); | ||
4108 | s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; | ||
4109 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); | ||
4110 | s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE; | ||
4111 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); | ||
4112 | s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE; | ||
4113 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); | ||
4114 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; | ||
4115 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE)); | ||
4116 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; | ||
4117 | } | ||
4118 | |||
4119 | static void | ||
4120 | mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s) | ||
4121 | { | ||
4122 | struct mem_cgroup *iter; | ||
4123 | |||
4124 | for_each_mem_cgroup_tree(iter, memcg) | ||
4125 | mem_cgroup_get_local_stat(iter, s); | ||
4126 | } | ||
4127 | |||
4128 | #ifdef CONFIG_NUMA | 4017 | #ifdef CONFIG_NUMA |
4129 | static int mem_control_numa_stat_show(struct seq_file *m, void *arg) | 4018 | static int mem_control_numa_stat_show(struct cgroup *cont, struct cftype *cft, |
4019 | struct seq_file *m) | ||
4130 | { | 4020 | { |
4131 | int nid; | 4021 | int nid; |
4132 | unsigned long total_nr, file_nr, anon_nr, unevictable_nr; | 4022 | unsigned long total_nr, file_nr, anon_nr, unevictable_nr; |
4133 | unsigned long node_nr; | 4023 | unsigned long node_nr; |
4134 | struct cgroup *cont = m->private; | ||
4135 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 4024 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
4136 | 4025 | ||
4137 | total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL); | 4026 | total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL); |
@@ -4172,64 +4061,100 @@ static int mem_control_numa_stat_show(struct seq_file *m, void *arg) | |||
4172 | } | 4061 | } |
4173 | #endif /* CONFIG_NUMA */ | 4062 | #endif /* CONFIG_NUMA */ |
4174 | 4063 | ||
4064 | static const char * const mem_cgroup_lru_names[] = { | ||
4065 | "inactive_anon", | ||
4066 | "active_anon", | ||
4067 | "inactive_file", | ||
4068 | "active_file", | ||
4069 | "unevictable", | ||
4070 | }; | ||
4071 | |||
4072 | static inline void mem_cgroup_lru_names_not_uptodate(void) | ||
4073 | { | ||
4074 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); | ||
4075 | } | ||
4076 | |||
4175 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | 4077 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, |
4176 | struct cgroup_map_cb *cb) | 4078 | struct seq_file *m) |
4177 | { | 4079 | { |
4178 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 4080 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
4179 | struct mcs_total_stat mystat; | 4081 | struct mem_cgroup *mi; |
4180 | int i; | 4082 | unsigned int i; |
4181 | |||
4182 | memset(&mystat, 0, sizeof(mystat)); | ||
4183 | mem_cgroup_get_local_stat(memcg, &mystat); | ||
4184 | 4083 | ||
4185 | 4084 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { | |
4186 | for (i = 0; i < NR_MCS_STAT; i++) { | 4085 | if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account) |
4187 | if (i == MCS_SWAP && !do_swap_account) | ||
4188 | continue; | 4086 | continue; |
4189 | cb->fill(cb, memcg_stat_strings[i].local_name, mystat.stat[i]); | 4087 | seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i], |
4088 | mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); | ||
4190 | } | 4089 | } |
4191 | 4090 | ||
4091 | for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) | ||
4092 | seq_printf(m, "%s %lu\n", mem_cgroup_events_names[i], | ||
4093 | mem_cgroup_read_events(memcg, i)); | ||
4094 | |||
4095 | for (i = 0; i < NR_LRU_LISTS; i++) | ||
4096 | seq_printf(m, "%s %lu\n", mem_cgroup_lru_names[i], | ||
4097 | mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE); | ||
4098 | |||
4192 | /* Hierarchical information */ | 4099 | /* Hierarchical information */ |
4193 | { | 4100 | { |
4194 | unsigned long long limit, memsw_limit; | 4101 | unsigned long long limit, memsw_limit; |
4195 | memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit); | 4102 | memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit); |
4196 | cb->fill(cb, "hierarchical_memory_limit", limit); | 4103 | seq_printf(m, "hierarchical_memory_limit %llu\n", limit); |
4197 | if (do_swap_account) | 4104 | if (do_swap_account) |
4198 | cb->fill(cb, "hierarchical_memsw_limit", memsw_limit); | 4105 | seq_printf(m, "hierarchical_memsw_limit %llu\n", |
4106 | memsw_limit); | ||
4199 | } | 4107 | } |
4200 | 4108 | ||
4201 | memset(&mystat, 0, sizeof(mystat)); | 4109 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { |
4202 | mem_cgroup_get_total_stat(memcg, &mystat); | 4110 | long long val = 0; |
4203 | for (i = 0; i < NR_MCS_STAT; i++) { | 4111 | |
4204 | if (i == MCS_SWAP && !do_swap_account) | 4112 | if (i == MEM_CGROUP_STAT_SWAPOUT && !do_swap_account) |
4205 | continue; | 4113 | continue; |
4206 | cb->fill(cb, memcg_stat_strings[i].total_name, mystat.stat[i]); | 4114 | for_each_mem_cgroup_tree(mi, memcg) |
4115 | val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; | ||
4116 | seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val); | ||
4117 | } | ||
4118 | |||
4119 | for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { | ||
4120 | unsigned long long val = 0; | ||
4121 | |||
4122 | for_each_mem_cgroup_tree(mi, memcg) | ||
4123 | val += mem_cgroup_read_events(mi, i); | ||
4124 | seq_printf(m, "total_%s %llu\n", | ||
4125 | mem_cgroup_events_names[i], val); | ||
4126 | } | ||
4127 | |||
4128 | for (i = 0; i < NR_LRU_LISTS; i++) { | ||
4129 | unsigned long long val = 0; | ||
4130 | |||
4131 | for_each_mem_cgroup_tree(mi, memcg) | ||
4132 | val += mem_cgroup_nr_lru_pages(mi, BIT(i)) * PAGE_SIZE; | ||
4133 | seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], val); | ||
4207 | } | 4134 | } |
4208 | 4135 | ||
4209 | #ifdef CONFIG_DEBUG_VM | 4136 | #ifdef CONFIG_DEBUG_VM |
4210 | { | 4137 | { |
4211 | int nid, zid; | 4138 | int nid, zid; |
4212 | struct mem_cgroup_per_zone *mz; | 4139 | struct mem_cgroup_per_zone *mz; |
4140 | struct zone_reclaim_stat *rstat; | ||
4213 | unsigned long recent_rotated[2] = {0, 0}; | 4141 | unsigned long recent_rotated[2] = {0, 0}; |
4214 | unsigned long recent_scanned[2] = {0, 0}; | 4142 | unsigned long recent_scanned[2] = {0, 0}; |
4215 | 4143 | ||
4216 | for_each_online_node(nid) | 4144 | for_each_online_node(nid) |
4217 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { | 4145 | for (zid = 0; zid < MAX_NR_ZONES; zid++) { |
4218 | mz = mem_cgroup_zoneinfo(memcg, nid, zid); | 4146 | mz = mem_cgroup_zoneinfo(memcg, nid, zid); |
4147 | rstat = &mz->lruvec.reclaim_stat; | ||
4219 | 4148 | ||
4220 | recent_rotated[0] += | 4149 | recent_rotated[0] += rstat->recent_rotated[0]; |
4221 | mz->reclaim_stat.recent_rotated[0]; | 4150 | recent_rotated[1] += rstat->recent_rotated[1]; |
4222 | recent_rotated[1] += | 4151 | recent_scanned[0] += rstat->recent_scanned[0]; |
4223 | mz->reclaim_stat.recent_rotated[1]; | 4152 | recent_scanned[1] += rstat->recent_scanned[1]; |
4224 | recent_scanned[0] += | ||
4225 | mz->reclaim_stat.recent_scanned[0]; | ||
4226 | recent_scanned[1] += | ||
4227 | mz->reclaim_stat.recent_scanned[1]; | ||
4228 | } | 4153 | } |
4229 | cb->fill(cb, "recent_rotated_anon", recent_rotated[0]); | 4154 | seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]); |
4230 | cb->fill(cb, "recent_rotated_file", recent_rotated[1]); | 4155 | seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]); |
4231 | cb->fill(cb, "recent_scanned_anon", recent_scanned[0]); | 4156 | seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]); |
4232 | cb->fill(cb, "recent_scanned_file", recent_scanned[1]); | 4157 | seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]); |
4233 | } | 4158 | } |
4234 | #endif | 4159 | #endif |
4235 | 4160 | ||
@@ -4291,7 +4216,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) | |||
4291 | usage = mem_cgroup_usage(memcg, swap); | 4216 | usage = mem_cgroup_usage(memcg, swap); |
4292 | 4217 | ||
4293 | /* | 4218 | /* |
4294 | * current_threshold points to threshold just below usage. | 4219 | * current_threshold points to threshold just below or equal to usage. |
4295 | * If it's not true, a threshold was crossed after last | 4220 | * If it's not true, a threshold was crossed after last |
4296 | * call of __mem_cgroup_threshold(). | 4221 | * call of __mem_cgroup_threshold(). |
4297 | */ | 4222 | */ |
@@ -4417,14 +4342,15 @@ static int mem_cgroup_usage_register_event(struct cgroup *cgrp, | |||
4417 | /* Find current threshold */ | 4342 | /* Find current threshold */ |
4418 | new->current_threshold = -1; | 4343 | new->current_threshold = -1; |
4419 | for (i = 0; i < size; i++) { | 4344 | for (i = 0; i < size; i++) { |
4420 | if (new->entries[i].threshold < usage) { | 4345 | if (new->entries[i].threshold <= usage) { |
4421 | /* | 4346 | /* |
4422 | * new->current_threshold will not be used until | 4347 | * new->current_threshold will not be used until |
4423 | * rcu_assign_pointer(), so it's safe to increment | 4348 | * rcu_assign_pointer(), so it's safe to increment |
4424 | * it here. | 4349 | * it here. |
4425 | */ | 4350 | */ |
4426 | ++new->current_threshold; | 4351 | ++new->current_threshold; |
4427 | } | 4352 | } else |
4353 | break; | ||
4428 | } | 4354 | } |
4429 | 4355 | ||
4430 | /* Free old spare buffer and save old primary buffer as spare */ | 4356 | /* Free old spare buffer and save old primary buffer as spare */ |
@@ -4493,7 +4419,7 @@ static void mem_cgroup_usage_unregister_event(struct cgroup *cgrp, | |||
4493 | continue; | 4419 | continue; |
4494 | 4420 | ||
4495 | new->entries[j] = thresholds->primary->entries[i]; | 4421 | new->entries[j] = thresholds->primary->entries[i]; |
4496 | if (new->entries[j].threshold < usage) { | 4422 | if (new->entries[j].threshold <= usage) { |
4497 | /* | 4423 | /* |
4498 | * new->current_threshold will not be used | 4424 | * new->current_threshold will not be used |
4499 | * until rcu_assign_pointer(), so it's safe to increment | 4425 | * until rcu_assign_pointer(), so it's safe to increment |
@@ -4607,46 +4533,23 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp, | |||
4607 | return 0; | 4533 | return 0; |
4608 | } | 4534 | } |
4609 | 4535 | ||
4610 | #ifdef CONFIG_NUMA | ||
4611 | static const struct file_operations mem_control_numa_stat_file_operations = { | ||
4612 | .read = seq_read, | ||
4613 | .llseek = seq_lseek, | ||
4614 | .release = single_release, | ||
4615 | }; | ||
4616 | |||
4617 | static int mem_control_numa_stat_open(struct inode *unused, struct file *file) | ||
4618 | { | ||
4619 | struct cgroup *cont = file->f_dentry->d_parent->d_fsdata; | ||
4620 | |||
4621 | file->f_op = &mem_control_numa_stat_file_operations; | ||
4622 | return single_open(file, mem_control_numa_stat_show, cont); | ||
4623 | } | ||
4624 | #endif /* CONFIG_NUMA */ | ||
4625 | |||
4626 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | 4536 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM |
4627 | static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) | 4537 | static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
4628 | { | 4538 | { |
4629 | /* | 4539 | return mem_cgroup_sockets_init(memcg, ss); |
4630 | * Part of this would be better living in a separate allocation | ||
4631 | * function, leaving us with just the cgroup tree population work. | ||
4632 | * We, however, depend on state such as network's proto_list that | ||
4633 | * is only initialized after cgroup creation. I found the less | ||
4634 | * cumbersome way to deal with it to defer it all to populate time | ||
4635 | */ | ||
4636 | return mem_cgroup_sockets_init(cont, ss); | ||
4637 | }; | 4540 | }; |
4638 | 4541 | ||
4639 | static void kmem_cgroup_destroy(struct cgroup *cont) | 4542 | static void kmem_cgroup_destroy(struct mem_cgroup *memcg) |
4640 | { | 4543 | { |
4641 | mem_cgroup_sockets_destroy(cont); | 4544 | mem_cgroup_sockets_destroy(memcg); |
4642 | } | 4545 | } |
4643 | #else | 4546 | #else |
4644 | static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) | 4547 | static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
4645 | { | 4548 | { |
4646 | return 0; | 4549 | return 0; |
4647 | } | 4550 | } |
4648 | 4551 | ||
4649 | static void kmem_cgroup_destroy(struct cgroup *cont) | 4552 | static void kmem_cgroup_destroy(struct mem_cgroup *memcg) |
4650 | { | 4553 | { |
4651 | } | 4554 | } |
4652 | #endif | 4555 | #endif |
@@ -4655,7 +4558,7 @@ static struct cftype mem_cgroup_files[] = { | |||
4655 | { | 4558 | { |
4656 | .name = "usage_in_bytes", | 4559 | .name = "usage_in_bytes", |
4657 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), | 4560 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), |
4658 | .read_u64 = mem_cgroup_read, | 4561 | .read = mem_cgroup_read, |
4659 | .register_event = mem_cgroup_usage_register_event, | 4562 | .register_event = mem_cgroup_usage_register_event, |
4660 | .unregister_event = mem_cgroup_usage_unregister_event, | 4563 | .unregister_event = mem_cgroup_usage_unregister_event, |
4661 | }, | 4564 | }, |
@@ -4663,29 +4566,29 @@ static struct cftype mem_cgroup_files[] = { | |||
4663 | .name = "max_usage_in_bytes", | 4566 | .name = "max_usage_in_bytes", |
4664 | .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), | 4567 | .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), |
4665 | .trigger = mem_cgroup_reset, | 4568 | .trigger = mem_cgroup_reset, |
4666 | .read_u64 = mem_cgroup_read, | 4569 | .read = mem_cgroup_read, |
4667 | }, | 4570 | }, |
4668 | { | 4571 | { |
4669 | .name = "limit_in_bytes", | 4572 | .name = "limit_in_bytes", |
4670 | .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), | 4573 | .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), |
4671 | .write_string = mem_cgroup_write, | 4574 | .write_string = mem_cgroup_write, |
4672 | .read_u64 = mem_cgroup_read, | 4575 | .read = mem_cgroup_read, |
4673 | }, | 4576 | }, |
4674 | { | 4577 | { |
4675 | .name = "soft_limit_in_bytes", | 4578 | .name = "soft_limit_in_bytes", |
4676 | .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), | 4579 | .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), |
4677 | .write_string = mem_cgroup_write, | 4580 | .write_string = mem_cgroup_write, |
4678 | .read_u64 = mem_cgroup_read, | 4581 | .read = mem_cgroup_read, |
4679 | }, | 4582 | }, |
4680 | { | 4583 | { |
4681 | .name = "failcnt", | 4584 | .name = "failcnt", |
4682 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), | 4585 | .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), |
4683 | .trigger = mem_cgroup_reset, | 4586 | .trigger = mem_cgroup_reset, |
4684 | .read_u64 = mem_cgroup_read, | 4587 | .read = mem_cgroup_read, |
4685 | }, | 4588 | }, |
4686 | { | 4589 | { |
4687 | .name = "stat", | 4590 | .name = "stat", |
4688 | .read_map = mem_control_stat_show, | 4591 | .read_seq_string = mem_control_stat_show, |
4689 | }, | 4592 | }, |
4690 | { | 4593 | { |
4691 | .name = "force_empty", | 4594 | .name = "force_empty", |
@@ -4717,18 +4620,14 @@ static struct cftype mem_cgroup_files[] = { | |||
4717 | #ifdef CONFIG_NUMA | 4620 | #ifdef CONFIG_NUMA |
4718 | { | 4621 | { |
4719 | .name = "numa_stat", | 4622 | .name = "numa_stat", |
4720 | .open = mem_control_numa_stat_open, | 4623 | .read_seq_string = mem_control_numa_stat_show, |
4721 | .mode = S_IRUGO, | ||
4722 | }, | 4624 | }, |
4723 | #endif | 4625 | #endif |
4724 | }; | ||
4725 | |||
4726 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4626 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
4727 | static struct cftype memsw_cgroup_files[] = { | ||
4728 | { | 4627 | { |
4729 | .name = "memsw.usage_in_bytes", | 4628 | .name = "memsw.usage_in_bytes", |
4730 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), | 4629 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), |
4731 | .read_u64 = mem_cgroup_read, | 4630 | .read = mem_cgroup_read, |
4732 | .register_event = mem_cgroup_usage_register_event, | 4631 | .register_event = mem_cgroup_usage_register_event, |
4733 | .unregister_event = mem_cgroup_usage_unregister_event, | 4632 | .unregister_event = mem_cgroup_usage_unregister_event, |
4734 | }, | 4633 | }, |
@@ -4736,41 +4635,28 @@ static struct cftype memsw_cgroup_files[] = { | |||
4736 | .name = "memsw.max_usage_in_bytes", | 4635 | .name = "memsw.max_usage_in_bytes", |
4737 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), | 4636 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), |
4738 | .trigger = mem_cgroup_reset, | 4637 | .trigger = mem_cgroup_reset, |
4739 | .read_u64 = mem_cgroup_read, | 4638 | .read = mem_cgroup_read, |
4740 | }, | 4639 | }, |
4741 | { | 4640 | { |
4742 | .name = "memsw.limit_in_bytes", | 4641 | .name = "memsw.limit_in_bytes", |
4743 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), | 4642 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), |
4744 | .write_string = mem_cgroup_write, | 4643 | .write_string = mem_cgroup_write, |
4745 | .read_u64 = mem_cgroup_read, | 4644 | .read = mem_cgroup_read, |
4746 | }, | 4645 | }, |
4747 | { | 4646 | { |
4748 | .name = "memsw.failcnt", | 4647 | .name = "memsw.failcnt", |
4749 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), | 4648 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), |
4750 | .trigger = mem_cgroup_reset, | 4649 | .trigger = mem_cgroup_reset, |
4751 | .read_u64 = mem_cgroup_read, | 4650 | .read = mem_cgroup_read, |
4752 | }, | 4651 | }, |
4753 | }; | ||
4754 | |||
4755 | static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) | ||
4756 | { | ||
4757 | if (!do_swap_account) | ||
4758 | return 0; | ||
4759 | return cgroup_add_files(cont, ss, memsw_cgroup_files, | ||
4760 | ARRAY_SIZE(memsw_cgroup_files)); | ||
4761 | }; | ||
4762 | #else | ||
4763 | static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) | ||
4764 | { | ||
4765 | return 0; | ||
4766 | } | ||
4767 | #endif | 4652 | #endif |
4653 | { }, /* terminate */ | ||
4654 | }; | ||
4768 | 4655 | ||
4769 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | 4656 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) |
4770 | { | 4657 | { |
4771 | struct mem_cgroup_per_node *pn; | 4658 | struct mem_cgroup_per_node *pn; |
4772 | struct mem_cgroup_per_zone *mz; | 4659 | struct mem_cgroup_per_zone *mz; |
4773 | enum lru_list lru; | ||
4774 | int zone, tmp = node; | 4660 | int zone, tmp = node; |
4775 | /* | 4661 | /* |
4776 | * This routine is called against possible nodes. | 4662 | * This routine is called against possible nodes. |
@@ -4788,8 +4674,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | |||
4788 | 4674 | ||
4789 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 4675 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
4790 | mz = &pn->zoneinfo[zone]; | 4676 | mz = &pn->zoneinfo[zone]; |
4791 | for_each_lru(lru) | 4677 | lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]); |
4792 | INIT_LIST_HEAD(&mz->lruvec.lists[lru]); | ||
4793 | mz->usage_in_excess = 0; | 4678 | mz->usage_in_excess = 0; |
4794 | mz->on_tree = false; | 4679 | mz->on_tree = false; |
4795 | mz->memcg = memcg; | 4680 | mz->memcg = memcg; |
@@ -4832,23 +4717,40 @@ out_free: | |||
4832 | } | 4717 | } |
4833 | 4718 | ||
4834 | /* | 4719 | /* |
4835 | * Helpers for freeing a vzalloc()ed mem_cgroup by RCU, | 4720 | * Helpers for freeing a kmalloc()ed/vzalloc()ed mem_cgroup by RCU, |
4836 | * but in process context. The work_freeing structure is overlaid | 4721 | * but in process context. The work_freeing structure is overlaid |
4837 | * on the rcu_freeing structure, which itself is overlaid on memsw. | 4722 | * on the rcu_freeing structure, which itself is overlaid on memsw. |
4838 | */ | 4723 | */ |
4839 | static void vfree_work(struct work_struct *work) | 4724 | static void free_work(struct work_struct *work) |
4840 | { | 4725 | { |
4841 | struct mem_cgroup *memcg; | 4726 | struct mem_cgroup *memcg; |
4727 | int size = sizeof(struct mem_cgroup); | ||
4842 | 4728 | ||
4843 | memcg = container_of(work, struct mem_cgroup, work_freeing); | 4729 | memcg = container_of(work, struct mem_cgroup, work_freeing); |
4844 | vfree(memcg); | 4730 | /* |
4731 | * We need to make sure that (at least for now), the jump label | ||
4732 | * destruction code runs outside of the cgroup lock. This is because | ||
4733 | * get_online_cpus(), which is called from the static_branch update, | ||
4734 | * can't be called inside the cgroup_lock. cpusets are the ones | ||
4735 | * enforcing this dependency, so if they ever change, we might as well. | ||
4736 | * | ||
4737 | * schedule_work() will guarantee this happens. Be careful if you need | ||
4738 | * to move this code around, and make sure it is outside | ||
4739 | * the cgroup_lock. | ||
4740 | */ | ||
4741 | disarm_sock_keys(memcg); | ||
4742 | if (size < PAGE_SIZE) | ||
4743 | kfree(memcg); | ||
4744 | else | ||
4745 | vfree(memcg); | ||
4845 | } | 4746 | } |
4846 | static void vfree_rcu(struct rcu_head *rcu_head) | 4747 | |
4748 | static void free_rcu(struct rcu_head *rcu_head) | ||
4847 | { | 4749 | { |
4848 | struct mem_cgroup *memcg; | 4750 | struct mem_cgroup *memcg; |
4849 | 4751 | ||
4850 | memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing); | 4752 | memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing); |
4851 | INIT_WORK(&memcg->work_freeing, vfree_work); | 4753 | INIT_WORK(&memcg->work_freeing, free_work); |
4852 | schedule_work(&memcg->work_freeing); | 4754 | schedule_work(&memcg->work_freeing); |
4853 | } | 4755 | } |
4854 | 4756 | ||
@@ -4874,10 +4776,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
4874 | free_mem_cgroup_per_zone_info(memcg, node); | 4776 | free_mem_cgroup_per_zone_info(memcg, node); |
4875 | 4777 | ||
4876 | free_percpu(memcg->stat); | 4778 | free_percpu(memcg->stat); |
4877 | if (sizeof(struct mem_cgroup) < PAGE_SIZE) | 4779 | call_rcu(&memcg->rcu_freeing, free_rcu); |
4878 | kfree_rcu(memcg, rcu_freeing); | ||
4879 | else | ||
4880 | call_rcu(&memcg->rcu_freeing, vfree_rcu); | ||
4881 | } | 4780 | } |
4882 | 4781 | ||
4883 | static void mem_cgroup_get(struct mem_cgroup *memcg) | 4782 | static void mem_cgroup_get(struct mem_cgroup *memcg) |
@@ -5016,6 +4915,17 @@ mem_cgroup_create(struct cgroup *cont) | |||
5016 | memcg->move_charge_at_immigrate = 0; | 4915 | memcg->move_charge_at_immigrate = 0; |
5017 | mutex_init(&memcg->thresholds_lock); | 4916 | mutex_init(&memcg->thresholds_lock); |
5018 | spin_lock_init(&memcg->move_lock); | 4917 | spin_lock_init(&memcg->move_lock); |
4918 | |||
4919 | error = memcg_init_kmem(memcg, &mem_cgroup_subsys); | ||
4920 | if (error) { | ||
4921 | /* | ||
4922 | * We call put now because our (and parent's) refcnts | ||
4923 | * are already in place. mem_cgroup_put() will internally | ||
4924 | * call __mem_cgroup_free, so return directly | ||
4925 | */ | ||
4926 | mem_cgroup_put(memcg); | ||
4927 | return ERR_PTR(error); | ||
4928 | } | ||
5019 | return &memcg->css; | 4929 | return &memcg->css; |
5020 | free_out: | 4930 | free_out: |
5021 | __mem_cgroup_free(memcg); | 4931 | __mem_cgroup_free(memcg); |
@@ -5033,28 +4943,11 @@ static void mem_cgroup_destroy(struct cgroup *cont) | |||
5033 | { | 4943 | { |
5034 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 4944 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
5035 | 4945 | ||
5036 | kmem_cgroup_destroy(cont); | 4946 | kmem_cgroup_destroy(memcg); |
5037 | 4947 | ||
5038 | mem_cgroup_put(memcg); | 4948 | mem_cgroup_put(memcg); |
5039 | } | 4949 | } |
5040 | 4950 | ||
5041 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | ||
5042 | struct cgroup *cont) | ||
5043 | { | ||
5044 | int ret; | ||
5045 | |||
5046 | ret = cgroup_add_files(cont, ss, mem_cgroup_files, | ||
5047 | ARRAY_SIZE(mem_cgroup_files)); | ||
5048 | |||
5049 | if (!ret) | ||
5050 | ret = register_memsw_files(cont, ss); | ||
5051 | |||
5052 | if (!ret) | ||
5053 | ret = register_kmem_files(cont, ss); | ||
5054 | |||
5055 | return ret; | ||
5056 | } | ||
5057 | |||
5058 | #ifdef CONFIG_MMU | 4951 | #ifdef CONFIG_MMU |
5059 | /* Handlers for move charge at task migration. */ | 4952 | /* Handlers for move charge at task migration. */ |
5060 | #define PRECHARGE_COUNT_AT_ONCE 256 | 4953 | #define PRECHARGE_COUNT_AT_ONCE 256 |
@@ -5147,7 +5040,7 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, | |||
5147 | return NULL; | 5040 | return NULL; |
5148 | if (PageAnon(page)) { | 5041 | if (PageAnon(page)) { |
5149 | /* we don't move shared anon */ | 5042 | /* we don't move shared anon */ |
5150 | if (!move_anon() || page_mapcount(page) > 2) | 5043 | if (!move_anon()) |
5151 | return NULL; | 5044 | return NULL; |
5152 | } else if (!move_file()) | 5045 | } else if (!move_file()) |
5153 | /* we ignore mapcount for file pages */ | 5046 | /* we ignore mapcount for file pages */ |
@@ -5158,32 +5051,37 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, | |||
5158 | return page; | 5051 | return page; |
5159 | } | 5052 | } |
5160 | 5053 | ||
5054 | #ifdef CONFIG_SWAP | ||
5161 | static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, | 5055 | static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, |
5162 | unsigned long addr, pte_t ptent, swp_entry_t *entry) | 5056 | unsigned long addr, pte_t ptent, swp_entry_t *entry) |
5163 | { | 5057 | { |
5164 | int usage_count; | ||
5165 | struct page *page = NULL; | 5058 | struct page *page = NULL; |
5166 | swp_entry_t ent = pte_to_swp_entry(ptent); | 5059 | swp_entry_t ent = pte_to_swp_entry(ptent); |
5167 | 5060 | ||
5168 | if (!move_anon() || non_swap_entry(ent)) | 5061 | if (!move_anon() || non_swap_entry(ent)) |
5169 | return NULL; | 5062 | return NULL; |
5170 | usage_count = mem_cgroup_count_swap_user(ent, &page); | 5063 | /* |
5171 | if (usage_count > 1) { /* we don't move shared anon */ | 5064 | * Because lookup_swap_cache() updates some statistics counter, |
5172 | if (page) | 5065 | * we call find_get_page() with swapper_space directly. |
5173 | put_page(page); | 5066 | */ |
5174 | return NULL; | 5067 | page = find_get_page(&swapper_space, ent.val); |
5175 | } | ||
5176 | if (do_swap_account) | 5068 | if (do_swap_account) |
5177 | entry->val = ent.val; | 5069 | entry->val = ent.val; |
5178 | 5070 | ||
5179 | return page; | 5071 | return page; |
5180 | } | 5072 | } |
5073 | #else | ||
5074 | static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, | ||
5075 | unsigned long addr, pte_t ptent, swp_entry_t *entry) | ||
5076 | { | ||
5077 | return NULL; | ||
5078 | } | ||
5079 | #endif | ||
5181 | 5080 | ||
5182 | static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | 5081 | static struct page *mc_handle_file_pte(struct vm_area_struct *vma, |
5183 | unsigned long addr, pte_t ptent, swp_entry_t *entry) | 5082 | unsigned long addr, pte_t ptent, swp_entry_t *entry) |
5184 | { | 5083 | { |
5185 | struct page *page = NULL; | 5084 | struct page *page = NULL; |
5186 | struct inode *inode; | ||
5187 | struct address_space *mapping; | 5085 | struct address_space *mapping; |
5188 | pgoff_t pgoff; | 5086 | pgoff_t pgoff; |
5189 | 5087 | ||
@@ -5192,7 +5090,6 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | |||
5192 | if (!move_file()) | 5090 | if (!move_file()) |
5193 | return NULL; | 5091 | return NULL; |
5194 | 5092 | ||
5195 | inode = vma->vm_file->f_path.dentry->d_inode; | ||
5196 | mapping = vma->vm_file->f_mapping; | 5093 | mapping = vma->vm_file->f_mapping; |
5197 | if (pte_none(ptent)) | 5094 | if (pte_none(ptent)) |
5198 | pgoff = linear_page_index(vma, addr); | 5095 | pgoff = linear_page_index(vma, addr); |
@@ -5491,8 +5388,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, | |||
5491 | if (!isolate_lru_page(page)) { | 5388 | if (!isolate_lru_page(page)) { |
5492 | pc = lookup_page_cgroup(page); | 5389 | pc = lookup_page_cgroup(page); |
5493 | if (!mem_cgroup_move_account(page, HPAGE_PMD_NR, | 5390 | if (!mem_cgroup_move_account(page, HPAGE_PMD_NR, |
5494 | pc, mc.from, mc.to, | 5391 | pc, mc.from, mc.to)) { |
5495 | false)) { | ||
5496 | mc.precharge -= HPAGE_PMD_NR; | 5392 | mc.precharge -= HPAGE_PMD_NR; |
5497 | mc.moved_charge += HPAGE_PMD_NR; | 5393 | mc.moved_charge += HPAGE_PMD_NR; |
5498 | } | 5394 | } |
@@ -5522,7 +5418,7 @@ retry: | |||
5522 | goto put; | 5418 | goto put; |
5523 | pc = lookup_page_cgroup(page); | 5419 | pc = lookup_page_cgroup(page); |
5524 | if (!mem_cgroup_move_account(page, 1, pc, | 5420 | if (!mem_cgroup_move_account(page, 1, pc, |
5525 | mc.from, mc.to, false)) { | 5421 | mc.from, mc.to)) { |
5526 | mc.precharge--; | 5422 | mc.precharge--; |
5527 | /* we uncharge from mc.from later. */ | 5423 | /* we uncharge from mc.from later. */ |
5528 | mc.moved_charge++; | 5424 | mc.moved_charge++; |
@@ -5533,8 +5429,7 @@ put: /* get_mctgt_type() gets the page */ | |||
5533 | break; | 5429 | break; |
5534 | case MC_TARGET_SWAP: | 5430 | case MC_TARGET_SWAP: |
5535 | ent = target.ent; | 5431 | ent = target.ent; |
5536 | if (!mem_cgroup_move_swap_account(ent, | 5432 | if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to)) { |
5537 | mc.from, mc.to, false)) { | ||
5538 | mc.precharge--; | 5433 | mc.precharge--; |
5539 | /* we fixup refcnts and charges later. */ | 5434 | /* we fixup refcnts and charges later. */ |
5540 | mc.moved_swap++; | 5435 | mc.moved_swap++; |
@@ -5610,7 +5505,6 @@ static void mem_cgroup_move_task(struct cgroup *cont, | |||
5610 | if (mm) { | 5505 | if (mm) { |
5611 | if (mc.to) | 5506 | if (mc.to) |
5612 | mem_cgroup_move_charge(mm); | 5507 | mem_cgroup_move_charge(mm); |
5613 | put_swap_token(mm); | ||
5614 | mmput(mm); | 5508 | mmput(mm); |
5615 | } | 5509 | } |
5616 | if (mc.to) | 5510 | if (mc.to) |
@@ -5638,12 +5532,13 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
5638 | .create = mem_cgroup_create, | 5532 | .create = mem_cgroup_create, |
5639 | .pre_destroy = mem_cgroup_pre_destroy, | 5533 | .pre_destroy = mem_cgroup_pre_destroy, |
5640 | .destroy = mem_cgroup_destroy, | 5534 | .destroy = mem_cgroup_destroy, |
5641 | .populate = mem_cgroup_populate, | ||
5642 | .can_attach = mem_cgroup_can_attach, | 5535 | .can_attach = mem_cgroup_can_attach, |
5643 | .cancel_attach = mem_cgroup_cancel_attach, | 5536 | .cancel_attach = mem_cgroup_cancel_attach, |
5644 | .attach = mem_cgroup_move_task, | 5537 | .attach = mem_cgroup_move_task, |
5538 | .base_cftypes = mem_cgroup_files, | ||
5645 | .early_init = 0, | 5539 | .early_init = 0, |
5646 | .use_id = 1, | 5540 | .use_id = 1, |
5541 | .__DEPRECATED_clear_css_refs = true, | ||
5647 | }; | 5542 | }; |
5648 | 5543 | ||
5649 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 5544 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |