aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2015-11-03 17:29:03 -0500
committerTejun Heo <tj@kernel.org>2015-11-03 17:29:03 -0500
commit159b5bb46492e4dcef2070b12861030bc360402b (patch)
tree93de7d6e94a059aade50ee5437de6a50ccd1cf7b /mm/memcontrol.c
parent56e74338a535cbcc2f2da08b1ea1a92920194364 (diff)
parent469eabb3aec03d9defed3462df743a223a5c8f54 (diff)
Merge branch 'for-4.3-fixes' into for-4.4
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c470
1 files changed, 85 insertions, 385 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1af057575ce9..6ddaeba34e09 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -111,56 +111,10 @@ static const char * const mem_cgroup_lru_names[] = {
111 "unevictable", 111 "unevictable",
112}; 112};
113 113
114/*
115 * Per memcg event counter is incremented at every pagein/pageout. With THP,
116 * it will be incremated by the number of pages. This counter is used for
117 * for trigger some periodic events. This is straightforward and better
118 * than using jiffies etc. to handle periodic memcg event.
119 */
120enum mem_cgroup_events_target {
121 MEM_CGROUP_TARGET_THRESH,
122 MEM_CGROUP_TARGET_SOFTLIMIT,
123 MEM_CGROUP_TARGET_NUMAINFO,
124 MEM_CGROUP_NTARGETS,
125};
126#define THRESHOLDS_EVENTS_TARGET 128 114#define THRESHOLDS_EVENTS_TARGET 128
127#define SOFTLIMIT_EVENTS_TARGET 1024 115#define SOFTLIMIT_EVENTS_TARGET 1024
128#define NUMAINFO_EVENTS_TARGET 1024 116#define NUMAINFO_EVENTS_TARGET 1024
129 117
130struct mem_cgroup_stat_cpu {
131 long count[MEM_CGROUP_STAT_NSTATS];
132 unsigned long events[MEMCG_NR_EVENTS];
133 unsigned long nr_page_events;
134 unsigned long targets[MEM_CGROUP_NTARGETS];
135};
136
137struct reclaim_iter {
138 struct mem_cgroup *position;
139 /* scan generation, increased every round-trip */
140 unsigned int generation;
141};
142
143/*
144 * per-zone information in memory controller.
145 */
146struct mem_cgroup_per_zone {
147 struct lruvec lruvec;
148 unsigned long lru_size[NR_LRU_LISTS];
149
150 struct reclaim_iter iter[DEF_PRIORITY + 1];
151
152 struct rb_node tree_node; /* RB tree node */
153 unsigned long usage_in_excess;/* Set to the value by which */
154 /* the soft limit is exceeded*/
155 bool on_tree;
156 struct mem_cgroup *memcg; /* Back pointer, we cannot */
157 /* use container_of */
158};
159
160struct mem_cgroup_per_node {
161 struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
162};
163
164/* 118/*
165 * Cgroups above their limits are maintained in a RB-Tree, independent of 119 * Cgroups above their limits are maintained in a RB-Tree, independent of
166 * their hierarchy representation 120 * their hierarchy representation
@@ -181,32 +135,6 @@ struct mem_cgroup_tree {
181 135
182static struct mem_cgroup_tree soft_limit_tree __read_mostly; 136static struct mem_cgroup_tree soft_limit_tree __read_mostly;
183 137
184struct mem_cgroup_threshold {
185 struct eventfd_ctx *eventfd;
186 unsigned long threshold;
187};
188
189/* For threshold */
190struct mem_cgroup_threshold_ary {
191 /* An array index points to threshold just below or equal to usage. */
192 int current_threshold;
193 /* Size of entries[] */
194 unsigned int size;
195 /* Array of thresholds */
196 struct mem_cgroup_threshold entries[0];
197};
198
199struct mem_cgroup_thresholds {
200 /* Primary thresholds array */
201 struct mem_cgroup_threshold_ary *primary;
202 /*
203 * Spare threshold array.
204 * This is needed to make mem_cgroup_unregister_event() "never fail".
205 * It must be able to store at least primary->size - 1 entries.
206 */
207 struct mem_cgroup_threshold_ary *spare;
208};
209
210/* for OOM */ 138/* for OOM */
211struct mem_cgroup_eventfd_list { 139struct mem_cgroup_eventfd_list {
212 struct list_head list; 140 struct list_head list;
@@ -256,113 +184,6 @@ struct mem_cgroup_event {
256static void mem_cgroup_threshold(struct mem_cgroup *memcg); 184static void mem_cgroup_threshold(struct mem_cgroup *memcg);
257static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); 185static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
258 186
259/*
260 * The memory controller data structure. The memory controller controls both
261 * page cache and RSS per cgroup. We would eventually like to provide
262 * statistics based on the statistics developed by Rik Van Riel for clock-pro,
263 * to help the administrator determine what knobs to tune.
264 */
265struct mem_cgroup {
266 struct cgroup_subsys_state css;
267
268 /* Accounted resources */
269 struct page_counter memory;
270 struct page_counter memsw;
271 struct page_counter kmem;
272
273 /* Normal memory consumption range */
274 unsigned long low;
275 unsigned long high;
276
277 unsigned long soft_limit;
278
279 /* vmpressure notifications */
280 struct vmpressure vmpressure;
281
282 /* css_online() has been completed */
283 int initialized;
284
285 /*
286 * Should the accounting and control be hierarchical, per subtree?
287 */
288 bool use_hierarchy;
289
290 /* protected by memcg_oom_lock */
291 bool oom_lock;
292 int under_oom;
293
294 int swappiness;
295 /* OOM-Killer disable */
296 int oom_kill_disable;
297
298 /* protect arrays of thresholds */
299 struct mutex thresholds_lock;
300
301 /* thresholds for memory usage. RCU-protected */
302 struct mem_cgroup_thresholds thresholds;
303
304 /* thresholds for mem+swap usage. RCU-protected */
305 struct mem_cgroup_thresholds memsw_thresholds;
306
307 /* For oom notifier event fd */
308 struct list_head oom_notify;
309
310 /*
311 * Should we move charges of a task when a task is moved into this
312 * mem_cgroup ? And what type of charges should we move ?
313 */
314 unsigned long move_charge_at_immigrate;
315 /*
316 * set > 0 if pages under this cgroup are moving to other cgroup.
317 */
318 atomic_t moving_account;
319 /* taken only while moving_account > 0 */
320 spinlock_t move_lock;
321 struct task_struct *move_lock_task;
322 unsigned long move_lock_flags;
323 /*
324 * percpu counter.
325 */
326 struct mem_cgroup_stat_cpu __percpu *stat;
327 spinlock_t pcp_counter_lock;
328
329#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
330 struct cg_proto tcp_mem;
331#endif
332#if defined(CONFIG_MEMCG_KMEM)
333 /* Index in the kmem_cache->memcg_params.memcg_caches array */
334 int kmemcg_id;
335 bool kmem_acct_activated;
336 bool kmem_acct_active;
337#endif
338
339 int last_scanned_node;
340#if MAX_NUMNODES > 1
341 nodemask_t scan_nodes;
342 atomic_t numainfo_events;
343 atomic_t numainfo_updating;
344#endif
345
346#ifdef CONFIG_CGROUP_WRITEBACK
347 struct list_head cgwb_list;
348 struct wb_domain cgwb_domain;
349#endif
350
351 /* List of events which userspace want to receive */
352 struct list_head event_list;
353 spinlock_t event_list_lock;
354
355 struct mem_cgroup_per_node *nodeinfo[0];
356 /* WARNING: nodeinfo must be the last member here */
357};
358
359#ifdef CONFIG_MEMCG_KMEM
360bool memcg_kmem_is_active(struct mem_cgroup *memcg)
361{
362 return memcg->kmem_acct_active;
363}
364#endif
365
366/* Stuffs for move charges at task migration. */ 187/* Stuffs for move charges at task migration. */
367/* 188/*
368 * Types of charges to be moved. 189 * Types of charges to be moved.
@@ -423,11 +244,6 @@ enum res_type {
423 */ 244 */
424static DEFINE_MUTEX(memcg_create_mutex); 245static DEFINE_MUTEX(memcg_create_mutex);
425 246
426struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s)
427{
428 return s ? container_of(s, struct mem_cgroup, css) : NULL;
429}
430
431/* Some nice accessors for the vmpressure. */ 247/* Some nice accessors for the vmpressure. */
432struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) 248struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
433{ 249{
@@ -499,8 +315,7 @@ void sock_update_memcg(struct sock *sk)
499 rcu_read_lock(); 315 rcu_read_lock();
500 memcg = mem_cgroup_from_task(current); 316 memcg = mem_cgroup_from_task(current);
501 cg_proto = sk->sk_prot->proto_cgroup(memcg); 317 cg_proto = sk->sk_prot->proto_cgroup(memcg);
502 if (!mem_cgroup_is_root(memcg) && 318 if (cg_proto && test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags) &&
503 memcg_proto_active(cg_proto) &&
504 css_tryget_online(&memcg->css)) { 319 css_tryget_online(&memcg->css)) {
505 sk->sk_cgrp = cg_proto; 320 sk->sk_cgrp = cg_proto;
506 } 321 }
@@ -593,11 +408,6 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
593 return &memcg->nodeinfo[nid]->zoneinfo[zid]; 408 return &memcg->nodeinfo[nid]->zoneinfo[zid];
594} 409}
595 410
596struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
597{
598 return &memcg->css;
599}
600
601/** 411/**
602 * mem_cgroup_css_from_page - css of the memcg associated with a page 412 * mem_cgroup_css_from_page - css of the memcg associated with a page
603 * @page: page of interest 413 * @page: page of interest
@@ -631,6 +441,34 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
631 return &memcg->css; 441 return &memcg->css;
632} 442}
633 443
444/**
445 * page_cgroup_ino - return inode number of the memcg a page is charged to
446 * @page: the page
447 *
448 * Look up the closest online ancestor of the memory cgroup @page is charged to
449 * and return its inode number or 0 if @page is not charged to any cgroup. It
450 * is safe to call this function without holding a reference to @page.
451 *
452 * Note, this function is inherently racy, because there is nothing to prevent
453 * the cgroup inode from getting torn down and potentially reallocated a moment
454 * after page_cgroup_ino() returns, so it only should be used by callers that
455 * do not care (such as procfs interfaces).
456 */
457ino_t page_cgroup_ino(struct page *page)
458{
459 struct mem_cgroup *memcg;
460 unsigned long ino = 0;
461
462 rcu_read_lock();
463 memcg = READ_ONCE(page->mem_cgroup);
464 while (memcg && !(memcg->css.flags & CSS_ONLINE))
465 memcg = parent_mem_cgroup(memcg);
466 if (memcg)
467 ino = cgroup_ino(memcg->css.cgroup);
468 rcu_read_unlock();
469 return ino;
470}
471
634static struct mem_cgroup_per_zone * 472static struct mem_cgroup_per_zone *
635mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) 473mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page)
636{ 474{
@@ -876,14 +714,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
876 __this_cpu_add(memcg->stat->nr_page_events, nr_pages); 714 __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
877} 715}
878 716
879unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
880{
881 struct mem_cgroup_per_zone *mz;
882
883 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
884 return mz->lru_size[lru];
885}
886
887static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, 717static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
888 int nid, 718 int nid,
889 unsigned int lru_mask) 719 unsigned int lru_mask)
@@ -986,6 +816,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
986 816
987 return mem_cgroup_from_css(task_css(p, memory_cgrp_id)); 817 return mem_cgroup_from_css(task_css(p, memory_cgrp_id));
988} 818}
819EXPORT_SYMBOL(mem_cgroup_from_task);
989 820
990static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) 821static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
991{ 822{
@@ -1031,7 +862,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1031 struct mem_cgroup *prev, 862 struct mem_cgroup *prev,
1032 struct mem_cgroup_reclaim_cookie *reclaim) 863 struct mem_cgroup_reclaim_cookie *reclaim)
1033{ 864{
1034 struct reclaim_iter *uninitialized_var(iter); 865 struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
1035 struct cgroup_subsys_state *css = NULL; 866 struct cgroup_subsys_state *css = NULL;
1036 struct mem_cgroup *memcg = NULL; 867 struct mem_cgroup *memcg = NULL;
1037 struct mem_cgroup *pos = NULL; 868 struct mem_cgroup *pos = NULL;
@@ -1173,30 +1004,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
1173 iter != NULL; \ 1004 iter != NULL; \
1174 iter = mem_cgroup_iter(NULL, iter, NULL)) 1005 iter = mem_cgroup_iter(NULL, iter, NULL))
1175 1006
1176void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
1177{
1178 struct mem_cgroup *memcg;
1179
1180 rcu_read_lock();
1181 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
1182 if (unlikely(!memcg))
1183 goto out;
1184
1185 switch (idx) {
1186 case PGFAULT:
1187 this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]);
1188 break;
1189 case PGMAJFAULT:
1190 this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
1191 break;
1192 default:
1193 BUG();
1194 }
1195out:
1196 rcu_read_unlock();
1197}
1198EXPORT_SYMBOL(__mem_cgroup_count_vm_event);
1199
1200/** 1007/**
1201 * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg 1008 * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
1202 * @zone: zone of the wanted lruvec 1009 * @zone: zone of the wanted lruvec
@@ -1295,15 +1102,6 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
1295 VM_BUG_ON((long)(*lru_size) < 0); 1102 VM_BUG_ON((long)(*lru_size) < 0);
1296} 1103}
1297 1104
1298bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root)
1299{
1300 if (root == memcg)
1301 return true;
1302 if (!root->use_hierarchy)
1303 return false;
1304 return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
1305}
1306
1307bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg) 1105bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
1308{ 1106{
1309 struct mem_cgroup *task_memcg; 1107 struct mem_cgroup *task_memcg;
@@ -1330,39 +1128,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
1330 return ret; 1128 return ret;
1331} 1129}
1332 1130
1333int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
1334{
1335 unsigned long inactive_ratio;
1336 unsigned long inactive;
1337 unsigned long active;
1338 unsigned long gb;
1339
1340 inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON);
1341 active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON);
1342
1343 gb = (inactive + active) >> (30 - PAGE_SHIFT);
1344 if (gb)
1345 inactive_ratio = int_sqrt(10 * gb);
1346 else
1347 inactive_ratio = 1;
1348
1349 return inactive * inactive_ratio < active;
1350}
1351
1352bool mem_cgroup_lruvec_online(struct lruvec *lruvec)
1353{
1354 struct mem_cgroup_per_zone *mz;
1355 struct mem_cgroup *memcg;
1356
1357 if (mem_cgroup_disabled())
1358 return true;
1359
1360 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
1361 memcg = mz->memcg;
1362
1363 return !!(memcg->css.flags & CSS_ONLINE);
1364}
1365
1366#define mem_cgroup_from_counter(counter, member) \ 1131#define mem_cgroup_from_counter(counter, member) \
1367 container_of(counter, struct mem_cgroup, member) 1132 container_of(counter, struct mem_cgroup, member)
1368 1133
@@ -1394,15 +1159,6 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
1394 return margin; 1159 return margin;
1395} 1160}
1396 1161
1397int mem_cgroup_swappiness(struct mem_cgroup *memcg)
1398{
1399 /* root ? */
1400 if (mem_cgroup_disabled() || !memcg->css.parent)
1401 return vm_swappiness;
1402
1403 return memcg->swappiness;
1404}
1405
1406/* 1162/*
1407 * A routine for checking "mem" is under move_account() or not. 1163 * A routine for checking "mem" is under move_account() or not.
1408 * 1164 *
@@ -1545,6 +1301,12 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
1545static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, 1301static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1546 int order) 1302 int order)
1547{ 1303{
1304 struct oom_control oc = {
1305 .zonelist = NULL,
1306 .nodemask = NULL,
1307 .gfp_mask = gfp_mask,
1308 .order = order,
1309 };
1548 struct mem_cgroup *iter; 1310 struct mem_cgroup *iter;
1549 unsigned long chosen_points = 0; 1311 unsigned long chosen_points = 0;
1550 unsigned long totalpages; 1312 unsigned long totalpages;
@@ -1563,7 +1325,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1563 goto unlock; 1325 goto unlock;
1564 } 1326 }
1565 1327
1566 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg); 1328 check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg);
1567 totalpages = mem_cgroup_get_limit(memcg) ? : 1; 1329 totalpages = mem_cgroup_get_limit(memcg) ? : 1;
1568 for_each_mem_cgroup_tree(iter, memcg) { 1330 for_each_mem_cgroup_tree(iter, memcg) {
1569 struct css_task_iter it; 1331 struct css_task_iter it;
@@ -1571,8 +1333,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1571 1333
1572 css_task_iter_start(&iter->css, &it); 1334 css_task_iter_start(&iter->css, &it);
1573 while ((task = css_task_iter_next(&it))) { 1335 while ((task = css_task_iter_next(&it))) {
1574 switch (oom_scan_process_thread(task, totalpages, NULL, 1336 switch (oom_scan_process_thread(&oc, task, totalpages)) {
1575 false)) {
1576 case OOM_SCAN_SELECT: 1337 case OOM_SCAN_SELECT:
1577 if (chosen) 1338 if (chosen)
1578 put_task_struct(chosen); 1339 put_task_struct(chosen);
@@ -1610,8 +1371,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1610 1371
1611 if (chosen) { 1372 if (chosen) {
1612 points = chosen_points * 1000 / totalpages; 1373 points = chosen_points * 1000 / totalpages;
1613 oom_kill_process(chosen, gfp_mask, order, points, totalpages, 1374 oom_kill_process(&oc, chosen, points, totalpages, memcg,
1614 memcg, NULL, "Memory cgroup out of memory"); 1375 "Memory cgroup out of memory");
1615 } 1376 }
1616unlock: 1377unlock:
1617 mutex_unlock(&oom_lock); 1378 mutex_unlock(&oom_lock);
@@ -2062,23 +1823,6 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg)
2062} 1823}
2063EXPORT_SYMBOL(mem_cgroup_end_page_stat); 1824EXPORT_SYMBOL(mem_cgroup_end_page_stat);
2064 1825
2065/**
2066 * mem_cgroup_update_page_stat - update page state statistics
2067 * @memcg: memcg to account against
2068 * @idx: page state item to account
2069 * @val: number of pages (positive or negative)
2070 *
2071 * See mem_cgroup_begin_page_stat() for locking requirements.
2072 */
2073void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
2074 enum mem_cgroup_stat_index idx, int val)
2075{
2076 VM_BUG_ON(!rcu_read_lock_held());
2077
2078 if (memcg)
2079 this_cpu_add(memcg->stat->count[idx], val);
2080}
2081
2082/* 1826/*
2083 * size of first charge trial. "32" comes from vmscan.c's magic value. 1827 * size of first charge trial. "32" comes from vmscan.c's magic value.
2084 * TODO: maybe necessary to use big numbers in big irons. 1828 * TODO: maybe necessary to use big numbers in big irons.
@@ -2355,40 +2099,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
2355 css_put_many(&memcg->css, nr_pages); 2099 css_put_many(&memcg->css, nr_pages);
2356} 2100}
2357 2101
2358/*
2359 * try_get_mem_cgroup_from_page - look up page's memcg association
2360 * @page: the page
2361 *
2362 * Look up, get a css reference, and return the memcg that owns @page.
2363 *
2364 * The page must be locked to prevent racing with swap-in and page
2365 * cache charges. If coming from an unlocked page table, the caller
2366 * must ensure the page is on the LRU or this can race with charging.
2367 */
2368struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2369{
2370 struct mem_cgroup *memcg;
2371 unsigned short id;
2372 swp_entry_t ent;
2373
2374 VM_BUG_ON_PAGE(!PageLocked(page), page);
2375
2376 memcg = page->mem_cgroup;
2377 if (memcg) {
2378 if (!css_tryget_online(&memcg->css))
2379 memcg = NULL;
2380 } else if (PageSwapCache(page)) {
2381 ent.val = page_private(page);
2382 id = lookup_swap_cgroup_id(ent);
2383 rcu_read_lock();
2384 memcg = mem_cgroup_from_id(id);
2385 if (memcg && !css_tryget_online(&memcg->css))
2386 memcg = NULL;
2387 rcu_read_unlock();
2388 }
2389 return memcg;
2390}
2391
2392static void lock_page_lru(struct page *page, int *isolated) 2102static void lock_page_lru(struct page *page, int *isolated)
2393{ 2103{
2394 struct zone *zone = page_zone(page); 2104 struct zone *zone = page_zone(page);
@@ -2504,16 +2214,6 @@ void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages)
2504 css_put_many(&memcg->css, nr_pages); 2214 css_put_many(&memcg->css, nr_pages);
2505} 2215}
2506 2216
2507/*
2508 * helper for acessing a memcg's index. It will be used as an index in the
2509 * child cache array in kmem_cache, and also to derive its name. This function
2510 * will return -1 when this is not a kmem-limited memcg.
2511 */
2512int memcg_cache_id(struct mem_cgroup *memcg)
2513{
2514 return memcg ? memcg->kmemcg_id : -1;
2515}
2516
2517static int memcg_alloc_cache_id(void) 2217static int memcg_alloc_cache_id(void)
2518{ 2218{
2519 int id, size; 2219 int id, size;
@@ -5127,10 +4827,12 @@ static void mem_cgroup_clear_mc(void)
5127static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, 4827static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
5128 struct cgroup_taskset *tset) 4828 struct cgroup_taskset *tset)
5129{ 4829{
5130 struct task_struct *p = cgroup_taskset_first(tset);
5131 int ret = 0;
5132 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 4830 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
4831 struct mem_cgroup *from;
4832 struct task_struct *p;
4833 struct mm_struct *mm;
5133 unsigned long move_flags; 4834 unsigned long move_flags;
4835 int ret = 0;
5134 4836
5135 /* 4837 /*
5136 * We are now commited to this value whatever it is. Changes in this 4838 * We are now commited to this value whatever it is. Changes in this
@@ -5138,36 +4840,37 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
5138 * So we need to save it, and keep it going. 4840 * So we need to save it, and keep it going.
5139 */ 4841 */
5140 move_flags = READ_ONCE(memcg->move_charge_at_immigrate); 4842 move_flags = READ_ONCE(memcg->move_charge_at_immigrate);
5141 if (move_flags) { 4843 if (!move_flags)
5142 struct mm_struct *mm; 4844 return 0;
5143 struct mem_cgroup *from = mem_cgroup_from_task(p);
5144 4845
5145 VM_BUG_ON(from == memcg); 4846 p = cgroup_taskset_first(tset);
4847 from = mem_cgroup_from_task(p);
5146 4848
5147 mm = get_task_mm(p); 4849 VM_BUG_ON(from == memcg);
5148 if (!mm) 4850
5149 return 0; 4851 mm = get_task_mm(p);
5150 /* We move charges only when we move a owner of the mm */ 4852 if (!mm)
5151 if (mm->owner == p) { 4853 return 0;
5152 VM_BUG_ON(mc.from); 4854 /* We move charges only when we move a owner of the mm */
5153 VM_BUG_ON(mc.to); 4855 if (mm->owner == p) {
5154 VM_BUG_ON(mc.precharge); 4856 VM_BUG_ON(mc.from);
5155 VM_BUG_ON(mc.moved_charge); 4857 VM_BUG_ON(mc.to);
5156 VM_BUG_ON(mc.moved_swap); 4858 VM_BUG_ON(mc.precharge);
5157 4859 VM_BUG_ON(mc.moved_charge);
5158 spin_lock(&mc.lock); 4860 VM_BUG_ON(mc.moved_swap);
5159 mc.from = from; 4861
5160 mc.to = memcg; 4862 spin_lock(&mc.lock);
5161 mc.flags = move_flags; 4863 mc.from = from;
5162 spin_unlock(&mc.lock); 4864 mc.to = memcg;
5163 /* We set mc.moving_task later */ 4865 mc.flags = move_flags;
5164 4866 spin_unlock(&mc.lock);
5165 ret = mem_cgroup_precharge_mc(mm); 4867 /* We set mc.moving_task later */
5166 if (ret) 4868
5167 mem_cgroup_clear_mc(); 4869 ret = mem_cgroup_precharge_mc(mm);
5168 } 4870 if (ret)
5169 mmput(mm); 4871 mem_cgroup_clear_mc();
5170 } 4872 }
4873 mmput(mm);
5171 return ret; 4874 return ret;
5172} 4875}
5173 4876
@@ -5521,19 +5224,6 @@ struct cgroup_subsys memory_cgrp_subsys = {
5521}; 5224};
5522 5225
5523/** 5226/**
5524 * mem_cgroup_events - count memory events against a cgroup
5525 * @memcg: the memory cgroup
5526 * @idx: the event index
5527 * @nr: the number of events to account for
5528 */
5529void mem_cgroup_events(struct mem_cgroup *memcg,
5530 enum mem_cgroup_events_index idx,
5531 unsigned int nr)
5532{
5533 this_cpu_add(memcg->stat->events[idx], nr);
5534}
5535
5536/**
5537 * mem_cgroup_low - check if memory consumption is below the normal range 5227 * mem_cgroup_low - check if memory consumption is below the normal range
5538 * @root: the highest ancestor to consider 5228 * @root: the highest ancestor to consider
5539 * @memcg: the memory cgroup to check 5229 * @memcg: the memory cgroup to check
@@ -5605,8 +5295,20 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
5605 * the page lock, which serializes swap cache removal, which 5295 * the page lock, which serializes swap cache removal, which
5606 * in turn serializes uncharging. 5296 * in turn serializes uncharging.
5607 */ 5297 */
5298 VM_BUG_ON_PAGE(!PageLocked(page), page);
5608 if (page->mem_cgroup) 5299 if (page->mem_cgroup)
5609 goto out; 5300 goto out;
5301
5302 if (do_swap_account) {
5303 swp_entry_t ent = { .val = page_private(page), };
5304 unsigned short id = lookup_swap_cgroup_id(ent);
5305
5306 rcu_read_lock();
5307 memcg = mem_cgroup_from_id(id);
5308 if (memcg && !css_tryget_online(&memcg->css))
5309 memcg = NULL;
5310 rcu_read_unlock();
5311 }
5610 } 5312 }
5611 5313
5612 if (PageTransHuge(page)) { 5314 if (PageTransHuge(page)) {
@@ -5614,8 +5316,6 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
5614 VM_BUG_ON_PAGE(!PageTransHuge(page), page); 5316 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
5615 } 5317 }
5616 5318
5617 if (do_swap_account && PageSwapCache(page))
5618 memcg = try_get_mem_cgroup_from_page(page);
5619 if (!memcg) 5319 if (!memcg)
5620 memcg = get_mem_cgroup_from_mm(mm); 5320 memcg = get_mem_cgroup_from_mm(mm);
5621 5321