aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2016-07-28 18:46:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:07:41 -0400
commitef8f2327996b5c20f11420f64e439e87c7a01604 (patch)
tree0ea9bf78d88e1207005fc5310fe812d1edb0efc2 /mm/memcontrol.c
parenta9dd0a83104c01269ea36a9b4ec42b51edf85427 (diff)
mm, memcg: move memcg limit enforcement from zones to nodes
Memcg needs adjustment after moving LRUs to the node. Limits are tracked per memcg but the soft-limit excess is tracked per zone. As global page reclaim is based on the node, it is easy to imagine a situation where a zone soft limit is exceeded even though the memcg limit is fine. This patch moves the soft limit tree the node. Technically, all the variable names should also change but people are already familiar by the meaning of "mz" even if "mn" would be a more appropriate name now. Link: http://lkml.kernel.org/r/1467970510-21195-15-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@surriel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c190
1 files changed, 83 insertions, 107 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c9ebec98e92a..9cbd40ebccd1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -132,15 +132,11 @@ static const char * const mem_cgroup_lru_names[] = {
132 * their hierarchy representation 132 * their hierarchy representation
133 */ 133 */
134 134
135struct mem_cgroup_tree_per_zone { 135struct mem_cgroup_tree_per_node {
136 struct rb_root rb_root; 136 struct rb_root rb_root;
137 spinlock_t lock; 137 spinlock_t lock;
138}; 138};
139 139
140struct mem_cgroup_tree_per_node {
141 struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
142};
143
144struct mem_cgroup_tree { 140struct mem_cgroup_tree {
145 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; 141 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
146}; 142};
@@ -374,37 +370,35 @@ ino_t page_cgroup_ino(struct page *page)
374 return ino; 370 return ino;
375} 371}
376 372
377static struct mem_cgroup_per_zone * 373static struct mem_cgroup_per_node *
378mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) 374mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page)
379{ 375{
380 int nid = page_to_nid(page); 376 int nid = page_to_nid(page);
381 int zid = page_zonenum(page);
382 377
383 return &memcg->nodeinfo[nid]->zoneinfo[zid]; 378 return memcg->nodeinfo[nid];
384} 379}
385 380
386static struct mem_cgroup_tree_per_zone * 381static struct mem_cgroup_tree_per_node *
387soft_limit_tree_node_zone(int nid, int zid) 382soft_limit_tree_node(int nid)
388{ 383{
389 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; 384 return soft_limit_tree.rb_tree_per_node[nid];
390} 385}
391 386
392static struct mem_cgroup_tree_per_zone * 387static struct mem_cgroup_tree_per_node *
393soft_limit_tree_from_page(struct page *page) 388soft_limit_tree_from_page(struct page *page)
394{ 389{
395 int nid = page_to_nid(page); 390 int nid = page_to_nid(page);
396 int zid = page_zonenum(page);
397 391
398 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; 392 return soft_limit_tree.rb_tree_per_node[nid];
399} 393}
400 394
401static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz, 395static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
402 struct mem_cgroup_tree_per_zone *mctz, 396 struct mem_cgroup_tree_per_node *mctz,
403 unsigned long new_usage_in_excess) 397 unsigned long new_usage_in_excess)
404{ 398{
405 struct rb_node **p = &mctz->rb_root.rb_node; 399 struct rb_node **p = &mctz->rb_root.rb_node;
406 struct rb_node *parent = NULL; 400 struct rb_node *parent = NULL;
407 struct mem_cgroup_per_zone *mz_node; 401 struct mem_cgroup_per_node *mz_node;
408 402
409 if (mz->on_tree) 403 if (mz->on_tree)
410 return; 404 return;
@@ -414,7 +408,7 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
414 return; 408 return;
415 while (*p) { 409 while (*p) {
416 parent = *p; 410 parent = *p;
417 mz_node = rb_entry(parent, struct mem_cgroup_per_zone, 411 mz_node = rb_entry(parent, struct mem_cgroup_per_node,
418 tree_node); 412 tree_node);
419 if (mz->usage_in_excess < mz_node->usage_in_excess) 413 if (mz->usage_in_excess < mz_node->usage_in_excess)
420 p = &(*p)->rb_left; 414 p = &(*p)->rb_left;
@@ -430,8 +424,8 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
430 mz->on_tree = true; 424 mz->on_tree = true;
431} 425}
432 426
433static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, 427static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
434 struct mem_cgroup_tree_per_zone *mctz) 428 struct mem_cgroup_tree_per_node *mctz)
435{ 429{
436 if (!mz->on_tree) 430 if (!mz->on_tree)
437 return; 431 return;
@@ -439,8 +433,8 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
439 mz->on_tree = false; 433 mz->on_tree = false;
440} 434}
441 435
442static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, 436static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
443 struct mem_cgroup_tree_per_zone *mctz) 437 struct mem_cgroup_tree_per_node *mctz)
444{ 438{
445 unsigned long flags; 439 unsigned long flags;
446 440
@@ -464,8 +458,8 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
464static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) 458static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
465{ 459{
466 unsigned long excess; 460 unsigned long excess;
467 struct mem_cgroup_per_zone *mz; 461 struct mem_cgroup_per_node *mz;
468 struct mem_cgroup_tree_per_zone *mctz; 462 struct mem_cgroup_tree_per_node *mctz;
469 463
470 mctz = soft_limit_tree_from_page(page); 464 mctz = soft_limit_tree_from_page(page);
471 /* 465 /*
@@ -473,7 +467,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
473 * because their event counter is not touched. 467 * because their event counter is not touched.
474 */ 468 */
475 for (; memcg; memcg = parent_mem_cgroup(memcg)) { 469 for (; memcg; memcg = parent_mem_cgroup(memcg)) {
476 mz = mem_cgroup_page_zoneinfo(memcg, page); 470 mz = mem_cgroup_page_nodeinfo(memcg, page);
477 excess = soft_limit_excess(memcg); 471 excess = soft_limit_excess(memcg);
478 /* 472 /*
479 * We have to update the tree if mz is on RB-tree or 473 * We have to update the tree if mz is on RB-tree or
@@ -498,24 +492,22 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
498 492
499static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) 493static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
500{ 494{
501 struct mem_cgroup_tree_per_zone *mctz; 495 struct mem_cgroup_tree_per_node *mctz;
502 struct mem_cgroup_per_zone *mz; 496 struct mem_cgroup_per_node *mz;
503 int nid, zid; 497 int nid;
504 498
505 for_each_node(nid) { 499 for_each_node(nid) {
506 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 500 mz = mem_cgroup_nodeinfo(memcg, nid);
507 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 501 mctz = soft_limit_tree_node(nid);
508 mctz = soft_limit_tree_node_zone(nid, zid); 502 mem_cgroup_remove_exceeded(mz, mctz);
509 mem_cgroup_remove_exceeded(mz, mctz);
510 }
511 } 503 }
512} 504}
513 505
514static struct mem_cgroup_per_zone * 506static struct mem_cgroup_per_node *
515__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) 507__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
516{ 508{
517 struct rb_node *rightmost = NULL; 509 struct rb_node *rightmost = NULL;
518 struct mem_cgroup_per_zone *mz; 510 struct mem_cgroup_per_node *mz;
519 511
520retry: 512retry:
521 mz = NULL; 513 mz = NULL;
@@ -523,7 +515,7 @@ retry:
523 if (!rightmost) 515 if (!rightmost)
524 goto done; /* Nothing to reclaim from */ 516 goto done; /* Nothing to reclaim from */
525 517
526 mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); 518 mz = rb_entry(rightmost, struct mem_cgroup_per_node, tree_node);
527 /* 519 /*
528 * Remove the node now but someone else can add it back, 520 * Remove the node now but someone else can add it back,
529 * we will to add it back at the end of reclaim to its correct 521 * we will to add it back at the end of reclaim to its correct
@@ -537,10 +529,10 @@ done:
537 return mz; 529 return mz;
538} 530}
539 531
540static struct mem_cgroup_per_zone * 532static struct mem_cgroup_per_node *
541mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) 533mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
542{ 534{
543 struct mem_cgroup_per_zone *mz; 535 struct mem_cgroup_per_node *mz;
544 536
545 spin_lock_irq(&mctz->lock); 537 spin_lock_irq(&mctz->lock);
546 mz = __mem_cgroup_largest_soft_limit_node(mctz); 538 mz = __mem_cgroup_largest_soft_limit_node(mctz);
@@ -634,20 +626,16 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
634 int nid, unsigned int lru_mask) 626 int nid, unsigned int lru_mask)
635{ 627{
636 unsigned long nr = 0; 628 unsigned long nr = 0;
637 int zid; 629 struct mem_cgroup_per_node *mz;
630 enum lru_list lru;
638 631
639 VM_BUG_ON((unsigned)nid >= nr_node_ids); 632 VM_BUG_ON((unsigned)nid >= nr_node_ids);
640 633
641 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 634 for_each_lru(lru) {
642 struct mem_cgroup_per_zone *mz; 635 if (!(BIT(lru) & lru_mask))
643 enum lru_list lru; 636 continue;
644 637 mz = mem_cgroup_nodeinfo(memcg, nid);
645 for_each_lru(lru) { 638 nr += mz->lru_size[lru];
646 if (!(BIT(lru) & lru_mask))
647 continue;
648 mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
649 nr += mz->lru_size[lru];
650 }
651 } 639 }
652 return nr; 640 return nr;
653} 641}
@@ -800,9 +788,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
800 rcu_read_lock(); 788 rcu_read_lock();
801 789
802 if (reclaim) { 790 if (reclaim) {
803 struct mem_cgroup_per_zone *mz; 791 struct mem_cgroup_per_node *mz;
804 792
805 mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone); 793 mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id);
806 iter = &mz->iter[reclaim->priority]; 794 iter = &mz->iter[reclaim->priority];
807 795
808 if (prev && reclaim->generation != iter->generation) 796 if (prev && reclaim->generation != iter->generation)
@@ -901,19 +889,17 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
901{ 889{
902 struct mem_cgroup *memcg = dead_memcg; 890 struct mem_cgroup *memcg = dead_memcg;
903 struct mem_cgroup_reclaim_iter *iter; 891 struct mem_cgroup_reclaim_iter *iter;
904 struct mem_cgroup_per_zone *mz; 892 struct mem_cgroup_per_node *mz;
905 int nid, zid; 893 int nid;
906 int i; 894 int i;
907 895
908 while ((memcg = parent_mem_cgroup(memcg))) { 896 while ((memcg = parent_mem_cgroup(memcg))) {
909 for_each_node(nid) { 897 for_each_node(nid) {
910 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 898 mz = mem_cgroup_nodeinfo(memcg, nid);
911 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 899 for (i = 0; i <= DEF_PRIORITY; i++) {
912 for (i = 0; i <= DEF_PRIORITY; i++) { 900 iter = &mz->iter[i];
913 iter = &mz->iter[i]; 901 cmpxchg(&iter->position,
914 cmpxchg(&iter->position, 902 dead_memcg, NULL);
915 dead_memcg, NULL);
916 }
917 } 903 }
918 } 904 }
919 } 905 }
@@ -945,7 +931,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
945 */ 931 */
946struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat) 932struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
947{ 933{
948 struct mem_cgroup_per_zone *mz; 934 struct mem_cgroup_per_node *mz;
949 struct mem_cgroup *memcg; 935 struct mem_cgroup *memcg;
950 struct lruvec *lruvec; 936 struct lruvec *lruvec;
951 937
@@ -962,7 +948,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd
962 if (!memcg) 948 if (!memcg)
963 memcg = root_mem_cgroup; 949 memcg = root_mem_cgroup;
964 950
965 mz = mem_cgroup_page_zoneinfo(memcg, page); 951 mz = mem_cgroup_page_nodeinfo(memcg, page);
966 lruvec = &mz->lruvec; 952 lruvec = &mz->lruvec;
967out: 953out:
968 /* 954 /*
@@ -989,7 +975,7 @@ out:
989void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, 975void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
990 enum zone_type zid, int nr_pages) 976 enum zone_type zid, int nr_pages)
991{ 977{
992 struct mem_cgroup_per_zone *mz; 978 struct mem_cgroup_per_node *mz;
993 unsigned long *lru_size; 979 unsigned long *lru_size;
994 long size; 980 long size;
995 bool empty; 981 bool empty;
@@ -999,7 +985,7 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
999 if (mem_cgroup_disabled()) 985 if (mem_cgroup_disabled())
1000 return; 986 return;
1001 987
1002 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); 988 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
1003 lru_size = mz->lru_size + lru; 989 lru_size = mz->lru_size + lru;
1004 empty = list_empty(lruvec->lists + lru); 990 empty = list_empty(lruvec->lists + lru);
1005 991
@@ -1392,7 +1378,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1392#endif 1378#endif
1393 1379
1394static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, 1380static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
1395 struct zone *zone, 1381 pg_data_t *pgdat,
1396 gfp_t gfp_mask, 1382 gfp_t gfp_mask,
1397 unsigned long *total_scanned) 1383 unsigned long *total_scanned)
1398{ 1384{
@@ -1402,7 +1388,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
1402 unsigned long excess; 1388 unsigned long excess;
1403 unsigned long nr_scanned; 1389 unsigned long nr_scanned;
1404 struct mem_cgroup_reclaim_cookie reclaim = { 1390 struct mem_cgroup_reclaim_cookie reclaim = {
1405 .zone = zone, 1391 .pgdat = pgdat,
1406 .priority = 0, 1392 .priority = 0,
1407 }; 1393 };
1408 1394
@@ -1433,7 +1419,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
1433 continue; 1419 continue;
1434 } 1420 }
1435 total += mem_cgroup_shrink_node(victim, gfp_mask, false, 1421 total += mem_cgroup_shrink_node(victim, gfp_mask, false,
1436 zone, &nr_scanned); 1422 pgdat, &nr_scanned);
1437 *total_scanned += nr_scanned; 1423 *total_scanned += nr_scanned;
1438 if (!soft_limit_excess(root_memcg)) 1424 if (!soft_limit_excess(root_memcg))
1439 break; 1425 break;
@@ -2560,22 +2546,22 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
2560 return ret; 2546 return ret;
2561} 2547}
2562 2548
2563unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, 2549unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
2564 gfp_t gfp_mask, 2550 gfp_t gfp_mask,
2565 unsigned long *total_scanned) 2551 unsigned long *total_scanned)
2566{ 2552{
2567 unsigned long nr_reclaimed = 0; 2553 unsigned long nr_reclaimed = 0;
2568 struct mem_cgroup_per_zone *mz, *next_mz = NULL; 2554 struct mem_cgroup_per_node *mz, *next_mz = NULL;
2569 unsigned long reclaimed; 2555 unsigned long reclaimed;
2570 int loop = 0; 2556 int loop = 0;
2571 struct mem_cgroup_tree_per_zone *mctz; 2557 struct mem_cgroup_tree_per_node *mctz;
2572 unsigned long excess; 2558 unsigned long excess;
2573 unsigned long nr_scanned; 2559 unsigned long nr_scanned;
2574 2560
2575 if (order > 0) 2561 if (order > 0)
2576 return 0; 2562 return 0;
2577 2563
2578 mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); 2564 mctz = soft_limit_tree_node(pgdat->node_id);
2579 /* 2565 /*
2580 * This loop can run a while, specially if mem_cgroup's continuously 2566 * This loop can run a while, specially if mem_cgroup's continuously
2581 * keep exceeding their soft limit and putting the system under 2567 * keep exceeding their soft limit and putting the system under
@@ -2590,7 +2576,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
2590 break; 2576 break;
2591 2577
2592 nr_scanned = 0; 2578 nr_scanned = 0;
2593 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, 2579 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat,
2594 gfp_mask, &nr_scanned); 2580 gfp_mask, &nr_scanned);
2595 nr_reclaimed += reclaimed; 2581 nr_reclaimed += reclaimed;
2596 *total_scanned += nr_scanned; 2582 *total_scanned += nr_scanned;
@@ -3211,22 +3197,21 @@ static int memcg_stat_show(struct seq_file *m, void *v)
3211 3197
3212#ifdef CONFIG_DEBUG_VM 3198#ifdef CONFIG_DEBUG_VM
3213 { 3199 {
3214 int nid, zid; 3200 pg_data_t *pgdat;
3215 struct mem_cgroup_per_zone *mz; 3201 struct mem_cgroup_per_node *mz;
3216 struct zone_reclaim_stat *rstat; 3202 struct zone_reclaim_stat *rstat;
3217 unsigned long recent_rotated[2] = {0, 0}; 3203 unsigned long recent_rotated[2] = {0, 0};
3218 unsigned long recent_scanned[2] = {0, 0}; 3204 unsigned long recent_scanned[2] = {0, 0};
3219 3205
3220 for_each_online_node(nid) 3206 for_each_online_pgdat(pgdat) {
3221 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 3207 mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
3222 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 3208 rstat = &mz->lruvec.reclaim_stat;
3223 rstat = &mz->lruvec.reclaim_stat;
3224 3209
3225 recent_rotated[0] += rstat->recent_rotated[0]; 3210 recent_rotated[0] += rstat->recent_rotated[0];
3226 recent_rotated[1] += rstat->recent_rotated[1]; 3211 recent_rotated[1] += rstat->recent_rotated[1];
3227 recent_scanned[0] += rstat->recent_scanned[0]; 3212 recent_scanned[0] += rstat->recent_scanned[0];
3228 recent_scanned[1] += rstat->recent_scanned[1]; 3213 recent_scanned[1] += rstat->recent_scanned[1];
3229 } 3214 }
3230 seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]); 3215 seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
3231 seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]); 3216 seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
3232 seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]); 3217 seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
@@ -4106,11 +4091,10 @@ struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
4106 return idr_find(&mem_cgroup_idr, id); 4091 return idr_find(&mem_cgroup_idr, id);
4107} 4092}
4108 4093
4109static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) 4094static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
4110{ 4095{
4111 struct mem_cgroup_per_node *pn; 4096 struct mem_cgroup_per_node *pn;
4112 struct mem_cgroup_per_zone *mz; 4097 int tmp = node;
4113 int zone, tmp = node;
4114 /* 4098 /*
4115 * This routine is called against possible nodes. 4099 * This routine is called against possible nodes.
4116 * But it's BUG to call kmalloc() against offline node. 4100 * But it's BUG to call kmalloc() against offline node.
@@ -4125,18 +4109,16 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4125 if (!pn) 4109 if (!pn)
4126 return 1; 4110 return 1;
4127 4111
4128 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 4112 lruvec_init(&pn->lruvec);
4129 mz = &pn->zoneinfo[zone]; 4113 pn->usage_in_excess = 0;
4130 lruvec_init(&mz->lruvec); 4114 pn->on_tree = false;
4131 mz->usage_in_excess = 0; 4115 pn->memcg = memcg;
4132 mz->on_tree = false; 4116
4133 mz->memcg = memcg;
4134 }
4135 memcg->nodeinfo[node] = pn; 4117 memcg->nodeinfo[node] = pn;
4136 return 0; 4118 return 0;
4137} 4119}
4138 4120
4139static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) 4121static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
4140{ 4122{
4141 kfree(memcg->nodeinfo[node]); 4123 kfree(memcg->nodeinfo[node]);
4142} 4124}
@@ -4147,7 +4129,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
4147 4129
4148 memcg_wb_domain_exit(memcg); 4130 memcg_wb_domain_exit(memcg);
4149 for_each_node(node) 4131 for_each_node(node)
4150 free_mem_cgroup_per_zone_info(memcg, node); 4132 free_mem_cgroup_per_node_info(memcg, node);
4151 free_percpu(memcg->stat); 4133 free_percpu(memcg->stat);
4152 kfree(memcg); 4134 kfree(memcg);
4153} 4135}
@@ -4176,7 +4158,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
4176 goto fail; 4158 goto fail;
4177 4159
4178 for_each_node(node) 4160 for_each_node(node)
4179 if (alloc_mem_cgroup_per_zone_info(memcg, node)) 4161 if (alloc_mem_cgroup_per_node_info(memcg, node))
4180 goto fail; 4162 goto fail;
4181 4163
4182 if (memcg_wb_domain_init(memcg, GFP_KERNEL)) 4164 if (memcg_wb_domain_init(memcg, GFP_KERNEL))
@@ -5779,18 +5761,12 @@ static int __init mem_cgroup_init(void)
5779 5761
5780 for_each_node(node) { 5762 for_each_node(node) {
5781 struct mem_cgroup_tree_per_node *rtpn; 5763 struct mem_cgroup_tree_per_node *rtpn;
5782 int zone;
5783 5764
5784 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, 5765 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL,
5785 node_online(node) ? node : NUMA_NO_NODE); 5766 node_online(node) ? node : NUMA_NO_NODE);
5786 5767
5787 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 5768 rtpn->rb_root = RB_ROOT;
5788 struct mem_cgroup_tree_per_zone *rtpz; 5769 spin_lock_init(&rtpn->lock);
5789
5790 rtpz = &rtpn->rb_tree_per_zone[zone];
5791 rtpz->rb_root = RB_ROOT;
5792 spin_lock_init(&rtpz->lock);
5793 }
5794 soft_limit_tree.rb_tree_per_node[node] = rtpn; 5770 soft_limit_tree.rb_tree_per_node[node] = rtpn;
5795 } 5771 }
5796 5772