aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h38
-rw-r--r--include/linux/swap.h2
-rw-r--r--mm/memcontrol.c190
-rw-r--r--mm/vmscan.c19
-rw-r--r--mm/workingset.c6
5 files changed, 111 insertions, 144 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f4963ee4fdbc..b759827b2f1e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -60,7 +60,7 @@ enum mem_cgroup_stat_index {
60}; 60};
61 61
62struct mem_cgroup_reclaim_cookie { 62struct mem_cgroup_reclaim_cookie {
63 struct zone *zone; 63 pg_data_t *pgdat;
64 int priority; 64 int priority;
65 unsigned int generation; 65 unsigned int generation;
66}; 66};
@@ -118,7 +118,7 @@ struct mem_cgroup_reclaim_iter {
118/* 118/*
119 * per-zone information in memory controller. 119 * per-zone information in memory controller.
120 */ 120 */
121struct mem_cgroup_per_zone { 121struct mem_cgroup_per_node {
122 struct lruvec lruvec; 122 struct lruvec lruvec;
123 unsigned long lru_size[NR_LRU_LISTS]; 123 unsigned long lru_size[NR_LRU_LISTS];
124 124
@@ -132,10 +132,6 @@ struct mem_cgroup_per_zone {
132 /* use container_of */ 132 /* use container_of */
133}; 133};
134 134
135struct mem_cgroup_per_node {
136 struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
137};
138
139struct mem_cgroup_threshold { 135struct mem_cgroup_threshold {
140 struct eventfd_ctx *eventfd; 136 struct eventfd_ctx *eventfd;
141 unsigned long threshold; 137 unsigned long threshold;
@@ -314,19 +310,15 @@ void mem_cgroup_uncharge_list(struct list_head *page_list);
314 310
315void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); 311void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
316 312
317static inline struct mem_cgroup_per_zone * 313static struct mem_cgroup_per_node *
318mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) 314mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
319{ 315{
320 int nid = zone_to_nid(zone); 316 return memcg->nodeinfo[nid];
321 int zid = zone_idx(zone);
322
323 return &memcg->nodeinfo[nid]->zoneinfo[zid];
324} 317}
325 318
326/** 319/**
327 * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone 320 * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
328 * @node: node of the wanted lruvec 321 * @node: node of the wanted lruvec
329 * @zone: zone of the wanted lruvec
330 * @memcg: memcg of the wanted lruvec 322 * @memcg: memcg of the wanted lruvec
331 * 323 *
332 * Returns the lru list vector holding pages for a given @node or a given 324 * Returns the lru list vector holding pages for a given @node or a given
@@ -334,9 +326,9 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
334 * is disabled. 326 * is disabled.
335 */ 327 */
336static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, 328static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
337 struct zone *zone, struct mem_cgroup *memcg) 329 struct mem_cgroup *memcg)
338{ 330{
339 struct mem_cgroup_per_zone *mz; 331 struct mem_cgroup_per_node *mz;
340 struct lruvec *lruvec; 332 struct lruvec *lruvec;
341 333
342 if (mem_cgroup_disabled()) { 334 if (mem_cgroup_disabled()) {
@@ -344,7 +336,7 @@ static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
344 goto out; 336 goto out;
345 } 337 }
346 338
347 mz = mem_cgroup_zone_zoneinfo(memcg, zone); 339 mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
348 lruvec = &mz->lruvec; 340 lruvec = &mz->lruvec;
349out: 341out:
350 /* 342 /*
@@ -352,8 +344,8 @@ out:
352 * we have to be prepared to initialize lruvec->pgdat here; 344 * we have to be prepared to initialize lruvec->pgdat here;
353 * and if offlined then reonlined, we need to reinitialize it. 345 * and if offlined then reonlined, we need to reinitialize it.
354 */ 346 */
355 if (unlikely(lruvec->pgdat != zone->zone_pgdat)) 347 if (unlikely(lruvec->pgdat != pgdat))
356 lruvec->pgdat = zone->zone_pgdat; 348 lruvec->pgdat = pgdat;
357 return lruvec; 349 return lruvec;
358} 350}
359 351
@@ -446,9 +438,9 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
446static inline 438static inline
447unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) 439unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
448{ 440{
449 struct mem_cgroup_per_zone *mz; 441 struct mem_cgroup_per_node *mz;
450 442
451 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); 443 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
452 return mz->lru_size[lru]; 444 return mz->lru_size[lru];
453} 445}
454 446
@@ -519,7 +511,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
519 mem_cgroup_update_page_stat(page, idx, -1); 511 mem_cgroup_update_page_stat(page, idx, -1);
520} 512}
521 513
522unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, 514unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
523 gfp_t gfp_mask, 515 gfp_t gfp_mask,
524 unsigned long *total_scanned); 516 unsigned long *total_scanned);
525 517
@@ -611,7 +603,7 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
611} 603}
612 604
613static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, 605static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
614 struct zone *zone, struct mem_cgroup *memcg) 606 struct mem_cgroup *memcg)
615{ 607{
616 return node_lruvec(pgdat); 608 return node_lruvec(pgdat);
617} 609}
@@ -723,7 +715,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
723} 715}
724 716
725static inline 717static inline
726unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, 718unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
727 gfp_t gfp_mask, 719 gfp_t gfp_mask,
728 unsigned long *total_scanned) 720 unsigned long *total_scanned)
729{ 721{
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0ad616d7c381..2a23ddc96edd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -318,7 +318,7 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
318 bool may_swap); 318 bool may_swap);
319extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, 319extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
320 gfp_t gfp_mask, bool noswap, 320 gfp_t gfp_mask, bool noswap,
321 struct zone *zone, 321 pg_data_t *pgdat,
322 unsigned long *nr_scanned); 322 unsigned long *nr_scanned);
323extern unsigned long shrink_all_memory(unsigned long nr_pages); 323extern unsigned long shrink_all_memory(unsigned long nr_pages);
324extern int vm_swappiness; 324extern int vm_swappiness;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c9ebec98e92a..9cbd40ebccd1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -132,15 +132,11 @@ static const char * const mem_cgroup_lru_names[] = {
132 * their hierarchy representation 132 * their hierarchy representation
133 */ 133 */
134 134
135struct mem_cgroup_tree_per_zone { 135struct mem_cgroup_tree_per_node {
136 struct rb_root rb_root; 136 struct rb_root rb_root;
137 spinlock_t lock; 137 spinlock_t lock;
138}; 138};
139 139
140struct mem_cgroup_tree_per_node {
141 struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
142};
143
144struct mem_cgroup_tree { 140struct mem_cgroup_tree {
145 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; 141 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
146}; 142};
@@ -374,37 +370,35 @@ ino_t page_cgroup_ino(struct page *page)
374 return ino; 370 return ino;
375} 371}
376 372
377static struct mem_cgroup_per_zone * 373static struct mem_cgroup_per_node *
378mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) 374mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page)
379{ 375{
380 int nid = page_to_nid(page); 376 int nid = page_to_nid(page);
381 int zid = page_zonenum(page);
382 377
383 return &memcg->nodeinfo[nid]->zoneinfo[zid]; 378 return memcg->nodeinfo[nid];
384} 379}
385 380
386static struct mem_cgroup_tree_per_zone * 381static struct mem_cgroup_tree_per_node *
387soft_limit_tree_node_zone(int nid, int zid) 382soft_limit_tree_node(int nid)
388{ 383{
389 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; 384 return soft_limit_tree.rb_tree_per_node[nid];
390} 385}
391 386
392static struct mem_cgroup_tree_per_zone * 387static struct mem_cgroup_tree_per_node *
393soft_limit_tree_from_page(struct page *page) 388soft_limit_tree_from_page(struct page *page)
394{ 389{
395 int nid = page_to_nid(page); 390 int nid = page_to_nid(page);
396 int zid = page_zonenum(page);
397 391
398 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; 392 return soft_limit_tree.rb_tree_per_node[nid];
399} 393}
400 394
401static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz, 395static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
402 struct mem_cgroup_tree_per_zone *mctz, 396 struct mem_cgroup_tree_per_node *mctz,
403 unsigned long new_usage_in_excess) 397 unsigned long new_usage_in_excess)
404{ 398{
405 struct rb_node **p = &mctz->rb_root.rb_node; 399 struct rb_node **p = &mctz->rb_root.rb_node;
406 struct rb_node *parent = NULL; 400 struct rb_node *parent = NULL;
407 struct mem_cgroup_per_zone *mz_node; 401 struct mem_cgroup_per_node *mz_node;
408 402
409 if (mz->on_tree) 403 if (mz->on_tree)
410 return; 404 return;
@@ -414,7 +408,7 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
414 return; 408 return;
415 while (*p) { 409 while (*p) {
416 parent = *p; 410 parent = *p;
417 mz_node = rb_entry(parent, struct mem_cgroup_per_zone, 411 mz_node = rb_entry(parent, struct mem_cgroup_per_node,
418 tree_node); 412 tree_node);
419 if (mz->usage_in_excess < mz_node->usage_in_excess) 413 if (mz->usage_in_excess < mz_node->usage_in_excess)
420 p = &(*p)->rb_left; 414 p = &(*p)->rb_left;
@@ -430,8 +424,8 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
430 mz->on_tree = true; 424 mz->on_tree = true;
431} 425}
432 426
433static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, 427static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
434 struct mem_cgroup_tree_per_zone *mctz) 428 struct mem_cgroup_tree_per_node *mctz)
435{ 429{
436 if (!mz->on_tree) 430 if (!mz->on_tree)
437 return; 431 return;
@@ -439,8 +433,8 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
439 mz->on_tree = false; 433 mz->on_tree = false;
440} 434}
441 435
442static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, 436static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
443 struct mem_cgroup_tree_per_zone *mctz) 437 struct mem_cgroup_tree_per_node *mctz)
444{ 438{
445 unsigned long flags; 439 unsigned long flags;
446 440
@@ -464,8 +458,8 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
464static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) 458static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
465{ 459{
466 unsigned long excess; 460 unsigned long excess;
467 struct mem_cgroup_per_zone *mz; 461 struct mem_cgroup_per_node *mz;
468 struct mem_cgroup_tree_per_zone *mctz; 462 struct mem_cgroup_tree_per_node *mctz;
469 463
470 mctz = soft_limit_tree_from_page(page); 464 mctz = soft_limit_tree_from_page(page);
471 /* 465 /*
@@ -473,7 +467,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
473 * because their event counter is not touched. 467 * because their event counter is not touched.
474 */ 468 */
475 for (; memcg; memcg = parent_mem_cgroup(memcg)) { 469 for (; memcg; memcg = parent_mem_cgroup(memcg)) {
476 mz = mem_cgroup_page_zoneinfo(memcg, page); 470 mz = mem_cgroup_page_nodeinfo(memcg, page);
477 excess = soft_limit_excess(memcg); 471 excess = soft_limit_excess(memcg);
478 /* 472 /*
479 * We have to update the tree if mz is on RB-tree or 473 * We have to update the tree if mz is on RB-tree or
@@ -498,24 +492,22 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
498 492
499static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) 493static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
500{ 494{
501 struct mem_cgroup_tree_per_zone *mctz; 495 struct mem_cgroup_tree_per_node *mctz;
502 struct mem_cgroup_per_zone *mz; 496 struct mem_cgroup_per_node *mz;
503 int nid, zid; 497 int nid;
504 498
505 for_each_node(nid) { 499 for_each_node(nid) {
506 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 500 mz = mem_cgroup_nodeinfo(memcg, nid);
507 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 501 mctz = soft_limit_tree_node(nid);
508 mctz = soft_limit_tree_node_zone(nid, zid); 502 mem_cgroup_remove_exceeded(mz, mctz);
509 mem_cgroup_remove_exceeded(mz, mctz);
510 }
511 } 503 }
512} 504}
513 505
514static struct mem_cgroup_per_zone * 506static struct mem_cgroup_per_node *
515__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) 507__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
516{ 508{
517 struct rb_node *rightmost = NULL; 509 struct rb_node *rightmost = NULL;
518 struct mem_cgroup_per_zone *mz; 510 struct mem_cgroup_per_node *mz;
519 511
520retry: 512retry:
521 mz = NULL; 513 mz = NULL;
@@ -523,7 +515,7 @@ retry:
523 if (!rightmost) 515 if (!rightmost)
524 goto done; /* Nothing to reclaim from */ 516 goto done; /* Nothing to reclaim from */
525 517
526 mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); 518 mz = rb_entry(rightmost, struct mem_cgroup_per_node, tree_node);
527 /* 519 /*
528 * Remove the node now but someone else can add it back, 520 * Remove the node now but someone else can add it back,
529 * we will to add it back at the end of reclaim to its correct 521 * we will to add it back at the end of reclaim to its correct
@@ -537,10 +529,10 @@ done:
537 return mz; 529 return mz;
538} 530}
539 531
540static struct mem_cgroup_per_zone * 532static struct mem_cgroup_per_node *
541mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) 533mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
542{ 534{
543 struct mem_cgroup_per_zone *mz; 535 struct mem_cgroup_per_node *mz;
544 536
545 spin_lock_irq(&mctz->lock); 537 spin_lock_irq(&mctz->lock);
546 mz = __mem_cgroup_largest_soft_limit_node(mctz); 538 mz = __mem_cgroup_largest_soft_limit_node(mctz);
@@ -634,20 +626,16 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
634 int nid, unsigned int lru_mask) 626 int nid, unsigned int lru_mask)
635{ 627{
636 unsigned long nr = 0; 628 unsigned long nr = 0;
637 int zid; 629 struct mem_cgroup_per_node *mz;
630 enum lru_list lru;
638 631
639 VM_BUG_ON((unsigned)nid >= nr_node_ids); 632 VM_BUG_ON((unsigned)nid >= nr_node_ids);
640 633
641 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 634 for_each_lru(lru) {
642 struct mem_cgroup_per_zone *mz; 635 if (!(BIT(lru) & lru_mask))
643 enum lru_list lru; 636 continue;
644 637 mz = mem_cgroup_nodeinfo(memcg, nid);
645 for_each_lru(lru) { 638 nr += mz->lru_size[lru];
646 if (!(BIT(lru) & lru_mask))
647 continue;
648 mz = &memcg->nodeinfo[nid]->zoneinfo[zid];
649 nr += mz->lru_size[lru];
650 }
651 } 639 }
652 return nr; 640 return nr;
653} 641}
@@ -800,9 +788,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
800 rcu_read_lock(); 788 rcu_read_lock();
801 789
802 if (reclaim) { 790 if (reclaim) {
803 struct mem_cgroup_per_zone *mz; 791 struct mem_cgroup_per_node *mz;
804 792
805 mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone); 793 mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id);
806 iter = &mz->iter[reclaim->priority]; 794 iter = &mz->iter[reclaim->priority];
807 795
808 if (prev && reclaim->generation != iter->generation) 796 if (prev && reclaim->generation != iter->generation)
@@ -901,19 +889,17 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
901{ 889{
902 struct mem_cgroup *memcg = dead_memcg; 890 struct mem_cgroup *memcg = dead_memcg;
903 struct mem_cgroup_reclaim_iter *iter; 891 struct mem_cgroup_reclaim_iter *iter;
904 struct mem_cgroup_per_zone *mz; 892 struct mem_cgroup_per_node *mz;
905 int nid, zid; 893 int nid;
906 int i; 894 int i;
907 895
908 while ((memcg = parent_mem_cgroup(memcg))) { 896 while ((memcg = parent_mem_cgroup(memcg))) {
909 for_each_node(nid) { 897 for_each_node(nid) {
910 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 898 mz = mem_cgroup_nodeinfo(memcg, nid);
911 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 899 for (i = 0; i <= DEF_PRIORITY; i++) {
912 for (i = 0; i <= DEF_PRIORITY; i++) { 900 iter = &mz->iter[i];
913 iter = &mz->iter[i]; 901 cmpxchg(&iter->position,
914 cmpxchg(&iter->position, 902 dead_memcg, NULL);
915 dead_memcg, NULL);
916 }
917 } 903 }
918 } 904 }
919 } 905 }
@@ -945,7 +931,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
945 */ 931 */
946struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat) 932struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
947{ 933{
948 struct mem_cgroup_per_zone *mz; 934 struct mem_cgroup_per_node *mz;
949 struct mem_cgroup *memcg; 935 struct mem_cgroup *memcg;
950 struct lruvec *lruvec; 936 struct lruvec *lruvec;
951 937
@@ -962,7 +948,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd
962 if (!memcg) 948 if (!memcg)
963 memcg = root_mem_cgroup; 949 memcg = root_mem_cgroup;
964 950
965 mz = mem_cgroup_page_zoneinfo(memcg, page); 951 mz = mem_cgroup_page_nodeinfo(memcg, page);
966 lruvec = &mz->lruvec; 952 lruvec = &mz->lruvec;
967out: 953out:
968 /* 954 /*
@@ -989,7 +975,7 @@ out:
989void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, 975void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
990 enum zone_type zid, int nr_pages) 976 enum zone_type zid, int nr_pages)
991{ 977{
992 struct mem_cgroup_per_zone *mz; 978 struct mem_cgroup_per_node *mz;
993 unsigned long *lru_size; 979 unsigned long *lru_size;
994 long size; 980 long size;
995 bool empty; 981 bool empty;
@@ -999,7 +985,7 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
999 if (mem_cgroup_disabled()) 985 if (mem_cgroup_disabled())
1000 return; 986 return;
1001 987
1002 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); 988 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
1003 lru_size = mz->lru_size + lru; 989 lru_size = mz->lru_size + lru;
1004 empty = list_empty(lruvec->lists + lru); 990 empty = list_empty(lruvec->lists + lru);
1005 991
@@ -1392,7 +1378,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1392#endif 1378#endif
1393 1379
1394static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, 1380static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
1395 struct zone *zone, 1381 pg_data_t *pgdat,
1396 gfp_t gfp_mask, 1382 gfp_t gfp_mask,
1397 unsigned long *total_scanned) 1383 unsigned long *total_scanned)
1398{ 1384{
@@ -1402,7 +1388,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
1402 unsigned long excess; 1388 unsigned long excess;
1403 unsigned long nr_scanned; 1389 unsigned long nr_scanned;
1404 struct mem_cgroup_reclaim_cookie reclaim = { 1390 struct mem_cgroup_reclaim_cookie reclaim = {
1405 .zone = zone, 1391 .pgdat = pgdat,
1406 .priority = 0, 1392 .priority = 0,
1407 }; 1393 };
1408 1394
@@ -1433,7 +1419,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
1433 continue; 1419 continue;
1434 } 1420 }
1435 total += mem_cgroup_shrink_node(victim, gfp_mask, false, 1421 total += mem_cgroup_shrink_node(victim, gfp_mask, false,
1436 zone, &nr_scanned); 1422 pgdat, &nr_scanned);
1437 *total_scanned += nr_scanned; 1423 *total_scanned += nr_scanned;
1438 if (!soft_limit_excess(root_memcg)) 1424 if (!soft_limit_excess(root_memcg))
1439 break; 1425 break;
@@ -2560,22 +2546,22 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
2560 return ret; 2546 return ret;
2561} 2547}
2562 2548
2563unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, 2549unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
2564 gfp_t gfp_mask, 2550 gfp_t gfp_mask,
2565 unsigned long *total_scanned) 2551 unsigned long *total_scanned)
2566{ 2552{
2567 unsigned long nr_reclaimed = 0; 2553 unsigned long nr_reclaimed = 0;
2568 struct mem_cgroup_per_zone *mz, *next_mz = NULL; 2554 struct mem_cgroup_per_node *mz, *next_mz = NULL;
2569 unsigned long reclaimed; 2555 unsigned long reclaimed;
2570 int loop = 0; 2556 int loop = 0;
2571 struct mem_cgroup_tree_per_zone *mctz; 2557 struct mem_cgroup_tree_per_node *mctz;
2572 unsigned long excess; 2558 unsigned long excess;
2573 unsigned long nr_scanned; 2559 unsigned long nr_scanned;
2574 2560
2575 if (order > 0) 2561 if (order > 0)
2576 return 0; 2562 return 0;
2577 2563
2578 mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); 2564 mctz = soft_limit_tree_node(pgdat->node_id);
2579 /* 2565 /*
2580 * This loop can run a while, specially if mem_cgroup's continuously 2566 * This loop can run a while, specially if mem_cgroup's continuously
2581 * keep exceeding their soft limit and putting the system under 2567 * keep exceeding their soft limit and putting the system under
@@ -2590,7 +2576,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
2590 break; 2576 break;
2591 2577
2592 nr_scanned = 0; 2578 nr_scanned = 0;
2593 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, 2579 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat,
2594 gfp_mask, &nr_scanned); 2580 gfp_mask, &nr_scanned);
2595 nr_reclaimed += reclaimed; 2581 nr_reclaimed += reclaimed;
2596 *total_scanned += nr_scanned; 2582 *total_scanned += nr_scanned;
@@ -3211,22 +3197,21 @@ static int memcg_stat_show(struct seq_file *m, void *v)
3211 3197
3212#ifdef CONFIG_DEBUG_VM 3198#ifdef CONFIG_DEBUG_VM
3213 { 3199 {
3214 int nid, zid; 3200 pg_data_t *pgdat;
3215 struct mem_cgroup_per_zone *mz; 3201 struct mem_cgroup_per_node *mz;
3216 struct zone_reclaim_stat *rstat; 3202 struct zone_reclaim_stat *rstat;
3217 unsigned long recent_rotated[2] = {0, 0}; 3203 unsigned long recent_rotated[2] = {0, 0};
3218 unsigned long recent_scanned[2] = {0, 0}; 3204 unsigned long recent_scanned[2] = {0, 0};
3219 3205
3220 for_each_online_node(nid) 3206 for_each_online_pgdat(pgdat) {
3221 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 3207 mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
3222 mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; 3208 rstat = &mz->lruvec.reclaim_stat;
3223 rstat = &mz->lruvec.reclaim_stat;
3224 3209
3225 recent_rotated[0] += rstat->recent_rotated[0]; 3210 recent_rotated[0] += rstat->recent_rotated[0];
3226 recent_rotated[1] += rstat->recent_rotated[1]; 3211 recent_rotated[1] += rstat->recent_rotated[1];
3227 recent_scanned[0] += rstat->recent_scanned[0]; 3212 recent_scanned[0] += rstat->recent_scanned[0];
3228 recent_scanned[1] += rstat->recent_scanned[1]; 3213 recent_scanned[1] += rstat->recent_scanned[1];
3229 } 3214 }
3230 seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]); 3215 seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]);
3231 seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]); 3216 seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]);
3232 seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]); 3217 seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]);
@@ -4106,11 +4091,10 @@ struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
4106 return idr_find(&mem_cgroup_idr, id); 4091 return idr_find(&mem_cgroup_idr, id);
4107} 4092}
4108 4093
4109static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) 4094static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
4110{ 4095{
4111 struct mem_cgroup_per_node *pn; 4096 struct mem_cgroup_per_node *pn;
4112 struct mem_cgroup_per_zone *mz; 4097 int tmp = node;
4113 int zone, tmp = node;
4114 /* 4098 /*
4115 * This routine is called against possible nodes. 4099 * This routine is called against possible nodes.
4116 * But it's BUG to call kmalloc() against offline node. 4100 * But it's BUG to call kmalloc() against offline node.
@@ -4125,18 +4109,16 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4125 if (!pn) 4109 if (!pn)
4126 return 1; 4110 return 1;
4127 4111
4128 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 4112 lruvec_init(&pn->lruvec);
4129 mz = &pn->zoneinfo[zone]; 4113 pn->usage_in_excess = 0;
4130 lruvec_init(&mz->lruvec); 4114 pn->on_tree = false;
4131 mz->usage_in_excess = 0; 4115 pn->memcg = memcg;
4132 mz->on_tree = false; 4116
4133 mz->memcg = memcg;
4134 }
4135 memcg->nodeinfo[node] = pn; 4117 memcg->nodeinfo[node] = pn;
4136 return 0; 4118 return 0;
4137} 4119}
4138 4120
4139static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) 4121static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
4140{ 4122{
4141 kfree(memcg->nodeinfo[node]); 4123 kfree(memcg->nodeinfo[node]);
4142} 4124}
@@ -4147,7 +4129,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
4147 4129
4148 memcg_wb_domain_exit(memcg); 4130 memcg_wb_domain_exit(memcg);
4149 for_each_node(node) 4131 for_each_node(node)
4150 free_mem_cgroup_per_zone_info(memcg, node); 4132 free_mem_cgroup_per_node_info(memcg, node);
4151 free_percpu(memcg->stat); 4133 free_percpu(memcg->stat);
4152 kfree(memcg); 4134 kfree(memcg);
4153} 4135}
@@ -4176,7 +4158,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
4176 goto fail; 4158 goto fail;
4177 4159
4178 for_each_node(node) 4160 for_each_node(node)
4179 if (alloc_mem_cgroup_per_zone_info(memcg, node)) 4161 if (alloc_mem_cgroup_per_node_info(memcg, node))
4180 goto fail; 4162 goto fail;
4181 4163
4182 if (memcg_wb_domain_init(memcg, GFP_KERNEL)) 4164 if (memcg_wb_domain_init(memcg, GFP_KERNEL))
@@ -5779,18 +5761,12 @@ static int __init mem_cgroup_init(void)
5779 5761
5780 for_each_node(node) { 5762 for_each_node(node) {
5781 struct mem_cgroup_tree_per_node *rtpn; 5763 struct mem_cgroup_tree_per_node *rtpn;
5782 int zone;
5783 5764
5784 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, 5765 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL,
5785 node_online(node) ? node : NUMA_NO_NODE); 5766 node_online(node) ? node : NUMA_NO_NODE);
5786 5767
5787 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 5768 rtpn->rb_root = RB_ROOT;
5788 struct mem_cgroup_tree_per_zone *rtpz; 5769 spin_lock_init(&rtpn->lock);
5789
5790 rtpz = &rtpn->rb_tree_per_zone[zone];
5791 rtpz->rb_root = RB_ROOT;
5792 spin_lock_init(&rtpz->lock);
5793 }
5794 soft_limit_tree.rb_tree_per_node[node] = rtpn; 5770 soft_limit_tree.rb_tree_per_node[node] = rtpn;
5795 } 5771 }
5796 5772
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 46f7a71ed13b..9f6e673efba7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2229,8 +2229,7 @@ static inline void init_tlb_ubc(void)
2229static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg, 2229static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memcg,
2230 struct scan_control *sc, unsigned long *lru_pages) 2230 struct scan_control *sc, unsigned long *lru_pages)
2231{ 2231{
2232 struct zone *zone = &pgdat->node_zones[sc->reclaim_idx]; 2232 struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
2233 struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg);
2234 unsigned long nr[NR_LRU_LISTS]; 2233 unsigned long nr[NR_LRU_LISTS];
2235 unsigned long targets[NR_LRU_LISTS]; 2234 unsigned long targets[NR_LRU_LISTS];
2236 unsigned long nr_to_scan; 2235 unsigned long nr_to_scan;
@@ -2439,7 +2438,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
2439 do { 2438 do {
2440 struct mem_cgroup *root = sc->target_mem_cgroup; 2439 struct mem_cgroup *root = sc->target_mem_cgroup;
2441 struct mem_cgroup_reclaim_cookie reclaim = { 2440 struct mem_cgroup_reclaim_cookie reclaim = {
2442 .zone = &pgdat->node_zones[classzone_idx], 2441 .pgdat = pgdat,
2443 .priority = sc->priority, 2442 .priority = sc->priority,
2444 }; 2443 };
2445 unsigned long node_lru_pages = 0; 2444 unsigned long node_lru_pages = 0;
@@ -2647,7 +2646,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2647 * and balancing, not for a memcg's limit. 2646 * and balancing, not for a memcg's limit.
2648 */ 2647 */
2649 nr_soft_scanned = 0; 2648 nr_soft_scanned = 0;
2650 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, 2649 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone->zone_pgdat,
2651 sc->order, sc->gfp_mask, 2650 sc->order, sc->gfp_mask,
2652 &nr_soft_scanned); 2651 &nr_soft_scanned);
2653 sc->nr_reclaimed += nr_soft_reclaimed; 2652 sc->nr_reclaimed += nr_soft_reclaimed;
@@ -2917,7 +2916,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
2917 2916
2918unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg, 2917unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
2919 gfp_t gfp_mask, bool noswap, 2918 gfp_t gfp_mask, bool noswap,
2920 struct zone *zone, 2919 pg_data_t *pgdat,
2921 unsigned long *nr_scanned) 2920 unsigned long *nr_scanned)
2922{ 2921{
2923 struct scan_control sc = { 2922 struct scan_control sc = {
@@ -2944,7 +2943,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
2944 * will pick up pages from other mem cgroup's as well. We hack 2943 * will pick up pages from other mem cgroup's as well. We hack
2945 * the priority and make it zero. 2944 * the priority and make it zero.
2946 */ 2945 */
2947 shrink_node_memcg(zone->zone_pgdat, memcg, &sc, &lru_pages); 2946 shrink_node_memcg(pgdat, memcg, &sc, &lru_pages);
2948 2947
2949 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); 2948 trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
2950 2949
@@ -2994,7 +2993,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
2994#endif 2993#endif
2995 2994
2996static void age_active_anon(struct pglist_data *pgdat, 2995static void age_active_anon(struct pglist_data *pgdat,
2997 struct zone *zone, struct scan_control *sc) 2996 struct scan_control *sc)
2998{ 2997{
2999 struct mem_cgroup *memcg; 2998 struct mem_cgroup *memcg;
3000 2999
@@ -3003,7 +3002,7 @@ static void age_active_anon(struct pglist_data *pgdat,
3003 3002
3004 memcg = mem_cgroup_iter(NULL, NULL, NULL); 3003 memcg = mem_cgroup_iter(NULL, NULL, NULL);
3005 do { 3004 do {
3006 struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, zone, memcg); 3005 struct lruvec *lruvec = mem_cgroup_lruvec(pgdat, memcg);
3007 3006
3008 if (inactive_list_is_low(lruvec, false)) 3007 if (inactive_list_is_low(lruvec, false))
3009 shrink_active_list(SWAP_CLUSTER_MAX, lruvec, 3008 shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
@@ -3193,7 +3192,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
3193 * pages are rotated regardless of classzone as this is 3192 * pages are rotated regardless of classzone as this is
3194 * about consistent aging. 3193 * about consistent aging.
3195 */ 3194 */
3196 age_active_anon(pgdat, &pgdat->node_zones[MAX_NR_ZONES - 1], &sc); 3195 age_active_anon(pgdat, &sc);
3197 3196
3198 /* 3197 /*
3199 * If we're getting trouble reclaiming, start doing writepage 3198 * If we're getting trouble reclaiming, start doing writepage
@@ -3205,7 +3204,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
3205 /* Call soft limit reclaim before calling shrink_node. */ 3204 /* Call soft limit reclaim before calling shrink_node. */
3206 sc.nr_scanned = 0; 3205 sc.nr_scanned = 0;
3207 nr_soft_scanned = 0; 3206 nr_soft_scanned = 0;
3208 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, sc.order, 3207 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(pgdat, sc.order,
3209 sc.gfp_mask, &nr_soft_scanned); 3208 sc.gfp_mask, &nr_soft_scanned);
3210 sc.nr_reclaimed += nr_soft_reclaimed; 3209 sc.nr_reclaimed += nr_soft_reclaimed;
3211 3210
diff --git a/mm/workingset.c b/mm/workingset.c
index df0dacaf54ee..2af14bb5a349 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -218,7 +218,7 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
218 VM_BUG_ON_PAGE(page_count(page), page); 218 VM_BUG_ON_PAGE(page_count(page), page);
219 VM_BUG_ON_PAGE(!PageLocked(page), page); 219 VM_BUG_ON_PAGE(!PageLocked(page), page);
220 220
221 lruvec = mem_cgroup_lruvec(zone->zone_pgdat, zone, memcg); 221 lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg);
222 eviction = atomic_long_inc_return(&lruvec->inactive_age); 222 eviction = atomic_long_inc_return(&lruvec->inactive_age);
223 return pack_shadow(memcgid, zone, eviction); 223 return pack_shadow(memcgid, zone, eviction);
224} 224}
@@ -267,7 +267,7 @@ bool workingset_refault(void *shadow)
267 rcu_read_unlock(); 267 rcu_read_unlock();
268 return false; 268 return false;
269 } 269 }
270 lruvec = mem_cgroup_lruvec(zone->zone_pgdat, zone, memcg); 270 lruvec = mem_cgroup_lruvec(zone->zone_pgdat, memcg);
271 refault = atomic_long_read(&lruvec->inactive_age); 271 refault = atomic_long_read(&lruvec->inactive_age);
272 active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE); 272 active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
273 rcu_read_unlock(); 273 rcu_read_unlock();
@@ -319,7 +319,7 @@ void workingset_activation(struct page *page)
319 memcg = page_memcg_rcu(page); 319 memcg = page_memcg_rcu(page);
320 if (!mem_cgroup_disabled() && !memcg) 320 if (!mem_cgroup_disabled() && !memcg)
321 goto out; 321 goto out;
322 lruvec = mem_cgroup_lruvec(page_pgdat(page), page_zone(page), memcg); 322 lruvec = mem_cgroup_lruvec(page_pgdat(page), memcg);
323 atomic_long_inc(&lruvec->inactive_age); 323 atomic_long_inc(&lruvec->inactive_age);
324out: 324out:
325 rcu_read_unlock(); 325 rcu_read_unlock();