aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c1182
1 files changed, 516 insertions, 666 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ebd1e86bef1c..6aff93c98aca 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -33,6 +33,7 @@
33#include <linux/bit_spinlock.h> 33#include <linux/bit_spinlock.h>
34#include <linux/rcupdate.h> 34#include <linux/rcupdate.h>
35#include <linux/limits.h> 35#include <linux/limits.h>
36#include <linux/export.h>
36#include <linux/mutex.h> 37#include <linux/mutex.h>
37#include <linux/rbtree.h> 38#include <linux/rbtree.h>
38#include <linux/slab.h> 39#include <linux/slab.h>
@@ -201,52 +202,8 @@ struct mem_cgroup_eventfd_list {
201 struct eventfd_ctx *eventfd; 202 struct eventfd_ctx *eventfd;
202}; 203};
203 204
204static void mem_cgroup_threshold(struct mem_cgroup *mem); 205static void mem_cgroup_threshold(struct mem_cgroup *memcg);
205static void mem_cgroup_oom_notify(struct mem_cgroup *mem); 206static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
206
207enum {
208 SCAN_BY_LIMIT,
209 SCAN_BY_SYSTEM,
210 NR_SCAN_CONTEXT,
211 SCAN_BY_SHRINK, /* not recorded now */
212};
213
214enum {
215 SCAN,
216 SCAN_ANON,
217 SCAN_FILE,
218 ROTATE,
219 ROTATE_ANON,
220 ROTATE_FILE,
221 FREED,
222 FREED_ANON,
223 FREED_FILE,
224 ELAPSED,
225 NR_SCANSTATS,
226};
227
228struct scanstat {
229 spinlock_t lock;
230 unsigned long stats[NR_SCAN_CONTEXT][NR_SCANSTATS];
231 unsigned long rootstats[NR_SCAN_CONTEXT][NR_SCANSTATS];
232};
233
234const char *scanstat_string[NR_SCANSTATS] = {
235 "scanned_pages",
236 "scanned_anon_pages",
237 "scanned_file_pages",
238 "rotated_pages",
239 "rotated_anon_pages",
240 "rotated_file_pages",
241 "freed_pages",
242 "freed_anon_pages",
243 "freed_file_pages",
244 "elapsed_ns",
245};
246#define SCANSTAT_WORD_LIMIT "_by_limit"
247#define SCANSTAT_WORD_SYSTEM "_by_system"
248#define SCANSTAT_WORD_HIERARCHY "_under_hierarchy"
249
250 207
251/* 208/*
252 * The memory controller data structure. The memory controller controls both 209 * The memory controller data structure. The memory controller controls both
@@ -313,8 +270,7 @@ struct mem_cgroup {
313 270
314 /* For oom notifier event fd */ 271 /* For oom notifier event fd */
315 struct list_head oom_notify; 272 struct list_head oom_notify;
316 /* For recording LRU-scan statistics */ 273
317 struct scanstat scanstat;
318 /* 274 /*
319 * Should we move charges of a task when a task is moved into this 275 * Should we move charges of a task when a task is moved into this
320 * mem_cgroup ? And what type of charges should we move ? 276 * mem_cgroup ? And what type of charges should we move ?
@@ -407,29 +363,29 @@ enum charge_type {
407#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 363#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
408#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) 364#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
409 365
410static void mem_cgroup_get(struct mem_cgroup *mem); 366static void mem_cgroup_get(struct mem_cgroup *memcg);
411static void mem_cgroup_put(struct mem_cgroup *mem); 367static void mem_cgroup_put(struct mem_cgroup *memcg);
412static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); 368static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
413static void drain_all_stock_async(struct mem_cgroup *mem); 369static void drain_all_stock_async(struct mem_cgroup *memcg);
414 370
415static struct mem_cgroup_per_zone * 371static struct mem_cgroup_per_zone *
416mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) 372mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid)
417{ 373{
418 return &mem->info.nodeinfo[nid]->zoneinfo[zid]; 374 return &memcg->info.nodeinfo[nid]->zoneinfo[zid];
419} 375}
420 376
421struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) 377struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg)
422{ 378{
423 return &mem->css; 379 return &memcg->css;
424} 380}
425 381
426static struct mem_cgroup_per_zone * 382static struct mem_cgroup_per_zone *
427page_cgroup_zoneinfo(struct mem_cgroup *mem, struct page *page) 383page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
428{ 384{
429 int nid = page_to_nid(page); 385 int nid = page_to_nid(page);
430 int zid = page_zonenum(page); 386 int zid = page_zonenum(page);
431 387
432 return mem_cgroup_zoneinfo(mem, nid, zid); 388 return mem_cgroup_zoneinfo(memcg, nid, zid);
433} 389}
434 390
435static struct mem_cgroup_tree_per_zone * 391static struct mem_cgroup_tree_per_zone *
@@ -448,7 +404,7 @@ soft_limit_tree_from_page(struct page *page)
448} 404}
449 405
450static void 406static void
451__mem_cgroup_insert_exceeded(struct mem_cgroup *mem, 407__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
452 struct mem_cgroup_per_zone *mz, 408 struct mem_cgroup_per_zone *mz,
453 struct mem_cgroup_tree_per_zone *mctz, 409 struct mem_cgroup_tree_per_zone *mctz,
454 unsigned long long new_usage_in_excess) 410 unsigned long long new_usage_in_excess)
@@ -482,7 +438,7 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
482} 438}
483 439
484static void 440static void
485__mem_cgroup_remove_exceeded(struct mem_cgroup *mem, 441__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
486 struct mem_cgroup_per_zone *mz, 442 struct mem_cgroup_per_zone *mz,
487 struct mem_cgroup_tree_per_zone *mctz) 443 struct mem_cgroup_tree_per_zone *mctz)
488{ 444{
@@ -493,17 +449,17 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
493} 449}
494 450
495static void 451static void
496mem_cgroup_remove_exceeded(struct mem_cgroup *mem, 452mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
497 struct mem_cgroup_per_zone *mz, 453 struct mem_cgroup_per_zone *mz,
498 struct mem_cgroup_tree_per_zone *mctz) 454 struct mem_cgroup_tree_per_zone *mctz)
499{ 455{
500 spin_lock(&mctz->lock); 456 spin_lock(&mctz->lock);
501 __mem_cgroup_remove_exceeded(mem, mz, mctz); 457 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
502 spin_unlock(&mctz->lock); 458 spin_unlock(&mctz->lock);
503} 459}
504 460
505 461
506static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) 462static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
507{ 463{
508 unsigned long long excess; 464 unsigned long long excess;
509 struct mem_cgroup_per_zone *mz; 465 struct mem_cgroup_per_zone *mz;
@@ -516,9 +472,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
516 * Necessary to update all ancestors when hierarchy is used. 472 * Necessary to update all ancestors when hierarchy is used.
517 * because their event counter is not touched. 473 * because their event counter is not touched.
518 */ 474 */
519 for (; mem; mem = parent_mem_cgroup(mem)) { 475 for (; memcg; memcg = parent_mem_cgroup(memcg)) {
520 mz = mem_cgroup_zoneinfo(mem, nid, zid); 476 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
521 excess = res_counter_soft_limit_excess(&mem->res); 477 excess = res_counter_soft_limit_excess(&memcg->res);
522 /* 478 /*
523 * We have to update the tree if mz is on RB-tree or 479 * We have to update the tree if mz is on RB-tree or
524 * mem is over its softlimit. 480 * mem is over its softlimit.
@@ -527,18 +483,18 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
527 spin_lock(&mctz->lock); 483 spin_lock(&mctz->lock);
528 /* if on-tree, remove it */ 484 /* if on-tree, remove it */
529 if (mz->on_tree) 485 if (mz->on_tree)
530 __mem_cgroup_remove_exceeded(mem, mz, mctz); 486 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
531 /* 487 /*
532 * Insert again. mz->usage_in_excess will be updated. 488 * Insert again. mz->usage_in_excess will be updated.
533 * If excess is 0, no tree ops. 489 * If excess is 0, no tree ops.
534 */ 490 */
535 __mem_cgroup_insert_exceeded(mem, mz, mctz, excess); 491 __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
536 spin_unlock(&mctz->lock); 492 spin_unlock(&mctz->lock);
537 } 493 }
538 } 494 }
539} 495}
540 496
541static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) 497static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
542{ 498{
543 int node, zone; 499 int node, zone;
544 struct mem_cgroup_per_zone *mz; 500 struct mem_cgroup_per_zone *mz;
@@ -546,9 +502,9 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
546 502
547 for_each_node_state(node, N_POSSIBLE) { 503 for_each_node_state(node, N_POSSIBLE) {
548 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 504 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
549 mz = mem_cgroup_zoneinfo(mem, node, zone); 505 mz = mem_cgroup_zoneinfo(memcg, node, zone);
550 mctz = soft_limit_tree_node_zone(node, zone); 506 mctz = soft_limit_tree_node_zone(node, zone);
551 mem_cgroup_remove_exceeded(mem, mz, mctz); 507 mem_cgroup_remove_exceeded(memcg, mz, mctz);
552 } 508 }
553 } 509 }
554} 510}
@@ -609,7 +565,7 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
609 * common workload, threashold and synchonization as vmstat[] should be 565 * common workload, threashold and synchonization as vmstat[] should be
610 * implemented. 566 * implemented.
611 */ 567 */
612static long mem_cgroup_read_stat(struct mem_cgroup *mem, 568static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
613 enum mem_cgroup_stat_index idx) 569 enum mem_cgroup_stat_index idx)
614{ 570{
615 long val = 0; 571 long val = 0;
@@ -617,81 +573,83 @@ static long mem_cgroup_read_stat(struct mem_cgroup *mem,
617 573
618 get_online_cpus(); 574 get_online_cpus();
619 for_each_online_cpu(cpu) 575 for_each_online_cpu(cpu)
620 val += per_cpu(mem->stat->count[idx], cpu); 576 val += per_cpu(memcg->stat->count[idx], cpu);
621#ifdef CONFIG_HOTPLUG_CPU 577#ifdef CONFIG_HOTPLUG_CPU
622 spin_lock(&mem->pcp_counter_lock); 578 spin_lock(&memcg->pcp_counter_lock);
623 val += mem->nocpu_base.count[idx]; 579 val += memcg->nocpu_base.count[idx];
624 spin_unlock(&mem->pcp_counter_lock); 580 spin_unlock(&memcg->pcp_counter_lock);
625#endif 581#endif
626 put_online_cpus(); 582 put_online_cpus();
627 return val; 583 return val;
628} 584}
629 585
630static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, 586static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
631 bool charge) 587 bool charge)
632{ 588{
633 int val = (charge) ? 1 : -1; 589 int val = (charge) ? 1 : -1;
634 this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); 590 this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
635} 591}
636 592
637void mem_cgroup_pgfault(struct mem_cgroup *mem, int val) 593void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val)
638{ 594{
639 this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val); 595 this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
640} 596}
641 597
642void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val) 598void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val)
643{ 599{
644 this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val); 600 this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
645} 601}
646 602
647static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem, 603static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
648 enum mem_cgroup_events_index idx) 604 enum mem_cgroup_events_index idx)
649{ 605{
650 unsigned long val = 0; 606 unsigned long val = 0;
651 int cpu; 607 int cpu;
652 608
653 for_each_online_cpu(cpu) 609 for_each_online_cpu(cpu)
654 val += per_cpu(mem->stat->events[idx], cpu); 610 val += per_cpu(memcg->stat->events[idx], cpu);
655#ifdef CONFIG_HOTPLUG_CPU 611#ifdef CONFIG_HOTPLUG_CPU
656 spin_lock(&mem->pcp_counter_lock); 612 spin_lock(&memcg->pcp_counter_lock);
657 val += mem->nocpu_base.events[idx]; 613 val += memcg->nocpu_base.events[idx];
658 spin_unlock(&mem->pcp_counter_lock); 614 spin_unlock(&memcg->pcp_counter_lock);
659#endif 615#endif
660 return val; 616 return val;
661} 617}
662 618
663static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 619static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
664 bool file, int nr_pages) 620 bool file, int nr_pages)
665{ 621{
666 preempt_disable(); 622 preempt_disable();
667 623
668 if (file) 624 if (file)
669 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); 625 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE],
626 nr_pages);
670 else 627 else
671 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); 628 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
629 nr_pages);
672 630
673 /* pagein of a big page is an event. So, ignore page size */ 631 /* pagein of a big page is an event. So, ignore page size */
674 if (nr_pages > 0) 632 if (nr_pages > 0)
675 __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]); 633 __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGIN]);
676 else { 634 else {
677 __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]); 635 __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]);
678 nr_pages = -nr_pages; /* for event */ 636 nr_pages = -nr_pages; /* for event */
679 } 637 }
680 638
681 __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); 639 __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages);
682 640
683 preempt_enable(); 641 preempt_enable();
684} 642}
685 643
686unsigned long 644unsigned long
687mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid, 645mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid,
688 unsigned int lru_mask) 646 unsigned int lru_mask)
689{ 647{
690 struct mem_cgroup_per_zone *mz; 648 struct mem_cgroup_per_zone *mz;
691 enum lru_list l; 649 enum lru_list l;
692 unsigned long ret = 0; 650 unsigned long ret = 0;
693 651
694 mz = mem_cgroup_zoneinfo(mem, nid, zid); 652 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
695 653
696 for_each_lru(l) { 654 for_each_lru(l) {
697 if (BIT(l) & lru_mask) 655 if (BIT(l) & lru_mask)
@@ -701,44 +659,45 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid,
701} 659}
702 660
703static unsigned long 661static unsigned long
704mem_cgroup_node_nr_lru_pages(struct mem_cgroup *mem, 662mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
705 int nid, unsigned int lru_mask) 663 int nid, unsigned int lru_mask)
706{ 664{
707 u64 total = 0; 665 u64 total = 0;
708 int zid; 666 int zid;
709 667
710 for (zid = 0; zid < MAX_NR_ZONES; zid++) 668 for (zid = 0; zid < MAX_NR_ZONES; zid++)
711 total += mem_cgroup_zone_nr_lru_pages(mem, nid, zid, lru_mask); 669 total += mem_cgroup_zone_nr_lru_pages(memcg,
670 nid, zid, lru_mask);
712 671
713 return total; 672 return total;
714} 673}
715 674
716static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *mem, 675static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg,
717 unsigned int lru_mask) 676 unsigned int lru_mask)
718{ 677{
719 int nid; 678 int nid;
720 u64 total = 0; 679 u64 total = 0;
721 680
722 for_each_node_state(nid, N_HIGH_MEMORY) 681 for_each_node_state(nid, N_HIGH_MEMORY)
723 total += mem_cgroup_node_nr_lru_pages(mem, nid, lru_mask); 682 total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
724 return total; 683 return total;
725} 684}
726 685
727static bool __memcg_event_check(struct mem_cgroup *mem, int target) 686static bool __memcg_event_check(struct mem_cgroup *memcg, int target)
728{ 687{
729 unsigned long val, next; 688 unsigned long val, next;
730 689
731 val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); 690 val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
732 next = this_cpu_read(mem->stat->targets[target]); 691 next = __this_cpu_read(memcg->stat->targets[target]);
733 /* from time_after() in jiffies.h */ 692 /* from time_after() in jiffies.h */
734 return ((long)next - (long)val < 0); 693 return ((long)next - (long)val < 0);
735} 694}
736 695
737static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target) 696static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
738{ 697{
739 unsigned long val, next; 698 unsigned long val, next;
740 699
741 val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); 700 val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
742 701
743 switch (target) { 702 switch (target) {
744 case MEM_CGROUP_TARGET_THRESH: 703 case MEM_CGROUP_TARGET_THRESH:
@@ -754,34 +713,36 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
754 return; 713 return;
755 } 714 }
756 715
757 this_cpu_write(mem->stat->targets[target], next); 716 __this_cpu_write(memcg->stat->targets[target], next);
758} 717}
759 718
760/* 719/*
761 * Check events in order. 720 * Check events in order.
762 * 721 *
763 */ 722 */
764static void memcg_check_events(struct mem_cgroup *mem, struct page *page) 723static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
765{ 724{
725 preempt_disable();
766 /* threshold event is triggered in finer grain than soft limit */ 726 /* threshold event is triggered in finer grain than soft limit */
767 if (unlikely(__memcg_event_check(mem, MEM_CGROUP_TARGET_THRESH))) { 727 if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) {
768 mem_cgroup_threshold(mem); 728 mem_cgroup_threshold(memcg);
769 __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH); 729 __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH);
770 if (unlikely(__memcg_event_check(mem, 730 if (unlikely(__memcg_event_check(memcg,
771 MEM_CGROUP_TARGET_SOFTLIMIT))) { 731 MEM_CGROUP_TARGET_SOFTLIMIT))) {
772 mem_cgroup_update_tree(mem, page); 732 mem_cgroup_update_tree(memcg, page);
773 __mem_cgroup_target_update(mem, 733 __mem_cgroup_target_update(memcg,
774 MEM_CGROUP_TARGET_SOFTLIMIT); 734 MEM_CGROUP_TARGET_SOFTLIMIT);
775 } 735 }
776#if MAX_NUMNODES > 1 736#if MAX_NUMNODES > 1
777 if (unlikely(__memcg_event_check(mem, 737 if (unlikely(__memcg_event_check(memcg,
778 MEM_CGROUP_TARGET_NUMAINFO))) { 738 MEM_CGROUP_TARGET_NUMAINFO))) {
779 atomic_inc(&mem->numainfo_events); 739 atomic_inc(&memcg->numainfo_events);
780 __mem_cgroup_target_update(mem, 740 __mem_cgroup_target_update(memcg,
781 MEM_CGROUP_TARGET_NUMAINFO); 741 MEM_CGROUP_TARGET_NUMAINFO);
782 } 742 }
783#endif 743#endif
784 } 744 }
745 preempt_enable();
785} 746}
786 747
787static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) 748static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
@@ -807,7 +768,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
807 768
808struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) 769struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
809{ 770{
810 struct mem_cgroup *mem = NULL; 771 struct mem_cgroup *memcg = NULL;
811 772
812 if (!mm) 773 if (!mm)
813 return NULL; 774 return NULL;
@@ -818,25 +779,25 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
818 */ 779 */
819 rcu_read_lock(); 780 rcu_read_lock();
820 do { 781 do {
821 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 782 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
822 if (unlikely(!mem)) 783 if (unlikely(!memcg))
823 break; 784 break;
824 } while (!css_tryget(&mem->css)); 785 } while (!css_tryget(&memcg->css));
825 rcu_read_unlock(); 786 rcu_read_unlock();
826 return mem; 787 return memcg;
827} 788}
828 789
829/* The caller has to guarantee "mem" exists before calling this */ 790/* The caller has to guarantee "mem" exists before calling this */
830static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) 791static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *memcg)
831{ 792{
832 struct cgroup_subsys_state *css; 793 struct cgroup_subsys_state *css;
833 int found; 794 int found;
834 795
835 if (!mem) /* ROOT cgroup has the smallest ID */ 796 if (!memcg) /* ROOT cgroup has the smallest ID */
836 return root_mem_cgroup; /*css_put/get against root is ignored*/ 797 return root_mem_cgroup; /*css_put/get against root is ignored*/
837 if (!mem->use_hierarchy) { 798 if (!memcg->use_hierarchy) {
838 if (css_tryget(&mem->css)) 799 if (css_tryget(&memcg->css))
839 return mem; 800 return memcg;
840 return NULL; 801 return NULL;
841 } 802 }
842 rcu_read_lock(); 803 rcu_read_lock();
@@ -844,13 +805,13 @@ static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem)
844 * searching a memory cgroup which has the smallest ID under given 805 * searching a memory cgroup which has the smallest ID under given
845 * ROOT cgroup. (ID >= 1) 806 * ROOT cgroup. (ID >= 1)
846 */ 807 */
847 css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found); 808 css = css_get_next(&mem_cgroup_subsys, 1, &memcg->css, &found);
848 if (css && css_tryget(css)) 809 if (css && css_tryget(css))
849 mem = container_of(css, struct mem_cgroup, css); 810 memcg = container_of(css, struct mem_cgroup, css);
850 else 811 else
851 mem = NULL; 812 memcg = NULL;
852 rcu_read_unlock(); 813 rcu_read_unlock();
853 return mem; 814 return memcg;
854} 815}
855 816
856static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, 817static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
@@ -904,29 +865,29 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
904 for_each_mem_cgroup_tree_cond(iter, NULL, true) 865 for_each_mem_cgroup_tree_cond(iter, NULL, true)
905 866
906 867
907static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) 868static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
908{ 869{
909 return (mem == root_mem_cgroup); 870 return (memcg == root_mem_cgroup);
910} 871}
911 872
912void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) 873void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
913{ 874{
914 struct mem_cgroup *mem; 875 struct mem_cgroup *memcg;
915 876
916 if (!mm) 877 if (!mm)
917 return; 878 return;
918 879
919 rcu_read_lock(); 880 rcu_read_lock();
920 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 881 memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
921 if (unlikely(!mem)) 882 if (unlikely(!memcg))
922 goto out; 883 goto out;
923 884
924 switch (idx) { 885 switch (idx) {
925 case PGMAJFAULT: 886 case PGMAJFAULT:
926 mem_cgroup_pgmajfault(mem, 1); 887 mem_cgroup_pgmajfault(memcg, 1);
927 break; 888 break;
928 case PGFAULT: 889 case PGFAULT:
929 mem_cgroup_pgfault(mem, 1); 890 mem_cgroup_pgfault(memcg, 1);
930 break; 891 break;
931 default: 892 default:
932 BUG(); 893 BUG();
@@ -1035,6 +996,16 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
1035 return; 996 return;
1036 pc = lookup_page_cgroup(page); 997 pc = lookup_page_cgroup(page);
1037 VM_BUG_ON(PageCgroupAcctLRU(pc)); 998 VM_BUG_ON(PageCgroupAcctLRU(pc));
999 /*
1000 * putback: charge:
1001 * SetPageLRU SetPageCgroupUsed
1002 * smp_mb smp_mb
1003 * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
1004 *
1005 * Ensure that one of the two sides adds the page to the memcg
1006 * LRU during a race.
1007 */
1008 smp_mb();
1038 if (!PageCgroupUsed(pc)) 1009 if (!PageCgroupUsed(pc))
1039 return; 1010 return;
1040 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ 1011 /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
@@ -1086,7 +1057,16 @@ static void mem_cgroup_lru_add_after_commit(struct page *page)
1086 unsigned long flags; 1057 unsigned long flags;
1087 struct zone *zone = page_zone(page); 1058 struct zone *zone = page_zone(page);
1088 struct page_cgroup *pc = lookup_page_cgroup(page); 1059 struct page_cgroup *pc = lookup_page_cgroup(page);
1089 1060 /*
1061 * putback: charge:
1062 * SetPageLRU SetPageCgroupUsed
1063 * smp_mb smp_mb
1064 * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
1065 *
1066 * Ensure that one of the two sides adds the page to the memcg
1067 * LRU during a race.
1068 */
1069 smp_mb();
1090 /* taking care of that the page is added to LRU while we commit it */ 1070 /* taking care of that the page is added to LRU while we commit it */
1091 if (likely(!PageLRU(page))) 1071 if (likely(!PageLRU(page)))
1092 return; 1072 return;
@@ -1108,21 +1088,21 @@ void mem_cgroup_move_lists(struct page *page,
1108} 1088}
1109 1089
1110/* 1090/*
1111 * Checks whether given mem is same or in the root_mem's 1091 * Checks whether given mem is same or in the root_mem_cgroup's
1112 * hierarchy subtree 1092 * hierarchy subtree
1113 */ 1093 */
1114static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_mem, 1094static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
1115 struct mem_cgroup *mem) 1095 struct mem_cgroup *memcg)
1116{ 1096{
1117 if (root_mem != mem) { 1097 if (root_memcg != memcg) {
1118 return (root_mem->use_hierarchy && 1098 return (root_memcg->use_hierarchy &&
1119 css_is_ancestor(&mem->css, &root_mem->css)); 1099 css_is_ancestor(&memcg->css, &root_memcg->css));
1120 } 1100 }
1121 1101
1122 return true; 1102 return true;
1123} 1103}
1124 1104
1125int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) 1105int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg)
1126{ 1106{
1127 int ret; 1107 int ret;
1128 struct mem_cgroup *curr = NULL; 1108 struct mem_cgroup *curr = NULL;
@@ -1136,25 +1116,29 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
1136 if (!curr) 1116 if (!curr)
1137 return 0; 1117 return 0;
1138 /* 1118 /*
1139 * We should check use_hierarchy of "mem" not "curr". Because checking 1119 * We should check use_hierarchy of "memcg" not "curr". Because checking
1140 * use_hierarchy of "curr" here make this function true if hierarchy is 1120 * use_hierarchy of "curr" here make this function true if hierarchy is
1141 * enabled in "curr" and "curr" is a child of "mem" in *cgroup* 1121 * enabled in "curr" and "curr" is a child of "memcg" in *cgroup*
1142 * hierarchy(even if use_hierarchy is disabled in "mem"). 1122 * hierarchy(even if use_hierarchy is disabled in "memcg").
1143 */ 1123 */
1144 ret = mem_cgroup_same_or_subtree(mem, curr); 1124 ret = mem_cgroup_same_or_subtree(memcg, curr);
1145 css_put(&curr->css); 1125 css_put(&curr->css);
1146 return ret; 1126 return ret;
1147} 1127}
1148 1128
1149static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages) 1129int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone)
1150{ 1130{
1151 unsigned long active; 1131 unsigned long inactive_ratio;
1132 int nid = zone_to_nid(zone);
1133 int zid = zone_idx(zone);
1152 unsigned long inactive; 1134 unsigned long inactive;
1135 unsigned long active;
1153 unsigned long gb; 1136 unsigned long gb;
1154 unsigned long inactive_ratio;
1155 1137
1156 inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); 1138 inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1157 active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); 1139 BIT(LRU_INACTIVE_ANON));
1140 active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1141 BIT(LRU_ACTIVE_ANON));
1158 1142
1159 gb = (inactive + active) >> (30 - PAGE_SHIFT); 1143 gb = (inactive + active) >> (30 - PAGE_SHIFT);
1160 if (gb) 1144 if (gb)
@@ -1162,39 +1146,20 @@ static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_
1162 else 1146 else
1163 inactive_ratio = 1; 1147 inactive_ratio = 1;
1164 1148
1165 if (present_pages) { 1149 return inactive * inactive_ratio < active;
1166 present_pages[0] = inactive;
1167 present_pages[1] = active;
1168 }
1169
1170 return inactive_ratio;
1171} 1150}
1172 1151
1173int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) 1152int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone)
1174{
1175 unsigned long active;
1176 unsigned long inactive;
1177 unsigned long present_pages[2];
1178 unsigned long inactive_ratio;
1179
1180 inactive_ratio = calc_inactive_ratio(memcg, present_pages);
1181
1182 inactive = present_pages[0];
1183 active = present_pages[1];
1184
1185 if (inactive * inactive_ratio < active)
1186 return 1;
1187
1188 return 0;
1189}
1190
1191int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg)
1192{ 1153{
1193 unsigned long active; 1154 unsigned long active;
1194 unsigned long inactive; 1155 unsigned long inactive;
1156 int zid = zone_idx(zone);
1157 int nid = zone_to_nid(zone);
1195 1158
1196 inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); 1159 inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1197 active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); 1160 BIT(LRU_INACTIVE_FILE));
1161 active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid,
1162 BIT(LRU_ACTIVE_FILE));
1198 1163
1199 return (active > inactive); 1164 return (active > inactive);
1200} 1165}
@@ -1230,7 +1195,8 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
1230unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 1195unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
1231 struct list_head *dst, 1196 struct list_head *dst,
1232 unsigned long *scanned, int order, 1197 unsigned long *scanned, int order,
1233 int mode, struct zone *z, 1198 isolate_mode_t mode,
1199 struct zone *z,
1234 struct mem_cgroup *mem_cont, 1200 struct mem_cgroup *mem_cont,
1235 int active, int file) 1201 int active, int file)
1236{ 1202{
@@ -1298,13 +1264,13 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
1298 * Returns the maximum amount of memory @mem can be charged with, in 1264 * Returns the maximum amount of memory @mem can be charged with, in
1299 * pages. 1265 * pages.
1300 */ 1266 */
1301static unsigned long mem_cgroup_margin(struct mem_cgroup *mem) 1267static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
1302{ 1268{
1303 unsigned long long margin; 1269 unsigned long long margin;
1304 1270
1305 margin = res_counter_margin(&mem->res); 1271 margin = res_counter_margin(&memcg->res);
1306 if (do_swap_account) 1272 if (do_swap_account)
1307 margin = min(margin, res_counter_margin(&mem->memsw)); 1273 margin = min(margin, res_counter_margin(&memcg->memsw));
1308 return margin >> PAGE_SHIFT; 1274 return margin >> PAGE_SHIFT;
1309} 1275}
1310 1276
@@ -1319,33 +1285,33 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
1319 return memcg->swappiness; 1285 return memcg->swappiness;
1320} 1286}
1321 1287
1322static void mem_cgroup_start_move(struct mem_cgroup *mem) 1288static void mem_cgroup_start_move(struct mem_cgroup *memcg)
1323{ 1289{
1324 int cpu; 1290 int cpu;
1325 1291
1326 get_online_cpus(); 1292 get_online_cpus();
1327 spin_lock(&mem->pcp_counter_lock); 1293 spin_lock(&memcg->pcp_counter_lock);
1328 for_each_online_cpu(cpu) 1294 for_each_online_cpu(cpu)
1329 per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1; 1295 per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1;
1330 mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1; 1296 memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1;
1331 spin_unlock(&mem->pcp_counter_lock); 1297 spin_unlock(&memcg->pcp_counter_lock);
1332 put_online_cpus(); 1298 put_online_cpus();
1333 1299
1334 synchronize_rcu(); 1300 synchronize_rcu();
1335} 1301}
1336 1302
1337static void mem_cgroup_end_move(struct mem_cgroup *mem) 1303static void mem_cgroup_end_move(struct mem_cgroup *memcg)
1338{ 1304{
1339 int cpu; 1305 int cpu;
1340 1306
1341 if (!mem) 1307 if (!memcg)
1342 return; 1308 return;
1343 get_online_cpus(); 1309 get_online_cpus();
1344 spin_lock(&mem->pcp_counter_lock); 1310 spin_lock(&memcg->pcp_counter_lock);
1345 for_each_online_cpu(cpu) 1311 for_each_online_cpu(cpu)
1346 per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1; 1312 per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1;
1347 mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1; 1313 memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1;
1348 spin_unlock(&mem->pcp_counter_lock); 1314 spin_unlock(&memcg->pcp_counter_lock);
1349 put_online_cpus(); 1315 put_online_cpus();
1350} 1316}
1351/* 1317/*
@@ -1360,13 +1326,13 @@ static void mem_cgroup_end_move(struct mem_cgroup *mem)
1360 * waiting at hith-memory prressure caused by "move". 1326 * waiting at hith-memory prressure caused by "move".
1361 */ 1327 */
1362 1328
1363static bool mem_cgroup_stealed(struct mem_cgroup *mem) 1329static bool mem_cgroup_stealed(struct mem_cgroup *memcg)
1364{ 1330{
1365 VM_BUG_ON(!rcu_read_lock_held()); 1331 VM_BUG_ON(!rcu_read_lock_held());
1366 return this_cpu_read(mem->stat->count[MEM_CGROUP_ON_MOVE]) > 0; 1332 return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0;
1367} 1333}
1368 1334
1369static bool mem_cgroup_under_move(struct mem_cgroup *mem) 1335static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
1370{ 1336{
1371 struct mem_cgroup *from; 1337 struct mem_cgroup *from;
1372 struct mem_cgroup *to; 1338 struct mem_cgroup *to;
@@ -1381,17 +1347,17 @@ static bool mem_cgroup_under_move(struct mem_cgroup *mem)
1381 if (!from) 1347 if (!from)
1382 goto unlock; 1348 goto unlock;
1383 1349
1384 ret = mem_cgroup_same_or_subtree(mem, from) 1350 ret = mem_cgroup_same_or_subtree(memcg, from)
1385 || mem_cgroup_same_or_subtree(mem, to); 1351 || mem_cgroup_same_or_subtree(memcg, to);
1386unlock: 1352unlock:
1387 spin_unlock(&mc.lock); 1353 spin_unlock(&mc.lock);
1388 return ret; 1354 return ret;
1389} 1355}
1390 1356
1391static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem) 1357static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
1392{ 1358{
1393 if (mc.moving_task && current != mc.moving_task) { 1359 if (mc.moving_task && current != mc.moving_task) {
1394 if (mem_cgroup_under_move(mem)) { 1360 if (mem_cgroup_under_move(memcg)) {
1395 DEFINE_WAIT(wait); 1361 DEFINE_WAIT(wait);
1396 prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE); 1362 prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE);
1397 /* moving charge context might have finished. */ 1363 /* moving charge context might have finished. */
@@ -1475,12 +1441,12 @@ done:
1475 * This function returns the number of memcg under hierarchy tree. Returns 1441 * This function returns the number of memcg under hierarchy tree. Returns
1476 * 1(self count) if no children. 1442 * 1(self count) if no children.
1477 */ 1443 */
1478static int mem_cgroup_count_children(struct mem_cgroup *mem) 1444static int mem_cgroup_count_children(struct mem_cgroup *memcg)
1479{ 1445{
1480 int num = 0; 1446 int num = 0;
1481 struct mem_cgroup *iter; 1447 struct mem_cgroup *iter;
1482 1448
1483 for_each_mem_cgroup_tree(iter, mem) 1449 for_each_mem_cgroup_tree(iter, memcg)
1484 num++; 1450 num++;
1485 return num; 1451 return num;
1486} 1452}
@@ -1510,21 +1476,21 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
1510 * that to reclaim free pages from. 1476 * that to reclaim free pages from.
1511 */ 1477 */
1512static struct mem_cgroup * 1478static struct mem_cgroup *
1513mem_cgroup_select_victim(struct mem_cgroup *root_mem) 1479mem_cgroup_select_victim(struct mem_cgroup *root_memcg)
1514{ 1480{
1515 struct mem_cgroup *ret = NULL; 1481 struct mem_cgroup *ret = NULL;
1516 struct cgroup_subsys_state *css; 1482 struct cgroup_subsys_state *css;
1517 int nextid, found; 1483 int nextid, found;
1518 1484
1519 if (!root_mem->use_hierarchy) { 1485 if (!root_memcg->use_hierarchy) {
1520 css_get(&root_mem->css); 1486 css_get(&root_memcg->css);
1521 ret = root_mem; 1487 ret = root_memcg;
1522 } 1488 }
1523 1489
1524 while (!ret) { 1490 while (!ret) {
1525 rcu_read_lock(); 1491 rcu_read_lock();
1526 nextid = root_mem->last_scanned_child + 1; 1492 nextid = root_memcg->last_scanned_child + 1;
1527 css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css, 1493 css = css_get_next(&mem_cgroup_subsys, nextid, &root_memcg->css,
1528 &found); 1494 &found);
1529 if (css && css_tryget(css)) 1495 if (css && css_tryget(css))
1530 ret = container_of(css, struct mem_cgroup, css); 1496 ret = container_of(css, struct mem_cgroup, css);
@@ -1533,9 +1499,9 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1533 /* Updates scanning parameter */ 1499 /* Updates scanning parameter */
1534 if (!css) { 1500 if (!css) {
1535 /* this means start scan from ID:1 */ 1501 /* this means start scan from ID:1 */
1536 root_mem->last_scanned_child = 0; 1502 root_memcg->last_scanned_child = 0;
1537 } else 1503 } else
1538 root_mem->last_scanned_child = found; 1504 root_memcg->last_scanned_child = found;
1539 } 1505 }
1540 1506
1541 return ret; 1507 return ret;
@@ -1551,14 +1517,14 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1551 * reclaimable pages on a node. Returns true if there are any reclaimable 1517 * reclaimable pages on a node. Returns true if there are any reclaimable
1552 * pages in the node. 1518 * pages in the node.
1553 */ 1519 */
1554static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, 1520static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
1555 int nid, bool noswap) 1521 int nid, bool noswap)
1556{ 1522{
1557 if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_FILE)) 1523 if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE))
1558 return true; 1524 return true;
1559 if (noswap || !total_swap_pages) 1525 if (noswap || !total_swap_pages)
1560 return false; 1526 return false;
1561 if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_ANON)) 1527 if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON))
1562 return true; 1528 return true;
1563 return false; 1529 return false;
1564 1530
@@ -1571,29 +1537,29 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
1571 * nodes based on the zonelist. So update the list loosely once per 10 secs. 1537 * nodes based on the zonelist. So update the list loosely once per 10 secs.
1572 * 1538 *
1573 */ 1539 */
1574static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) 1540static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg)
1575{ 1541{
1576 int nid; 1542 int nid;
1577 /* 1543 /*
1578 * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET 1544 * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
1579 * pagein/pageout changes since the last update. 1545 * pagein/pageout changes since the last update.
1580 */ 1546 */
1581 if (!atomic_read(&mem->numainfo_events)) 1547 if (!atomic_read(&memcg->numainfo_events))
1582 return; 1548 return;
1583 if (atomic_inc_return(&mem->numainfo_updating) > 1) 1549 if (atomic_inc_return(&memcg->numainfo_updating) > 1)
1584 return; 1550 return;
1585 1551
1586 /* make a nodemask where this memcg uses memory from */ 1552 /* make a nodemask where this memcg uses memory from */
1587 mem->scan_nodes = node_states[N_HIGH_MEMORY]; 1553 memcg->scan_nodes = node_states[N_HIGH_MEMORY];
1588 1554
1589 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) { 1555 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
1590 1556
1591 if (!test_mem_cgroup_node_reclaimable(mem, nid, false)) 1557 if (!test_mem_cgroup_node_reclaimable(memcg, nid, false))
1592 node_clear(nid, mem->scan_nodes); 1558 node_clear(nid, memcg->scan_nodes);
1593 } 1559 }
1594 1560
1595 atomic_set(&mem->numainfo_events, 0); 1561 atomic_set(&memcg->numainfo_events, 0);
1596 atomic_set(&mem->numainfo_updating, 0); 1562 atomic_set(&memcg->numainfo_updating, 0);
1597} 1563}
1598 1564
1599/* 1565/*
@@ -1608,16 +1574,16 @@ static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
1608 * 1574 *
1609 * Now, we use round-robin. Better algorithm is welcomed. 1575 * Now, we use round-robin. Better algorithm is welcomed.
1610 */ 1576 */
1611int mem_cgroup_select_victim_node(struct mem_cgroup *mem) 1577int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1612{ 1578{
1613 int node; 1579 int node;
1614 1580
1615 mem_cgroup_may_update_nodemask(mem); 1581 mem_cgroup_may_update_nodemask(memcg);
1616 node = mem->last_scanned_node; 1582 node = memcg->last_scanned_node;
1617 1583
1618 node = next_node(node, mem->scan_nodes); 1584 node = next_node(node, memcg->scan_nodes);
1619 if (node == MAX_NUMNODES) 1585 if (node == MAX_NUMNODES)
1620 node = first_node(mem->scan_nodes); 1586 node = first_node(memcg->scan_nodes);
1621 /* 1587 /*
1622 * We call this when we hit limit, not when pages are added to LRU. 1588 * We call this when we hit limit, not when pages are added to LRU.
1623 * No LRU may hold pages because all pages are UNEVICTABLE or 1589 * No LRU may hold pages because all pages are UNEVICTABLE or
@@ -1627,7 +1593,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1627 if (unlikely(node == MAX_NUMNODES)) 1593 if (unlikely(node == MAX_NUMNODES))
1628 node = numa_node_id(); 1594 node = numa_node_id();
1629 1595
1630 mem->last_scanned_node = node; 1596 memcg->last_scanned_node = node;
1631 return node; 1597 return node;
1632} 1598}
1633 1599
@@ -1637,7 +1603,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1637 * unused nodes. But scan_nodes is lazily updated and may not cotain 1603 * unused nodes. But scan_nodes is lazily updated and may not cotain
1638 * enough new information. We need to do double check. 1604 * enough new information. We need to do double check.
1639 */ 1605 */
1640bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) 1606bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1641{ 1607{
1642 int nid; 1608 int nid;
1643 1609
@@ -1645,12 +1611,12 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1645 * quick check...making use of scan_node. 1611 * quick check...making use of scan_node.
1646 * We can skip unused nodes. 1612 * We can skip unused nodes.
1647 */ 1613 */
1648 if (!nodes_empty(mem->scan_nodes)) { 1614 if (!nodes_empty(memcg->scan_nodes)) {
1649 for (nid = first_node(mem->scan_nodes); 1615 for (nid = first_node(memcg->scan_nodes);
1650 nid < MAX_NUMNODES; 1616 nid < MAX_NUMNODES;
1651 nid = next_node(nid, mem->scan_nodes)) { 1617 nid = next_node(nid, memcg->scan_nodes)) {
1652 1618
1653 if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) 1619 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1654 return true; 1620 return true;
1655 } 1621 }
1656 } 1622 }
@@ -1658,77 +1624,39 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1658 * Check rest of nodes. 1624 * Check rest of nodes.
1659 */ 1625 */
1660 for_each_node_state(nid, N_HIGH_MEMORY) { 1626 for_each_node_state(nid, N_HIGH_MEMORY) {
1661 if (node_isset(nid, mem->scan_nodes)) 1627 if (node_isset(nid, memcg->scan_nodes))
1662 continue; 1628 continue;
1663 if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) 1629 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1664 return true; 1630 return true;
1665 } 1631 }
1666 return false; 1632 return false;
1667} 1633}
1668 1634
1669#else 1635#else
1670int mem_cgroup_select_victim_node(struct mem_cgroup *mem) 1636int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1671{ 1637{
1672 return 0; 1638 return 0;
1673} 1639}
1674 1640
1675bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) 1641bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1676{ 1642{
1677 return test_mem_cgroup_node_reclaimable(mem, 0, noswap); 1643 return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
1678} 1644}
1679#endif 1645#endif
1680 1646
1681static void __mem_cgroup_record_scanstat(unsigned long *stats,
1682 struct memcg_scanrecord *rec)
1683{
1684
1685 stats[SCAN] += rec->nr_scanned[0] + rec->nr_scanned[1];
1686 stats[SCAN_ANON] += rec->nr_scanned[0];
1687 stats[SCAN_FILE] += rec->nr_scanned[1];
1688
1689 stats[ROTATE] += rec->nr_rotated[0] + rec->nr_rotated[1];
1690 stats[ROTATE_ANON] += rec->nr_rotated[0];
1691 stats[ROTATE_FILE] += rec->nr_rotated[1];
1692
1693 stats[FREED] += rec->nr_freed[0] + rec->nr_freed[1];
1694 stats[FREED_ANON] += rec->nr_freed[0];
1695 stats[FREED_FILE] += rec->nr_freed[1];
1696
1697 stats[ELAPSED] += rec->elapsed;
1698}
1699
1700static void mem_cgroup_record_scanstat(struct memcg_scanrecord *rec)
1701{
1702 struct mem_cgroup *mem;
1703 int context = rec->context;
1704
1705 if (context >= NR_SCAN_CONTEXT)
1706 return;
1707
1708 mem = rec->mem;
1709 spin_lock(&mem->scanstat.lock);
1710 __mem_cgroup_record_scanstat(mem->scanstat.stats[context], rec);
1711 spin_unlock(&mem->scanstat.lock);
1712
1713 mem = rec->root;
1714 spin_lock(&mem->scanstat.lock);
1715 __mem_cgroup_record_scanstat(mem->scanstat.rootstats[context], rec);
1716 spin_unlock(&mem->scanstat.lock);
1717}
1718
1719/* 1647/*
1720 * Scan the hierarchy if needed to reclaim memory. We remember the last child 1648 * Scan the hierarchy if needed to reclaim memory. We remember the last child
1721 * we reclaimed from, so that we don't end up penalizing one child extensively 1649 * we reclaimed from, so that we don't end up penalizing one child extensively
1722 * based on its position in the children list. 1650 * based on its position in the children list.
1723 * 1651 *
1724 * root_mem is the original ancestor that we've been reclaim from. 1652 * root_memcg is the original ancestor that we've been reclaim from.
1725 * 1653 *
1726 * We give up and return to the caller when we visit root_mem twice. 1654 * We give up and return to the caller when we visit root_memcg twice.
1727 * (other groups can be removed while we're walking....) 1655 * (other groups can be removed while we're walking....)
1728 * 1656 *
1729 * If shrink==true, for avoiding to free too much, this returns immedieately. 1657 * If shrink==true, for avoiding to free too much, this returns immedieately.
1730 */ 1658 */
1731static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, 1659static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
1732 struct zone *zone, 1660 struct zone *zone,
1733 gfp_t gfp_mask, 1661 gfp_t gfp_mask,
1734 unsigned long reclaim_options, 1662 unsigned long reclaim_options,
@@ -1740,28 +1668,18 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1740 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; 1668 bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
1741 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; 1669 bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
1742 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; 1670 bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
1743 struct memcg_scanrecord rec;
1744 unsigned long excess; 1671 unsigned long excess;
1745 unsigned long scanned; 1672 unsigned long nr_scanned;
1746 1673
1747 excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; 1674 excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
1748 1675
1749 /* If memsw_is_minimum==1, swap-out is of-no-use. */ 1676 /* If memsw_is_minimum==1, swap-out is of-no-use. */
1750 if (!check_soft && !shrink && root_mem->memsw_is_minimum) 1677 if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
1751 noswap = true; 1678 noswap = true;
1752 1679
1753 if (shrink)
1754 rec.context = SCAN_BY_SHRINK;
1755 else if (check_soft)
1756 rec.context = SCAN_BY_SYSTEM;
1757 else
1758 rec.context = SCAN_BY_LIMIT;
1759
1760 rec.root = root_mem;
1761
1762 while (1) { 1680 while (1) {
1763 victim = mem_cgroup_select_victim(root_mem); 1681 victim = mem_cgroup_select_victim(root_memcg);
1764 if (victim == root_mem) { 1682 if (victim == root_memcg) {
1765 loop++; 1683 loop++;
1766 /* 1684 /*
1767 * We are not draining per cpu cached charges during 1685 * We are not draining per cpu cached charges during
@@ -1770,7 +1688,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1770 * charges will not give any. 1688 * charges will not give any.
1771 */ 1689 */
1772 if (!check_soft && loop >= 1) 1690 if (!check_soft && loop >= 1)
1773 drain_all_stock_async(root_mem); 1691 drain_all_stock_async(root_memcg);
1774 if (loop >= 2) { 1692 if (loop >= 2) {
1775 /* 1693 /*
1776 * If we have not been able to reclaim 1694 * If we have not been able to reclaim
@@ -1799,23 +1717,14 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1799 css_put(&victim->css); 1717 css_put(&victim->css);
1800 continue; 1718 continue;
1801 } 1719 }
1802 rec.mem = victim;
1803 rec.nr_scanned[0] = 0;
1804 rec.nr_scanned[1] = 0;
1805 rec.nr_rotated[0] = 0;
1806 rec.nr_rotated[1] = 0;
1807 rec.nr_freed[0] = 0;
1808 rec.nr_freed[1] = 0;
1809 rec.elapsed = 0;
1810 /* we use swappiness of local cgroup */ 1720 /* we use swappiness of local cgroup */
1811 if (check_soft) { 1721 if (check_soft) {
1812 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask, 1722 ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
1813 noswap, zone, &rec, &scanned); 1723 noswap, zone, &nr_scanned);
1814 *total_scanned += scanned; 1724 *total_scanned += nr_scanned;
1815 } else 1725 } else
1816 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, 1726 ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
1817 noswap, &rec); 1727 noswap);
1818 mem_cgroup_record_scanstat(&rec);
1819 css_put(&victim->css); 1728 css_put(&victim->css);
1820 /* 1729 /*
1821 * At shrinking usage, we can't check we should stop here or 1730 * At shrinking usage, we can't check we should stop here or
@@ -1826,9 +1735,9 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1826 return ret; 1735 return ret;
1827 total += ret; 1736 total += ret;
1828 if (check_soft) { 1737 if (check_soft) {
1829 if (!res_counter_soft_limit_excess(&root_mem->res)) 1738 if (!res_counter_soft_limit_excess(&root_memcg->res))
1830 return total; 1739 return total;
1831 } else if (mem_cgroup_margin(root_mem)) 1740 } else if (mem_cgroup_margin(root_memcg))
1832 return total; 1741 return total;
1833 } 1742 }
1834 return total; 1743 return total;
@@ -1839,12 +1748,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1839 * If someone is running, return false. 1748 * If someone is running, return false.
1840 * Has to be called with memcg_oom_lock 1749 * Has to be called with memcg_oom_lock
1841 */ 1750 */
1842static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) 1751static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
1843{ 1752{
1844 struct mem_cgroup *iter, *failed = NULL; 1753 struct mem_cgroup *iter, *failed = NULL;
1845 bool cond = true; 1754 bool cond = true;
1846 1755
1847 for_each_mem_cgroup_tree_cond(iter, mem, cond) { 1756 for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
1848 if (iter->oom_lock) { 1757 if (iter->oom_lock) {
1849 /* 1758 /*
1850 * this subtree of our hierarchy is already locked 1759 * this subtree of our hierarchy is already locked
@@ -1864,7 +1773,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
1864 * what we set up to the failing subtree 1773 * what we set up to the failing subtree
1865 */ 1774 */
1866 cond = true; 1775 cond = true;
1867 for_each_mem_cgroup_tree_cond(iter, mem, cond) { 1776 for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
1868 if (iter == failed) { 1777 if (iter == failed) {
1869 cond = false; 1778 cond = false;
1870 continue; 1779 continue;
@@ -1877,24 +1786,24 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem)
1877/* 1786/*
1878 * Has to be called with memcg_oom_lock 1787 * Has to be called with memcg_oom_lock
1879 */ 1788 */
1880static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) 1789static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
1881{ 1790{
1882 struct mem_cgroup *iter; 1791 struct mem_cgroup *iter;
1883 1792
1884 for_each_mem_cgroup_tree(iter, mem) 1793 for_each_mem_cgroup_tree(iter, memcg)
1885 iter->oom_lock = false; 1794 iter->oom_lock = false;
1886 return 0; 1795 return 0;
1887} 1796}
1888 1797
1889static void mem_cgroup_mark_under_oom(struct mem_cgroup *mem) 1798static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
1890{ 1799{
1891 struct mem_cgroup *iter; 1800 struct mem_cgroup *iter;
1892 1801
1893 for_each_mem_cgroup_tree(iter, mem) 1802 for_each_mem_cgroup_tree(iter, memcg)
1894 atomic_inc(&iter->under_oom); 1803 atomic_inc(&iter->under_oom);
1895} 1804}
1896 1805
1897static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem) 1806static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
1898{ 1807{
1899 struct mem_cgroup *iter; 1808 struct mem_cgroup *iter;
1900 1809
@@ -1903,7 +1812,7 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem)
1903 * mem_cgroup_oom_lock() may not be called. We have to use 1812 * mem_cgroup_oom_lock() may not be called. We have to use
1904 * atomic_add_unless() here. 1813 * atomic_add_unless() here.
1905 */ 1814 */
1906 for_each_mem_cgroup_tree(iter, mem) 1815 for_each_mem_cgroup_tree(iter, memcg)
1907 atomic_add_unless(&iter->under_oom, -1, 0); 1816 atomic_add_unless(&iter->under_oom, -1, 0);
1908} 1817}
1909 1818
@@ -1918,85 +1827,85 @@ struct oom_wait_info {
1918static int memcg_oom_wake_function(wait_queue_t *wait, 1827static int memcg_oom_wake_function(wait_queue_t *wait,
1919 unsigned mode, int sync, void *arg) 1828 unsigned mode, int sync, void *arg)
1920{ 1829{
1921 struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg, 1830 struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg,
1922 *oom_wait_mem; 1831 *oom_wait_memcg;
1923 struct oom_wait_info *oom_wait_info; 1832 struct oom_wait_info *oom_wait_info;
1924 1833
1925 oom_wait_info = container_of(wait, struct oom_wait_info, wait); 1834 oom_wait_info = container_of(wait, struct oom_wait_info, wait);
1926 oom_wait_mem = oom_wait_info->mem; 1835 oom_wait_memcg = oom_wait_info->mem;
1927 1836
1928 /* 1837 /*
1929 * Both of oom_wait_info->mem and wake_mem are stable under us. 1838 * Both of oom_wait_info->mem and wake_mem are stable under us.
1930 * Then we can use css_is_ancestor without taking care of RCU. 1839 * Then we can use css_is_ancestor without taking care of RCU.
1931 */ 1840 */
1932 if (!mem_cgroup_same_or_subtree(oom_wait_mem, wake_mem) 1841 if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
1933 && !mem_cgroup_same_or_subtree(wake_mem, oom_wait_mem)) 1842 && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg))
1934 return 0; 1843 return 0;
1935 return autoremove_wake_function(wait, mode, sync, arg); 1844 return autoremove_wake_function(wait, mode, sync, arg);
1936} 1845}
1937 1846
1938static void memcg_wakeup_oom(struct mem_cgroup *mem) 1847static void memcg_wakeup_oom(struct mem_cgroup *memcg)
1939{ 1848{
1940 /* for filtering, pass "mem" as argument. */ 1849 /* for filtering, pass "memcg" as argument. */
1941 __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, mem); 1850 __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
1942} 1851}
1943 1852
1944static void memcg_oom_recover(struct mem_cgroup *mem) 1853static void memcg_oom_recover(struct mem_cgroup *memcg)
1945{ 1854{
1946 if (mem && atomic_read(&mem->under_oom)) 1855 if (memcg && atomic_read(&memcg->under_oom))
1947 memcg_wakeup_oom(mem); 1856 memcg_wakeup_oom(memcg);
1948} 1857}
1949 1858
1950/* 1859/*
1951 * try to call OOM killer. returns false if we should exit memory-reclaim loop. 1860 * try to call OOM killer. returns false if we should exit memory-reclaim loop.
1952 */ 1861 */
1953bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) 1862bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask)
1954{ 1863{
1955 struct oom_wait_info owait; 1864 struct oom_wait_info owait;
1956 bool locked, need_to_kill; 1865 bool locked, need_to_kill;
1957 1866
1958 owait.mem = mem; 1867 owait.mem = memcg;
1959 owait.wait.flags = 0; 1868 owait.wait.flags = 0;
1960 owait.wait.func = memcg_oom_wake_function; 1869 owait.wait.func = memcg_oom_wake_function;
1961 owait.wait.private = current; 1870 owait.wait.private = current;
1962 INIT_LIST_HEAD(&owait.wait.task_list); 1871 INIT_LIST_HEAD(&owait.wait.task_list);
1963 need_to_kill = true; 1872 need_to_kill = true;
1964 mem_cgroup_mark_under_oom(mem); 1873 mem_cgroup_mark_under_oom(memcg);
1965 1874
1966 /* At first, try to OOM lock hierarchy under mem.*/ 1875 /* At first, try to OOM lock hierarchy under memcg.*/
1967 spin_lock(&memcg_oom_lock); 1876 spin_lock(&memcg_oom_lock);
1968 locked = mem_cgroup_oom_lock(mem); 1877 locked = mem_cgroup_oom_lock(memcg);
1969 /* 1878 /*
1970 * Even if signal_pending(), we can't quit charge() loop without 1879 * Even if signal_pending(), we can't quit charge() loop without
1971 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL 1880 * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
1972 * under OOM is always welcomed, use TASK_KILLABLE here. 1881 * under OOM is always welcomed, use TASK_KILLABLE here.
1973 */ 1882 */
1974 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); 1883 prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
1975 if (!locked || mem->oom_kill_disable) 1884 if (!locked || memcg->oom_kill_disable)
1976 need_to_kill = false; 1885 need_to_kill = false;
1977 if (locked) 1886 if (locked)
1978 mem_cgroup_oom_notify(mem); 1887 mem_cgroup_oom_notify(memcg);
1979 spin_unlock(&memcg_oom_lock); 1888 spin_unlock(&memcg_oom_lock);
1980 1889
1981 if (need_to_kill) { 1890 if (need_to_kill) {
1982 finish_wait(&memcg_oom_waitq, &owait.wait); 1891 finish_wait(&memcg_oom_waitq, &owait.wait);
1983 mem_cgroup_out_of_memory(mem, mask); 1892 mem_cgroup_out_of_memory(memcg, mask);
1984 } else { 1893 } else {
1985 schedule(); 1894 schedule();
1986 finish_wait(&memcg_oom_waitq, &owait.wait); 1895 finish_wait(&memcg_oom_waitq, &owait.wait);
1987 } 1896 }
1988 spin_lock(&memcg_oom_lock); 1897 spin_lock(&memcg_oom_lock);
1989 if (locked) 1898 if (locked)
1990 mem_cgroup_oom_unlock(mem); 1899 mem_cgroup_oom_unlock(memcg);
1991 memcg_wakeup_oom(mem); 1900 memcg_wakeup_oom(memcg);
1992 spin_unlock(&memcg_oom_lock); 1901 spin_unlock(&memcg_oom_lock);
1993 1902
1994 mem_cgroup_unmark_under_oom(mem); 1903 mem_cgroup_unmark_under_oom(memcg);
1995 1904
1996 if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) 1905 if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
1997 return false; 1906 return false;
1998 /* Give chance to dying process */ 1907 /* Give chance to dying process */
1999 schedule_timeout(1); 1908 schedule_timeout_uninterruptible(1);
2000 return true; 1909 return true;
2001} 1910}
2002 1911
@@ -2027,7 +1936,7 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask)
2027void mem_cgroup_update_page_stat(struct page *page, 1936void mem_cgroup_update_page_stat(struct page *page,
2028 enum mem_cgroup_page_stat_item idx, int val) 1937 enum mem_cgroup_page_stat_item idx, int val)
2029{ 1938{
2030 struct mem_cgroup *mem; 1939 struct mem_cgroup *memcg;
2031 struct page_cgroup *pc = lookup_page_cgroup(page); 1940 struct page_cgroup *pc = lookup_page_cgroup(page);
2032 bool need_unlock = false; 1941 bool need_unlock = false;
2033 unsigned long uninitialized_var(flags); 1942 unsigned long uninitialized_var(flags);
@@ -2036,16 +1945,16 @@ void mem_cgroup_update_page_stat(struct page *page,
2036 return; 1945 return;
2037 1946
2038 rcu_read_lock(); 1947 rcu_read_lock();
2039 mem = pc->mem_cgroup; 1948 memcg = pc->mem_cgroup;
2040 if (unlikely(!mem || !PageCgroupUsed(pc))) 1949 if (unlikely(!memcg || !PageCgroupUsed(pc)))
2041 goto out; 1950 goto out;
2042 /* pc->mem_cgroup is unstable ? */ 1951 /* pc->mem_cgroup is unstable ? */
2043 if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) { 1952 if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) {
2044 /* take a lock against to access pc->mem_cgroup */ 1953 /* take a lock against to access pc->mem_cgroup */
2045 move_lock_page_cgroup(pc, &flags); 1954 move_lock_page_cgroup(pc, &flags);
2046 need_unlock = true; 1955 need_unlock = true;
2047 mem = pc->mem_cgroup; 1956 memcg = pc->mem_cgroup;
2048 if (!mem || !PageCgroupUsed(pc)) 1957 if (!memcg || !PageCgroupUsed(pc))
2049 goto out; 1958 goto out;
2050 } 1959 }
2051 1960
@@ -2061,7 +1970,7 @@ void mem_cgroup_update_page_stat(struct page *page,
2061 BUG(); 1970 BUG();
2062 } 1971 }
2063 1972
2064 this_cpu_add(mem->stat->count[idx], val); 1973 this_cpu_add(memcg->stat->count[idx], val);
2065 1974
2066out: 1975out:
2067 if (unlikely(need_unlock)) 1976 if (unlikely(need_unlock))
@@ -2092,13 +2001,13 @@ static DEFINE_MUTEX(percpu_charge_mutex);
2092 * cgroup which is not current target, returns false. This stock will be 2001 * cgroup which is not current target, returns false. This stock will be
2093 * refilled. 2002 * refilled.
2094 */ 2003 */
2095static bool consume_stock(struct mem_cgroup *mem) 2004static bool consume_stock(struct mem_cgroup *memcg)
2096{ 2005{
2097 struct memcg_stock_pcp *stock; 2006 struct memcg_stock_pcp *stock;
2098 bool ret = true; 2007 bool ret = true;
2099 2008
2100 stock = &get_cpu_var(memcg_stock); 2009 stock = &get_cpu_var(memcg_stock);
2101 if (mem == stock->cached && stock->nr_pages) 2010 if (memcg == stock->cached && stock->nr_pages)
2102 stock->nr_pages--; 2011 stock->nr_pages--;
2103 else /* need to call res_counter_charge */ 2012 else /* need to call res_counter_charge */
2104 ret = false; 2013 ret = false;
@@ -2139,24 +2048,24 @@ static void drain_local_stock(struct work_struct *dummy)
2139 * Cache charges(val) which is from res_counter, to local per_cpu area. 2048 * Cache charges(val) which is from res_counter, to local per_cpu area.
2140 * This will be consumed by consume_stock() function, later. 2049 * This will be consumed by consume_stock() function, later.
2141 */ 2050 */
2142static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages) 2051static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
2143{ 2052{
2144 struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); 2053 struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
2145 2054
2146 if (stock->cached != mem) { /* reset if necessary */ 2055 if (stock->cached != memcg) { /* reset if necessary */
2147 drain_stock(stock); 2056 drain_stock(stock);
2148 stock->cached = mem; 2057 stock->cached = memcg;
2149 } 2058 }
2150 stock->nr_pages += nr_pages; 2059 stock->nr_pages += nr_pages;
2151 put_cpu_var(memcg_stock); 2060 put_cpu_var(memcg_stock);
2152} 2061}
2153 2062
2154/* 2063/*
2155 * Drains all per-CPU charge caches for given root_mem resp. subtree 2064 * Drains all per-CPU charge caches for given root_memcg resp. subtree
2156 * of the hierarchy under it. sync flag says whether we should block 2065 * of the hierarchy under it. sync flag says whether we should block
2157 * until the work is done. 2066 * until the work is done.
2158 */ 2067 */
2159static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) 2068static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
2160{ 2069{
2161 int cpu, curcpu; 2070 int cpu, curcpu;
2162 2071
@@ -2165,12 +2074,12 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
2165 curcpu = get_cpu(); 2074 curcpu = get_cpu();
2166 for_each_online_cpu(cpu) { 2075 for_each_online_cpu(cpu) {
2167 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); 2076 struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
2168 struct mem_cgroup *mem; 2077 struct mem_cgroup *memcg;
2169 2078
2170 mem = stock->cached; 2079 memcg = stock->cached;
2171 if (!mem || !stock->nr_pages) 2080 if (!memcg || !stock->nr_pages)
2172 continue; 2081 continue;
2173 if (!mem_cgroup_same_or_subtree(root_mem, mem)) 2082 if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
2174 continue; 2083 continue;
2175 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { 2084 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
2176 if (cpu == curcpu) 2085 if (cpu == curcpu)
@@ -2199,23 +2108,23 @@ out:
2199 * expects some charges will be back to res_counter later but cannot wait for 2108 * expects some charges will be back to res_counter later but cannot wait for
2200 * it. 2109 * it.
2201 */ 2110 */
2202static void drain_all_stock_async(struct mem_cgroup *root_mem) 2111static void drain_all_stock_async(struct mem_cgroup *root_memcg)
2203{ 2112{
2204 /* 2113 /*
2205 * If someone calls draining, avoid adding more kworker runs. 2114 * If someone calls draining, avoid adding more kworker runs.
2206 */ 2115 */
2207 if (!mutex_trylock(&percpu_charge_mutex)) 2116 if (!mutex_trylock(&percpu_charge_mutex))
2208 return; 2117 return;
2209 drain_all_stock(root_mem, false); 2118 drain_all_stock(root_memcg, false);
2210 mutex_unlock(&percpu_charge_mutex); 2119 mutex_unlock(&percpu_charge_mutex);
2211} 2120}
2212 2121
2213/* This is a synchronous drain interface. */ 2122/* This is a synchronous drain interface. */
2214static void drain_all_stock_sync(struct mem_cgroup *root_mem) 2123static void drain_all_stock_sync(struct mem_cgroup *root_memcg)
2215{ 2124{
2216 /* called when force_empty is called */ 2125 /* called when force_empty is called */
2217 mutex_lock(&percpu_charge_mutex); 2126 mutex_lock(&percpu_charge_mutex);
2218 drain_all_stock(root_mem, true); 2127 drain_all_stock(root_memcg, true);
2219 mutex_unlock(&percpu_charge_mutex); 2128 mutex_unlock(&percpu_charge_mutex);
2220} 2129}
2221 2130
@@ -2223,35 +2132,35 @@ static void drain_all_stock_sync(struct mem_cgroup *root_mem)
2223 * This function drains percpu counter value from DEAD cpu and 2132 * This function drains percpu counter value from DEAD cpu and
2224 * move it to local cpu. Note that this function can be preempted. 2133 * move it to local cpu. Note that this function can be preempted.
2225 */ 2134 */
2226static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu) 2135static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu)
2227{ 2136{
2228 int i; 2137 int i;
2229 2138
2230 spin_lock(&mem->pcp_counter_lock); 2139 spin_lock(&memcg->pcp_counter_lock);
2231 for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { 2140 for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) {
2232 long x = per_cpu(mem->stat->count[i], cpu); 2141 long x = per_cpu(memcg->stat->count[i], cpu);
2233 2142
2234 per_cpu(mem->stat->count[i], cpu) = 0; 2143 per_cpu(memcg->stat->count[i], cpu) = 0;
2235 mem->nocpu_base.count[i] += x; 2144 memcg->nocpu_base.count[i] += x;
2236 } 2145 }
2237 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { 2146 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
2238 unsigned long x = per_cpu(mem->stat->events[i], cpu); 2147 unsigned long x = per_cpu(memcg->stat->events[i], cpu);
2239 2148
2240 per_cpu(mem->stat->events[i], cpu) = 0; 2149 per_cpu(memcg->stat->events[i], cpu) = 0;
2241 mem->nocpu_base.events[i] += x; 2150 memcg->nocpu_base.events[i] += x;
2242 } 2151 }
2243 /* need to clear ON_MOVE value, works as a kind of lock. */ 2152 /* need to clear ON_MOVE value, works as a kind of lock. */
2244 per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; 2153 per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0;
2245 spin_unlock(&mem->pcp_counter_lock); 2154 spin_unlock(&memcg->pcp_counter_lock);
2246} 2155}
2247 2156
2248static void synchronize_mem_cgroup_on_move(struct mem_cgroup *mem, int cpu) 2157static void synchronize_mem_cgroup_on_move(struct mem_cgroup *memcg, int cpu)
2249{ 2158{
2250 int idx = MEM_CGROUP_ON_MOVE; 2159 int idx = MEM_CGROUP_ON_MOVE;
2251 2160
2252 spin_lock(&mem->pcp_counter_lock); 2161 spin_lock(&memcg->pcp_counter_lock);
2253 per_cpu(mem->stat->count[idx], cpu) = mem->nocpu_base.count[idx]; 2162 per_cpu(memcg->stat->count[idx], cpu) = memcg->nocpu_base.count[idx];
2254 spin_unlock(&mem->pcp_counter_lock); 2163 spin_unlock(&memcg->pcp_counter_lock);
2255} 2164}
2256 2165
2257static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, 2166static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb,
@@ -2289,7 +2198,7 @@ enum {
2289 CHARGE_OOM_DIE, /* the current is killed because of OOM */ 2198 CHARGE_OOM_DIE, /* the current is killed because of OOM */
2290}; 2199};
2291 2200
2292static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, 2201static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2293 unsigned int nr_pages, bool oom_check) 2202 unsigned int nr_pages, bool oom_check)
2294{ 2203{
2295 unsigned long csize = nr_pages * PAGE_SIZE; 2204 unsigned long csize = nr_pages * PAGE_SIZE;
@@ -2298,16 +2207,16 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
2298 unsigned long flags = 0; 2207 unsigned long flags = 0;
2299 int ret; 2208 int ret;
2300 2209
2301 ret = res_counter_charge(&mem->res, csize, &fail_res); 2210 ret = res_counter_charge(&memcg->res, csize, &fail_res);
2302 2211
2303 if (likely(!ret)) { 2212 if (likely(!ret)) {
2304 if (!do_swap_account) 2213 if (!do_swap_account)
2305 return CHARGE_OK; 2214 return CHARGE_OK;
2306 ret = res_counter_charge(&mem->memsw, csize, &fail_res); 2215 ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
2307 if (likely(!ret)) 2216 if (likely(!ret))
2308 return CHARGE_OK; 2217 return CHARGE_OK;
2309 2218
2310 res_counter_uncharge(&mem->res, csize); 2219 res_counter_uncharge(&memcg->res, csize);
2311 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); 2220 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
2312 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 2221 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
2313 } else 2222 } else
@@ -2365,12 +2274,12 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
2365static int __mem_cgroup_try_charge(struct mm_struct *mm, 2274static int __mem_cgroup_try_charge(struct mm_struct *mm,
2366 gfp_t gfp_mask, 2275 gfp_t gfp_mask,
2367 unsigned int nr_pages, 2276 unsigned int nr_pages,
2368 struct mem_cgroup **memcg, 2277 struct mem_cgroup **ptr,
2369 bool oom) 2278 bool oom)
2370{ 2279{
2371 unsigned int batch = max(CHARGE_BATCH, nr_pages); 2280 unsigned int batch = max(CHARGE_BATCH, nr_pages);
2372 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; 2281 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
2373 struct mem_cgroup *mem = NULL; 2282 struct mem_cgroup *memcg = NULL;
2374 int ret; 2283 int ret;
2375 2284
2376 /* 2285 /*
@@ -2388,17 +2297,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
2388 * thread group leader migrates. It's possible that mm is not 2297 * thread group leader migrates. It's possible that mm is not
2389 * set, if so charge the init_mm (happens for pagecache usage). 2298 * set, if so charge the init_mm (happens for pagecache usage).
2390 */ 2299 */
2391 if (!*memcg && !mm) 2300 if (!*ptr && !mm)
2392 goto bypass; 2301 goto bypass;
2393again: 2302again:
2394 if (*memcg) { /* css should be a valid one */ 2303 if (*ptr) { /* css should be a valid one */
2395 mem = *memcg; 2304 memcg = *ptr;
2396 VM_BUG_ON(css_is_removed(&mem->css)); 2305 VM_BUG_ON(css_is_removed(&memcg->css));
2397 if (mem_cgroup_is_root(mem)) 2306 if (mem_cgroup_is_root(memcg))
2398 goto done; 2307 goto done;
2399 if (nr_pages == 1 && consume_stock(mem)) 2308 if (nr_pages == 1 && consume_stock(memcg))
2400 goto done; 2309 goto done;
2401 css_get(&mem->css); 2310 css_get(&memcg->css);
2402 } else { 2311 } else {
2403 struct task_struct *p; 2312 struct task_struct *p;
2404 2313
@@ -2406,7 +2315,7 @@ again:
2406 p = rcu_dereference(mm->owner); 2315 p = rcu_dereference(mm->owner);
2407 /* 2316 /*
2408 * Because we don't have task_lock(), "p" can exit. 2317 * Because we don't have task_lock(), "p" can exit.
2409 * In that case, "mem" can point to root or p can be NULL with 2318 * In that case, "memcg" can point to root or p can be NULL with
2410 * race with swapoff. Then, we have small risk of mis-accouning. 2319 * race with swapoff. Then, we have small risk of mis-accouning.
2411 * But such kind of mis-account by race always happens because 2320 * But such kind of mis-account by race always happens because
2412 * we don't have cgroup_mutex(). It's overkill and we allo that 2321 * we don't have cgroup_mutex(). It's overkill and we allo that
@@ -2414,12 +2323,12 @@ again:
2414 * (*) swapoff at el will charge against mm-struct not against 2323 * (*) swapoff at el will charge against mm-struct not against
2415 * task-struct. So, mm->owner can be NULL. 2324 * task-struct. So, mm->owner can be NULL.
2416 */ 2325 */
2417 mem = mem_cgroup_from_task(p); 2326 memcg = mem_cgroup_from_task(p);
2418 if (!mem || mem_cgroup_is_root(mem)) { 2327 if (!memcg || mem_cgroup_is_root(memcg)) {
2419 rcu_read_unlock(); 2328 rcu_read_unlock();
2420 goto done; 2329 goto done;
2421 } 2330 }
2422 if (nr_pages == 1 && consume_stock(mem)) { 2331 if (nr_pages == 1 && consume_stock(memcg)) {
2423 /* 2332 /*
2424 * It seems dagerous to access memcg without css_get(). 2333 * It seems dagerous to access memcg without css_get().
2425 * But considering how consume_stok works, it's not 2334 * But considering how consume_stok works, it's not
@@ -2432,7 +2341,7 @@ again:
2432 goto done; 2341 goto done;
2433 } 2342 }
2434 /* after here, we may be blocked. we need to get refcnt */ 2343 /* after here, we may be blocked. we need to get refcnt */
2435 if (!css_tryget(&mem->css)) { 2344 if (!css_tryget(&memcg->css)) {
2436 rcu_read_unlock(); 2345 rcu_read_unlock();
2437 goto again; 2346 goto again;
2438 } 2347 }
@@ -2444,7 +2353,7 @@ again:
2444 2353
2445 /* If killed, bypass charge */ 2354 /* If killed, bypass charge */
2446 if (fatal_signal_pending(current)) { 2355 if (fatal_signal_pending(current)) {
2447 css_put(&mem->css); 2356 css_put(&memcg->css);
2448 goto bypass; 2357 goto bypass;
2449 } 2358 }
2450 2359
@@ -2454,43 +2363,43 @@ again:
2454 nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; 2363 nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
2455 } 2364 }
2456 2365
2457 ret = mem_cgroup_do_charge(mem, gfp_mask, batch, oom_check); 2366 ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, oom_check);
2458 switch (ret) { 2367 switch (ret) {
2459 case CHARGE_OK: 2368 case CHARGE_OK:
2460 break; 2369 break;
2461 case CHARGE_RETRY: /* not in OOM situation but retry */ 2370 case CHARGE_RETRY: /* not in OOM situation but retry */
2462 batch = nr_pages; 2371 batch = nr_pages;
2463 css_put(&mem->css); 2372 css_put(&memcg->css);
2464 mem = NULL; 2373 memcg = NULL;
2465 goto again; 2374 goto again;
2466 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ 2375 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
2467 css_put(&mem->css); 2376 css_put(&memcg->css);
2468 goto nomem; 2377 goto nomem;
2469 case CHARGE_NOMEM: /* OOM routine works */ 2378 case CHARGE_NOMEM: /* OOM routine works */
2470 if (!oom) { 2379 if (!oom) {
2471 css_put(&mem->css); 2380 css_put(&memcg->css);
2472 goto nomem; 2381 goto nomem;
2473 } 2382 }
2474 /* If oom, we never return -ENOMEM */ 2383 /* If oom, we never return -ENOMEM */
2475 nr_oom_retries--; 2384 nr_oom_retries--;
2476 break; 2385 break;
2477 case CHARGE_OOM_DIE: /* Killed by OOM Killer */ 2386 case CHARGE_OOM_DIE: /* Killed by OOM Killer */
2478 css_put(&mem->css); 2387 css_put(&memcg->css);
2479 goto bypass; 2388 goto bypass;
2480 } 2389 }
2481 } while (ret != CHARGE_OK); 2390 } while (ret != CHARGE_OK);
2482 2391
2483 if (batch > nr_pages) 2392 if (batch > nr_pages)
2484 refill_stock(mem, batch - nr_pages); 2393 refill_stock(memcg, batch - nr_pages);
2485 css_put(&mem->css); 2394 css_put(&memcg->css);
2486done: 2395done:
2487 *memcg = mem; 2396 *ptr = memcg;
2488 return 0; 2397 return 0;
2489nomem: 2398nomem:
2490 *memcg = NULL; 2399 *ptr = NULL;
2491 return -ENOMEM; 2400 return -ENOMEM;
2492bypass: 2401bypass:
2493 *memcg = NULL; 2402 *ptr = NULL;
2494 return 0; 2403 return 0;
2495} 2404}
2496 2405
@@ -2499,15 +2408,15 @@ bypass:
2499 * This function is for that and do uncharge, put css's refcnt. 2408 * This function is for that and do uncharge, put css's refcnt.
2500 * gotten by try_charge(). 2409 * gotten by try_charge().
2501 */ 2410 */
2502static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem, 2411static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
2503 unsigned int nr_pages) 2412 unsigned int nr_pages)
2504{ 2413{
2505 if (!mem_cgroup_is_root(mem)) { 2414 if (!mem_cgroup_is_root(memcg)) {
2506 unsigned long bytes = nr_pages * PAGE_SIZE; 2415 unsigned long bytes = nr_pages * PAGE_SIZE;
2507 2416
2508 res_counter_uncharge(&mem->res, bytes); 2417 res_counter_uncharge(&memcg->res, bytes);
2509 if (do_swap_account) 2418 if (do_swap_account)
2510 res_counter_uncharge(&mem->memsw, bytes); 2419 res_counter_uncharge(&memcg->memsw, bytes);
2511 } 2420 }
2512} 2421}
2513 2422
@@ -2532,7 +2441,7 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2532 2441
2533struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) 2442struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2534{ 2443{
2535 struct mem_cgroup *mem = NULL; 2444 struct mem_cgroup *memcg = NULL;
2536 struct page_cgroup *pc; 2445 struct page_cgroup *pc;
2537 unsigned short id; 2446 unsigned short id;
2538 swp_entry_t ent; 2447 swp_entry_t ent;
@@ -2542,23 +2451,23 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2542 pc = lookup_page_cgroup(page); 2451 pc = lookup_page_cgroup(page);
2543 lock_page_cgroup(pc); 2452 lock_page_cgroup(pc);
2544 if (PageCgroupUsed(pc)) { 2453 if (PageCgroupUsed(pc)) {
2545 mem = pc->mem_cgroup; 2454 memcg = pc->mem_cgroup;
2546 if (mem && !css_tryget(&mem->css)) 2455 if (memcg && !css_tryget(&memcg->css))
2547 mem = NULL; 2456 memcg = NULL;
2548 } else if (PageSwapCache(page)) { 2457 } else if (PageSwapCache(page)) {
2549 ent.val = page_private(page); 2458 ent.val = page_private(page);
2550 id = lookup_swap_cgroup(ent); 2459 id = lookup_swap_cgroup(ent);
2551 rcu_read_lock(); 2460 rcu_read_lock();
2552 mem = mem_cgroup_lookup(id); 2461 memcg = mem_cgroup_lookup(id);
2553 if (mem && !css_tryget(&mem->css)) 2462 if (memcg && !css_tryget(&memcg->css))
2554 mem = NULL; 2463 memcg = NULL;
2555 rcu_read_unlock(); 2464 rcu_read_unlock();
2556 } 2465 }
2557 unlock_page_cgroup(pc); 2466 unlock_page_cgroup(pc);
2558 return mem; 2467 return memcg;
2559} 2468}
2560 2469
2561static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, 2470static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2562 struct page *page, 2471 struct page *page,
2563 unsigned int nr_pages, 2472 unsigned int nr_pages,
2564 struct page_cgroup *pc, 2473 struct page_cgroup *pc,
@@ -2567,14 +2476,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2567 lock_page_cgroup(pc); 2476 lock_page_cgroup(pc);
2568 if (unlikely(PageCgroupUsed(pc))) { 2477 if (unlikely(PageCgroupUsed(pc))) {
2569 unlock_page_cgroup(pc); 2478 unlock_page_cgroup(pc);
2570 __mem_cgroup_cancel_charge(mem, nr_pages); 2479 __mem_cgroup_cancel_charge(memcg, nr_pages);
2571 return; 2480 return;
2572 } 2481 }
2573 /* 2482 /*
2574 * we don't need page_cgroup_lock about tail pages, becase they are not 2483 * we don't need page_cgroup_lock about tail pages, becase they are not
2575 * accessed by any other context at this point. 2484 * accessed by any other context at this point.
2576 */ 2485 */
2577 pc->mem_cgroup = mem; 2486 pc->mem_cgroup = memcg;
2578 /* 2487 /*
2579 * We access a page_cgroup asynchronously without lock_page_cgroup(). 2488 * We access a page_cgroup asynchronously without lock_page_cgroup().
2580 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup 2489 * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
@@ -2597,14 +2506,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2597 break; 2506 break;
2598 } 2507 }
2599 2508
2600 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages); 2509 mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
2601 unlock_page_cgroup(pc); 2510 unlock_page_cgroup(pc);
2602 /* 2511 /*
2603 * "charge_statistics" updated event counter. Then, check it. 2512 * "charge_statistics" updated event counter. Then, check it.
2604 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. 2513 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
2605 * if they exceeds softlimit. 2514 * if they exceeds softlimit.
2606 */ 2515 */
2607 memcg_check_events(mem, page); 2516 memcg_check_events(memcg, page);
2608} 2517}
2609 2518
2610#ifdef CONFIG_TRANSPARENT_HUGEPAGE 2519#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -2791,7 +2700,7 @@ out:
2791static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, 2700static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
2792 gfp_t gfp_mask, enum charge_type ctype) 2701 gfp_t gfp_mask, enum charge_type ctype)
2793{ 2702{
2794 struct mem_cgroup *mem = NULL; 2703 struct mem_cgroup *memcg = NULL;
2795 unsigned int nr_pages = 1; 2704 unsigned int nr_pages = 1;
2796 struct page_cgroup *pc; 2705 struct page_cgroup *pc;
2797 bool oom = true; 2706 bool oom = true;
@@ -2810,11 +2719,11 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
2810 pc = lookup_page_cgroup(page); 2719 pc = lookup_page_cgroup(page);
2811 BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */ 2720 BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */
2812 2721
2813 ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &mem, oom); 2722 ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
2814 if (ret || !mem) 2723 if (ret || !memcg)
2815 return ret; 2724 return ret;
2816 2725
2817 __mem_cgroup_commit_charge(mem, page, nr_pages, pc, ctype); 2726 __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
2818 return 0; 2727 return 0;
2819} 2728}
2820 2729
@@ -2843,7 +2752,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
2843 enum charge_type ctype); 2752 enum charge_type ctype);
2844 2753
2845static void 2754static void
2846__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, 2755__mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
2847 enum charge_type ctype) 2756 enum charge_type ctype)
2848{ 2757{
2849 struct page_cgroup *pc = lookup_page_cgroup(page); 2758 struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -2853,7 +2762,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem,
2853 * LRU. Take care of it. 2762 * LRU. Take care of it.
2854 */ 2763 */
2855 mem_cgroup_lru_del_before_commit(page); 2764 mem_cgroup_lru_del_before_commit(page);
2856 __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); 2765 __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
2857 mem_cgroup_lru_add_after_commit(page); 2766 mem_cgroup_lru_add_after_commit(page);
2858 return; 2767 return;
2859} 2768}
@@ -2861,7 +2770,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem,
2861int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 2770int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
2862 gfp_t gfp_mask) 2771 gfp_t gfp_mask)
2863{ 2772{
2864 struct mem_cgroup *mem = NULL; 2773 struct mem_cgroup *memcg = NULL;
2865 int ret; 2774 int ret;
2866 2775
2867 if (mem_cgroup_disabled()) 2776 if (mem_cgroup_disabled())
@@ -2873,8 +2782,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
2873 mm = &init_mm; 2782 mm = &init_mm;
2874 2783
2875 if (page_is_file_cache(page)) { 2784 if (page_is_file_cache(page)) {
2876 ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &mem, true); 2785 ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
2877 if (ret || !mem) 2786 if (ret || !memcg)
2878 return ret; 2787 return ret;
2879 2788
2880 /* 2789 /*
@@ -2882,15 +2791,15 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
2882 * put that would remove them from the LRU list, make 2791 * put that would remove them from the LRU list, make
2883 * sure that they get relinked properly. 2792 * sure that they get relinked properly.
2884 */ 2793 */
2885 __mem_cgroup_commit_charge_lrucare(page, mem, 2794 __mem_cgroup_commit_charge_lrucare(page, memcg,
2886 MEM_CGROUP_CHARGE_TYPE_CACHE); 2795 MEM_CGROUP_CHARGE_TYPE_CACHE);
2887 return ret; 2796 return ret;
2888 } 2797 }
2889 /* shmem */ 2798 /* shmem */
2890 if (PageSwapCache(page)) { 2799 if (PageSwapCache(page)) {
2891 ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); 2800 ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
2892 if (!ret) 2801 if (!ret)
2893 __mem_cgroup_commit_charge_swapin(page, mem, 2802 __mem_cgroup_commit_charge_swapin(page, memcg,
2894 MEM_CGROUP_CHARGE_TYPE_SHMEM); 2803 MEM_CGROUP_CHARGE_TYPE_SHMEM);
2895 } else 2804 } else
2896 ret = mem_cgroup_charge_common(page, mm, gfp_mask, 2805 ret = mem_cgroup_charge_common(page, mm, gfp_mask,
@@ -2909,7 +2818,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
2909 struct page *page, 2818 struct page *page,
2910 gfp_t mask, struct mem_cgroup **ptr) 2819 gfp_t mask, struct mem_cgroup **ptr)
2911{ 2820{
2912 struct mem_cgroup *mem; 2821 struct mem_cgroup *memcg;
2913 int ret; 2822 int ret;
2914 2823
2915 *ptr = NULL; 2824 *ptr = NULL;
@@ -2927,12 +2836,12 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
2927 */ 2836 */
2928 if (!PageSwapCache(page)) 2837 if (!PageSwapCache(page))
2929 goto charge_cur_mm; 2838 goto charge_cur_mm;
2930 mem = try_get_mem_cgroup_from_page(page); 2839 memcg = try_get_mem_cgroup_from_page(page);
2931 if (!mem) 2840 if (!memcg)
2932 goto charge_cur_mm; 2841 goto charge_cur_mm;
2933 *ptr = mem; 2842 *ptr = memcg;
2934 ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true); 2843 ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true);
2935 css_put(&mem->css); 2844 css_put(&memcg->css);
2936 return ret; 2845 return ret;
2937charge_cur_mm: 2846charge_cur_mm:
2938 if (unlikely(!mm)) 2847 if (unlikely(!mm))
@@ -2992,16 +2901,16 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
2992 MEM_CGROUP_CHARGE_TYPE_MAPPED); 2901 MEM_CGROUP_CHARGE_TYPE_MAPPED);
2993} 2902}
2994 2903
2995void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) 2904void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
2996{ 2905{
2997 if (mem_cgroup_disabled()) 2906 if (mem_cgroup_disabled())
2998 return; 2907 return;
2999 if (!mem) 2908 if (!memcg)
3000 return; 2909 return;
3001 __mem_cgroup_cancel_charge(mem, 1); 2910 __mem_cgroup_cancel_charge(memcg, 1);
3002} 2911}
3003 2912
3004static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, 2913static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
3005 unsigned int nr_pages, 2914 unsigned int nr_pages,
3006 const enum charge_type ctype) 2915 const enum charge_type ctype)
3007{ 2916{
@@ -3019,7 +2928,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
3019 * uncharges. Then, it's ok to ignore memcg's refcnt. 2928 * uncharges. Then, it's ok to ignore memcg's refcnt.
3020 */ 2929 */
3021 if (!batch->memcg) 2930 if (!batch->memcg)
3022 batch->memcg = mem; 2931 batch->memcg = memcg;
3023 /* 2932 /*
3024 * do_batch > 0 when unmapping pages or inode invalidate/truncate. 2933 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
3025 * In those cases, all pages freed continuously can be expected to be in 2934 * In those cases, all pages freed continuously can be expected to be in
@@ -3039,7 +2948,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
3039 * merge a series of uncharges to an uncharge of res_counter. 2948 * merge a series of uncharges to an uncharge of res_counter.
3040 * If not, we uncharge res_counter ony by one. 2949 * If not, we uncharge res_counter ony by one.
3041 */ 2950 */
3042 if (batch->memcg != mem) 2951 if (batch->memcg != memcg)
3043 goto direct_uncharge; 2952 goto direct_uncharge;
3044 /* remember freed charge and uncharge it later */ 2953 /* remember freed charge and uncharge it later */
3045 batch->nr_pages++; 2954 batch->nr_pages++;
@@ -3047,11 +2956,11 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
3047 batch->memsw_nr_pages++; 2956 batch->memsw_nr_pages++;
3048 return; 2957 return;
3049direct_uncharge: 2958direct_uncharge:
3050 res_counter_uncharge(&mem->res, nr_pages * PAGE_SIZE); 2959 res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
3051 if (uncharge_memsw) 2960 if (uncharge_memsw)
3052 res_counter_uncharge(&mem->memsw, nr_pages * PAGE_SIZE); 2961 res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
3053 if (unlikely(batch->memcg != mem)) 2962 if (unlikely(batch->memcg != memcg))
3054 memcg_oom_recover(mem); 2963 memcg_oom_recover(memcg);
3055 return; 2964 return;
3056} 2965}
3057 2966
@@ -3061,7 +2970,7 @@ direct_uncharge:
3061static struct mem_cgroup * 2970static struct mem_cgroup *
3062__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) 2971__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
3063{ 2972{
3064 struct mem_cgroup *mem = NULL; 2973 struct mem_cgroup *memcg = NULL;
3065 unsigned int nr_pages = 1; 2974 unsigned int nr_pages = 1;
3066 struct page_cgroup *pc; 2975 struct page_cgroup *pc;
3067 2976
@@ -3084,7 +2993,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
3084 2993
3085 lock_page_cgroup(pc); 2994 lock_page_cgroup(pc);
3086 2995
3087 mem = pc->mem_cgroup; 2996 memcg = pc->mem_cgroup;
3088 2997
3089 if (!PageCgroupUsed(pc)) 2998 if (!PageCgroupUsed(pc))
3090 goto unlock_out; 2999 goto unlock_out;
@@ -3107,7 +3016,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
3107 break; 3016 break;
3108 } 3017 }
3109 3018
3110 mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -nr_pages); 3019 mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -nr_pages);
3111 3020
3112 ClearPageCgroupUsed(pc); 3021 ClearPageCgroupUsed(pc);
3113 /* 3022 /*
@@ -3119,18 +3028,18 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
3119 3028
3120 unlock_page_cgroup(pc); 3029 unlock_page_cgroup(pc);
3121 /* 3030 /*
3122 * even after unlock, we have mem->res.usage here and this memcg 3031 * even after unlock, we have memcg->res.usage here and this memcg
3123 * will never be freed. 3032 * will never be freed.
3124 */ 3033 */
3125 memcg_check_events(mem, page); 3034 memcg_check_events(memcg, page);
3126 if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) { 3035 if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
3127 mem_cgroup_swap_statistics(mem, true); 3036 mem_cgroup_swap_statistics(memcg, true);
3128 mem_cgroup_get(mem); 3037 mem_cgroup_get(memcg);
3129 } 3038 }
3130 if (!mem_cgroup_is_root(mem)) 3039 if (!mem_cgroup_is_root(memcg))
3131 mem_cgroup_do_uncharge(mem, nr_pages, ctype); 3040 mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
3132 3041
3133 return mem; 3042 return memcg;
3134 3043
3135unlock_out: 3044unlock_out:
3136 unlock_page_cgroup(pc); 3045 unlock_page_cgroup(pc);
@@ -3320,7 +3229,7 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
3320int mem_cgroup_prepare_migration(struct page *page, 3229int mem_cgroup_prepare_migration(struct page *page,
3321 struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask) 3230 struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask)
3322{ 3231{
3323 struct mem_cgroup *mem = NULL; 3232 struct mem_cgroup *memcg = NULL;
3324 struct page_cgroup *pc; 3233 struct page_cgroup *pc;
3325 enum charge_type ctype; 3234 enum charge_type ctype;
3326 int ret = 0; 3235 int ret = 0;
@@ -3334,8 +3243,8 @@ int mem_cgroup_prepare_migration(struct page *page,
3334 pc = lookup_page_cgroup(page); 3243 pc = lookup_page_cgroup(page);
3335 lock_page_cgroup(pc); 3244 lock_page_cgroup(pc);
3336 if (PageCgroupUsed(pc)) { 3245 if (PageCgroupUsed(pc)) {
3337 mem = pc->mem_cgroup; 3246 memcg = pc->mem_cgroup;
3338 css_get(&mem->css); 3247 css_get(&memcg->css);
3339 /* 3248 /*
3340 * At migrating an anonymous page, its mapcount goes down 3249 * At migrating an anonymous page, its mapcount goes down
3341 * to 0 and uncharge() will be called. But, even if it's fully 3250 * to 0 and uncharge() will be called. But, even if it's fully
@@ -3373,12 +3282,12 @@ int mem_cgroup_prepare_migration(struct page *page,
3373 * If the page is not charged at this point, 3282 * If the page is not charged at this point,
3374 * we return here. 3283 * we return here.
3375 */ 3284 */
3376 if (!mem) 3285 if (!memcg)
3377 return 0; 3286 return 0;
3378 3287
3379 *ptr = mem; 3288 *ptr = memcg;
3380 ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false); 3289 ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false);
3381 css_put(&mem->css);/* drop extra refcnt */ 3290 css_put(&memcg->css);/* drop extra refcnt */
3382 if (ret || *ptr == NULL) { 3291 if (ret || *ptr == NULL) {
3383 if (PageAnon(page)) { 3292 if (PageAnon(page)) {
3384 lock_page_cgroup(pc); 3293 lock_page_cgroup(pc);
@@ -3404,21 +3313,21 @@ int mem_cgroup_prepare_migration(struct page *page,
3404 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; 3313 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
3405 else 3314 else
3406 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; 3315 ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3407 __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); 3316 __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
3408 return ret; 3317 return ret;
3409} 3318}
3410 3319
3411/* remove redundant charge if migration failed*/ 3320/* remove redundant charge if migration failed*/
3412void mem_cgroup_end_migration(struct mem_cgroup *mem, 3321void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3413 struct page *oldpage, struct page *newpage, bool migration_ok) 3322 struct page *oldpage, struct page *newpage, bool migration_ok)
3414{ 3323{
3415 struct page *used, *unused; 3324 struct page *used, *unused;
3416 struct page_cgroup *pc; 3325 struct page_cgroup *pc;
3417 3326
3418 if (!mem) 3327 if (!memcg)
3419 return; 3328 return;
3420 /* blocks rmdir() */ 3329 /* blocks rmdir() */
3421 cgroup_exclude_rmdir(&mem->css); 3330 cgroup_exclude_rmdir(&memcg->css);
3422 if (!migration_ok) { 3331 if (!migration_ok) {
3423 used = oldpage; 3332 used = oldpage;
3424 unused = newpage; 3333 unused = newpage;
@@ -3454,7 +3363,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
3454 * So, rmdir()->pre_destroy() can be called while we do this charge. 3363 * So, rmdir()->pre_destroy() can be called while we do this charge.
3455 * In that case, we need to call pre_destroy() again. check it here. 3364 * In that case, we need to call pre_destroy() again. check it here.
3456 */ 3365 */
3457 cgroup_release_and_wakeup_rmdir(&mem->css); 3366 cgroup_release_and_wakeup_rmdir(&memcg->css);
3458} 3367}
3459 3368
3460#ifdef CONFIG_DEBUG_VM 3369#ifdef CONFIG_DEBUG_VM
@@ -3533,7 +3442,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
3533 /* 3442 /*
3534 * Rather than hide all in some function, I do this in 3443 * Rather than hide all in some function, I do this in
3535 * open coded manner. You see what this really does. 3444 * open coded manner. You see what this really does.
3536 * We have to guarantee mem->res.limit < mem->memsw.limit. 3445 * We have to guarantee memcg->res.limit < memcg->memsw.limit.
3537 */ 3446 */
3538 mutex_lock(&set_limit_mutex); 3447 mutex_lock(&set_limit_mutex);
3539 memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); 3448 memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
@@ -3595,7 +3504,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
3595 /* 3504 /*
3596 * Rather than hide all in some function, I do this in 3505 * Rather than hide all in some function, I do this in
3597 * open coded manner. You see what this really does. 3506 * open coded manner. You see what this really does.
3598 * We have to guarantee mem->res.limit < mem->memsw.limit. 3507 * We have to guarantee memcg->res.limit < memcg->memsw.limit.
3599 */ 3508 */
3600 mutex_lock(&set_limit_mutex); 3509 mutex_lock(&set_limit_mutex);
3601 memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); 3510 memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -3733,7 +3642,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
3733 * This routine traverse page_cgroup in given list and drop them all. 3642 * This routine traverse page_cgroup in given list and drop them all.
3734 * *And* this routine doesn't reclaim page itself, just removes page_cgroup. 3643 * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
3735 */ 3644 */
3736static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, 3645static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
3737 int node, int zid, enum lru_list lru) 3646 int node, int zid, enum lru_list lru)
3738{ 3647{
3739 struct zone *zone; 3648 struct zone *zone;
@@ -3744,7 +3653,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
3744 int ret = 0; 3653 int ret = 0;
3745 3654
3746 zone = &NODE_DATA(node)->node_zones[zid]; 3655 zone = &NODE_DATA(node)->node_zones[zid];
3747 mz = mem_cgroup_zoneinfo(mem, node, zid); 3656 mz = mem_cgroup_zoneinfo(memcg, node, zid);
3748 list = &mz->lists[lru]; 3657 list = &mz->lists[lru];
3749 3658
3750 loop = MEM_CGROUP_ZSTAT(mz, lru); 3659 loop = MEM_CGROUP_ZSTAT(mz, lru);
@@ -3771,7 +3680,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
3771 3680
3772 page = lookup_cgroup_page(pc); 3681 page = lookup_cgroup_page(pc);
3773 3682
3774 ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); 3683 ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
3775 if (ret == -ENOMEM) 3684 if (ret == -ENOMEM)
3776 break; 3685 break;
3777 3686
@@ -3792,14 +3701,14 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
3792 * make mem_cgroup's charge to be 0 if there is no task. 3701 * make mem_cgroup's charge to be 0 if there is no task.
3793 * This enables deleting this mem_cgroup. 3702 * This enables deleting this mem_cgroup.
3794 */ 3703 */
3795static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) 3704static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all)
3796{ 3705{
3797 int ret; 3706 int ret;
3798 int node, zid, shrink; 3707 int node, zid, shrink;
3799 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 3708 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
3800 struct cgroup *cgrp = mem->css.cgroup; 3709 struct cgroup *cgrp = memcg->css.cgroup;
3801 3710
3802 css_get(&mem->css); 3711 css_get(&memcg->css);
3803 3712
3804 shrink = 0; 3713 shrink = 0;
3805 /* should free all ? */ 3714 /* should free all ? */
@@ -3815,14 +3724,14 @@ move_account:
3815 goto out; 3724 goto out;
3816 /* This is for making all *used* pages to be on LRU. */ 3725 /* This is for making all *used* pages to be on LRU. */
3817 lru_add_drain_all(); 3726 lru_add_drain_all();
3818 drain_all_stock_sync(mem); 3727 drain_all_stock_sync(memcg);
3819 ret = 0; 3728 ret = 0;
3820 mem_cgroup_start_move(mem); 3729 mem_cgroup_start_move(memcg);
3821 for_each_node_state(node, N_HIGH_MEMORY) { 3730 for_each_node_state(node, N_HIGH_MEMORY) {
3822 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { 3731 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
3823 enum lru_list l; 3732 enum lru_list l;
3824 for_each_lru(l) { 3733 for_each_lru(l) {
3825 ret = mem_cgroup_force_empty_list(mem, 3734 ret = mem_cgroup_force_empty_list(memcg,
3826 node, zid, l); 3735 node, zid, l);
3827 if (ret) 3736 if (ret)
3828 break; 3737 break;
@@ -3831,16 +3740,16 @@ move_account:
3831 if (ret) 3740 if (ret)
3832 break; 3741 break;
3833 } 3742 }
3834 mem_cgroup_end_move(mem); 3743 mem_cgroup_end_move(memcg);
3835 memcg_oom_recover(mem); 3744 memcg_oom_recover(memcg);
3836 /* it seems parent cgroup doesn't have enough mem */ 3745 /* it seems parent cgroup doesn't have enough mem */
3837 if (ret == -ENOMEM) 3746 if (ret == -ENOMEM)
3838 goto try_to_free; 3747 goto try_to_free;
3839 cond_resched(); 3748 cond_resched();
3840 /* "ret" should also be checked to ensure all lists are empty. */ 3749 /* "ret" should also be checked to ensure all lists are empty. */
3841 } while (mem->res.usage > 0 || ret); 3750 } while (memcg->res.usage > 0 || ret);
3842out: 3751out:
3843 css_put(&mem->css); 3752 css_put(&memcg->css);
3844 return ret; 3753 return ret;
3845 3754
3846try_to_free: 3755try_to_free:
@@ -3853,19 +3762,15 @@ try_to_free:
3853 lru_add_drain_all(); 3762 lru_add_drain_all();
3854 /* try to free all pages in this cgroup */ 3763 /* try to free all pages in this cgroup */
3855 shrink = 1; 3764 shrink = 1;
3856 while (nr_retries && mem->res.usage > 0) { 3765 while (nr_retries && memcg->res.usage > 0) {
3857 struct memcg_scanrecord rec;
3858 int progress; 3766 int progress;
3859 3767
3860 if (signal_pending(current)) { 3768 if (signal_pending(current)) {
3861 ret = -EINTR; 3769 ret = -EINTR;
3862 goto out; 3770 goto out;
3863 } 3771 }
3864 rec.context = SCAN_BY_SHRINK; 3772 progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
3865 rec.mem = mem; 3773 false);
3866 rec.root = mem;
3867 progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL,
3868 false, &rec);
3869 if (!progress) { 3774 if (!progress) {
3870 nr_retries--; 3775 nr_retries--;
3871 /* maybe some writeback is necessary */ 3776 /* maybe some writeback is necessary */
@@ -3893,12 +3798,12 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
3893 u64 val) 3798 u64 val)
3894{ 3799{
3895 int retval = 0; 3800 int retval = 0;
3896 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 3801 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
3897 struct cgroup *parent = cont->parent; 3802 struct cgroup *parent = cont->parent;
3898 struct mem_cgroup *parent_mem = NULL; 3803 struct mem_cgroup *parent_memcg = NULL;
3899 3804
3900 if (parent) 3805 if (parent)
3901 parent_mem = mem_cgroup_from_cont(parent); 3806 parent_memcg = mem_cgroup_from_cont(parent);
3902 3807
3903 cgroup_lock(); 3808 cgroup_lock();
3904 /* 3809 /*
@@ -3909,10 +3814,10 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
3909 * For the root cgroup, parent_mem is NULL, we allow value to be 3814 * For the root cgroup, parent_mem is NULL, we allow value to be
3910 * set if there are no children. 3815 * set if there are no children.
3911 */ 3816 */
3912 if ((!parent_mem || !parent_mem->use_hierarchy) && 3817 if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
3913 (val == 1 || val == 0)) { 3818 (val == 1 || val == 0)) {
3914 if (list_empty(&cont->children)) 3819 if (list_empty(&cont->children))
3915 mem->use_hierarchy = val; 3820 memcg->use_hierarchy = val;
3916 else 3821 else
3917 retval = -EBUSY; 3822 retval = -EBUSY;
3918 } else 3823 } else
@@ -3923,14 +3828,14 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
3923} 3828}
3924 3829
3925 3830
3926static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem, 3831static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
3927 enum mem_cgroup_stat_index idx) 3832 enum mem_cgroup_stat_index idx)
3928{ 3833{
3929 struct mem_cgroup *iter; 3834 struct mem_cgroup *iter;
3930 long val = 0; 3835 long val = 0;
3931 3836
3932 /* Per-cpu values can be negative, use a signed accumulator */ 3837 /* Per-cpu values can be negative, use a signed accumulator */
3933 for_each_mem_cgroup_tree(iter, mem) 3838 for_each_mem_cgroup_tree(iter, memcg)
3934 val += mem_cgroup_read_stat(iter, idx); 3839 val += mem_cgroup_read_stat(iter, idx);
3935 3840
3936 if (val < 0) /* race ? */ 3841 if (val < 0) /* race ? */
@@ -3938,29 +3843,29 @@ static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem,
3938 return val; 3843 return val;
3939} 3844}
3940 3845
3941static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) 3846static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
3942{ 3847{
3943 u64 val; 3848 u64 val;
3944 3849
3945 if (!mem_cgroup_is_root(mem)) { 3850 if (!mem_cgroup_is_root(memcg)) {
3946 if (!swap) 3851 if (!swap)
3947 return res_counter_read_u64(&mem->res, RES_USAGE); 3852 return res_counter_read_u64(&memcg->res, RES_USAGE);
3948 else 3853 else
3949 return res_counter_read_u64(&mem->memsw, RES_USAGE); 3854 return res_counter_read_u64(&memcg->memsw, RES_USAGE);
3950 } 3855 }
3951 3856
3952 val = mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_CACHE); 3857 val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
3953 val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_RSS); 3858 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
3954 3859
3955 if (swap) 3860 if (swap)
3956 val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_SWAPOUT); 3861 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
3957 3862
3958 return val << PAGE_SHIFT; 3863 return val << PAGE_SHIFT;
3959} 3864}
3960 3865
3961static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) 3866static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
3962{ 3867{
3963 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 3868 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
3964 u64 val; 3869 u64 val;
3965 int type, name; 3870 int type, name;
3966 3871
@@ -3969,15 +3874,15 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
3969 switch (type) { 3874 switch (type) {
3970 case _MEM: 3875 case _MEM:
3971 if (name == RES_USAGE) 3876 if (name == RES_USAGE)
3972 val = mem_cgroup_usage(mem, false); 3877 val = mem_cgroup_usage(memcg, false);
3973 else 3878 else
3974 val = res_counter_read_u64(&mem->res, name); 3879 val = res_counter_read_u64(&memcg->res, name);
3975 break; 3880 break;
3976 case _MEMSWAP: 3881 case _MEMSWAP:
3977 if (name == RES_USAGE) 3882 if (name == RES_USAGE)
3978 val = mem_cgroup_usage(mem, true); 3883 val = mem_cgroup_usage(memcg, true);
3979 else 3884 else
3980 val = res_counter_read_u64(&mem->memsw, name); 3885 val = res_counter_read_u64(&memcg->memsw, name);
3981 break; 3886 break;
3982 default: 3887 default:
3983 BUG(); 3888 BUG();
@@ -4065,24 +3970,24 @@ out:
4065 3970
4066static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) 3971static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
4067{ 3972{
4068 struct mem_cgroup *mem; 3973 struct mem_cgroup *memcg;
4069 int type, name; 3974 int type, name;
4070 3975
4071 mem = mem_cgroup_from_cont(cont); 3976 memcg = mem_cgroup_from_cont(cont);
4072 type = MEMFILE_TYPE(event); 3977 type = MEMFILE_TYPE(event);
4073 name = MEMFILE_ATTR(event); 3978 name = MEMFILE_ATTR(event);
4074 switch (name) { 3979 switch (name) {
4075 case RES_MAX_USAGE: 3980 case RES_MAX_USAGE:
4076 if (type == _MEM) 3981 if (type == _MEM)
4077 res_counter_reset_max(&mem->res); 3982 res_counter_reset_max(&memcg->res);
4078 else 3983 else
4079 res_counter_reset_max(&mem->memsw); 3984 res_counter_reset_max(&memcg->memsw);
4080 break; 3985 break;
4081 case RES_FAILCNT: 3986 case RES_FAILCNT:
4082 if (type == _MEM) 3987 if (type == _MEM)
4083 res_counter_reset_failcnt(&mem->res); 3988 res_counter_reset_failcnt(&memcg->res);
4084 else 3989 else
4085 res_counter_reset_failcnt(&mem->memsw); 3990 res_counter_reset_failcnt(&memcg->memsw);
4086 break; 3991 break;
4087 } 3992 }
4088 3993
@@ -4099,7 +4004,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
4099static int mem_cgroup_move_charge_write(struct cgroup *cgrp, 4004static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
4100 struct cftype *cft, u64 val) 4005 struct cftype *cft, u64 val)
4101{ 4006{
4102 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); 4007 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
4103 4008
4104 if (val >= (1 << NR_MOVE_TYPE)) 4009 if (val >= (1 << NR_MOVE_TYPE))
4105 return -EINVAL; 4010 return -EINVAL;
@@ -4109,7 +4014,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
4109 * inconsistent. 4014 * inconsistent.
4110 */ 4015 */
4111 cgroup_lock(); 4016 cgroup_lock();
4112 mem->move_charge_at_immigrate = val; 4017 memcg->move_charge_at_immigrate = val;
4113 cgroup_unlock(); 4018 cgroup_unlock();
4114 4019
4115 return 0; 4020 return 0;
@@ -4166,49 +4071,49 @@ struct {
4166 4071
4167 4072
4168static void 4073static void
4169mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) 4074mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
4170{ 4075{
4171 s64 val; 4076 s64 val;
4172 4077
4173 /* per cpu stat */ 4078 /* per cpu stat */
4174 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); 4079 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE);
4175 s->stat[MCS_CACHE] += val * PAGE_SIZE; 4080 s->stat[MCS_CACHE] += val * PAGE_SIZE;
4176 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); 4081 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS);
4177 s->stat[MCS_RSS] += val * PAGE_SIZE; 4082 s->stat[MCS_RSS] += val * PAGE_SIZE;
4178 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED); 4083 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
4179 s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; 4084 s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE;
4180 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGIN); 4085 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN);
4181 s->stat[MCS_PGPGIN] += val; 4086 s->stat[MCS_PGPGIN] += val;
4182 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGOUT); 4087 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT);
4183 s->stat[MCS_PGPGOUT] += val; 4088 s->stat[MCS_PGPGOUT] += val;
4184 if (do_swap_account) { 4089 if (do_swap_account) {
4185 val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT); 4090 val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SWAPOUT);
4186 s->stat[MCS_SWAP] += val * PAGE_SIZE; 4091 s->stat[MCS_SWAP] += val * PAGE_SIZE;
4187 } 4092 }
4188 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT); 4093 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGFAULT);
4189 s->stat[MCS_PGFAULT] += val; 4094 s->stat[MCS_PGFAULT] += val;
4190 val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT); 4095 val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT);
4191 s->stat[MCS_PGMAJFAULT] += val; 4096 s->stat[MCS_PGMAJFAULT] += val;
4192 4097
4193 /* per zone stat */ 4098 /* per zone stat */
4194 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_ANON)); 4099 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON));
4195 s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; 4100 s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE;
4196 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_ANON)); 4101 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON));
4197 s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE; 4102 s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE;
4198 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_FILE)); 4103 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE));
4199 s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE; 4104 s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE;
4200 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_FILE)); 4105 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE));
4201 s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; 4106 s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
4202 val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_UNEVICTABLE)); 4107 val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
4203 s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; 4108 s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
4204} 4109}
4205 4110
4206static void 4111static void
4207mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) 4112mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s)
4208{ 4113{
4209 struct mem_cgroup *iter; 4114 struct mem_cgroup *iter;
4210 4115
4211 for_each_mem_cgroup_tree(iter, mem) 4116 for_each_mem_cgroup_tree(iter, memcg)
4212 mem_cgroup_get_local_stat(iter, s); 4117 mem_cgroup_get_local_stat(iter, s);
4213} 4118}
4214 4119
@@ -4294,8 +4199,6 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
4294 } 4199 }
4295 4200
4296#ifdef CONFIG_DEBUG_VM 4201#ifdef CONFIG_DEBUG_VM
4297 cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL));
4298
4299 { 4202 {
4300 int nid, zid; 4203 int nid, zid;
4301 struct mem_cgroup_per_zone *mz; 4204 struct mem_cgroup_per_zone *mz;
@@ -4432,20 +4335,20 @@ static int compare_thresholds(const void *a, const void *b)
4432 return _a->threshold - _b->threshold; 4335 return _a->threshold - _b->threshold;
4433} 4336}
4434 4337
4435static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem) 4338static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
4436{ 4339{
4437 struct mem_cgroup_eventfd_list *ev; 4340 struct mem_cgroup_eventfd_list *ev;
4438 4341
4439 list_for_each_entry(ev, &mem->oom_notify, list) 4342 list_for_each_entry(ev, &memcg->oom_notify, list)
4440 eventfd_signal(ev->eventfd, 1); 4343 eventfd_signal(ev->eventfd, 1);
4441 return 0; 4344 return 0;
4442} 4345}
4443 4346
4444static void mem_cgroup_oom_notify(struct mem_cgroup *mem) 4347static void mem_cgroup_oom_notify(struct mem_cgroup *memcg)
4445{ 4348{
4446 struct mem_cgroup *iter; 4349 struct mem_cgroup *iter;
4447 4350
4448 for_each_mem_cgroup_tree(iter, mem) 4351 for_each_mem_cgroup_tree(iter, memcg)
4449 mem_cgroup_oom_notify_cb(iter); 4352 mem_cgroup_oom_notify_cb(iter);
4450} 4353}
4451 4354
@@ -4635,7 +4538,7 @@ static int mem_cgroup_oom_register_event(struct cgroup *cgrp,
4635static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, 4538static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
4636 struct cftype *cft, struct eventfd_ctx *eventfd) 4539 struct cftype *cft, struct eventfd_ctx *eventfd)
4637{ 4540{
4638 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); 4541 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
4639 struct mem_cgroup_eventfd_list *ev, *tmp; 4542 struct mem_cgroup_eventfd_list *ev, *tmp;
4640 int type = MEMFILE_TYPE(cft->private); 4543 int type = MEMFILE_TYPE(cft->private);
4641 4544
@@ -4643,7 +4546,7 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
4643 4546
4644 spin_lock(&memcg_oom_lock); 4547 spin_lock(&memcg_oom_lock);
4645 4548
4646 list_for_each_entry_safe(ev, tmp, &mem->oom_notify, list) { 4549 list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) {
4647 if (ev->eventfd == eventfd) { 4550 if (ev->eventfd == eventfd) {
4648 list_del(&ev->list); 4551 list_del(&ev->list);
4649 kfree(ev); 4552 kfree(ev);
@@ -4656,11 +4559,11 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp,
4656static int mem_cgroup_oom_control_read(struct cgroup *cgrp, 4559static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
4657 struct cftype *cft, struct cgroup_map_cb *cb) 4560 struct cftype *cft, struct cgroup_map_cb *cb)
4658{ 4561{
4659 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); 4562 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
4660 4563
4661 cb->fill(cb, "oom_kill_disable", mem->oom_kill_disable); 4564 cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable);
4662 4565
4663 if (atomic_read(&mem->under_oom)) 4566 if (atomic_read(&memcg->under_oom))
4664 cb->fill(cb, "under_oom", 1); 4567 cb->fill(cb, "under_oom", 1);
4665 else 4568 else
4666 cb->fill(cb, "under_oom", 0); 4569 cb->fill(cb, "under_oom", 0);
@@ -4670,7 +4573,7 @@ static int mem_cgroup_oom_control_read(struct cgroup *cgrp,
4670static int mem_cgroup_oom_control_write(struct cgroup *cgrp, 4573static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
4671 struct cftype *cft, u64 val) 4574 struct cftype *cft, u64 val)
4672{ 4575{
4673 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); 4576 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
4674 struct mem_cgroup *parent; 4577 struct mem_cgroup *parent;
4675 4578
4676 /* cannot set to root cgroup and only 0 and 1 are allowed */ 4579 /* cannot set to root cgroup and only 0 and 1 are allowed */
@@ -4682,13 +4585,13 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
4682 cgroup_lock(); 4585 cgroup_lock();
4683 /* oom-kill-disable is a flag for subhierarchy. */ 4586 /* oom-kill-disable is a flag for subhierarchy. */
4684 if ((parent->use_hierarchy) || 4587 if ((parent->use_hierarchy) ||
4685 (mem->use_hierarchy && !list_empty(&cgrp->children))) { 4588 (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
4686 cgroup_unlock(); 4589 cgroup_unlock();
4687 return -EINVAL; 4590 return -EINVAL;
4688 } 4591 }
4689 mem->oom_kill_disable = val; 4592 memcg->oom_kill_disable = val;
4690 if (!val) 4593 if (!val)
4691 memcg_oom_recover(mem); 4594 memcg_oom_recover(memcg);
4692 cgroup_unlock(); 4595 cgroup_unlock();
4693 return 0; 4596 return 0;
4694} 4597}
@@ -4709,54 +4612,6 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
4709} 4612}
4710#endif /* CONFIG_NUMA */ 4613#endif /* CONFIG_NUMA */
4711 4614
4712static int mem_cgroup_vmscan_stat_read(struct cgroup *cgrp,
4713 struct cftype *cft,
4714 struct cgroup_map_cb *cb)
4715{
4716 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4717 char string[64];
4718 int i;
4719
4720 for (i = 0; i < NR_SCANSTATS; i++) {
4721 strcpy(string, scanstat_string[i]);
4722 strcat(string, SCANSTAT_WORD_LIMIT);
4723 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_LIMIT][i]);
4724 }
4725
4726 for (i = 0; i < NR_SCANSTATS; i++) {
4727 strcpy(string, scanstat_string[i]);
4728 strcat(string, SCANSTAT_WORD_SYSTEM);
4729 cb->fill(cb, string, mem->scanstat.stats[SCAN_BY_SYSTEM][i]);
4730 }
4731
4732 for (i = 0; i < NR_SCANSTATS; i++) {
4733 strcpy(string, scanstat_string[i]);
4734 strcat(string, SCANSTAT_WORD_LIMIT);
4735 strcat(string, SCANSTAT_WORD_HIERARCHY);
4736 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_LIMIT][i]);
4737 }
4738 for (i = 0; i < NR_SCANSTATS; i++) {
4739 strcpy(string, scanstat_string[i]);
4740 strcat(string, SCANSTAT_WORD_SYSTEM);
4741 strcat(string, SCANSTAT_WORD_HIERARCHY);
4742 cb->fill(cb, string, mem->scanstat.rootstats[SCAN_BY_SYSTEM][i]);
4743 }
4744 return 0;
4745}
4746
4747static int mem_cgroup_reset_vmscan_stat(struct cgroup *cgrp,
4748 unsigned int event)
4749{
4750 struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
4751
4752 spin_lock(&mem->scanstat.lock);
4753 memset(&mem->scanstat.stats, 0, sizeof(mem->scanstat.stats));
4754 memset(&mem->scanstat.rootstats, 0, sizeof(mem->scanstat.rootstats));
4755 spin_unlock(&mem->scanstat.lock);
4756 return 0;
4757}
4758
4759
4760static struct cftype mem_cgroup_files[] = { 4615static struct cftype mem_cgroup_files[] = {
4761 { 4616 {
4762 .name = "usage_in_bytes", 4617 .name = "usage_in_bytes",
@@ -4827,11 +4682,6 @@ static struct cftype mem_cgroup_files[] = {
4827 .mode = S_IRUGO, 4682 .mode = S_IRUGO,
4828 }, 4683 },
4829#endif 4684#endif
4830 {
4831 .name = "vmscan_stat",
4832 .read_map = mem_cgroup_vmscan_stat_read,
4833 .trigger = mem_cgroup_reset_vmscan_stat,
4834 },
4835}; 4685};
4836 4686
4837#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4687#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -4877,7 +4727,7 @@ static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
4877} 4727}
4878#endif 4728#endif
4879 4729
4880static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) 4730static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4881{ 4731{
4882 struct mem_cgroup_per_node *pn; 4732 struct mem_cgroup_per_node *pn;
4883 struct mem_cgroup_per_zone *mz; 4733 struct mem_cgroup_per_zone *mz;
@@ -4897,21 +4747,21 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
4897 if (!pn) 4747 if (!pn)
4898 return 1; 4748 return 1;
4899 4749
4900 mem->info.nodeinfo[node] = pn;
4901 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 4750 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
4902 mz = &pn->zoneinfo[zone]; 4751 mz = &pn->zoneinfo[zone];
4903 for_each_lru(l) 4752 for_each_lru(l)
4904 INIT_LIST_HEAD(&mz->lists[l]); 4753 INIT_LIST_HEAD(&mz->lists[l]);
4905 mz->usage_in_excess = 0; 4754 mz->usage_in_excess = 0;
4906 mz->on_tree = false; 4755 mz->on_tree = false;
4907 mz->mem = mem; 4756 mz->mem = memcg;
4908 } 4757 }
4758 memcg->info.nodeinfo[node] = pn;
4909 return 0; 4759 return 0;
4910} 4760}
4911 4761
4912static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) 4762static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4913{ 4763{
4914 kfree(mem->info.nodeinfo[node]); 4764 kfree(memcg->info.nodeinfo[node]);
4915} 4765}
4916 4766
4917static struct mem_cgroup *mem_cgroup_alloc(void) 4767static struct mem_cgroup *mem_cgroup_alloc(void)
@@ -4953,51 +4803,51 @@ out_free:
4953 * Removal of cgroup itself succeeds regardless of refs from swap. 4803 * Removal of cgroup itself succeeds regardless of refs from swap.
4954 */ 4804 */
4955 4805
4956static void __mem_cgroup_free(struct mem_cgroup *mem) 4806static void __mem_cgroup_free(struct mem_cgroup *memcg)
4957{ 4807{
4958 int node; 4808 int node;
4959 4809
4960 mem_cgroup_remove_from_trees(mem); 4810 mem_cgroup_remove_from_trees(memcg);
4961 free_css_id(&mem_cgroup_subsys, &mem->css); 4811 free_css_id(&mem_cgroup_subsys, &memcg->css);
4962 4812
4963 for_each_node_state(node, N_POSSIBLE) 4813 for_each_node_state(node, N_POSSIBLE)
4964 free_mem_cgroup_per_zone_info(mem, node); 4814 free_mem_cgroup_per_zone_info(memcg, node);
4965 4815
4966 free_percpu(mem->stat); 4816 free_percpu(memcg->stat);
4967 if (sizeof(struct mem_cgroup) < PAGE_SIZE) 4817 if (sizeof(struct mem_cgroup) < PAGE_SIZE)
4968 kfree(mem); 4818 kfree(memcg);
4969 else 4819 else
4970 vfree(mem); 4820 vfree(memcg);
4971} 4821}
4972 4822
4973static void mem_cgroup_get(struct mem_cgroup *mem) 4823static void mem_cgroup_get(struct mem_cgroup *memcg)
4974{ 4824{
4975 atomic_inc(&mem->refcnt); 4825 atomic_inc(&memcg->refcnt);
4976} 4826}
4977 4827
4978static void __mem_cgroup_put(struct mem_cgroup *mem, int count) 4828static void __mem_cgroup_put(struct mem_cgroup *memcg, int count)
4979{ 4829{
4980 if (atomic_sub_and_test(count, &mem->refcnt)) { 4830 if (atomic_sub_and_test(count, &memcg->refcnt)) {
4981 struct mem_cgroup *parent = parent_mem_cgroup(mem); 4831 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
4982 __mem_cgroup_free(mem); 4832 __mem_cgroup_free(memcg);
4983 if (parent) 4833 if (parent)
4984 mem_cgroup_put(parent); 4834 mem_cgroup_put(parent);
4985 } 4835 }
4986} 4836}
4987 4837
4988static void mem_cgroup_put(struct mem_cgroup *mem) 4838static void mem_cgroup_put(struct mem_cgroup *memcg)
4989{ 4839{
4990 __mem_cgroup_put(mem, 1); 4840 __mem_cgroup_put(memcg, 1);
4991} 4841}
4992 4842
4993/* 4843/*
4994 * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. 4844 * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
4995 */ 4845 */
4996static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem) 4846static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
4997{ 4847{
4998 if (!mem->res.parent) 4848 if (!memcg->res.parent)
4999 return NULL; 4849 return NULL;
5000 return mem_cgroup_from_res_counter(mem->res.parent, res); 4850 return mem_cgroup_from_res_counter(memcg->res.parent, res);
5001} 4851}
5002 4852
5003#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4853#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -5040,16 +4890,16 @@ static int mem_cgroup_soft_limit_tree_init(void)
5040static struct cgroup_subsys_state * __ref 4890static struct cgroup_subsys_state * __ref
5041mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 4891mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5042{ 4892{
5043 struct mem_cgroup *mem, *parent; 4893 struct mem_cgroup *memcg, *parent;
5044 long error = -ENOMEM; 4894 long error = -ENOMEM;
5045 int node; 4895 int node;
5046 4896
5047 mem = mem_cgroup_alloc(); 4897 memcg = mem_cgroup_alloc();
5048 if (!mem) 4898 if (!memcg)
5049 return ERR_PTR(error); 4899 return ERR_PTR(error);
5050 4900
5051 for_each_node_state(node, N_POSSIBLE) 4901 for_each_node_state(node, N_POSSIBLE)
5052 if (alloc_mem_cgroup_per_zone_info(mem, node)) 4902 if (alloc_mem_cgroup_per_zone_info(memcg, node))
5053 goto free_out; 4903 goto free_out;
5054 4904
5055 /* root ? */ 4905 /* root ? */
@@ -5057,7 +4907,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5057 int cpu; 4907 int cpu;
5058 enable_swap_cgroup(); 4908 enable_swap_cgroup();
5059 parent = NULL; 4909 parent = NULL;
5060 root_mem_cgroup = mem; 4910 root_mem_cgroup = memcg;
5061 if (mem_cgroup_soft_limit_tree_init()) 4911 if (mem_cgroup_soft_limit_tree_init())
5062 goto free_out; 4912 goto free_out;
5063 for_each_possible_cpu(cpu) { 4913 for_each_possible_cpu(cpu) {
@@ -5068,13 +4918,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5068 hotcpu_notifier(memcg_cpu_hotplug_callback, 0); 4918 hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
5069 } else { 4919 } else {
5070 parent = mem_cgroup_from_cont(cont->parent); 4920 parent = mem_cgroup_from_cont(cont->parent);
5071 mem->use_hierarchy = parent->use_hierarchy; 4921 memcg->use_hierarchy = parent->use_hierarchy;
5072 mem->oom_kill_disable = parent->oom_kill_disable; 4922 memcg->oom_kill_disable = parent->oom_kill_disable;
5073 } 4923 }
5074 4924
5075 if (parent && parent->use_hierarchy) { 4925 if (parent && parent->use_hierarchy) {
5076 res_counter_init(&mem->res, &parent->res); 4926 res_counter_init(&memcg->res, &parent->res);
5077 res_counter_init(&mem->memsw, &parent->memsw); 4927 res_counter_init(&memcg->memsw, &parent->memsw);
5078 /* 4928 /*
5079 * We increment refcnt of the parent to ensure that we can 4929 * We increment refcnt of the parent to ensure that we can
5080 * safely access it on res_counter_charge/uncharge. 4930 * safely access it on res_counter_charge/uncharge.
@@ -5083,22 +4933,21 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
5083 */ 4933 */
5084 mem_cgroup_get(parent); 4934 mem_cgroup_get(parent);
5085 } else { 4935 } else {
5086 res_counter_init(&mem->res, NULL); 4936 res_counter_init(&memcg->res, NULL);
5087 res_counter_init(&mem->memsw, NULL); 4937 res_counter_init(&memcg->memsw, NULL);
5088 } 4938 }
5089 mem->last_scanned_child = 0; 4939 memcg->last_scanned_child = 0;
5090 mem->last_scanned_node = MAX_NUMNODES; 4940 memcg->last_scanned_node = MAX_NUMNODES;
5091 INIT_LIST_HEAD(&mem->oom_notify); 4941 INIT_LIST_HEAD(&memcg->oom_notify);
5092 4942
5093 if (parent) 4943 if (parent)
5094 mem->swappiness = mem_cgroup_swappiness(parent); 4944 memcg->swappiness = mem_cgroup_swappiness(parent);
5095 atomic_set(&mem->refcnt, 1); 4945 atomic_set(&memcg->refcnt, 1);
5096 mem->move_charge_at_immigrate = 0; 4946 memcg->move_charge_at_immigrate = 0;
5097 mutex_init(&mem->thresholds_lock); 4947 mutex_init(&memcg->thresholds_lock);
5098 spin_lock_init(&mem->scanstat.lock); 4948 return &memcg->css;
5099 return &mem->css;
5100free_out: 4949free_out:
5101 __mem_cgroup_free(mem); 4950 __mem_cgroup_free(memcg);
5102 root_mem_cgroup = NULL; 4951 root_mem_cgroup = NULL;
5103 return ERR_PTR(error); 4952 return ERR_PTR(error);
5104} 4953}
@@ -5106,17 +4955,17 @@ free_out:
5106static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, 4955static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
5107 struct cgroup *cont) 4956 struct cgroup *cont)
5108{ 4957{
5109 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 4958 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5110 4959
5111 return mem_cgroup_force_empty(mem, false); 4960 return mem_cgroup_force_empty(memcg, false);
5112} 4961}
5113 4962
5114static void mem_cgroup_destroy(struct cgroup_subsys *ss, 4963static void mem_cgroup_destroy(struct cgroup_subsys *ss,
5115 struct cgroup *cont) 4964 struct cgroup *cont)
5116{ 4965{
5117 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 4966 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5118 4967
5119 mem_cgroup_put(mem); 4968 mem_cgroup_put(memcg);
5120} 4969}
5121 4970
5122static int mem_cgroup_populate(struct cgroup_subsys *ss, 4971static int mem_cgroup_populate(struct cgroup_subsys *ss,
@@ -5139,9 +4988,9 @@ static int mem_cgroup_do_precharge(unsigned long count)
5139{ 4988{
5140 int ret = 0; 4989 int ret = 0;
5141 int batch_count = PRECHARGE_COUNT_AT_ONCE; 4990 int batch_count = PRECHARGE_COUNT_AT_ONCE;
5142 struct mem_cgroup *mem = mc.to; 4991 struct mem_cgroup *memcg = mc.to;
5143 4992
5144 if (mem_cgroup_is_root(mem)) { 4993 if (mem_cgroup_is_root(memcg)) {
5145 mc.precharge += count; 4994 mc.precharge += count;
5146 /* we don't need css_get for root */ 4995 /* we don't need css_get for root */
5147 return ret; 4996 return ret;
@@ -5150,16 +4999,16 @@ static int mem_cgroup_do_precharge(unsigned long count)
5150 if (count > 1) { 4999 if (count > 1) {
5151 struct res_counter *dummy; 5000 struct res_counter *dummy;
5152 /* 5001 /*
5153 * "mem" cannot be under rmdir() because we've already checked 5002 * "memcg" cannot be under rmdir() because we've already checked
5154 * by cgroup_lock_live_cgroup() that it is not removed and we 5003 * by cgroup_lock_live_cgroup() that it is not removed and we
5155 * are still under the same cgroup_mutex. So we can postpone 5004 * are still under the same cgroup_mutex. So we can postpone
5156 * css_get(). 5005 * css_get().
5157 */ 5006 */
5158 if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy)) 5007 if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
5159 goto one_by_one; 5008 goto one_by_one;
5160 if (do_swap_account && res_counter_charge(&mem->memsw, 5009 if (do_swap_account && res_counter_charge(&memcg->memsw,
5161 PAGE_SIZE * count, &dummy)) { 5010 PAGE_SIZE * count, &dummy)) {
5162 res_counter_uncharge(&mem->res, PAGE_SIZE * count); 5011 res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
5163 goto one_by_one; 5012 goto one_by_one;
5164 } 5013 }
5165 mc.precharge += count; 5014 mc.precharge += count;
@@ -5176,8 +5025,9 @@ one_by_one:
5176 batch_count = PRECHARGE_COUNT_AT_ONCE; 5025 batch_count = PRECHARGE_COUNT_AT_ONCE;
5177 cond_resched(); 5026 cond_resched();
5178 } 5027 }
5179 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, 1, &mem, false); 5028 ret = __mem_cgroup_try_charge(NULL,
5180 if (ret || !mem) 5029 GFP_KERNEL, 1, &memcg, false);
5030 if (ret || !memcg)
5181 /* mem_cgroup_clear_mc() will do uncharge later */ 5031 /* mem_cgroup_clear_mc() will do uncharge later */
5182 return -ENOMEM; 5032 return -ENOMEM;
5183 mc.precharge++; 5033 mc.precharge++;
@@ -5451,13 +5301,13 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
5451 struct task_struct *p) 5301 struct task_struct *p)
5452{ 5302{
5453 int ret = 0; 5303 int ret = 0;
5454 struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup); 5304 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
5455 5305
5456 if (mem->move_charge_at_immigrate) { 5306 if (memcg->move_charge_at_immigrate) {
5457 struct mm_struct *mm; 5307 struct mm_struct *mm;
5458 struct mem_cgroup *from = mem_cgroup_from_task(p); 5308 struct mem_cgroup *from = mem_cgroup_from_task(p);
5459 5309
5460 VM_BUG_ON(from == mem); 5310 VM_BUG_ON(from == memcg);
5461 5311
5462 mm = get_task_mm(p); 5312 mm = get_task_mm(p);
5463 if (!mm) 5313 if (!mm)
@@ -5472,7 +5322,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
5472 mem_cgroup_start_move(from); 5322 mem_cgroup_start_move(from);
5473 spin_lock(&mc.lock); 5323 spin_lock(&mc.lock);
5474 mc.from = from; 5324 mc.from = from;
5475 mc.to = mem; 5325 mc.to = memcg;
5476 spin_unlock(&mc.lock); 5326 spin_unlock(&mc.lock);
5477 /* We set mc.moving_task later */ 5327 /* We set mc.moving_task later */
5478 5328