aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h36
-rw-r--r--mm/memcontrol.c155
-rw-r--r--mm/memory.c12
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/swapfile.c6
5 files changed, 170 insertions, 41 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1fbe14d39521..c592f315cd02 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,8 +27,17 @@ struct mm_struct;
27 27
28#ifdef CONFIG_CGROUP_MEM_RES_CTLR 28#ifdef CONFIG_CGROUP_MEM_RES_CTLR
29 29
30extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, 30extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
31 gfp_t gfp_mask); 31 gfp_t gfp_mask);
32extern int mem_cgroup_charge_migrate_fixup(struct page *page,
33 struct mm_struct *mm, gfp_t gfp_mask);
34/* for swap handling */
35extern int mem_cgroup_try_charge(struct mm_struct *mm,
36 gfp_t gfp_mask, struct mem_cgroup **ptr);
37extern void mem_cgroup_commit_charge_swapin(struct page *page,
38 struct mem_cgroup *ptr);
39extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
40
32extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 41extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
33 gfp_t gfp_mask); 42 gfp_t gfp_mask);
34extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru); 43extern void mem_cgroup_move_lists(struct page *page, enum lru_list lru);
@@ -71,7 +80,9 @@ extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone,
71 80
72 81
73#else /* CONFIG_CGROUP_MEM_RES_CTLR */ 82#else /* CONFIG_CGROUP_MEM_RES_CTLR */
74static inline int mem_cgroup_charge(struct page *page, 83struct mem_cgroup;
84
85static inline int mem_cgroup_newpage_charge(struct page *page,
75 struct mm_struct *mm, gfp_t gfp_mask) 86 struct mm_struct *mm, gfp_t gfp_mask)
76{ 87{
77 return 0; 88 return 0;
@@ -83,6 +94,27 @@ static inline int mem_cgroup_cache_charge(struct page *page,
83 return 0; 94 return 0;
84} 95}
85 96
97static inline int mem_cgroup_charge_migrate_fixup(struct page *page,
98 struct mm_struct *mm, gfp_t gfp_mask)
99{
100 return 0;
101}
102
103static inline int mem_cgroup_try_charge(struct mm_struct *mm,
104 gfp_t gfp_mask, struct mem_cgroup **ptr)
105{
106 return 0;
107}
108
109static inline void mem_cgroup_commit_charge_swapin(struct page *page,
110 struct mem_cgroup *ptr)
111{
112}
113
114static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
115{
116}
117
86static inline void mem_cgroup_uncharge_page(struct page *page) 118static inline void mem_cgroup_uncharge_page(struct page *page)
87{ 119{
88} 120}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 51ee96545579..f568b1964551 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -467,35 +467,31 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
467 return nr_taken; 467 return nr_taken;
468} 468}
469 469
470/* 470
471 * Charge the memory controller for page usage. 471/**
472 * Return 472 * mem_cgroup_try_charge - get charge of PAGE_SIZE.
473 * 0 if the charge was successful 473 * @mm: an mm_struct which is charged against. (when *memcg is NULL)
474 * < 0 if the cgroup is over its limit 474 * @gfp_mask: gfp_mask for reclaim.
475 * @memcg: a pointer to memory cgroup which is charged against.
476 *
477 * charge against memory cgroup pointed by *memcg. if *memcg == NULL, estimated
478 * memory cgroup from @mm is got and stored in *memcg.
479 *
480 * Returns 0 if success. -ENOMEM at failure.
475 */ 481 */
476static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, 482
477 gfp_t gfp_mask, enum charge_type ctype, 483int mem_cgroup_try_charge(struct mm_struct *mm,
478 struct mem_cgroup *memcg) 484 gfp_t gfp_mask, struct mem_cgroup **memcg)
479{ 485{
480 struct mem_cgroup *mem; 486 struct mem_cgroup *mem;
481 struct page_cgroup *pc; 487 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
482 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
483 struct mem_cgroup_per_zone *mz;
484 unsigned long flags;
485
486 pc = lookup_page_cgroup(page);
487 /* can happen at boot */
488 if (unlikely(!pc))
489 return 0;
490 prefetchw(pc);
491 /* 488 /*
492 * We always charge the cgroup the mm_struct belongs to. 489 * We always charge the cgroup the mm_struct belongs to.
493 * The mm_struct's mem_cgroup changes on task migration if the 490 * The mm_struct's mem_cgroup changes on task migration if the
494 * thread group leader migrates. It's possible that mm is not 491 * thread group leader migrates. It's possible that mm is not
495 * set, if so charge the init_mm (happens for pagecache usage). 492 * set, if so charge the init_mm (happens for pagecache usage).
496 */ 493 */
497 494 if (likely(!*memcg)) {
498 if (likely(!memcg)) {
499 rcu_read_lock(); 495 rcu_read_lock();
500 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 496 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
501 if (unlikely(!mem)) { 497 if (unlikely(!mem)) {
@@ -506,15 +502,17 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
506 * For every charge from the cgroup, increment reference count 502 * For every charge from the cgroup, increment reference count
507 */ 503 */
508 css_get(&mem->css); 504 css_get(&mem->css);
505 *memcg = mem;
509 rcu_read_unlock(); 506 rcu_read_unlock();
510 } else { 507 } else {
511 mem = memcg; 508 mem = *memcg;
512 css_get(&memcg->css); 509 css_get(&mem->css);
513 } 510 }
514 511
512
515 while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) { 513 while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
516 if (!(gfp_mask & __GFP_WAIT)) 514 if (!(gfp_mask & __GFP_WAIT))
517 goto out; 515 goto nomem;
518 516
519 if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) 517 if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
520 continue; 518 continue;
@@ -531,18 +529,37 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
531 529
532 if (!nr_retries--) { 530 if (!nr_retries--) {
533 mem_cgroup_out_of_memory(mem, gfp_mask); 531 mem_cgroup_out_of_memory(mem, gfp_mask);
534 goto out; 532 goto nomem;
535 } 533 }
536 } 534 }
535 return 0;
536nomem:
537 css_put(&mem->css);
538 return -ENOMEM;
539}
540
541/*
542 * commit a charge got by mem_cgroup_try_charge() and makes page_cgroup to be
543 * USED state. If already USED, uncharge and return.
544 */
545
546static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
547 struct page_cgroup *pc,
548 enum charge_type ctype)
549{
550 struct mem_cgroup_per_zone *mz;
551 unsigned long flags;
537 552
553 /* try_charge() can return NULL to *memcg, taking care of it. */
554 if (!mem)
555 return;
538 556
539 lock_page_cgroup(pc); 557 lock_page_cgroup(pc);
540 if (unlikely(PageCgroupUsed(pc))) { 558 if (unlikely(PageCgroupUsed(pc))) {
541 unlock_page_cgroup(pc); 559 unlock_page_cgroup(pc);
542 res_counter_uncharge(&mem->res, PAGE_SIZE); 560 res_counter_uncharge(&mem->res, PAGE_SIZE);
543 css_put(&mem->css); 561 css_put(&mem->css);
544 562 return;
545 goto done;
546 } 563 }
547 pc->mem_cgroup = mem; 564 pc->mem_cgroup = mem;
548 /* 565 /*
@@ -557,15 +574,39 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
557 __mem_cgroup_add_list(mz, pc); 574 __mem_cgroup_add_list(mz, pc);
558 spin_unlock_irqrestore(&mz->lru_lock, flags); 575 spin_unlock_irqrestore(&mz->lru_lock, flags);
559 unlock_page_cgroup(pc); 576 unlock_page_cgroup(pc);
577}
560 578
561done: 579/*
580 * Charge the memory controller for page usage.
581 * Return
582 * 0 if the charge was successful
583 * < 0 if the cgroup is over its limit
584 */
585static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
586 gfp_t gfp_mask, enum charge_type ctype,
587 struct mem_cgroup *memcg)
588{
589 struct mem_cgroup *mem;
590 struct page_cgroup *pc;
591 int ret;
592
593 pc = lookup_page_cgroup(page);
594 /* can happen at boot */
595 if (unlikely(!pc))
596 return 0;
597 prefetchw(pc);
598
599 mem = memcg;
600 ret = mem_cgroup_try_charge(mm, gfp_mask, &mem);
601 if (ret)
602 return ret;
603
604 __mem_cgroup_commit_charge(mem, pc, ctype);
562 return 0; 605 return 0;
563out:
564 css_put(&mem->css);
565 return -ENOMEM;
566} 606}
567 607
568int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) 608int mem_cgroup_newpage_charge(struct page *page,
609 struct mm_struct *mm, gfp_t gfp_mask)
569{ 610{
570 if (mem_cgroup_subsys.disabled) 611 if (mem_cgroup_subsys.disabled)
571 return 0; 612 return 0;
@@ -586,6 +627,34 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
586 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); 627 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
587} 628}
588 629
630/*
631 * same as mem_cgroup_newpage_charge(), now.
632 * But what we assume is different from newpage, and this is special case.
633 * treat this in special function. easy for maintenance.
634 */
635
636int mem_cgroup_charge_migrate_fixup(struct page *page,
637 struct mm_struct *mm, gfp_t gfp_mask)
638{
639 if (mem_cgroup_subsys.disabled)
640 return 0;
641
642 if (PageCompound(page))
643 return 0;
644
645 if (page_mapped(page) || (page->mapping && !PageAnon(page)))
646 return 0;
647
648 if (unlikely(!mm))
649 mm = &init_mm;
650
651 return mem_cgroup_charge_common(page, mm, gfp_mask,
652 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
653}
654
655
656
657
589int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 658int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
590 gfp_t gfp_mask) 659 gfp_t gfp_mask)
591{ 660{
@@ -628,6 +697,30 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
628 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); 697 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
629} 698}
630 699
700
701void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
702{
703 struct page_cgroup *pc;
704
705 if (mem_cgroup_subsys.disabled)
706 return;
707 if (!ptr)
708 return;
709 pc = lookup_page_cgroup(page);
710 __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED);
711}
712
713void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
714{
715 if (mem_cgroup_subsys.disabled)
716 return;
717 if (!mem)
718 return;
719 res_counter_uncharge(&mem->res, PAGE_SIZE);
720 css_put(&mem->css);
721}
722
723
631/* 724/*
632 * uncharge if !page_mapped(page) 725 * uncharge if !page_mapped(page)
633 */ 726 */
diff --git a/mm/memory.c b/mm/memory.c
index 3f8fa06b963b..7f210f160990 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2000,7 +2000,7 @@ gotten:
2000 cow_user_page(new_page, old_page, address, vma); 2000 cow_user_page(new_page, old_page, address, vma);
2001 __SetPageUptodate(new_page); 2001 __SetPageUptodate(new_page);
2002 2002
2003 if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) 2003 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
2004 goto oom_free_new; 2004 goto oom_free_new;
2005 2005
2006 /* 2006 /*
@@ -2392,6 +2392,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2392 struct page *page; 2392 struct page *page;
2393 swp_entry_t entry; 2393 swp_entry_t entry;
2394 pte_t pte; 2394 pte_t pte;
2395 struct mem_cgroup *ptr = NULL;
2395 int ret = 0; 2396 int ret = 0;
2396 2397
2397 if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) 2398 if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
@@ -2430,7 +2431,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2430 lock_page(page); 2431 lock_page(page);
2431 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2432 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2432 2433
2433 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { 2434 if (mem_cgroup_try_charge(mm, GFP_KERNEL, &ptr) == -ENOMEM) {
2434 ret = VM_FAULT_OOM; 2435 ret = VM_FAULT_OOM;
2435 unlock_page(page); 2436 unlock_page(page);
2436 goto out; 2437 goto out;
@@ -2460,6 +2461,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2460 flush_icache_page(vma, page); 2461 flush_icache_page(vma, page);
2461 set_pte_at(mm, address, page_table, pte); 2462 set_pte_at(mm, address, page_table, pte);
2462 page_add_anon_rmap(page, vma, address); 2463 page_add_anon_rmap(page, vma, address);
2464 mem_cgroup_commit_charge_swapin(page, ptr);
2463 2465
2464 swap_free(entry); 2466 swap_free(entry);
2465 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) 2467 if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2480,7 +2482,7 @@ unlock:
2480out: 2482out:
2481 return ret; 2483 return ret;
2482out_nomap: 2484out_nomap:
2483 mem_cgroup_uncharge_page(page); 2485 mem_cgroup_cancel_charge_swapin(ptr);
2484 pte_unmap_unlock(page_table, ptl); 2486 pte_unmap_unlock(page_table, ptl);
2485 unlock_page(page); 2487 unlock_page(page);
2486 page_cache_release(page); 2488 page_cache_release(page);
@@ -2510,7 +2512,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2510 goto oom; 2512 goto oom;
2511 __SetPageUptodate(page); 2513 __SetPageUptodate(page);
2512 2514
2513 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) 2515 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))
2514 goto oom_free_page; 2516 goto oom_free_page;
2515 2517
2516 entry = mk_pte(page, vma->vm_page_prot); 2518 entry = mk_pte(page, vma->vm_page_prot);
@@ -2601,7 +2603,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2601 ret = VM_FAULT_OOM; 2603 ret = VM_FAULT_OOM;
2602 goto out; 2604 goto out;
2603 } 2605 }
2604 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { 2606 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) {
2605 ret = VM_FAULT_OOM; 2607 ret = VM_FAULT_OOM;
2606 page_cache_release(page); 2608 page_cache_release(page);
2607 goto out; 2609 goto out;
diff --git a/mm/migrate.c b/mm/migrate.c
index 55373983c9c6..246dcb973ae7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -133,7 +133,7 @@ static void remove_migration_pte(struct vm_area_struct *vma,
133 * be reliable, and this charge can actually fail: oh well, we don't 133 * be reliable, and this charge can actually fail: oh well, we don't
134 * make the situation any worse by proceeding as if it had succeeded. 134 * make the situation any worse by proceeding as if it had succeeded.
135 */ 135 */
136 mem_cgroup_charge(new, mm, GFP_ATOMIC); 136 mem_cgroup_charge_migrate_fixup(new, mm, GFP_ATOMIC);
137 137
138 get_page(new); 138 get_page(new);
139 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 139 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
diff --git a/mm/swapfile.c b/mm/swapfile.c
index eec5ca758a23..fb926efb5167 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -690,17 +690,18 @@ unsigned int count_swap_pages(int type, int free)
690static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, 690static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
691 unsigned long addr, swp_entry_t entry, struct page *page) 691 unsigned long addr, swp_entry_t entry, struct page *page)
692{ 692{
693 struct mem_cgroup *ptr = NULL;
693 spinlock_t *ptl; 694 spinlock_t *ptl;
694 pte_t *pte; 695 pte_t *pte;
695 int ret = 1; 696 int ret = 1;
696 697
697 if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) 698 if (mem_cgroup_try_charge(vma->vm_mm, GFP_KERNEL, &ptr))
698 ret = -ENOMEM; 699 ret = -ENOMEM;
699 700
700 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 701 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
701 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { 702 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
702 if (ret > 0) 703 if (ret > 0)
703 mem_cgroup_uncharge_page(page); 704 mem_cgroup_cancel_charge_swapin(ptr);
704 ret = 0; 705 ret = 0;
705 goto out; 706 goto out;
706 } 707 }
@@ -710,6 +711,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
710 set_pte_at(vma->vm_mm, addr, pte, 711 set_pte_at(vma->vm_mm, addr, pte,
711 pte_mkold(mk_pte(page, vma->vm_page_prot))); 712 pte_mkold(mk_pte(page, vma->vm_page_prot)));
712 page_add_anon_rmap(page, vma, addr); 713 page_add_anon_rmap(page, vma, addr);
714 mem_cgroup_commit_charge_swapin(page, ptr);
713 swap_free(entry); 715 swap_free(entry);
714 /* 716 /*
715 * Move the page to the active list so it is not 717 * Move the page to the active list so it is not