aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-01-07 21:07:48 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:04 -0500
commit7a81b88cb53e335ff7d019e6398c95792c817d93 (patch)
tree6ebca4d509a541ac707e10f9369916549e90c0ad /mm/memcontrol.c
parent0b82ac37b889ec881b645860da3775118effb3ca (diff)
memcg: introduce charge-commit-cancel style of functions
There is a small race in do_swap_page(). When the page swapped-in is charged, the mapcount can be greater than 0. But, at the same time some process (shares it ) call unmap and make mapcount 1->0 and the page is uncharged. CPUA CPUB mapcount == 1. (1) charge if mapcount==0 zap_pte_range() (2) mapcount 1 => 0. (3) uncharge(). (success) (4) set page's rmap() mapcount 0=>1 Then, this swap page's account is leaked. For fixing this, I added a new interface. - charge account to res_counter by PAGE_SIZE and try to free pages if necessary. - commit register page_cgroup and add to LRU if necessary. - cancel uncharge PAGE_SIZE because of do_swap_page failure. CPUA (1) charge (always) (2) set page's rmap (mapcount > 0) (3) commit charge was necessary or not after set_pte(). This protocol uses PCG_USED bit on page_cgroup for avoiding over accounting. Usual mem_cgroup_charge_common() does charge -> commit at a time. And this patch also adds following function to clarify all charges. - mem_cgroup_newpage_charge() ....replacement for mem_cgroup_charge() called against newly allocated anon pages. - mem_cgroup_charge_migrate_fixup() called only from remove_migration_ptes(). we'll have to rewrite this later.(this patch just keeps old behavior) This function will be removed by additional patch to make migration clearer. Good for clarifying "what we do" Then, we have 4 following charge points. - newpage - swap-in - add-to-cache. - migration. [akpm@linux-foundation.org: add missing inline directives to stubs] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c155
1 files changed, 124 insertions, 31 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 51ee96545579..f568b1964551 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -467,35 +467,31 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
467 return nr_taken; 467 return nr_taken;
468} 468}
469 469
470/* 470
471 * Charge the memory controller for page usage. 471/**
472 * Return 472 * mem_cgroup_try_charge - get charge of PAGE_SIZE.
473 * 0 if the charge was successful 473 * @mm: an mm_struct which is charged against. (when *memcg is NULL)
474 * < 0 if the cgroup is over its limit 474 * @gfp_mask: gfp_mask for reclaim.
475 * @memcg: a pointer to memory cgroup which is charged against.
476 *
477 * charge against memory cgroup pointed by *memcg. if *memcg == NULL, estimated
478 * memory cgroup from @mm is got and stored in *memcg.
479 *
480 * Returns 0 if success. -ENOMEM at failure.
475 */ 481 */
476static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, 482
477 gfp_t gfp_mask, enum charge_type ctype, 483int mem_cgroup_try_charge(struct mm_struct *mm,
478 struct mem_cgroup *memcg) 484 gfp_t gfp_mask, struct mem_cgroup **memcg)
479{ 485{
480 struct mem_cgroup *mem; 486 struct mem_cgroup *mem;
481 struct page_cgroup *pc; 487 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
482 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
483 struct mem_cgroup_per_zone *mz;
484 unsigned long flags;
485
486 pc = lookup_page_cgroup(page);
487 /* can happen at boot */
488 if (unlikely(!pc))
489 return 0;
490 prefetchw(pc);
491 /* 488 /*
492 * We always charge the cgroup the mm_struct belongs to. 489 * We always charge the cgroup the mm_struct belongs to.
493 * The mm_struct's mem_cgroup changes on task migration if the 490 * The mm_struct's mem_cgroup changes on task migration if the
494 * thread group leader migrates. It's possible that mm is not 491 * thread group leader migrates. It's possible that mm is not
495 * set, if so charge the init_mm (happens for pagecache usage). 492 * set, if so charge the init_mm (happens for pagecache usage).
496 */ 493 */
497 494 if (likely(!*memcg)) {
498 if (likely(!memcg)) {
499 rcu_read_lock(); 495 rcu_read_lock();
500 mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); 496 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
501 if (unlikely(!mem)) { 497 if (unlikely(!mem)) {
@@ -506,15 +502,17 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
506 * For every charge from the cgroup, increment reference count 502 * For every charge from the cgroup, increment reference count
507 */ 503 */
508 css_get(&mem->css); 504 css_get(&mem->css);
505 *memcg = mem;
509 rcu_read_unlock(); 506 rcu_read_unlock();
510 } else { 507 } else {
511 mem = memcg; 508 mem = *memcg;
512 css_get(&memcg->css); 509 css_get(&mem->css);
513 } 510 }
514 511
512
515 while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) { 513 while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) {
516 if (!(gfp_mask & __GFP_WAIT)) 514 if (!(gfp_mask & __GFP_WAIT))
517 goto out; 515 goto nomem;
518 516
519 if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) 517 if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
520 continue; 518 continue;
@@ -531,18 +529,37 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
531 529
532 if (!nr_retries--) { 530 if (!nr_retries--) {
533 mem_cgroup_out_of_memory(mem, gfp_mask); 531 mem_cgroup_out_of_memory(mem, gfp_mask);
534 goto out; 532 goto nomem;
535 } 533 }
536 } 534 }
535 return 0;
536nomem:
537 css_put(&mem->css);
538 return -ENOMEM;
539}
540
541/*
542 * commit a charge got by mem_cgroup_try_charge() and makes page_cgroup to be
543 * USED state. If already USED, uncharge and return.
544 */
545
546static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
547 struct page_cgroup *pc,
548 enum charge_type ctype)
549{
550 struct mem_cgroup_per_zone *mz;
551 unsigned long flags;
537 552
553 /* try_charge() can return NULL to *memcg, taking care of it. */
554 if (!mem)
555 return;
538 556
539 lock_page_cgroup(pc); 557 lock_page_cgroup(pc);
540 if (unlikely(PageCgroupUsed(pc))) { 558 if (unlikely(PageCgroupUsed(pc))) {
541 unlock_page_cgroup(pc); 559 unlock_page_cgroup(pc);
542 res_counter_uncharge(&mem->res, PAGE_SIZE); 560 res_counter_uncharge(&mem->res, PAGE_SIZE);
543 css_put(&mem->css); 561 css_put(&mem->css);
544 562 return;
545 goto done;
546 } 563 }
547 pc->mem_cgroup = mem; 564 pc->mem_cgroup = mem;
548 /* 565 /*
@@ -557,15 +574,39 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
557 __mem_cgroup_add_list(mz, pc); 574 __mem_cgroup_add_list(mz, pc);
558 spin_unlock_irqrestore(&mz->lru_lock, flags); 575 spin_unlock_irqrestore(&mz->lru_lock, flags);
559 unlock_page_cgroup(pc); 576 unlock_page_cgroup(pc);
577}
560 578
561done: 579/*
580 * Charge the memory controller for page usage.
581 * Return
582 * 0 if the charge was successful
583 * < 0 if the cgroup is over its limit
584 */
585static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
586 gfp_t gfp_mask, enum charge_type ctype,
587 struct mem_cgroup *memcg)
588{
589 struct mem_cgroup *mem;
590 struct page_cgroup *pc;
591 int ret;
592
593 pc = lookup_page_cgroup(page);
594 /* can happen at boot */
595 if (unlikely(!pc))
596 return 0;
597 prefetchw(pc);
598
599 mem = memcg;
600 ret = mem_cgroup_try_charge(mm, gfp_mask, &mem);
601 if (ret)
602 return ret;
603
604 __mem_cgroup_commit_charge(mem, pc, ctype);
562 return 0; 605 return 0;
563out:
564 css_put(&mem->css);
565 return -ENOMEM;
566} 606}
567 607
568int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) 608int mem_cgroup_newpage_charge(struct page *page,
609 struct mm_struct *mm, gfp_t gfp_mask)
569{ 610{
570 if (mem_cgroup_subsys.disabled) 611 if (mem_cgroup_subsys.disabled)
571 return 0; 612 return 0;
@@ -586,6 +627,34 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
586 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); 627 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
587} 628}
588 629
630/*
631 * same as mem_cgroup_newpage_charge(), now.
632 * But what we assume is different from newpage, and this is special case.
633 * treat this in special function. easy for maintenance.
634 */
635
636int mem_cgroup_charge_migrate_fixup(struct page *page,
637 struct mm_struct *mm, gfp_t gfp_mask)
638{
639 if (mem_cgroup_subsys.disabled)
640 return 0;
641
642 if (PageCompound(page))
643 return 0;
644
645 if (page_mapped(page) || (page->mapping && !PageAnon(page)))
646 return 0;
647
648 if (unlikely(!mm))
649 mm = &init_mm;
650
651 return mem_cgroup_charge_common(page, mm, gfp_mask,
652 MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL);
653}
654
655
656
657
589int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 658int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
590 gfp_t gfp_mask) 659 gfp_t gfp_mask)
591{ 660{
@@ -628,6 +697,30 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
628 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); 697 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL);
629} 698}
630 699
700
701void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
702{
703 struct page_cgroup *pc;
704
705 if (mem_cgroup_subsys.disabled)
706 return;
707 if (!ptr)
708 return;
709 pc = lookup_page_cgroup(page);
710 __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED);
711}
712
713void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
714{
715 if (mem_cgroup_subsys.disabled)
716 return;
717 if (!mem)
718 return;
719 res_counter_uncharge(&mem->res, PAGE_SIZE);
720 css_put(&mem->css);
721}
722
723
631/* 724/*
632 * uncharge if !page_mapped(page) 725 * uncharge if !page_mapped(page)
633 */ 726 */