diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 155 | ||||
-rw-r--r-- | mm/memory.c | 12 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/swapfile.c | 6 |
4 files changed, 136 insertions, 39 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 51ee96545579..f568b1964551 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -467,35 +467,31 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
467 | return nr_taken; | 467 | return nr_taken; |
468 | } | 468 | } |
469 | 469 | ||
470 | /* | 470 | |
471 | * Charge the memory controller for page usage. | 471 | /** |
472 | * Return | 472 | * mem_cgroup_try_charge - get charge of PAGE_SIZE. |
473 | * 0 if the charge was successful | 473 | * @mm: an mm_struct which is charged against. (when *memcg is NULL) |
474 | * < 0 if the cgroup is over its limit | 474 | * @gfp_mask: gfp_mask for reclaim. |
475 | * @memcg: a pointer to memory cgroup which is charged against. | ||
476 | * | ||
477 | * charge against memory cgroup pointed by *memcg. if *memcg == NULL, estimated | ||
478 | * memory cgroup from @mm is got and stored in *memcg. | ||
479 | * | ||
480 | * Returns 0 if success. -ENOMEM at failure. | ||
475 | */ | 481 | */ |
476 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | 482 | |
477 | gfp_t gfp_mask, enum charge_type ctype, | 483 | int mem_cgroup_try_charge(struct mm_struct *mm, |
478 | struct mem_cgroup *memcg) | 484 | gfp_t gfp_mask, struct mem_cgroup **memcg) |
479 | { | 485 | { |
480 | struct mem_cgroup *mem; | 486 | struct mem_cgroup *mem; |
481 | struct page_cgroup *pc; | 487 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
482 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | ||
483 | struct mem_cgroup_per_zone *mz; | ||
484 | unsigned long flags; | ||
485 | |||
486 | pc = lookup_page_cgroup(page); | ||
487 | /* can happen at boot */ | ||
488 | if (unlikely(!pc)) | ||
489 | return 0; | ||
490 | prefetchw(pc); | ||
491 | /* | 488 | /* |
492 | * We always charge the cgroup the mm_struct belongs to. | 489 | * We always charge the cgroup the mm_struct belongs to. |
493 | * The mm_struct's mem_cgroup changes on task migration if the | 490 | * The mm_struct's mem_cgroup changes on task migration if the |
494 | * thread group leader migrates. It's possible that mm is not | 491 | * thread group leader migrates. It's possible that mm is not |
495 | * set, if so charge the init_mm (happens for pagecache usage). | 492 | * set, if so charge the init_mm (happens for pagecache usage). |
496 | */ | 493 | */ |
497 | 494 | if (likely(!*memcg)) { | |
498 | if (likely(!memcg)) { | ||
499 | rcu_read_lock(); | 495 | rcu_read_lock(); |
500 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 496 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
501 | if (unlikely(!mem)) { | 497 | if (unlikely(!mem)) { |
@@ -506,15 +502,17 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
506 | * For every charge from the cgroup, increment reference count | 502 | * For every charge from the cgroup, increment reference count |
507 | */ | 503 | */ |
508 | css_get(&mem->css); | 504 | css_get(&mem->css); |
505 | *memcg = mem; | ||
509 | rcu_read_unlock(); | 506 | rcu_read_unlock(); |
510 | } else { | 507 | } else { |
511 | mem = memcg; | 508 | mem = *memcg; |
512 | css_get(&memcg->css); | 509 | css_get(&mem->css); |
513 | } | 510 | } |
514 | 511 | ||
512 | |||
515 | while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) { | 513 | while (unlikely(res_counter_charge(&mem->res, PAGE_SIZE))) { |
516 | if (!(gfp_mask & __GFP_WAIT)) | 514 | if (!(gfp_mask & __GFP_WAIT)) |
517 | goto out; | 515 | goto nomem; |
518 | 516 | ||
519 | if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) | 517 | if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) |
520 | continue; | 518 | continue; |
@@ -531,18 +529,37 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
531 | 529 | ||
532 | if (!nr_retries--) { | 530 | if (!nr_retries--) { |
533 | mem_cgroup_out_of_memory(mem, gfp_mask); | 531 | mem_cgroup_out_of_memory(mem, gfp_mask); |
534 | goto out; | 532 | goto nomem; |
535 | } | 533 | } |
536 | } | 534 | } |
535 | return 0; | ||
536 | nomem: | ||
537 | css_put(&mem->css); | ||
538 | return -ENOMEM; | ||
539 | } | ||
540 | |||
541 | /* | ||
542 | * commit a charge got by mem_cgroup_try_charge() and makes page_cgroup to be | ||
543 | * USED state. If already USED, uncharge and return. | ||
544 | */ | ||
545 | |||
546 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | ||
547 | struct page_cgroup *pc, | ||
548 | enum charge_type ctype) | ||
549 | { | ||
550 | struct mem_cgroup_per_zone *mz; | ||
551 | unsigned long flags; | ||
537 | 552 | ||
553 | /* try_charge() can return NULL to *memcg, taking care of it. */ | ||
554 | if (!mem) | ||
555 | return; | ||
538 | 556 | ||
539 | lock_page_cgroup(pc); | 557 | lock_page_cgroup(pc); |
540 | if (unlikely(PageCgroupUsed(pc))) { | 558 | if (unlikely(PageCgroupUsed(pc))) { |
541 | unlock_page_cgroup(pc); | 559 | unlock_page_cgroup(pc); |
542 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 560 | res_counter_uncharge(&mem->res, PAGE_SIZE); |
543 | css_put(&mem->css); | 561 | css_put(&mem->css); |
544 | 562 | return; | |
545 | goto done; | ||
546 | } | 563 | } |
547 | pc->mem_cgroup = mem; | 564 | pc->mem_cgroup = mem; |
548 | /* | 565 | /* |
@@ -557,15 +574,39 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
557 | __mem_cgroup_add_list(mz, pc); | 574 | __mem_cgroup_add_list(mz, pc); |
558 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 575 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
559 | unlock_page_cgroup(pc); | 576 | unlock_page_cgroup(pc); |
577 | } | ||
560 | 578 | ||
561 | done: | 579 | /* |
580 | * Charge the memory controller for page usage. | ||
581 | * Return | ||
582 | * 0 if the charge was successful | ||
583 | * < 0 if the cgroup is over its limit | ||
584 | */ | ||
585 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | ||
586 | gfp_t gfp_mask, enum charge_type ctype, | ||
587 | struct mem_cgroup *memcg) | ||
588 | { | ||
589 | struct mem_cgroup *mem; | ||
590 | struct page_cgroup *pc; | ||
591 | int ret; | ||
592 | |||
593 | pc = lookup_page_cgroup(page); | ||
594 | /* can happen at boot */ | ||
595 | if (unlikely(!pc)) | ||
596 | return 0; | ||
597 | prefetchw(pc); | ||
598 | |||
599 | mem = memcg; | ||
600 | ret = mem_cgroup_try_charge(mm, gfp_mask, &mem); | ||
601 | if (ret) | ||
602 | return ret; | ||
603 | |||
604 | __mem_cgroup_commit_charge(mem, pc, ctype); | ||
562 | return 0; | 605 | return 0; |
563 | out: | ||
564 | css_put(&mem->css); | ||
565 | return -ENOMEM; | ||
566 | } | 606 | } |
567 | 607 | ||
568 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) | 608 | int mem_cgroup_newpage_charge(struct page *page, |
609 | struct mm_struct *mm, gfp_t gfp_mask) | ||
569 | { | 610 | { |
570 | if (mem_cgroup_subsys.disabled) | 611 | if (mem_cgroup_subsys.disabled) |
571 | return 0; | 612 | return 0; |
@@ -586,6 +627,34 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) | |||
586 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); | 627 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); |
587 | } | 628 | } |
588 | 629 | ||
630 | /* | ||
631 | * same as mem_cgroup_newpage_charge(), now. | ||
632 | * But what we assume is different from newpage, and this is special case. | ||
633 | * treat this in special function. easy for maintenance. | ||
634 | */ | ||
635 | |||
636 | int mem_cgroup_charge_migrate_fixup(struct page *page, | ||
637 | struct mm_struct *mm, gfp_t gfp_mask) | ||
638 | { | ||
639 | if (mem_cgroup_subsys.disabled) | ||
640 | return 0; | ||
641 | |||
642 | if (PageCompound(page)) | ||
643 | return 0; | ||
644 | |||
645 | if (page_mapped(page) || (page->mapping && !PageAnon(page))) | ||
646 | return 0; | ||
647 | |||
648 | if (unlikely(!mm)) | ||
649 | mm = &init_mm; | ||
650 | |||
651 | return mem_cgroup_charge_common(page, mm, gfp_mask, | ||
652 | MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); | ||
653 | } | ||
654 | |||
655 | |||
656 | |||
657 | |||
589 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 658 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
590 | gfp_t gfp_mask) | 659 | gfp_t gfp_mask) |
591 | { | 660 | { |
@@ -628,6 +697,30 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
628 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); | 697 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); |
629 | } | 698 | } |
630 | 699 | ||
700 | |||
701 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) | ||
702 | { | ||
703 | struct page_cgroup *pc; | ||
704 | |||
705 | if (mem_cgroup_subsys.disabled) | ||
706 | return; | ||
707 | if (!ptr) | ||
708 | return; | ||
709 | pc = lookup_page_cgroup(page); | ||
710 | __mem_cgroup_commit_charge(ptr, pc, MEM_CGROUP_CHARGE_TYPE_MAPPED); | ||
711 | } | ||
712 | |||
713 | void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | ||
714 | { | ||
715 | if (mem_cgroup_subsys.disabled) | ||
716 | return; | ||
717 | if (!mem) | ||
718 | return; | ||
719 | res_counter_uncharge(&mem->res, PAGE_SIZE); | ||
720 | css_put(&mem->css); | ||
721 | } | ||
722 | |||
723 | |||
631 | /* | 724 | /* |
632 | * uncharge if !page_mapped(page) | 725 | * uncharge if !page_mapped(page) |
633 | */ | 726 | */ |
diff --git a/mm/memory.c b/mm/memory.c index 3f8fa06b963b..7f210f160990 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2000,7 +2000,7 @@ gotten: | |||
2000 | cow_user_page(new_page, old_page, address, vma); | 2000 | cow_user_page(new_page, old_page, address, vma); |
2001 | __SetPageUptodate(new_page); | 2001 | __SetPageUptodate(new_page); |
2002 | 2002 | ||
2003 | if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) | 2003 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) |
2004 | goto oom_free_new; | 2004 | goto oom_free_new; |
2005 | 2005 | ||
2006 | /* | 2006 | /* |
@@ -2392,6 +2392,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2392 | struct page *page; | 2392 | struct page *page; |
2393 | swp_entry_t entry; | 2393 | swp_entry_t entry; |
2394 | pte_t pte; | 2394 | pte_t pte; |
2395 | struct mem_cgroup *ptr = NULL; | ||
2395 | int ret = 0; | 2396 | int ret = 0; |
2396 | 2397 | ||
2397 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) | 2398 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) |
@@ -2430,7 +2431,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2430 | lock_page(page); | 2431 | lock_page(page); |
2431 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2432 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2432 | 2433 | ||
2433 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { | 2434 | if (mem_cgroup_try_charge(mm, GFP_KERNEL, &ptr) == -ENOMEM) { |
2434 | ret = VM_FAULT_OOM; | 2435 | ret = VM_FAULT_OOM; |
2435 | unlock_page(page); | 2436 | unlock_page(page); |
2436 | goto out; | 2437 | goto out; |
@@ -2460,6 +2461,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2460 | flush_icache_page(vma, page); | 2461 | flush_icache_page(vma, page); |
2461 | set_pte_at(mm, address, page_table, pte); | 2462 | set_pte_at(mm, address, page_table, pte); |
2462 | page_add_anon_rmap(page, vma, address); | 2463 | page_add_anon_rmap(page, vma, address); |
2464 | mem_cgroup_commit_charge_swapin(page, ptr); | ||
2463 | 2465 | ||
2464 | swap_free(entry); | 2466 | swap_free(entry); |
2465 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) | 2467 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) |
@@ -2480,7 +2482,7 @@ unlock: | |||
2480 | out: | 2482 | out: |
2481 | return ret; | 2483 | return ret; |
2482 | out_nomap: | 2484 | out_nomap: |
2483 | mem_cgroup_uncharge_page(page); | 2485 | mem_cgroup_cancel_charge_swapin(ptr); |
2484 | pte_unmap_unlock(page_table, ptl); | 2486 | pte_unmap_unlock(page_table, ptl); |
2485 | unlock_page(page); | 2487 | unlock_page(page); |
2486 | page_cache_release(page); | 2488 | page_cache_release(page); |
@@ -2510,7 +2512,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2510 | goto oom; | 2512 | goto oom; |
2511 | __SetPageUptodate(page); | 2513 | __SetPageUptodate(page); |
2512 | 2514 | ||
2513 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) | 2515 | if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) |
2514 | goto oom_free_page; | 2516 | goto oom_free_page; |
2515 | 2517 | ||
2516 | entry = mk_pte(page, vma->vm_page_prot); | 2518 | entry = mk_pte(page, vma->vm_page_prot); |
@@ -2601,7 +2603,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2601 | ret = VM_FAULT_OOM; | 2603 | ret = VM_FAULT_OOM; |
2602 | goto out; | 2604 | goto out; |
2603 | } | 2605 | } |
2604 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { | 2606 | if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) { |
2605 | ret = VM_FAULT_OOM; | 2607 | ret = VM_FAULT_OOM; |
2606 | page_cache_release(page); | 2608 | page_cache_release(page); |
2607 | goto out; | 2609 | goto out; |
diff --git a/mm/migrate.c b/mm/migrate.c index 55373983c9c6..246dcb973ae7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -133,7 +133,7 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
133 | * be reliable, and this charge can actually fail: oh well, we don't | 133 | * be reliable, and this charge can actually fail: oh well, we don't |
134 | * make the situation any worse by proceeding as if it had succeeded. | 134 | * make the situation any worse by proceeding as if it had succeeded. |
135 | */ | 135 | */ |
136 | mem_cgroup_charge(new, mm, GFP_ATOMIC); | 136 | mem_cgroup_charge_migrate_fixup(new, mm, GFP_ATOMIC); |
137 | 137 | ||
138 | get_page(new); | 138 | get_page(new); |
139 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 139 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
diff --git a/mm/swapfile.c b/mm/swapfile.c index eec5ca758a23..fb926efb5167 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -690,17 +690,18 @@ unsigned int count_swap_pages(int type, int free) | |||
690 | static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | 690 | static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, |
691 | unsigned long addr, swp_entry_t entry, struct page *page) | 691 | unsigned long addr, swp_entry_t entry, struct page *page) |
692 | { | 692 | { |
693 | struct mem_cgroup *ptr = NULL; | ||
693 | spinlock_t *ptl; | 694 | spinlock_t *ptl; |
694 | pte_t *pte; | 695 | pte_t *pte; |
695 | int ret = 1; | 696 | int ret = 1; |
696 | 697 | ||
697 | if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) | 698 | if (mem_cgroup_try_charge(vma->vm_mm, GFP_KERNEL, &ptr)) |
698 | ret = -ENOMEM; | 699 | ret = -ENOMEM; |
699 | 700 | ||
700 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 701 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
701 | if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { | 702 | if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { |
702 | if (ret > 0) | 703 | if (ret > 0) |
703 | mem_cgroup_uncharge_page(page); | 704 | mem_cgroup_cancel_charge_swapin(ptr); |
704 | ret = 0; | 705 | ret = 0; |
705 | goto out; | 706 | goto out; |
706 | } | 707 | } |
@@ -710,6 +711,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | |||
710 | set_pte_at(vma->vm_mm, addr, pte, | 711 | set_pte_at(vma->vm_mm, addr, pte, |
711 | pte_mkold(mk_pte(page, vma->vm_page_prot))); | 712 | pte_mkold(mk_pte(page, vma->vm_page_prot))); |
712 | page_add_anon_rmap(page, vma, addr); | 713 | page_add_anon_rmap(page, vma, addr); |
714 | mem_cgroup_commit_charge_swapin(page, ptr); | ||
713 | swap_free(entry); | 715 | swap_free(entry); |
714 | /* | 716 | /* |
715 | * Move the page to the active list so it is not | 717 | * Move the page to the active list so it is not |