diff options
-rw-r--r-- | mm/memcontrol.c | 152 |
1 files changed, 98 insertions, 54 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 22f088f22102..f5fb9917787c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -253,6 +253,7 @@ static struct move_charge_struct { | |||
253 | struct mem_cgroup *from; | 253 | struct mem_cgroup *from; |
254 | struct mem_cgroup *to; | 254 | struct mem_cgroup *to; |
255 | unsigned long precharge; | 255 | unsigned long precharge; |
256 | unsigned long moved_charge; | ||
256 | } mc; | 257 | } mc; |
257 | 258 | ||
258 | /* | 259 | /* |
@@ -1536,14 +1537,23 @@ nomem: | |||
1536 | * This function is for that and do uncharge, put css's refcnt. | 1537 | * This function is for that and do uncharge, put css's refcnt. |
1537 | * gotten by try_charge(). | 1538 | * gotten by try_charge(). |
1538 | */ | 1539 | */ |
1539 | static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) | 1540 | static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem, |
1541 | unsigned long count) | ||
1540 | { | 1542 | { |
1541 | if (!mem_cgroup_is_root(mem)) { | 1543 | if (!mem_cgroup_is_root(mem)) { |
1542 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 1544 | res_counter_uncharge(&mem->res, PAGE_SIZE * count); |
1543 | if (do_swap_account) | 1545 | if (do_swap_account) |
1544 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | 1546 | res_counter_uncharge(&mem->memsw, PAGE_SIZE * count); |
1547 | VM_BUG_ON(test_bit(CSS_ROOT, &mem->css.flags)); | ||
1548 | WARN_ON_ONCE(count > INT_MAX); | ||
1549 | __css_put(&mem->css, (int)count); | ||
1545 | } | 1550 | } |
1546 | css_put(&mem->css); | 1551 | /* we don't need css_put for root */ |
1552 | } | ||
1553 | |||
1554 | static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) | ||
1555 | { | ||
1556 | __mem_cgroup_cancel_charge(mem, 1); | ||
1547 | } | 1557 | } |
1548 | 1558 | ||
1549 | /* | 1559 | /* |
@@ -1646,17 +1656,20 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
1646 | * @pc: page_cgroup of the page. | 1656 | * @pc: page_cgroup of the page. |
1647 | * @from: mem_cgroup which the page is moved from. | 1657 | * @from: mem_cgroup which the page is moved from. |
1648 | * @to: mem_cgroup which the page is moved to. @from != @to. | 1658 | * @to: mem_cgroup which the page is moved to. @from != @to. |
1659 | * @uncharge: whether we should call uncharge and css_put against @from. | ||
1649 | * | 1660 | * |
1650 | * The caller must confirm following. | 1661 | * The caller must confirm following. |
1651 | * - page is not on LRU (isolate_page() is useful.) | 1662 | * - page is not on LRU (isolate_page() is useful.) |
1652 | * - the pc is locked, used, and ->mem_cgroup points to @from. | 1663 | * - the pc is locked, used, and ->mem_cgroup points to @from. |
1653 | * | 1664 | * |
1654 | * This function does "uncharge" from old cgroup but doesn't do "charge" to | 1665 | * This function doesn't do "charge" nor css_get to new cgroup. It should be |
1655 | * new cgroup. It should be done by a caller. | 1666 | * done by a caller(__mem_cgroup_try_charge would be usefull). If @uncharge is |
1667 | * true, this function does "uncharge" from old cgroup, but it doesn't if | ||
1668 | * @uncharge is false, so a caller should do "uncharge". | ||
1656 | */ | 1669 | */ |
1657 | 1670 | ||
1658 | static void __mem_cgroup_move_account(struct page_cgroup *pc, | 1671 | static void __mem_cgroup_move_account(struct page_cgroup *pc, |
1659 | struct mem_cgroup *from, struct mem_cgroup *to) | 1672 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) |
1660 | { | 1673 | { |
1661 | struct page *page; | 1674 | struct page *page; |
1662 | int cpu; | 1675 | int cpu; |
@@ -1669,10 +1682,6 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
1669 | VM_BUG_ON(!PageCgroupUsed(pc)); | 1682 | VM_BUG_ON(!PageCgroupUsed(pc)); |
1670 | VM_BUG_ON(pc->mem_cgroup != from); | 1683 | VM_BUG_ON(pc->mem_cgroup != from); |
1671 | 1684 | ||
1672 | if (!mem_cgroup_is_root(from)) | ||
1673 | res_counter_uncharge(&from->res, PAGE_SIZE); | ||
1674 | mem_cgroup_charge_statistics(from, pc, false); | ||
1675 | |||
1676 | page = pc->page; | 1685 | page = pc->page; |
1677 | if (page_mapped(page) && !PageAnon(page)) { | 1686 | if (page_mapped(page) && !PageAnon(page)) { |
1678 | cpu = smp_processor_id(); | 1687 | cpu = smp_processor_id(); |
@@ -1688,12 +1697,12 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
1688 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, | 1697 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, |
1689 | 1); | 1698 | 1); |
1690 | } | 1699 | } |
1700 | mem_cgroup_charge_statistics(from, pc, false); | ||
1701 | if (uncharge) | ||
1702 | /* This is not "cancel", but cancel_charge does all we need. */ | ||
1703 | mem_cgroup_cancel_charge(from); | ||
1691 | 1704 | ||
1692 | if (do_swap_account && !mem_cgroup_is_root(from)) | 1705 | /* caller should have done css_get */ |
1693 | res_counter_uncharge(&from->memsw, PAGE_SIZE); | ||
1694 | css_put(&from->css); | ||
1695 | |||
1696 | css_get(&to->css); | ||
1697 | pc->mem_cgroup = to; | 1706 | pc->mem_cgroup = to; |
1698 | mem_cgroup_charge_statistics(to, pc, true); | 1707 | mem_cgroup_charge_statistics(to, pc, true); |
1699 | /* | 1708 | /* |
@@ -1710,12 +1719,12 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
1710 | * __mem_cgroup_move_account() | 1719 | * __mem_cgroup_move_account() |
1711 | */ | 1720 | */ |
1712 | static int mem_cgroup_move_account(struct page_cgroup *pc, | 1721 | static int mem_cgroup_move_account(struct page_cgroup *pc, |
1713 | struct mem_cgroup *from, struct mem_cgroup *to) | 1722 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) |
1714 | { | 1723 | { |
1715 | int ret = -EINVAL; | 1724 | int ret = -EINVAL; |
1716 | lock_page_cgroup(pc); | 1725 | lock_page_cgroup(pc); |
1717 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { | 1726 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { |
1718 | __mem_cgroup_move_account(pc, from, to); | 1727 | __mem_cgroup_move_account(pc, from, to, uncharge); |
1719 | ret = 0; | 1728 | ret = 0; |
1720 | } | 1729 | } |
1721 | unlock_page_cgroup(pc); | 1730 | unlock_page_cgroup(pc); |
@@ -1751,11 +1760,9 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
1751 | if (ret || !parent) | 1760 | if (ret || !parent) |
1752 | goto put_back; | 1761 | goto put_back; |
1753 | 1762 | ||
1754 | ret = mem_cgroup_move_account(pc, child, parent); | 1763 | ret = mem_cgroup_move_account(pc, child, parent, true); |
1755 | if (!ret) | 1764 | if (ret) |
1756 | css_put(&parent->css); /* drop extra refcnt by try_charge() */ | 1765 | mem_cgroup_cancel_charge(parent); |
1757 | else | ||
1758 | mem_cgroup_cancel_charge(parent); /* does css_put */ | ||
1759 | put_back: | 1766 | put_back: |
1760 | putback_lru_page(page); | 1767 | putback_lru_page(page); |
1761 | put: | 1768 | put: |
@@ -3438,16 +3445,58 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss, | |||
3438 | } | 3445 | } |
3439 | 3446 | ||
3440 | /* Handlers for move charge at task migration. */ | 3447 | /* Handlers for move charge at task migration. */ |
3441 | static int mem_cgroup_do_precharge(void) | 3448 | #define PRECHARGE_COUNT_AT_ONCE 256 |
3449 | static int mem_cgroup_do_precharge(unsigned long count) | ||
3442 | { | 3450 | { |
3443 | int ret = -ENOMEM; | 3451 | int ret = 0; |
3452 | int batch_count = PRECHARGE_COUNT_AT_ONCE; | ||
3444 | struct mem_cgroup *mem = mc.to; | 3453 | struct mem_cgroup *mem = mc.to; |
3445 | 3454 | ||
3446 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, NULL); | 3455 | if (mem_cgroup_is_root(mem)) { |
3447 | if (ret || !mem) | 3456 | mc.precharge += count; |
3448 | return -ENOMEM; | 3457 | /* we don't need css_get for root */ |
3449 | 3458 | return ret; | |
3450 | mc.precharge++; | 3459 | } |
3460 | /* try to charge at once */ | ||
3461 | if (count > 1) { | ||
3462 | struct res_counter *dummy; | ||
3463 | /* | ||
3464 | * "mem" cannot be under rmdir() because we've already checked | ||
3465 | * by cgroup_lock_live_cgroup() that it is not removed and we | ||
3466 | * are still under the same cgroup_mutex. So we can postpone | ||
3467 | * css_get(). | ||
3468 | */ | ||
3469 | if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy)) | ||
3470 | goto one_by_one; | ||
3471 | if (do_swap_account && res_counter_charge(&mem->memsw, | ||
3472 | PAGE_SIZE * count, &dummy)) { | ||
3473 | res_counter_uncharge(&mem->res, PAGE_SIZE * count); | ||
3474 | goto one_by_one; | ||
3475 | } | ||
3476 | mc.precharge += count; | ||
3477 | VM_BUG_ON(test_bit(CSS_ROOT, &mem->css.flags)); | ||
3478 | WARN_ON_ONCE(count > INT_MAX); | ||
3479 | __css_get(&mem->css, (int)count); | ||
3480 | return ret; | ||
3481 | } | ||
3482 | one_by_one: | ||
3483 | /* fall back to one by one charge */ | ||
3484 | while (count--) { | ||
3485 | if (signal_pending(current)) { | ||
3486 | ret = -EINTR; | ||
3487 | break; | ||
3488 | } | ||
3489 | if (!batch_count--) { | ||
3490 | batch_count = PRECHARGE_COUNT_AT_ONCE; | ||
3491 | cond_resched(); | ||
3492 | } | ||
3493 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, | ||
3494 | false, NULL); | ||
3495 | if (ret || !mem) | ||
3496 | /* mem_cgroup_clear_mc() will do uncharge later */ | ||
3497 | return -ENOMEM; | ||
3498 | mc.precharge++; | ||
3499 | } | ||
3451 | return ret; | 3500 | return ret; |
3452 | } | 3501 | } |
3453 | 3502 | ||
@@ -3570,34 +3619,25 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
3570 | return precharge; | 3619 | return precharge; |
3571 | } | 3620 | } |
3572 | 3621 | ||
3573 | #define PRECHARGE_AT_ONCE 256 | ||
3574 | static int mem_cgroup_precharge_mc(struct mm_struct *mm) | 3622 | static int mem_cgroup_precharge_mc(struct mm_struct *mm) |
3575 | { | 3623 | { |
3576 | int ret = 0; | 3624 | return mem_cgroup_do_precharge(mem_cgroup_count_precharge(mm)); |
3577 | int count = PRECHARGE_AT_ONCE; | ||
3578 | unsigned long precharge = mem_cgroup_count_precharge(mm); | ||
3579 | |||
3580 | while (!ret && precharge--) { | ||
3581 | if (signal_pending(current)) { | ||
3582 | ret = -EINTR; | ||
3583 | break; | ||
3584 | } | ||
3585 | if (!count--) { | ||
3586 | count = PRECHARGE_AT_ONCE; | ||
3587 | cond_resched(); | ||
3588 | } | ||
3589 | ret = mem_cgroup_do_precharge(); | ||
3590 | } | ||
3591 | |||
3592 | return ret; | ||
3593 | } | 3625 | } |
3594 | 3626 | ||
3595 | static void mem_cgroup_clear_mc(void) | 3627 | static void mem_cgroup_clear_mc(void) |
3596 | { | 3628 | { |
3597 | /* we must uncharge all the leftover precharges from mc.to */ | 3629 | /* we must uncharge all the leftover precharges from mc.to */ |
3598 | while (mc.precharge) { | 3630 | if (mc.precharge) { |
3599 | mem_cgroup_cancel_charge(mc.to); | 3631 | __mem_cgroup_cancel_charge(mc.to, mc.precharge); |
3600 | mc.precharge--; | 3632 | mc.precharge = 0; |
3633 | } | ||
3634 | /* | ||
3635 | * we didn't uncharge from mc.from at mem_cgroup_move_account(), so | ||
3636 | * we must uncharge here. | ||
3637 | */ | ||
3638 | if (mc.moved_charge) { | ||
3639 | __mem_cgroup_cancel_charge(mc.from, mc.moved_charge); | ||
3640 | mc.moved_charge = 0; | ||
3601 | } | 3641 | } |
3602 | mc.from = NULL; | 3642 | mc.from = NULL; |
3603 | mc.to = NULL; | 3643 | mc.to = NULL; |
@@ -3625,9 +3665,11 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
3625 | VM_BUG_ON(mc.from); | 3665 | VM_BUG_ON(mc.from); |
3626 | VM_BUG_ON(mc.to); | 3666 | VM_BUG_ON(mc.to); |
3627 | VM_BUG_ON(mc.precharge); | 3667 | VM_BUG_ON(mc.precharge); |
3668 | VM_BUG_ON(mc.moved_charge); | ||
3628 | mc.from = from; | 3669 | mc.from = from; |
3629 | mc.to = mem; | 3670 | mc.to = mem; |
3630 | mc.precharge = 0; | 3671 | mc.precharge = 0; |
3672 | mc.moved_charge = 0; | ||
3631 | 3673 | ||
3632 | ret = mem_cgroup_precharge_mc(mm); | 3674 | ret = mem_cgroup_precharge_mc(mm); |
3633 | if (ret) | 3675 | if (ret) |
@@ -3674,9 +3716,11 @@ retry: | |||
3674 | if (isolate_lru_page(page)) | 3716 | if (isolate_lru_page(page)) |
3675 | goto put; | 3717 | goto put; |
3676 | pc = lookup_page_cgroup(page); | 3718 | pc = lookup_page_cgroup(page); |
3677 | if (!mem_cgroup_move_account(pc, mc.from, mc.to)) { | 3719 | if (!mem_cgroup_move_account(pc, |
3678 | css_put(&mc.to->css); | 3720 | mc.from, mc.to, false)) { |
3679 | mc.precharge--; | 3721 | mc.precharge--; |
3722 | /* we uncharge from mc.from later. */ | ||
3723 | mc.moved_charge++; | ||
3680 | } | 3724 | } |
3681 | putback_lru_page(page); | 3725 | putback_lru_page(page); |
3682 | put: /* is_target_pte_for_mc() gets the page */ | 3726 | put: /* is_target_pte_for_mc() gets the page */ |
@@ -3696,7 +3740,7 @@ put: /* is_target_pte_for_mc() gets the page */ | |||
3696 | * charges to mc.to if we have failed in charge once in attach() | 3740 | * charges to mc.to if we have failed in charge once in attach() |
3697 | * phase. | 3741 | * phase. |
3698 | */ | 3742 | */ |
3699 | ret = mem_cgroup_do_precharge(); | 3743 | ret = mem_cgroup_do_precharge(1); |
3700 | if (!ret) | 3744 | if (!ret) |
3701 | goto retry; | 3745 | goto retry; |
3702 | } | 3746 | } |