aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>2010-03-10 18:22:17 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-12 18:52:36 -0500
commit024914477e15ef8b17f271ec47f1bb8a589f0806 (patch)
tree9a6a8b4224c94fcdd1b8c3127b301ee3537f8cc2 /mm
parent8033b97c9b5ef063e3f4bf2efe1cd0a22093aaff (diff)
memcg: move charges of anonymous swap
This patch is another core part of this move-charge-at-task-migration feature. It enables moving charges of anonymous swaps. To move the charge of swap, we need to exchange swap_cgroup's record. In current implementation, swap_cgroup's record is protected by: - page lock: if the entry is on swap cache. - swap_lock: if the entry is not on swap cache. This works well in usual swap-in/out activity. But this behavior make the feature of moving swap charge check many conditions to exchange swap_cgroup's record safely. So I changed modification of swap_cgroup's recored(swap_cgroup_record()) to use xchg, and define a new function to cmpxchg swap_cgroup's record. This patch also enables moving charge of non pte_present but not uncharged swap caches, which can be exist on swap-out path, by getting the target pages via find_get_page() as do_mincore() does. [kosaki.motohiro@jp.fujitsu.com: fix ia64 build] [akpm@linux-foundation.org: fix typos] Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c183
-rw-r--r--mm/page_cgroup.c34
-rw-r--r--mm/swapfile.c31
3 files changed, 210 insertions, 38 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 589084f00b70..e883198baf81 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -33,6 +33,7 @@
33#include <linux/rbtree.h> 33#include <linux/rbtree.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/swap.h> 35#include <linux/swap.h>
36#include <linux/swapops.h>
36#include <linux/spinlock.h> 37#include <linux/spinlock.h>
37#include <linux/fs.h> 38#include <linux/fs.h>
38#include <linux/seq_file.h> 39#include <linux/seq_file.h>
@@ -2270,6 +2271,54 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
2270 } 2271 }
2271 rcu_read_unlock(); 2272 rcu_read_unlock();
2272} 2273}
2274
2275/**
2276 * mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
2277 * @entry: swap entry to be moved
2278 * @from: mem_cgroup which the entry is moved from
2279 * @to: mem_cgroup which the entry is moved to
2280 *
2281 * It succeeds only when the swap_cgroup's record for this entry is the same
2282 * as the mem_cgroup's id of @from.
2283 *
2284 * Returns 0 on success, -EINVAL on failure.
2285 *
2286 * The caller must have charged to @to, IOW, called res_counter_charge() about
2287 * both res and memsw, and called css_get().
2288 */
2289static int mem_cgroup_move_swap_account(swp_entry_t entry,
2290 struct mem_cgroup *from, struct mem_cgroup *to)
2291{
2292 unsigned short old_id, new_id;
2293
2294 old_id = css_id(&from->css);
2295 new_id = css_id(&to->css);
2296
2297 if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
2298 if (!mem_cgroup_is_root(from))
2299 res_counter_uncharge(&from->memsw, PAGE_SIZE);
2300 mem_cgroup_swap_statistics(from, false);
2301 mem_cgroup_put(from);
2302 /*
2303 * we charged both to->res and to->memsw, so we should uncharge
2304 * to->res.
2305 */
2306 if (!mem_cgroup_is_root(to))
2307 res_counter_uncharge(&to->res, PAGE_SIZE);
2308 mem_cgroup_swap_statistics(to, true);
2309 mem_cgroup_get(to);
2310 css_put(&to->css);
2311
2312 return 0;
2313 }
2314 return -EINVAL;
2315}
2316#else
2317static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
2318 struct mem_cgroup *from, struct mem_cgroup *to)
2319{
2320 return -EINVAL;
2321}
2273#endif 2322#endif
2274 2323
2275/* 2324/*
@@ -2949,6 +2998,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
2949 return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate; 2998 return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
2950} 2999}
2951 3000
3001#ifdef CONFIG_MMU
2952static int mem_cgroup_move_charge_write(struct cgroup *cgrp, 3002static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
2953 struct cftype *cft, u64 val) 3003 struct cftype *cft, u64 val)
2954{ 3004{
@@ -2967,6 +3017,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
2967 3017
2968 return 0; 3018 return 0;
2969} 3019}
3020#else
3021static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
3022 struct cftype *cft, u64 val)
3023{
3024 return -ENOSYS;
3025}
3026#endif
2970 3027
2971 3028
2972/* For read statistics */ 3029/* For read statistics */
@@ -3489,6 +3546,7 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
3489 return ret; 3546 return ret;
3490} 3547}
3491 3548
3549#ifdef CONFIG_MMU
3492/* Handlers for move charge at task migration. */ 3550/* Handlers for move charge at task migration. */
3493#define PRECHARGE_COUNT_AT_ONCE 256 3551#define PRECHARGE_COUNT_AT_ONCE 256
3494static int mem_cgroup_do_precharge(unsigned long count) 3552static int mem_cgroup_do_precharge(unsigned long count)
@@ -3544,77 +3602,124 @@ one_by_one:
3544 } 3602 }
3545 return ret; 3603 return ret;
3546} 3604}
3605#else /* !CONFIG_MMU */
3606static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
3607 struct cgroup *cgroup,
3608 struct task_struct *p,
3609 bool threadgroup)
3610{
3611 return 0;
3612}
3613static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
3614 struct cgroup *cgroup,
3615 struct task_struct *p,
3616 bool threadgroup)
3617{
3618}
3619static void mem_cgroup_move_task(struct cgroup_subsys *ss,
3620 struct cgroup *cont,
3621 struct cgroup *old_cont,
3622 struct task_struct *p,
3623 bool threadgroup)
3624{
3625}
3626#endif
3547 3627
3548/** 3628/**
3549 * is_target_pte_for_mc - check a pte whether it is valid for move charge 3629 * is_target_pte_for_mc - check a pte whether it is valid for move charge
3550 * @vma: the vma the pte to be checked belongs 3630 * @vma: the vma the pte to be checked belongs
3551 * @addr: the address corresponding to the pte to be checked 3631 * @addr: the address corresponding to the pte to be checked
3552 * @ptent: the pte to be checked 3632 * @ptent: the pte to be checked
3553 * @target: the pointer the target page will be stored(can be NULL) 3633 * @target: the pointer the target page or swap ent will be stored(can be NULL)
3554 * 3634 *
3555 * Returns 3635 * Returns
3556 * 0(MC_TARGET_NONE): if the pte is not a target for move charge. 3636 * 0(MC_TARGET_NONE): if the pte is not a target for move charge.
3557 * 1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for 3637 * 1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
3558 * move charge. if @target is not NULL, the page is stored in target->page 3638 * move charge. if @target is not NULL, the page is stored in target->page
3559 * with extra refcnt got(Callers should handle it). 3639 * with extra refcnt got(Callers should handle it).
3640 * 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
3641 * target for charge migration. if @target is not NULL, the entry is stored
3642 * in target->ent.
3560 * 3643 *
3561 * Called with pte lock held. 3644 * Called with pte lock held.
3562 */ 3645 */
3563/* We add a new member later. */
3564union mc_target { 3646union mc_target {
3565 struct page *page; 3647 struct page *page;
3648 swp_entry_t ent;
3566}; 3649};
3567 3650
3568/* We add a new type later. */
3569enum mc_target_type { 3651enum mc_target_type {
3570 MC_TARGET_NONE, /* not used */ 3652 MC_TARGET_NONE, /* not used */
3571 MC_TARGET_PAGE, 3653 MC_TARGET_PAGE,
3654 MC_TARGET_SWAP,
3572}; 3655};
3573 3656
3574static int is_target_pte_for_mc(struct vm_area_struct *vma, 3657static int is_target_pte_for_mc(struct vm_area_struct *vma,
3575 unsigned long addr, pte_t ptent, union mc_target *target) 3658 unsigned long addr, pte_t ptent, union mc_target *target)
3576{ 3659{
3577 struct page *page; 3660 struct page *page = NULL;
3578 struct page_cgroup *pc; 3661 struct page_cgroup *pc;
3579 int ret = 0; 3662 int ret = 0;
3663 swp_entry_t ent = { .val = 0 };
3664 int usage_count = 0;
3580 bool move_anon = test_bit(MOVE_CHARGE_TYPE_ANON, 3665 bool move_anon = test_bit(MOVE_CHARGE_TYPE_ANON,
3581 &mc.to->move_charge_at_immigrate); 3666 &mc.to->move_charge_at_immigrate);
3582 3667
3583 if (!pte_present(ptent)) 3668 if (!pte_present(ptent)) {
3584 return 0; 3669 /* TODO: handle swap of shmes/tmpfs */
3585 3670 if (pte_none(ptent) || pte_file(ptent))
3586 page = vm_normal_page(vma, addr, ptent); 3671 return 0;
3587 if (!page || !page_mapped(page)) 3672 else if (is_swap_pte(ptent)) {
3588 return 0; 3673 ent = pte_to_swp_entry(ptent);
3589 /* 3674 if (!move_anon || non_swap_entry(ent))
3590 * TODO: We don't move charges of file(including shmem/tmpfs) pages for 3675 return 0;
3591 * now. 3676 usage_count = mem_cgroup_count_swap_user(ent, &page);
3592 */ 3677 }
3593 if (!move_anon || !PageAnon(page)) 3678 } else {
3594 return 0; 3679 page = vm_normal_page(vma, addr, ptent);
3595 /* 3680 if (!page || !page_mapped(page))
3596 * TODO: We don't move charges of shared(used by multiple processes) 3681 return 0;
3597 * pages for now. 3682 /*
3598 */ 3683 * TODO: We don't move charges of file(including shmem/tmpfs)
3599 if (page_mapcount(page) > 1) 3684 * pages for now.
3600 return 0; 3685 */
3601 if (!get_page_unless_zero(page)) 3686 if (!move_anon || !PageAnon(page))
3687 return 0;
3688 if (!get_page_unless_zero(page))
3689 return 0;
3690 usage_count = page_mapcount(page);
3691 }
3692 if (usage_count > 1) {
3693 /*
3694 * TODO: We don't move charges of shared(used by multiple
3695 * processes) pages for now.
3696 */
3697 if (page)
3698 put_page(page);
3602 return 0; 3699 return 0;
3603 3700 }
3604 pc = lookup_page_cgroup(page); 3701 if (page) {
3605 /* 3702 pc = lookup_page_cgroup(page);
3606 * Do only loose check w/o page_cgroup lock. mem_cgroup_move_account() 3703 /*
3607 * checks the pc is valid or not under the lock. 3704 * Do only loose check w/o page_cgroup lock.
3608 */ 3705 * mem_cgroup_move_account() checks the pc is valid or not under
3609 if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) { 3706 * the lock.
3610 ret = MC_TARGET_PAGE; 3707 */
3708 if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
3709 ret = MC_TARGET_PAGE;
3710 if (target)
3711 target->page = page;
3712 }
3713 if (!ret || !target)
3714 put_page(page);
3715 }
3716 /* throught */
3717 if (ent.val && do_swap_account && !ret &&
3718 css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
3719 ret = MC_TARGET_SWAP;
3611 if (target) 3720 if (target)
3612 target->page = page; 3721 target->ent = ent;
3613 } 3722 }
3614
3615 if (!ret || !target)
3616 put_page(page);
3617
3618 return ret; 3723 return ret;
3619} 3724}
3620 3725
@@ -3754,6 +3859,7 @@ retry:
3754 int type; 3859 int type;
3755 struct page *page; 3860 struct page *page;
3756 struct page_cgroup *pc; 3861 struct page_cgroup *pc;
3862 swp_entry_t ent;
3757 3863
3758 if (!mc.precharge) 3864 if (!mc.precharge)
3759 break; 3865 break;
@@ -3775,6 +3881,11 @@ retry:
3775put: /* is_target_pte_for_mc() gets the page */ 3881put: /* is_target_pte_for_mc() gets the page */
3776 put_page(page); 3882 put_page(page);
3777 break; 3883 break;
3884 case MC_TARGET_SWAP:
3885 ent = target.ent;
3886 if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to))
3887 mc.precharge--;
3888 break;
3778 default: 3889 default:
3779 break; 3890 break;
3780 } 3891 }
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 3d535d594826..3dd88539a0e6 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -335,6 +335,37 @@ not_enough_page:
335} 335}
336 336
337/** 337/**
338 * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
339 * @end: swap entry to be cmpxchged
340 * @old: old id
341 * @new: new id
342 *
343 * Returns old id at success, 0 at failure.
344 * (There is no mem_cgroup useing 0 as its id)
345 */
346unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
347 unsigned short old, unsigned short new)
348{
349 int type = swp_type(ent);
350 unsigned long offset = swp_offset(ent);
351 unsigned long idx = offset / SC_PER_PAGE;
352 unsigned long pos = offset & SC_POS_MASK;
353 struct swap_cgroup_ctrl *ctrl;
354 struct page *mappage;
355 struct swap_cgroup *sc;
356
357 ctrl = &swap_cgroup_ctrl[type];
358
359 mappage = ctrl->map[idx];
360 sc = page_address(mappage);
361 sc += pos;
362 if (cmpxchg(&sc->id, old, new) == old)
363 return old;
364 else
365 return 0;
366}
367
368/**
338 * swap_cgroup_record - record mem_cgroup for this swp_entry. 369 * swap_cgroup_record - record mem_cgroup for this swp_entry.
339 * @ent: swap entry to be recorded into 370 * @ent: swap entry to be recorded into
340 * @mem: mem_cgroup to be recorded 371 * @mem: mem_cgroup to be recorded
@@ -358,8 +389,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
358 mappage = ctrl->map[idx]; 389 mappage = ctrl->map[idx];
359 sc = page_address(mappage); 390 sc = page_address(mappage);
360 sc += pos; 391 sc += pos;
361 old = sc->id; 392 old = xchg(&sc->id, id);
362 sc->id = id;
363 393
364 return old; 394 return old;
365} 395}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 84374d8cf814..6cd0a8f90dc7 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -723,6 +723,37 @@ int free_swap_and_cache(swp_entry_t entry)
723 return p != NULL; 723 return p != NULL;
724} 724}
725 725
726#ifdef CONFIG_CGROUP_MEM_RES_CTLR
727/**
728 * mem_cgroup_count_swap_user - count the user of a swap entry
729 * @ent: the swap entry to be checked
730 * @pagep: the pointer for the swap cache page of the entry to be stored
731 *
732 * Returns the number of the user of the swap entry. The number is valid only
733 * for swaps of anonymous pages.
734 * If the entry is found on swap cache, the page is stored to pagep with
735 * refcount of it being incremented.
736 */
737int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
738{
739 struct page *page;
740 struct swap_info_struct *p;
741 int count = 0;
742
743 page = find_get_page(&swapper_space, ent.val);
744 if (page)
745 count += page_mapcount(page);
746 p = swap_info_get(ent);
747 if (p) {
748 count += swap_count(p->swap_map[swp_offset(ent)]);
749 spin_unlock(&swap_lock);
750 }
751
752 *pagep = page;
753 return count;
754}
755#endif
756
726#ifdef CONFIG_HIBERNATION 757#ifdef CONFIG_HIBERNATION
727/* 758/*
728 * Find the swap type that corresponds to given device (if any). 759 * Find the swap type that corresponds to given device (if any).