1 files changed, 284 insertions, 10 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 59ffaf511d77..22f088f22102 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -21,6 +21,7 @@
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
 #include <linux/mm.h>
+#include <linux/hugetlb.h>
 #include <linux/pagemap.h>
 #include <linux/smp.h>
 #include <linux/page-flags.h>
@@ -243,9 +244,17 @@ struct mem_cgroup {
 * left-shifted bitmap of these types.
 */
 enum move_type {
+        MOVE_CHARGE_TYPE_ANON,  /* private anonymous page and swap of it */
        NR_MOVE_TYPE,
 };
+/* "mc" and its members are protected by cgroup_mutex */
+static struct move_charge_struct {
+        struct mem_cgroup *from;
+        struct mem_cgroup *to;
+        unsigned long precharge;
+} mc;
 /*
 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
 * limit reclaim to prevent infinite loops, if they ever occur.
@@ -1513,7 +1522,7 @@ charged:
         * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
         * if they exceeds softlimit.
         */
-        if (mem_cgroup_soft_limit_check(mem))
+        if (page && mem_cgroup_soft_limit_check(mem))
                mem_cgroup_update_tree(mem, page);
 done:
        return 0;
@@ -1690,8 +1699,9 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
        /*
         * We charges against "to" which may not have any tasks. Then, "to"
         * can be under rmdir(). But in current implementation, caller of
-         * this function is just force_empty() and it's garanteed that
+         * this function is just force_empty() and move charge, so it's
-         * "to" is never removed. So, we don't check rmdir status here.
+         * garanteed that "to" is never removed. So, we don't check rmdir
+         * status here.
         */
 }
@@ -3428,11 +3438,171 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
 }
 /* Handlers for move charge at task migration. */
-static int mem_cgroup_can_move_charge(void)
+static int mem_cgroup_do_precharge(void)
 {
+        int ret = -ENOMEM;
+        struct mem_cgroup *mem = mc.to;
+        ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, NULL);
+        if (ret || !mem)
+                return -ENOMEM;
+        mc.precharge++;
+        return ret;
+}
+/**
+ * is_target_pte_for_mc - check a pte whether it is valid for move charge
+ * @vma: the vma the pte to be checked belongs
+ * @addr: the address corresponding to the pte to be checked
+ * @ptent: the pte to be checked
+ * @target: the pointer the target page will be stored(can be NULL)
+ *
+ * Returns
+ *   0(MC_TARGET_NONE): if the pte is not a target for move charge.
+ *   1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
+ *     move charge. if @target is not NULL, the page is stored in target->page
+ *     with extra refcnt got(Callers should handle it).
+ *
+ * Called with pte lock held.
+ */
+/* We add a new member later. */
+union mc_target {
+        struct page     *page;
+};
+/* We add a new type later. */
+enum mc_target_type {
+        MC_TARGET_NONE, /* not used */
+        MC_TARGET_PAGE,
+};
+static int is_target_pte_for_mc(struct vm_area_struct *vma,
+                unsigned long addr, pte_t ptent, union mc_target *target)
+{
+        struct page *page;
+        struct page_cgroup *pc;
+        int ret = 0;
+        bool move_anon = test_bit(MOVE_CHARGE_TYPE_ANON,
+                                        &mc.to->move_charge_at_immigrate);
+        if (!pte_present(ptent))
+                return 0;
+        page = vm_normal_page(vma, addr, ptent);
+        if (!page || !page_mapped(page))
+                return 0;
+        /*
+         * TODO: We don't move charges of file(including shmem/tmpfs) pages for
+         * now.
+         */
+        if (!move_anon || !PageAnon(page))
+                return 0;
+        /*
+         * TODO: We don't move charges of shared(used by multiple processes)
+         * pages for now.
+         */
+        if (page_mapcount(page) > 1)
+                return 0;
+        if (!get_page_unless_zero(page))
+                return 0;
+        pc = lookup_page_cgroup(page);
+        /*
+         * Do only loose check w/o page_cgroup lock. mem_cgroup_move_account()
+         * checks the pc is valid or not under the lock.
+         */
+        if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+                ret = MC_TARGET_PAGE;
+                if (target)
+                        target->page = page;
+        }
+        if (!ret || !target)
+                put_page(page);
+        return ret;
+}
+static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
+                                        unsigned long addr, unsigned long end,
+                                        struct mm_walk *walk)
+{
+        struct vm_area_struct *vma = walk->private;
+        pte_t *pte;
+        spinlock_t *ptl;
+        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+        for (; addr != end; pte++, addr += PAGE_SIZE)
+                if (is_target_pte_for_mc(vma, addr, *pte, NULL))
+                        mc.precharge++; /* increment precharge temporarily */
+        pte_unmap_unlock(pte - 1, ptl);
+        cond_resched();
        return 0;
 }
+static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
+{
+        unsigned long precharge;
+        struct vm_area_struct *vma;
+        down_read(&mm->mmap_sem);
+        for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                struct mm_walk mem_cgroup_count_precharge_walk = {
+                        .pmd_entry = mem_cgroup_count_precharge_pte_range,
+                        .mm = mm,
+                        .private = vma,
+                };
+                if (is_vm_hugetlb_page(vma))
+                        continue;
+                /* TODO: We don't move charges of shmem/tmpfs pages for now. */
+                if (vma->vm_flags & VM_SHARED)
+                        continue;
+                walk_page_range(vma->vm_start, vma->vm_end,
+                                        &mem_cgroup_count_precharge_walk);
+        }
+        up_read(&mm->mmap_sem);
+        precharge = mc.precharge;
+        mc.precharge = 0;
+        return precharge;
+}
+#define PRECHARGE_AT_ONCE       256
+static int mem_cgroup_precharge_mc(struct mm_struct *mm)
+{
+        int ret = 0;
+        int count = PRECHARGE_AT_ONCE;
+        unsigned long precharge = mem_cgroup_count_precharge(mm);
+        while (!ret && precharge--) {
+                if (signal_pending(current)) {
+                        ret = -EINTR;
+                        break;
+                }
+                if (!count--) {
+                        count = PRECHARGE_AT_ONCE;
+                        cond_resched();
+                }
+                ret = mem_cgroup_do_precharge();
+        }
+        return ret;
+}
+static void mem_cgroup_clear_mc(void)
+{
+        /* we must uncharge all the leftover precharges from mc.to */
+        while (mc.precharge) {
+                mem_cgroup_cancel_charge(mc.to);
+                mc.precharge--;
+        }
+        mc.from = NULL;
+        mc.to = NULL;
+}
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
                                struct cgroup *cgroup,
                                struct task_struct *p,
@@ -3450,11 +3620,19 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
                mm = get_task_mm(p);
                if (!mm)
                        return 0;
                /* We move charges only when we move a owner of the mm */
-                if (mm->owner == p)
+                if (mm->owner == p) {
-                        ret = mem_cgroup_can_move_charge();
+                        VM_BUG_ON(mc.from);
+                        VM_BUG_ON(mc.to);
+                        VM_BUG_ON(mc.precharge);
+                        mc.from = from;
+                        mc.to = mem;
+                        mc.precharge = 0;
+                        ret = mem_cgroup_precharge_mc(mm);
+                        if (ret)
+                                mem_cgroup_clear_mc();
+                }
                mmput(mm);
        }
        return ret;
@@ -3465,10 +3643,95 @@ static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
                                struct task_struct *p,
                                bool threadgroup)
 {
+        mem_cgroup_clear_mc();
 }
-static void mem_cgroup_move_charge(void)
+static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
+                                unsigned long addr, unsigned long end,
+                                struct mm_walk *walk)
 {
+        int ret = 0;
+        struct vm_area_struct *vma = walk->private;
+        pte_t *pte;
+        spinlock_t *ptl;
+retry:
+        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+        for (; addr != end; addr += PAGE_SIZE) {
+                pte_t ptent = *(pte++);
+                union mc_target target;
+                int type;
+                struct page *page;
+                struct page_cgroup *pc;
+                if (!mc.precharge)
+                        break;
+                type = is_target_pte_for_mc(vma, addr, ptent, &target);
+                switch (type) {
+                case MC_TARGET_PAGE:
+                        page = target.page;
+                        if (isolate_lru_page(page))
+                                goto put;
+                        pc = lookup_page_cgroup(page);
+                        if (!mem_cgroup_move_account(pc, mc.from, mc.to)) {
+                                css_put(&mc.to->css);
+                                mc.precharge--;
+                        }
+                        putback_lru_page(page);
+put:                    /* is_target_pte_for_mc() gets the page */
+                        put_page(page);
+                        break;
+                default:
+                        break;
+                }
+        }
+        pte_unmap_unlock(pte - 1, ptl);
+        cond_resched();
+        if (addr != end) {
+                /*
+                 * We have consumed all precharges we got in can_attach().
+                 * We try charge one by one, but don't do any additional
+                 * charges to mc.to if we have failed in charge once in attach()
+                 * phase.
+                 */
+                ret = mem_cgroup_do_precharge();
+                if (!ret)
+                        goto retry;
+        }
+        return ret;
+}
+static void mem_cgroup_move_charge(struct mm_struct *mm)
+{
+        struct vm_area_struct *vma;
+        lru_add_drain_all();
+        down_read(&mm->mmap_sem);
+        for (vma = mm->mmap; vma; vma = vma->vm_next) {
+                int ret;
+                struct mm_walk mem_cgroup_move_charge_walk = {
+                        .pmd_entry = mem_cgroup_move_charge_pte_range,
+                        .mm = mm,
+                        .private = vma,
+                };
+                if (is_vm_hugetlb_page(vma))
+                        continue;
+                /* TODO: We don't move charges of shmem/tmpfs pages for now. */
+                if (vma->vm_flags & VM_SHARED)
+                        continue;
+                ret = walk_page_range(vma->vm_start, vma->vm_end,
+                                                &mem_cgroup_move_charge_walk);
+                if (ret)
+                        /*
+                         * means we have consumed all precharges and failed in
+                         * doing additional charge. Just abandon here.
+                         */
+                        break;
+        }
+        up_read(&mm->mmap_sem);
 }
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
@@ -3477,7 +3740,18 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
                                struct task_struct *p,
                                bool threadgroup)
 {
-        mem_cgroup_move_charge();
+        struct mm_struct *mm;
+        if (!mc.to)
+                /* no need to move charge */
+                return;
+        mm = get_task_mm(p);
+        if (mm) {
+                mem_cgroup_move_charge(mm);
+                mmput(mm);
+        }
+        mem_cgroup_clear_mc();
 }
 struct cgroup_subsys mem_cgroup_subsys = {

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 59ffaf511d77..22f088f22102 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -21,6 +21,7 @@
21	#include <linux/memcontrol.h>	21	#include <linux/memcontrol.h>
22	#include <linux/cgroup.h>	22	#include <linux/cgroup.h>
23	#include <linux/mm.h>	23	#include <linux/mm.h>
		24	#include <linux/hugetlb.h>
24	#include <linux/pagemap.h>	25	#include <linux/pagemap.h>
25	#include <linux/smp.h>	26	#include <linux/smp.h>
26	#include <linux/page-flags.h>	27	#include <linux/page-flags.h>
@@ -243,9 +244,17 @@ struct mem_cgroup {
243	* left-shifted bitmap of these types.	244	* left-shifted bitmap of these types.
244	*/	245	*/
245	enum move_type {	246	enum move_type {
		247	MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */
246	NR_MOVE_TYPE,	248	NR_MOVE_TYPE,
247	};	249	};
248		250
		251	/* "mc" and its members are protected by cgroup_mutex */
		252	static struct move_charge_struct {
		253	struct mem_cgroup *from;
		254	struct mem_cgroup *to;
		255	unsigned long precharge;
		256	} mc;
		257
249	/*	258	/*
250	* Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft	259	* Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
251	* limit reclaim to prevent infinite loops, if they ever occur.	260	* limit reclaim to prevent infinite loops, if they ever occur.
@@ -1513,7 +1522,7 @@ charged:
1513	* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.	1522	* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
1514	* if they exceeds softlimit.	1523	* if they exceeds softlimit.
1515	*/	1524	*/
1516	if (mem_cgroup_soft_limit_check(mem))	1525	if (page && mem_cgroup_soft_limit_check(mem))
1517	mem_cgroup_update_tree(mem, page);	1526	mem_cgroup_update_tree(mem, page);
1518	done:	1527	done:
1519	return 0;	1528	return 0;
@@ -1690,8 +1699,9 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
1690	/*	1699	/*
1691	* We charges against "to" which may not have any tasks. Then, "to"	1700	* We charges against "to" which may not have any tasks. Then, "to"
1692	* can be under rmdir(). But in current implementation, caller of	1701	* can be under rmdir(). But in current implementation, caller of
1693	* this function is just force_empty() and it's garanteed that	1702	* this function is just force_empty() and move charge, so it's
1694	* "to" is never removed. So, we don't check rmdir status here.	1703	* garanteed that "to" is never removed. So, we don't check rmdir
		1704	* status here.
1695	*/	1705	*/
1696	}	1706	}
1697		1707
@@ -3428,11 +3438,171 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
3428	}	3438	}
3429		3439
3430	/* Handlers for move charge at task migration. */	3440	/* Handlers for move charge at task migration. */
3431	static int mem_cgroup_can_move_charge(void)	3441	static int mem_cgroup_do_precharge(void)
3432	{	3442	{
		3443	int ret = -ENOMEM;
		3444	struct mem_cgroup *mem = mc.to;
		3445
		3446	ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, NULL);
		3447	if (ret \|\| !mem)
		3448	return -ENOMEM;
		3449
		3450	mc.precharge++;
		3451	return ret;
		3452	}
		3453
		3454	/**
		3455	* is_target_pte_for_mc - check a pte whether it is valid for move charge
		3456	* @vma: the vma the pte to be checked belongs
		3457	* @addr: the address corresponding to the pte to be checked
		3458	* @ptent: the pte to be checked
		3459	* @target: the pointer the target page will be stored(can be NULL)
		3460	*
		3461	* Returns
		3462	* 0(MC_TARGET_NONE): if the pte is not a target for move charge.
		3463	* 1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
		3464	* move charge. if @target is not NULL, the page is stored in target->page
		3465	* with extra refcnt got(Callers should handle it).
		3466	*
		3467	* Called with pte lock held.
		3468	*/
		3469	/* We add a new member later. */
		3470	union mc_target {
		3471	struct page *page;
		3472	};
		3473
		3474	/* We add a new type later. */
		3475	enum mc_target_type {
		3476	MC_TARGET_NONE, /* not used */
		3477	MC_TARGET_PAGE,
		3478	};
		3479
		3480	static int is_target_pte_for_mc(struct vm_area_struct *vma,
		3481	unsigned long addr, pte_t ptent, union mc_target *target)
		3482	{
		3483	struct page *page;
		3484	struct page_cgroup *pc;
		3485	int ret = 0;
		3486	bool move_anon = test_bit(MOVE_CHARGE_TYPE_ANON,
		3487	&mc.to->move_charge_at_immigrate);
		3488
		3489	if (!pte_present(ptent))
		3490	return 0;
		3491
		3492	page = vm_normal_page(vma, addr, ptent);
		3493	if (!page \|\| !page_mapped(page))
		3494	return 0;
		3495	/*
		3496	* TODO: We don't move charges of file(including shmem/tmpfs) pages for
		3497	* now.
		3498	*/
		3499	if (!move_anon \|\| !PageAnon(page))
		3500	return 0;
		3501	/*
		3502	* TODO: We don't move charges of shared(used by multiple processes)
		3503	* pages for now.
		3504	*/
		3505	if (page_mapcount(page) > 1)
		3506	return 0;
		3507	if (!get_page_unless_zero(page))
		3508	return 0;
		3509
		3510	pc = lookup_page_cgroup(page);
		3511	/*
		3512	* Do only loose check w/o page_cgroup lock. mem_cgroup_move_account()
		3513	* checks the pc is valid or not under the lock.
		3514	*/
		3515	if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
		3516	ret = MC_TARGET_PAGE;
		3517	if (target)
		3518	target->page = page;
		3519	}
		3520
		3521	if (!ret \|\| !target)
		3522	put_page(page);
		3523
		3524	return ret;
		3525	}
		3526
		3527	static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
		3528	unsigned long addr, unsigned long end,
		3529	struct mm_walk *walk)
		3530	{
		3531	struct vm_area_struct *vma = walk->private;
		3532	pte_t *pte;
		3533	spinlock_t *ptl;
		3534
		3535	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
		3536	for (; addr != end; pte++, addr += PAGE_SIZE)
		3537	if (is_target_pte_for_mc(vma, addr, *pte, NULL))
		3538	mc.precharge++; /* increment precharge temporarily */
		3539	pte_unmap_unlock(pte - 1, ptl);
		3540	cond_resched();
		3541
3433	return 0;	3542	return 0;
3434	}	3543	}
3435		3544
		3545	static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
		3546	{
		3547	unsigned long precharge;
		3548	struct vm_area_struct *vma;
		3549
		3550	down_read(&mm->mmap_sem);
		3551	for (vma = mm->mmap; vma; vma = vma->vm_next) {
		3552	struct mm_walk mem_cgroup_count_precharge_walk = {
		3553	.pmd_entry = mem_cgroup_count_precharge_pte_range,
		3554	.mm = mm,
		3555	.private = vma,
		3556	};
		3557	if (is_vm_hugetlb_page(vma))
		3558	continue;
		3559	/* TODO: We don't move charges of shmem/tmpfs pages for now. */
		3560	if (vma->vm_flags & VM_SHARED)
		3561	continue;
		3562	walk_page_range(vma->vm_start, vma->vm_end,
		3563	&mem_cgroup_count_precharge_walk);
		3564	}
		3565	up_read(&mm->mmap_sem);
		3566
		3567	precharge = mc.precharge;
		3568	mc.precharge = 0;
		3569
		3570	return precharge;
		3571	}
		3572
		3573	#define PRECHARGE_AT_ONCE 256
		3574	static int mem_cgroup_precharge_mc(struct mm_struct *mm)
		3575	{
		3576	int ret = 0;
		3577	int count = PRECHARGE_AT_ONCE;
		3578	unsigned long precharge = mem_cgroup_count_precharge(mm);
		3579
		3580	while (!ret && precharge--) {
		3581	if (signal_pending(current)) {
		3582	ret = -EINTR;
		3583	break;
		3584	}
		3585	if (!count--) {
		3586	count = PRECHARGE_AT_ONCE;
		3587	cond_resched();
		3588	}
		3589	ret = mem_cgroup_do_precharge();
		3590	}
		3591
		3592	return ret;
		3593	}
		3594
		3595	static void mem_cgroup_clear_mc(void)
		3596	{
		3597	/* we must uncharge all the leftover precharges from mc.to */
		3598	while (mc.precharge) {
		3599	mem_cgroup_cancel_charge(mc.to);
		3600	mc.precharge--;
		3601	}
		3602	mc.from = NULL;
		3603	mc.to = NULL;
		3604	}
		3605
3436	static int mem_cgroup_can_attach(struct cgroup_subsys *ss,	3606	static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
3437	struct cgroup *cgroup,	3607	struct cgroup *cgroup,
3438	struct task_struct *p,	3608	struct task_struct *p,
@@ -3450,11 +3620,19 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
3450	mm = get_task_mm(p);	3620	mm = get_task_mm(p);
3451	if (!mm)	3621	if (!mm)
3452	return 0;	3622	return 0;
3453
3454	/* We move charges only when we move a owner of the mm */	3623	/* We move charges only when we move a owner of the mm */
3455	if (mm->owner == p)	3624	if (mm->owner == p) {
3456	ret = mem_cgroup_can_move_charge();	3625	VM_BUG_ON(mc.from);
3457		3626	VM_BUG_ON(mc.to);
		3627	VM_BUG_ON(mc.precharge);
		3628	mc.from = from;
		3629	mc.to = mem;
		3630	mc.precharge = 0;
		3631
		3632	ret = mem_cgroup_precharge_mc(mm);
		3633	if (ret)
		3634	mem_cgroup_clear_mc();
		3635	}
3458	mmput(mm);	3636	mmput(mm);
3459	}	3637	}
3460	return ret;	3638	return ret;
@@ -3465,10 +3643,95 @@ static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
3465	struct task_struct *p,	3643	struct task_struct *p,
3466	bool threadgroup)	3644	bool threadgroup)
3467	{	3645	{
		3646	mem_cgroup_clear_mc();
3468	}	3647	}
3469		3648
3470	static void mem_cgroup_move_charge(void)	3649	static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
		3650	unsigned long addr, unsigned long end,
		3651	struct mm_walk *walk)
3471	{	3652	{
		3653	int ret = 0;
		3654	struct vm_area_struct *vma = walk->private;
		3655	pte_t *pte;
		3656	spinlock_t *ptl;
		3657
		3658	retry:
		3659	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
		3660	for (; addr != end; addr += PAGE_SIZE) {
		3661	pte_t ptent = *(pte++);
		3662	union mc_target target;
		3663	int type;
		3664	struct page *page;
		3665	struct page_cgroup *pc;
		3666
		3667	if (!mc.precharge)
		3668	break;
		3669
		3670	type = is_target_pte_for_mc(vma, addr, ptent, &target);
		3671	switch (type) {
		3672	case MC_TARGET_PAGE:
		3673	page = target.page;
		3674	if (isolate_lru_page(page))
		3675	goto put;
		3676	pc = lookup_page_cgroup(page);
		3677	if (!mem_cgroup_move_account(pc, mc.from, mc.to)) {
		3678	css_put(&mc.to->css);
		3679	mc.precharge--;
		3680	}
		3681	putback_lru_page(page);
		3682	put: /* is_target_pte_for_mc() gets the page */
		3683	put_page(page);
		3684	break;
		3685	default:
		3686	break;
		3687	}
		3688	}
		3689	pte_unmap_unlock(pte - 1, ptl);
		3690	cond_resched();
		3691
		3692	if (addr != end) {
		3693	/*
		3694	* We have consumed all precharges we got in can_attach().
		3695	* We try charge one by one, but don't do any additional
		3696	* charges to mc.to if we have failed in charge once in attach()
		3697	* phase.
		3698	*/
		3699	ret = mem_cgroup_do_precharge();
		3700	if (!ret)
		3701	goto retry;
		3702	}
		3703
		3704	return ret;
		3705	}
		3706
		3707	static void mem_cgroup_move_charge(struct mm_struct *mm)
		3708	{
		3709	struct vm_area_struct *vma;
		3710
		3711	lru_add_drain_all();
		3712	down_read(&mm->mmap_sem);
		3713	for (vma = mm->mmap; vma; vma = vma->vm_next) {
		3714	int ret;
		3715	struct mm_walk mem_cgroup_move_charge_walk = {
		3716	.pmd_entry = mem_cgroup_move_charge_pte_range,
		3717	.mm = mm,
		3718	.private = vma,
		3719	};
		3720	if (is_vm_hugetlb_page(vma))
		3721	continue;
		3722	/* TODO: We don't move charges of shmem/tmpfs pages for now. */
		3723	if (vma->vm_flags & VM_SHARED)
		3724	continue;
		3725	ret = walk_page_range(vma->vm_start, vma->vm_end,
		3726	&mem_cgroup_move_charge_walk);
		3727	if (ret)
		3728	/*
		3729	* means we have consumed all precharges and failed in
		3730	* doing additional charge. Just abandon here.
		3731	*/
		3732	break;
		3733	}
		3734	up_read(&mm->mmap_sem);
3472	}	3735	}
3473		3736
3474	static void mem_cgroup_move_task(struct cgroup_subsys *ss,	3737	static void mem_cgroup_move_task(struct cgroup_subsys *ss,
@@ -3477,7 +3740,18 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
3477	struct task_struct *p,	3740	struct task_struct *p,
3478	bool threadgroup)	3741	bool threadgroup)
3479	{	3742	{
3480	mem_cgroup_move_charge();	3743	struct mm_struct *mm;
		3744
		3745	if (!mc.to)
		3746	/* no need to move charge */
		3747	return;
		3748
		3749	mm = get_task_mm(p);
		3750	if (mm) {
		3751	mem_cgroup_move_charge(mm);
		3752	mmput(mm);
		3753	}
		3754	mem_cgroup_clear_mc();
3481	}	3755	}
3482		3756
3483	struct cgroup_subsys mem_cgroup_subsys = {	3757	struct cgroup_subsys mem_cgroup_subsys = {