aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c63
1 files changed, 36 insertions, 27 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9be3cf8a5da4..a9a534a38ac0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -269,13 +269,14 @@ enum move_type {
269 269
270/* "mc" and its members are protected by cgroup_mutex */ 270/* "mc" and its members are protected by cgroup_mutex */
271static struct move_charge_struct { 271static struct move_charge_struct {
272 spinlock_t lock; /* for from, to, moving_task */ 272 spinlock_t lock; /* for from, to */
273 struct mem_cgroup *from; 273 struct mem_cgroup *from;
274 struct mem_cgroup *to; 274 struct mem_cgroup *to;
275 unsigned long precharge; 275 unsigned long precharge;
276 unsigned long moved_charge; 276 unsigned long moved_charge;
277 unsigned long moved_swap; 277 unsigned long moved_swap;
278 struct task_struct *moving_task; /* a task moving charges */ 278 struct task_struct *moving_task; /* a task moving charges */
279 struct mm_struct *mm;
279 wait_queue_head_t waitq; /* a waitq for other context */ 280 wait_queue_head_t waitq; /* a waitq for other context */
280} mc = { 281} mc = {
281 .lock = __SPIN_LOCK_UNLOCKED(mc.lock), 282 .lock = __SPIN_LOCK_UNLOCKED(mc.lock),
@@ -1646,6 +1647,7 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
1646 if (likely(!ret)) 1647 if (likely(!ret))
1647 return CHARGE_OK; 1648 return CHARGE_OK;
1648 1649
1650 res_counter_uncharge(&mem->res, csize);
1649 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); 1651 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
1650 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 1652 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1651 } else 1653 } else
@@ -1729,19 +1731,18 @@ again:
1729 1731
1730 rcu_read_lock(); 1732 rcu_read_lock();
1731 p = rcu_dereference(mm->owner); 1733 p = rcu_dereference(mm->owner);
1732 VM_BUG_ON(!p);
1733 /* 1734 /*
1734 * because we don't have task_lock(), "p" can exit while 1735 * Because we don't have task_lock(), "p" can exit.
1735 * we're here. In that case, "mem" can point to root 1736 * In that case, "mem" can point to root or p can be NULL with
1736 * cgroup but never be NULL. (and task_struct itself is freed 1737 * race with swapoff. Then, we have small risk of mis-accouning.
1737 * by RCU, cgroup itself is RCU safe.) Then, we have small 1738 * But such kind of mis-account by race always happens because
1738 * risk here to get wrong cgroup. But such kind of mis-account 1739 * we don't have cgroup_mutex(). It's overkill and we allo that
1739 * by race always happens because we don't have cgroup_mutex(). 1740 * small race, here.
1740 * It's overkill and we allow that small race, here. 1741 * (*) swapoff at el will charge against mm-struct not against
1742 * task-struct. So, mm->owner can be NULL.
1741 */ 1743 */
1742 mem = mem_cgroup_from_task(p); 1744 mem = mem_cgroup_from_task(p);
1743 VM_BUG_ON(!mem); 1745 if (!mem || mem_cgroup_is_root(mem)) {
1744 if (mem_cgroup_is_root(mem)) {
1745 rcu_read_unlock(); 1746 rcu_read_unlock();
1746 goto done; 1747 goto done;
1747 } 1748 }
@@ -4445,7 +4446,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
4445 unsigned long precharge; 4446 unsigned long precharge;
4446 struct vm_area_struct *vma; 4447 struct vm_area_struct *vma;
4447 4448
4448 down_read(&mm->mmap_sem); 4449 /* We've already held the mmap_sem */
4449 for (vma = mm->mmap; vma; vma = vma->vm_next) { 4450 for (vma = mm->mmap; vma; vma = vma->vm_next) {
4450 struct mm_walk mem_cgroup_count_precharge_walk = { 4451 struct mm_walk mem_cgroup_count_precharge_walk = {
4451 .pmd_entry = mem_cgroup_count_precharge_pte_range, 4452 .pmd_entry = mem_cgroup_count_precharge_pte_range,
@@ -4457,7 +4458,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
4457 walk_page_range(vma->vm_start, vma->vm_end, 4458 walk_page_range(vma->vm_start, vma->vm_end,
4458 &mem_cgroup_count_precharge_walk); 4459 &mem_cgroup_count_precharge_walk);
4459 } 4460 }
4460 up_read(&mm->mmap_sem);
4461 4461
4462 precharge = mc.precharge; 4462 precharge = mc.precharge;
4463 mc.precharge = 0; 4463 mc.precharge = 0;
@@ -4508,11 +4508,16 @@ static void mem_cgroup_clear_mc(void)
4508 4508
4509 mc.moved_swap = 0; 4509 mc.moved_swap = 0;
4510 } 4510 }
4511 if (mc.mm) {
4512 up_read(&mc.mm->mmap_sem);
4513 mmput(mc.mm);
4514 }
4511 spin_lock(&mc.lock); 4515 spin_lock(&mc.lock);
4512 mc.from = NULL; 4516 mc.from = NULL;
4513 mc.to = NULL; 4517 mc.to = NULL;
4514 mc.moving_task = NULL;
4515 spin_unlock(&mc.lock); 4518 spin_unlock(&mc.lock);
4519 mc.moving_task = NULL;
4520 mc.mm = NULL;
4516 memcg_oom_recover(from); 4521 memcg_oom_recover(from);
4517 memcg_oom_recover(to); 4522 memcg_oom_recover(to);
4518 wake_up_all(&mc.waitq); 4523 wake_up_all(&mc.waitq);
@@ -4537,26 +4542,37 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
4537 return 0; 4542 return 0;
4538 /* We move charges only when we move a owner of the mm */ 4543 /* We move charges only when we move a owner of the mm */
4539 if (mm->owner == p) { 4544 if (mm->owner == p) {
4545 /*
4546 * We do all the move charge works under one mmap_sem to
4547 * avoid deadlock with down_write(&mmap_sem)
4548 * -> try_charge() -> if (mc.moving_task) -> sleep.
4549 */
4550 down_read(&mm->mmap_sem);
4551
4540 VM_BUG_ON(mc.from); 4552 VM_BUG_ON(mc.from);
4541 VM_BUG_ON(mc.to); 4553 VM_BUG_ON(mc.to);
4542 VM_BUG_ON(mc.precharge); 4554 VM_BUG_ON(mc.precharge);
4543 VM_BUG_ON(mc.moved_charge); 4555 VM_BUG_ON(mc.moved_charge);
4544 VM_BUG_ON(mc.moved_swap); 4556 VM_BUG_ON(mc.moved_swap);
4545 VM_BUG_ON(mc.moving_task); 4557 VM_BUG_ON(mc.moving_task);
4558 VM_BUG_ON(mc.mm);
4559
4546 spin_lock(&mc.lock); 4560 spin_lock(&mc.lock);
4547 mc.from = from; 4561 mc.from = from;
4548 mc.to = mem; 4562 mc.to = mem;
4549 mc.precharge = 0; 4563 mc.precharge = 0;
4550 mc.moved_charge = 0; 4564 mc.moved_charge = 0;
4551 mc.moved_swap = 0; 4565 mc.moved_swap = 0;
4552 mc.moving_task = current;
4553 spin_unlock(&mc.lock); 4566 spin_unlock(&mc.lock);
4567 mc.moving_task = current;
4568 mc.mm = mm;
4554 4569
4555 ret = mem_cgroup_precharge_mc(mm); 4570 ret = mem_cgroup_precharge_mc(mm);
4556 if (ret) 4571 if (ret)
4557 mem_cgroup_clear_mc(); 4572 mem_cgroup_clear_mc();
4558 } 4573 /* We call up_read() and mmput() in clear_mc(). */
4559 mmput(mm); 4574 } else
4575 mmput(mm);
4560 } 4576 }
4561 return ret; 4577 return ret;
4562} 4578}
@@ -4644,7 +4660,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
4644 struct vm_area_struct *vma; 4660 struct vm_area_struct *vma;
4645 4661
4646 lru_add_drain_all(); 4662 lru_add_drain_all();
4647 down_read(&mm->mmap_sem); 4663 /* We've already held the mmap_sem */
4648 for (vma = mm->mmap; vma; vma = vma->vm_next) { 4664 for (vma = mm->mmap; vma; vma = vma->vm_next) {
4649 int ret; 4665 int ret;
4650 struct mm_walk mem_cgroup_move_charge_walk = { 4666 struct mm_walk mem_cgroup_move_charge_walk = {
@@ -4663,7 +4679,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
4663 */ 4679 */
4664 break; 4680 break;
4665 } 4681 }
4666 up_read(&mm->mmap_sem);
4667} 4682}
4668 4683
4669static void mem_cgroup_move_task(struct cgroup_subsys *ss, 4684static void mem_cgroup_move_task(struct cgroup_subsys *ss,
@@ -4672,17 +4687,11 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
4672 struct task_struct *p, 4687 struct task_struct *p,
4673 bool threadgroup) 4688 bool threadgroup)
4674{ 4689{
4675 struct mm_struct *mm; 4690 if (!mc.mm)
4676
4677 if (!mc.to)
4678 /* no need to move charge */ 4691 /* no need to move charge */
4679 return; 4692 return;
4680 4693
4681 mm = get_task_mm(p); 4694 mem_cgroup_move_charge(mc.mm);
4682 if (mm) {
4683 mem_cgroup_move_charge(mm);
4684 mmput(mm);
4685 }
4686 mem_cgroup_clear_mc(); 4695 mem_cgroup_clear_mc();
4687} 4696}
4688#else /* !CONFIG_MMU */ 4697#else /* !CONFIG_MMU */