diff options
| -rw-r--r-- | mm/memcontrol.c | 43 |
1 files changed, 26 insertions, 17 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 62d1880f6992..26218df8d19d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -278,13 +278,14 @@ enum move_type { | |||
| 278 | 278 | ||
| 279 | /* "mc" and its members are protected by cgroup_mutex */ | 279 | /* "mc" and its members are protected by cgroup_mutex */ |
| 280 | static struct move_charge_struct { | 280 | static struct move_charge_struct { |
| 281 | spinlock_t lock; /* for from, to, moving_task */ | 281 | spinlock_t lock; /* for from, to */ |
| 282 | struct mem_cgroup *from; | 282 | struct mem_cgroup *from; |
| 283 | struct mem_cgroup *to; | 283 | struct mem_cgroup *to; |
| 284 | unsigned long precharge; | 284 | unsigned long precharge; |
| 285 | unsigned long moved_charge; | 285 | unsigned long moved_charge; |
| 286 | unsigned long moved_swap; | 286 | unsigned long moved_swap; |
| 287 | struct task_struct *moving_task; /* a task moving charges */ | 287 | struct task_struct *moving_task; /* a task moving charges */ |
| 288 | struct mm_struct *mm; | ||
| 288 | wait_queue_head_t waitq; /* a waitq for other context */ | 289 | wait_queue_head_t waitq; /* a waitq for other context */ |
| 289 | } mc = { | 290 | } mc = { |
| 290 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), | 291 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), |
| @@ -4631,7 +4632,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
| 4631 | unsigned long precharge; | 4632 | unsigned long precharge; |
| 4632 | struct vm_area_struct *vma; | 4633 | struct vm_area_struct *vma; |
| 4633 | 4634 | ||
| 4634 | down_read(&mm->mmap_sem); | 4635 | /* We've already held the mmap_sem */ |
| 4635 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4636 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
| 4636 | struct mm_walk mem_cgroup_count_precharge_walk = { | 4637 | struct mm_walk mem_cgroup_count_precharge_walk = { |
| 4637 | .pmd_entry = mem_cgroup_count_precharge_pte_range, | 4638 | .pmd_entry = mem_cgroup_count_precharge_pte_range, |
| @@ -4643,7 +4644,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
| 4643 | walk_page_range(vma->vm_start, vma->vm_end, | 4644 | walk_page_range(vma->vm_start, vma->vm_end, |
| 4644 | &mem_cgroup_count_precharge_walk); | 4645 | &mem_cgroup_count_precharge_walk); |
| 4645 | } | 4646 | } |
| 4646 | up_read(&mm->mmap_sem); | ||
| 4647 | 4647 | ||
| 4648 | precharge = mc.precharge; | 4648 | precharge = mc.precharge; |
| 4649 | mc.precharge = 0; | 4649 | mc.precharge = 0; |
| @@ -4694,11 +4694,16 @@ static void mem_cgroup_clear_mc(void) | |||
| 4694 | 4694 | ||
| 4695 | mc.moved_swap = 0; | 4695 | mc.moved_swap = 0; |
| 4696 | } | 4696 | } |
| 4697 | if (mc.mm) { | ||
| 4698 | up_read(&mc.mm->mmap_sem); | ||
| 4699 | mmput(mc.mm); | ||
| 4700 | } | ||
| 4697 | spin_lock(&mc.lock); | 4701 | spin_lock(&mc.lock); |
| 4698 | mc.from = NULL; | 4702 | mc.from = NULL; |
| 4699 | mc.to = NULL; | 4703 | mc.to = NULL; |
| 4700 | mc.moving_task = NULL; | ||
| 4701 | spin_unlock(&mc.lock); | 4704 | spin_unlock(&mc.lock); |
| 4705 | mc.moving_task = NULL; | ||
| 4706 | mc.mm = NULL; | ||
| 4702 | mem_cgroup_end_move(from); | 4707 | mem_cgroup_end_move(from); |
| 4703 | memcg_oom_recover(from); | 4708 | memcg_oom_recover(from); |
| 4704 | memcg_oom_recover(to); | 4709 | memcg_oom_recover(to); |
| @@ -4724,12 +4729,21 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
| 4724 | return 0; | 4729 | return 0; |
| 4725 | /* We move charges only when we move a owner of the mm */ | 4730 | /* We move charges only when we move a owner of the mm */ |
| 4726 | if (mm->owner == p) { | 4731 | if (mm->owner == p) { |
| 4732 | /* | ||
| 4733 | * We do all the move charge works under one mmap_sem to | ||
| 4734 | * avoid deadlock with down_write(&mmap_sem) | ||
| 4735 | * -> try_charge() -> if (mc.moving_task) -> sleep. | ||
| 4736 | */ | ||
| 4737 | down_read(&mm->mmap_sem); | ||
| 4738 | |||
| 4727 | VM_BUG_ON(mc.from); | 4739 | VM_BUG_ON(mc.from); |
| 4728 | VM_BUG_ON(mc.to); | 4740 | VM_BUG_ON(mc.to); |
| 4729 | VM_BUG_ON(mc.precharge); | 4741 | VM_BUG_ON(mc.precharge); |
| 4730 | VM_BUG_ON(mc.moved_charge); | 4742 | VM_BUG_ON(mc.moved_charge); |
| 4731 | VM_BUG_ON(mc.moved_swap); | 4743 | VM_BUG_ON(mc.moved_swap); |
| 4732 | VM_BUG_ON(mc.moving_task); | 4744 | VM_BUG_ON(mc.moving_task); |
| 4745 | VM_BUG_ON(mc.mm); | ||
| 4746 | |||
| 4733 | mem_cgroup_start_move(from); | 4747 | mem_cgroup_start_move(from); |
| 4734 | spin_lock(&mc.lock); | 4748 | spin_lock(&mc.lock); |
| 4735 | mc.from = from; | 4749 | mc.from = from; |
| @@ -4737,14 +4751,16 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
| 4737 | mc.precharge = 0; | 4751 | mc.precharge = 0; |
| 4738 | mc.moved_charge = 0; | 4752 | mc.moved_charge = 0; |
| 4739 | mc.moved_swap = 0; | 4753 | mc.moved_swap = 0; |
| 4740 | mc.moving_task = current; | ||
| 4741 | spin_unlock(&mc.lock); | 4754 | spin_unlock(&mc.lock); |
| 4755 | mc.moving_task = current; | ||
| 4756 | mc.mm = mm; | ||
| 4742 | 4757 | ||
| 4743 | ret = mem_cgroup_precharge_mc(mm); | 4758 | ret = mem_cgroup_precharge_mc(mm); |
| 4744 | if (ret) | 4759 | if (ret) |
| 4745 | mem_cgroup_clear_mc(); | 4760 | mem_cgroup_clear_mc(); |
| 4746 | } | 4761 | /* We call up_read() and mmput() in clear_mc(). */ |
| 4747 | mmput(mm); | 4762 | } else |
| 4763 | mmput(mm); | ||
| 4748 | } | 4764 | } |
| 4749 | return ret; | 4765 | return ret; |
| 4750 | } | 4766 | } |
| @@ -4832,7 +4848,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
| 4832 | struct vm_area_struct *vma; | 4848 | struct vm_area_struct *vma; |
| 4833 | 4849 | ||
| 4834 | lru_add_drain_all(); | 4850 | lru_add_drain_all(); |
| 4835 | down_read(&mm->mmap_sem); | 4851 | /* We've already held the mmap_sem */ |
| 4836 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4852 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
| 4837 | int ret; | 4853 | int ret; |
| 4838 | struct mm_walk mem_cgroup_move_charge_walk = { | 4854 | struct mm_walk mem_cgroup_move_charge_walk = { |
| @@ -4851,7 +4867,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
| 4851 | */ | 4867 | */ |
| 4852 | break; | 4868 | break; |
| 4853 | } | 4869 | } |
| 4854 | up_read(&mm->mmap_sem); | ||
| 4855 | } | 4870 | } |
| 4856 | 4871 | ||
| 4857 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, | 4872 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, |
| @@ -4860,17 +4875,11 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
| 4860 | struct task_struct *p, | 4875 | struct task_struct *p, |
| 4861 | bool threadgroup) | 4876 | bool threadgroup) |
| 4862 | { | 4877 | { |
| 4863 | struct mm_struct *mm; | 4878 | if (!mc.mm) |
| 4864 | |||
| 4865 | if (!mc.to) | ||
| 4866 | /* no need to move charge */ | 4879 | /* no need to move charge */ |
| 4867 | return; | 4880 | return; |
| 4868 | 4881 | ||
| 4869 | mm = get_task_mm(p); | 4882 | mem_cgroup_move_charge(mc.mm); |
| 4870 | if (mm) { | ||
| 4871 | mem_cgroup_move_charge(mm); | ||
| 4872 | mmput(mm); | ||
| 4873 | } | ||
| 4874 | mem_cgroup_clear_mc(); | 4883 | mem_cgroup_clear_mc(); |
| 4875 | } | 4884 | } |
| 4876 | #else /* !CONFIG_MMU */ | 4885 | #else /* !CONFIG_MMU */ |
