aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2008-03-04 17:29:06 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-03-04 19:35:14 -0500
commit98837c7f82ef78aa38f40462aa2fcac68fd3acbf (patch)
tree6d76841b18b57a9202d7699ba4dca5b39fdd7aea
parent61469f1d51777fc3b6d8d70da8373ee77ee13349 (diff)
memcg: fix VM_BUG_ON from page migration
Page migration gave me free_hot_cold_page's VM_BUG_ON page->page_cgroup. remove_migration_pte was calling mem_cgroup_charge on the new page whenever it found a swap pte, before it had determined it to be a migration entry. That left a surplus reference count on the page_cgroup, so it was still attached when the page was later freed. Move that mem_cgroup_charge down to where we're sure it's a migration entry. We were already under i_mmap_lock or anon_vma->lock, so its GFP_KERNEL was already inappropriate: change that to GFP_ATOMIC. It's essential that remove_migration_pte removes all the migration entries, other crashes follow if not. So proceed even when the charge fails: normally it cannot, but after a mem_cgroup_force_empty it might - comment in the code. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: David Rientjes <rientjes@google.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hirokazu Takahashi <taka@valinux.co.jp> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/migrate.c19
1 files changed, 14 insertions, 5 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index a73504ff5ab9..4e0eccca5e26 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -153,11 +153,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
153 return; 153 return;
154 } 154 }
155 155
156 if (mem_cgroup_charge(new, mm, GFP_KERNEL)) {
157 pte_unmap(ptep);
158 return;
159 }
160
161 ptl = pte_lockptr(mm, pmd); 156 ptl = pte_lockptr(mm, pmd);
162 spin_lock(ptl); 157 spin_lock(ptl);
163 pte = *ptep; 158 pte = *ptep;
@@ -169,6 +164,20 @@ static void remove_migration_pte(struct vm_area_struct *vma,
169 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) 164 if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
170 goto out; 165 goto out;
171 166
167 /*
168 * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
169 * Failure is not an option here: we're now expected to remove every
170 * migration pte, and will cause crashes otherwise. Normally this
171 * is not an issue: mem_cgroup_prepare_migration bumped up the old
172 * page_cgroup count for safety, that's now attached to the new page,
173 * so this charge should just be another incrementation of the count,
174 * to keep in balance with rmap.c's mem_cgroup_uncharging. But if
175 * there's been a force_empty, those reference counts may no longer
176 * be reliable, and this charge can actually fail: oh well, we don't
177 * make the situation any worse by proceeding as if it had succeeded.
178 */
179 mem_cgroup_charge(new, mm, GFP_ATOMIC);
180
172 get_page(new); 181 get_page(new);
173 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 182 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
174 if (is_write_migration_entry(entry)) 183 if (is_write_migration_entry(entry))