diff options
author | Hugh Dickins <hugh@veritas.com> | 2008-03-04 17:29:06 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-03-04 19:35:14 -0500 |
commit | 98837c7f82ef78aa38f40462aa2fcac68fd3acbf (patch) | |
tree | 6d76841b18b57a9202d7699ba4dca5b39fdd7aea | |
parent | 61469f1d51777fc3b6d8d70da8373ee77ee13349 (diff) |
memcg: fix VM_BUG_ON from page migration
Page migration gave me free_hot_cold_page's VM_BUG_ON page->page_cgroup.
remove_migration_pte was calling mem_cgroup_charge on the new page whenever it
found a swap pte, before it had determined it to be a migration entry. That
left a surplus reference count on the page_cgroup, so it was still attached
when the page was later freed.
Move that mem_cgroup_charge down to where we're sure it's a migration entry.
We were already under i_mmap_lock or anon_vma->lock, so its GFP_KERNEL was
already inappropriate: change that to GFP_ATOMIC.
It's essential that remove_migration_pte removes all the migration entries,
other crashes follow if not. So proceed even when the charge fails: normally
it cannot, but after a mem_cgroup_force_empty it might - comment in the code.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/migrate.c | 19 |
1 files changed, 14 insertions, 5 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index a73504ff5ab9..4e0eccca5e26 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -153,11 +153,6 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
153 | return; | 153 | return; |
154 | } | 154 | } |
155 | 155 | ||
156 | if (mem_cgroup_charge(new, mm, GFP_KERNEL)) { | ||
157 | pte_unmap(ptep); | ||
158 | return; | ||
159 | } | ||
160 | |||
161 | ptl = pte_lockptr(mm, pmd); | 156 | ptl = pte_lockptr(mm, pmd); |
162 | spin_lock(ptl); | 157 | spin_lock(ptl); |
163 | pte = *ptep; | 158 | pte = *ptep; |
@@ -169,6 +164,20 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
169 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | 164 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) |
170 | goto out; | 165 | goto out; |
171 | 166 | ||
167 | /* | ||
168 | * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge. | ||
169 | * Failure is not an option here: we're now expected to remove every | ||
170 | * migration pte, and will cause crashes otherwise. Normally this | ||
171 | * is not an issue: mem_cgroup_prepare_migration bumped up the old | ||
172 | * page_cgroup count for safety, that's now attached to the new page, | ||
173 | * so this charge should just be another incrementation of the count, | ||
174 | * to keep in balance with rmap.c's mem_cgroup_uncharging. But if | ||
175 | * there's been a force_empty, those reference counts may no longer | ||
176 | * be reliable, and this charge can actually fail: oh well, we don't | ||
177 | * make the situation any worse by proceeding as if it had succeeded. | ||
178 | */ | ||
179 | mem_cgroup_charge(new, mm, GFP_ATOMIC); | ||
180 | |||
172 | get_page(new); | 181 | get_page(new); |
173 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 182 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
174 | if (is_write_migration_entry(entry)) | 183 | if (is_write_migration_entry(entry)) |