diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2008-02-07 03:14:08 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:19 -0500 |
commit | 9175e0311ec9e6d1bf1f6dfecf9268baf08765e6 (patch) | |
tree | 36ca9f47f8bc18345980270dc9d355f7c107cf6e | |
parent | f1a9ee758de7de1e040de849fdef46e6802ea117 (diff) |
bugfix for memory controller: add helper function for assigning cgroup to page
This patch adds following functions.
- clear_page_cgroup(page, pc)
- page_cgroup_assign_new_page_group(page, pc)
Mainly for cleanup.
A manner "check page->cgroup again after lock_page_cgroup()" is
implemented in straight way.
A comment in mem_cgroup_uncharge() will be removed by force-empty patch
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/memcontrol.c | 105 |
1 files changed, 76 insertions, 29 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2fadd4896a14..3270ce7375db 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -162,6 +162,48 @@ static void __always_inline unlock_page_cgroup(struct page *page) | |||
162 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); | 162 | bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
163 | } | 163 | } |
164 | 164 | ||
165 | /* | ||
166 | * Tie new page_cgroup to struct page under lock_page_cgroup() | ||
167 | * This can fail if the page has been tied to a page_cgroup. | ||
168 | * If success, returns 0. | ||
169 | */ | ||
170 | static inline int | ||
171 | page_cgroup_assign_new_page_cgroup(struct page *page, struct page_cgroup *pc) | ||
172 | { | ||
173 | int ret = 0; | ||
174 | |||
175 | lock_page_cgroup(page); | ||
176 | if (!page_get_page_cgroup(page)) | ||
177 | page_assign_page_cgroup(page, pc); | ||
178 | else /* A page is tied to other pc. */ | ||
179 | ret = 1; | ||
180 | unlock_page_cgroup(page); | ||
181 | return ret; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Clear page->page_cgroup member under lock_page_cgroup(). | ||
186 | * If given "pc" value is different from one page->page_cgroup, | ||
187 | * page->cgroup is not cleared. | ||
188 | * Returns a value of page->page_cgroup at lock taken. | ||
189 | * A can can detect failure of clearing by following | ||
190 | * clear_page_cgroup(page, pc) == pc | ||
191 | */ | ||
192 | |||
193 | static inline struct page_cgroup * | ||
194 | clear_page_cgroup(struct page *page, struct page_cgroup *pc) | ||
195 | { | ||
196 | struct page_cgroup *ret; | ||
197 | /* lock and clear */ | ||
198 | lock_page_cgroup(page); | ||
199 | ret = page_get_page_cgroup(page); | ||
200 | if (likely(ret == pc)) | ||
201 | page_assign_page_cgroup(page, NULL); | ||
202 | unlock_page_cgroup(page); | ||
203 | return ret; | ||
204 | } | ||
205 | |||
206 | |||
165 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) | 207 | static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) |
166 | { | 208 | { |
167 | if (active) | 209 | if (active) |
@@ -270,7 +312,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, | |||
270 | gfp_t gfp_mask) | 312 | gfp_t gfp_mask) |
271 | { | 313 | { |
272 | struct mem_cgroup *mem; | 314 | struct mem_cgroup *mem; |
273 | struct page_cgroup *pc, *race_pc; | 315 | struct page_cgroup *pc; |
274 | unsigned long flags; | 316 | unsigned long flags; |
275 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 317 | unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
276 | 318 | ||
@@ -293,8 +335,10 @@ retry: | |||
293 | unlock_page_cgroup(page); | 335 | unlock_page_cgroup(page); |
294 | cpu_relax(); | 336 | cpu_relax(); |
295 | goto retry; | 337 | goto retry; |
296 | } else | 338 | } else { |
339 | unlock_page_cgroup(page); | ||
297 | goto done; | 340 | goto done; |
341 | } | ||
298 | } | 342 | } |
299 | 343 | ||
300 | unlock_page_cgroup(page); | 344 | unlock_page_cgroup(page); |
@@ -364,31 +408,26 @@ noreclaim: | |||
364 | goto free_pc; | 408 | goto free_pc; |
365 | } | 409 | } |
366 | 410 | ||
367 | lock_page_cgroup(page); | ||
368 | /* | ||
369 | * Check if somebody else beat us to allocating the page_cgroup | ||
370 | */ | ||
371 | race_pc = page_get_page_cgroup(page); | ||
372 | if (race_pc) { | ||
373 | kfree(pc); | ||
374 | pc = race_pc; | ||
375 | atomic_inc(&pc->ref_cnt); | ||
376 | res_counter_uncharge(&mem->res, PAGE_SIZE); | ||
377 | css_put(&mem->css); | ||
378 | goto done; | ||
379 | } | ||
380 | |||
381 | atomic_set(&pc->ref_cnt, 1); | 411 | atomic_set(&pc->ref_cnt, 1); |
382 | pc->mem_cgroup = mem; | 412 | pc->mem_cgroup = mem; |
383 | pc->page = page; | 413 | pc->page = page; |
384 | page_assign_page_cgroup(page, pc); | 414 | if (page_cgroup_assign_new_page_cgroup(page, pc)) { |
415 | /* | ||
416 | * an another charge is added to this page already. | ||
417 | * we do take lock_page_cgroup(page) again and read | ||
418 | * page->cgroup, increment refcnt.... just retry is OK. | ||
419 | */ | ||
420 | res_counter_uncharge(&mem->res, PAGE_SIZE); | ||
421 | css_put(&mem->css); | ||
422 | kfree(pc); | ||
423 | goto retry; | ||
424 | } | ||
385 | 425 | ||
386 | spin_lock_irqsave(&mem->lru_lock, flags); | 426 | spin_lock_irqsave(&mem->lru_lock, flags); |
387 | list_add(&pc->lru, &mem->active_list); | 427 | list_add(&pc->lru, &mem->active_list); |
388 | spin_unlock_irqrestore(&mem->lru_lock, flags); | 428 | spin_unlock_irqrestore(&mem->lru_lock, flags); |
389 | 429 | ||
390 | done: | 430 | done: |
391 | unlock_page_cgroup(page); | ||
392 | return 0; | 431 | return 0; |
393 | free_pc: | 432 | free_pc: |
394 | kfree(pc); | 433 | kfree(pc); |
@@ -432,17 +471,25 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) | |||
432 | 471 | ||
433 | if (atomic_dec_and_test(&pc->ref_cnt)) { | 472 | if (atomic_dec_and_test(&pc->ref_cnt)) { |
434 | page = pc->page; | 473 | page = pc->page; |
435 | lock_page_cgroup(page); | 474 | /* |
436 | mem = pc->mem_cgroup; | 475 | * get page->cgroup and clear it under lock. |
437 | css_put(&mem->css); | 476 | */ |
438 | page_assign_page_cgroup(page, NULL); | 477 | if (clear_page_cgroup(page, pc) == pc) { |
439 | unlock_page_cgroup(page); | 478 | mem = pc->mem_cgroup; |
440 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 479 | css_put(&mem->css); |
441 | 480 | res_counter_uncharge(&mem->res, PAGE_SIZE); | |
442 | spin_lock_irqsave(&mem->lru_lock, flags); | 481 | spin_lock_irqsave(&mem->lru_lock, flags); |
443 | list_del_init(&pc->lru); | 482 | list_del_init(&pc->lru); |
444 | spin_unlock_irqrestore(&mem->lru_lock, flags); | 483 | spin_unlock_irqrestore(&mem->lru_lock, flags); |
445 | kfree(pc); | 484 | kfree(pc); |
485 | } else { | ||
486 | /* | ||
487 | * Note:This will be removed when force-empty patch is | ||
488 | * applied. just show warning here. | ||
489 | */ | ||
490 | printk(KERN_ERR "Race in mem_cgroup_uncharge() ?"); | ||
491 | dump_stack(); | ||
492 | } | ||
446 | } | 493 | } |
447 | } | 494 | } |
448 | 495 | ||