aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2008-02-07 03:14:08 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:19 -0500
commit9175e0311ec9e6d1bf1f6dfecf9268baf08765e6 (patch)
tree36ca9f47f8bc18345980270dc9d355f7c107cf6e
parentf1a9ee758de7de1e040de849fdef46e6802ea117 (diff)
bugfix for memory controller: add helper function for assigning cgroup to page
This patch adds following functions. - clear_page_cgroup(page, pc) - page_cgroup_assign_new_page_group(page, pc) Mainly for cleanup. A manner "check page->cgroup again after lock_page_cgroup()" is implemented in straight way. A comment in mem_cgroup_uncharge() will be removed by force-empty patch Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memcontrol.c105
1 files changed, 76 insertions, 29 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2fadd4896a14..3270ce7375db 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -162,6 +162,48 @@ static void __always_inline unlock_page_cgroup(struct page *page)
162 bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); 162 bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
163} 163}
164 164
165/*
166 * Tie new page_cgroup to struct page under lock_page_cgroup()
167 * This can fail if the page has been tied to a page_cgroup.
168 * If success, returns 0.
169 */
170static inline int
171page_cgroup_assign_new_page_cgroup(struct page *page, struct page_cgroup *pc)
172{
173 int ret = 0;
174
175 lock_page_cgroup(page);
176 if (!page_get_page_cgroup(page))
177 page_assign_page_cgroup(page, pc);
178 else /* A page is tied to other pc. */
179 ret = 1;
180 unlock_page_cgroup(page);
181 return ret;
182}
183
184/*
185 * Clear page->page_cgroup member under lock_page_cgroup().
186 * If given "pc" value is different from one page->page_cgroup,
187 * page->cgroup is not cleared.
188 * Returns a value of page->page_cgroup at lock taken.
189 * A can can detect failure of clearing by following
190 * clear_page_cgroup(page, pc) == pc
191 */
192
193static inline struct page_cgroup *
194clear_page_cgroup(struct page *page, struct page_cgroup *pc)
195{
196 struct page_cgroup *ret;
197 /* lock and clear */
198 lock_page_cgroup(page);
199 ret = page_get_page_cgroup(page);
200 if (likely(ret == pc))
201 page_assign_page_cgroup(page, NULL);
202 unlock_page_cgroup(page);
203 return ret;
204}
205
206
165static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) 207static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
166{ 208{
167 if (active) 209 if (active)
@@ -270,7 +312,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
270 gfp_t gfp_mask) 312 gfp_t gfp_mask)
271{ 313{
272 struct mem_cgroup *mem; 314 struct mem_cgroup *mem;
273 struct page_cgroup *pc, *race_pc; 315 struct page_cgroup *pc;
274 unsigned long flags; 316 unsigned long flags;
275 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 317 unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
276 318
@@ -293,8 +335,10 @@ retry:
293 unlock_page_cgroup(page); 335 unlock_page_cgroup(page);
294 cpu_relax(); 336 cpu_relax();
295 goto retry; 337 goto retry;
296 } else 338 } else {
339 unlock_page_cgroup(page);
297 goto done; 340 goto done;
341 }
298 } 342 }
299 343
300 unlock_page_cgroup(page); 344 unlock_page_cgroup(page);
@@ -364,31 +408,26 @@ noreclaim:
364 goto free_pc; 408 goto free_pc;
365 } 409 }
366 410
367 lock_page_cgroup(page);
368 /*
369 * Check if somebody else beat us to allocating the page_cgroup
370 */
371 race_pc = page_get_page_cgroup(page);
372 if (race_pc) {
373 kfree(pc);
374 pc = race_pc;
375 atomic_inc(&pc->ref_cnt);
376 res_counter_uncharge(&mem->res, PAGE_SIZE);
377 css_put(&mem->css);
378 goto done;
379 }
380
381 atomic_set(&pc->ref_cnt, 1); 411 atomic_set(&pc->ref_cnt, 1);
382 pc->mem_cgroup = mem; 412 pc->mem_cgroup = mem;
383 pc->page = page; 413 pc->page = page;
384 page_assign_page_cgroup(page, pc); 414 if (page_cgroup_assign_new_page_cgroup(page, pc)) {
415 /*
416 * an another charge is added to this page already.
417 * we do take lock_page_cgroup(page) again and read
418 * page->cgroup, increment refcnt.... just retry is OK.
419 */
420 res_counter_uncharge(&mem->res, PAGE_SIZE);
421 css_put(&mem->css);
422 kfree(pc);
423 goto retry;
424 }
385 425
386 spin_lock_irqsave(&mem->lru_lock, flags); 426 spin_lock_irqsave(&mem->lru_lock, flags);
387 list_add(&pc->lru, &mem->active_list); 427 list_add(&pc->lru, &mem->active_list);
388 spin_unlock_irqrestore(&mem->lru_lock, flags); 428 spin_unlock_irqrestore(&mem->lru_lock, flags);
389 429
390done: 430done:
391 unlock_page_cgroup(page);
392 return 0; 431 return 0;
393free_pc: 432free_pc:
394 kfree(pc); 433 kfree(pc);
@@ -432,17 +471,25 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
432 471
433 if (atomic_dec_and_test(&pc->ref_cnt)) { 472 if (atomic_dec_and_test(&pc->ref_cnt)) {
434 page = pc->page; 473 page = pc->page;
435 lock_page_cgroup(page); 474 /*
436 mem = pc->mem_cgroup; 475 * get page->cgroup and clear it under lock.
437 css_put(&mem->css); 476 */
438 page_assign_page_cgroup(page, NULL); 477 if (clear_page_cgroup(page, pc) == pc) {
439 unlock_page_cgroup(page); 478 mem = pc->mem_cgroup;
440 res_counter_uncharge(&mem->res, PAGE_SIZE); 479 css_put(&mem->css);
441 480 res_counter_uncharge(&mem->res, PAGE_SIZE);
442 spin_lock_irqsave(&mem->lru_lock, flags); 481 spin_lock_irqsave(&mem->lru_lock, flags);
443 list_del_init(&pc->lru); 482 list_del_init(&pc->lru);
444 spin_unlock_irqrestore(&mem->lru_lock, flags); 483 spin_unlock_irqrestore(&mem->lru_lock, flags);
445 kfree(pc); 484 kfree(pc);
485 } else {
486 /*
487 * Note:This will be removed when force-empty patch is
488 * applied. just show warning here.
489 */
490 printk(KERN_ERR "Race in mem_cgroup_uncharge() ?");
491 dump_stack();
492 }
446 } 493 }
447} 494}
448 495