aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>2009-05-28 17:34:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-05-29 11:40:02 -0400
commite767e0561d7fd2333df1921f1ab4176211f9036b (patch)
tree3b936733f80ceb1ee61ce99f927d002d2296250e
parentbd6daba909d8484bd2ccf6017db4028d7a420927 (diff)
memcg: fix deadlock between lock_page_cgroup and mapping tree_lock
mapping->tree_lock can be acquired from interrupt context. Then, following dead lock can occur. Assume "A" as a page. CPU0: lock_page_cgroup(A) interrupted -> take mapping->tree_lock. CPU1: take mapping->tree_lock -> lock_page_cgroup(A) This patch tries to fix above deadlock by moving memcg's hook to out of mapping->tree_lock. charge/uncharge of pagecache/swapcache is protected by page lock, not tree_lock. After this patch, lock_page_cgroup() is not called under mapping->tree_lock. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/swap.h5
-rw-r--r--mm/filemap.c6
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/swap_state.c4
-rw-r--r--mm/truncate.c1
-rw-r--r--mm/vmscan.c2
6 files changed, 15 insertions, 7 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 62d81435347a..d476aad3ff57 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -437,6 +437,11 @@ static inline int mem_cgroup_cache_charge_swapin(struct page *page,
437 return 0; 437 return 0;
438} 438}
439 439
440static inline void
441mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
442{
443}
444
440#endif /* CONFIG_SWAP */ 445#endif /* CONFIG_SWAP */
441#endif /* __KERNEL__*/ 446#endif /* __KERNEL__*/
442#endif /* _LINUX_SWAP_H */ 447#endif /* _LINUX_SWAP_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 379ff0bcbf6e..1b60f30cebfa 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page)
121 mapping->nrpages--; 121 mapping->nrpages--;
122 __dec_zone_page_state(page, NR_FILE_PAGES); 122 __dec_zone_page_state(page, NR_FILE_PAGES);
123 BUG_ON(page_mapped(page)); 123 BUG_ON(page_mapped(page));
124 mem_cgroup_uncharge_cache_page(page);
125 124
126 /* 125 /*
127 * Some filesystems seem to re-dirty the page even after 126 * Some filesystems seem to re-dirty the page even after
@@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page)
145 spin_lock_irq(&mapping->tree_lock); 144 spin_lock_irq(&mapping->tree_lock);
146 __remove_from_page_cache(page); 145 __remove_from_page_cache(page);
147 spin_unlock_irq(&mapping->tree_lock); 146 spin_unlock_irq(&mapping->tree_lock);
147 mem_cgroup_uncharge_cache_page(page);
148} 148}
149 149
150static int sync_page(void *word) 150static int sync_page(void *word)
@@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
476 if (likely(!error)) { 476 if (likely(!error)) {
477 mapping->nrpages++; 477 mapping->nrpages++;
478 __inc_zone_page_state(page, NR_FILE_PAGES); 478 __inc_zone_page_state(page, NR_FILE_PAGES);
479 spin_unlock_irq(&mapping->tree_lock);
479 } else { 480 } else {
480 page->mapping = NULL; 481 page->mapping = NULL;
482 spin_unlock_irq(&mapping->tree_lock);
481 mem_cgroup_uncharge_cache_page(page); 483 mem_cgroup_uncharge_cache_page(page);
482 page_cache_release(page); 484 page_cache_release(page);
483 } 485 }
484
485 spin_unlock_irq(&mapping->tree_lock);
486 radix_tree_preload_end(); 486 radix_tree_preload_end();
487 } else 487 } else
488 mem_cgroup_uncharge_cache_page(page); 488 mem_cgroup_uncharge_cache_page(page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 01c2d8f14685..4a747a27a22f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1488,8 +1488,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
1488 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); 1488 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
1489} 1489}
1490 1490
1491#ifdef CONFIG_SWAP
1491/* 1492/*
1492 * called from __delete_from_swap_cache() and drop "page" account. 1493 * called after __delete_from_swap_cache() and drop "page" account.
1493 * memcg information is recorded to swap_cgroup of "ent" 1494 * memcg information is recorded to swap_cgroup of "ent"
1494 */ 1495 */
1495void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) 1496void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
@@ -1506,6 +1507,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
1506 if (memcg) 1507 if (memcg)
1507 css_put(&memcg->css); 1508 css_put(&memcg->css);
1508} 1509}
1510#endif
1509 1511
1510#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 1512#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
1511/* 1513/*
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3ecea98ecb45..1416e7e9e02d 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -109,8 +109,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
109 */ 109 */
110void __delete_from_swap_cache(struct page *page) 110void __delete_from_swap_cache(struct page *page)
111{ 111{
112 swp_entry_t ent = {.val = page_private(page)};
113
114 VM_BUG_ON(!PageLocked(page)); 112 VM_BUG_ON(!PageLocked(page));
115 VM_BUG_ON(!PageSwapCache(page)); 113 VM_BUG_ON(!PageSwapCache(page));
116 VM_BUG_ON(PageWriteback(page)); 114 VM_BUG_ON(PageWriteback(page));
@@ -121,7 +119,6 @@ void __delete_from_swap_cache(struct page *page)
121 total_swapcache_pages--; 119 total_swapcache_pages--;
122 __dec_zone_page_state(page, NR_FILE_PAGES); 120 __dec_zone_page_state(page, NR_FILE_PAGES);
123 INC_CACHE_INFO(del_total); 121 INC_CACHE_INFO(del_total);
124 mem_cgroup_uncharge_swapcache(page, ent);
125} 122}
126 123
127/** 124/**
@@ -191,6 +188,7 @@ void delete_from_swap_cache(struct page *page)
191 __delete_from_swap_cache(page); 188 __delete_from_swap_cache(page);
192 spin_unlock_irq(&swapper_space.tree_lock); 189 spin_unlock_irq(&swapper_space.tree_lock);
193 190
191 mem_cgroup_uncharge_swapcache(page, entry);
194 swap_free(entry); 192 swap_free(entry);
195 page_cache_release(page); 193 page_cache_release(page);
196} 194}
diff --git a/mm/truncate.c b/mm/truncate.c
index 55206fab7b99..12e1579f9165 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -359,6 +359,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
359 BUG_ON(page_has_private(page)); 359 BUG_ON(page_has_private(page));
360 __remove_from_page_cache(page); 360 __remove_from_page_cache(page);
361 spin_unlock_irq(&mapping->tree_lock); 361 spin_unlock_irq(&mapping->tree_lock);
362 mem_cgroup_uncharge_cache_page(page);
362 page_cache_release(page); /* pagecache ref */ 363 page_cache_release(page); /* pagecache ref */
363 return 1; 364 return 1;
364failed: 365failed:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5fa3eda1f03f..d254306562cd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -470,10 +470,12 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
470 swp_entry_t swap = { .val = page_private(page) }; 470 swp_entry_t swap = { .val = page_private(page) };
471 __delete_from_swap_cache(page); 471 __delete_from_swap_cache(page);
472 spin_unlock_irq(&mapping->tree_lock); 472 spin_unlock_irq(&mapping->tree_lock);
473 mem_cgroup_uncharge_swapcache(page, swap);
473 swap_free(swap); 474 swap_free(swap);
474 } else { 475 } else {
475 __remove_from_page_cache(page); 476 __remove_from_page_cache(page);
476 spin_unlock_irq(&mapping->tree_lock); 477 spin_unlock_irq(&mapping->tree_lock);
478 mem_cgroup_uncharge_cache_page(page);
477 } 479 }
478 480
479 return 1; 481 return 1;