diff options
author | Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> | 2009-05-28 17:34:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-05-29 11:40:02 -0400 |
commit | e767e0561d7fd2333df1921f1ab4176211f9036b (patch) | |
tree | 3b936733f80ceb1ee61ce99f927d002d2296250e | |
parent | bd6daba909d8484bd2ccf6017db4028d7a420927 (diff) |
memcg: fix deadlock between lock_page_cgroup and mapping tree_lock
mapping->tree_lock can be acquired from interrupt context. Then,
following dead lock can occur.
Assume "A" as a page.
CPU0:
lock_page_cgroup(A)
interrupted
-> take mapping->tree_lock.
CPU1:
take mapping->tree_lock
-> lock_page_cgroup(A)
This patch tries to fix above deadlock by moving memcg's hook to out of
mapping->tree_lock. charge/uncharge of pagecache/swapcache is protected
by page lock, not tree_lock.
After this patch, lock_page_cgroup() is not called under mapping->tree_lock.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/swap.h | 5 | ||||
-rw-r--r-- | mm/filemap.c | 6 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/swap_state.c | 4 | ||||
-rw-r--r-- | mm/truncate.c | 1 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
6 files changed, 15 insertions, 7 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h index 62d81435347a..d476aad3ff57 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -437,6 +437,11 @@ static inline int mem_cgroup_cache_charge_swapin(struct page *page, | |||
437 | return 0; | 437 | return 0; |
438 | } | 438 | } |
439 | 439 | ||
440 | static inline void | ||
441 | mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) | ||
442 | { | ||
443 | } | ||
444 | |||
440 | #endif /* CONFIG_SWAP */ | 445 | #endif /* CONFIG_SWAP */ |
441 | #endif /* __KERNEL__*/ | 446 | #endif /* __KERNEL__*/ |
442 | #endif /* _LINUX_SWAP_H */ | 447 | #endif /* _LINUX_SWAP_H */ |
diff --git a/mm/filemap.c b/mm/filemap.c index 379ff0bcbf6e..1b60f30cebfa 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page) | |||
121 | mapping->nrpages--; | 121 | mapping->nrpages--; |
122 | __dec_zone_page_state(page, NR_FILE_PAGES); | 122 | __dec_zone_page_state(page, NR_FILE_PAGES); |
123 | BUG_ON(page_mapped(page)); | 123 | BUG_ON(page_mapped(page)); |
124 | mem_cgroup_uncharge_cache_page(page); | ||
125 | 124 | ||
126 | /* | 125 | /* |
127 | * Some filesystems seem to re-dirty the page even after | 126 | * Some filesystems seem to re-dirty the page even after |
@@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page) | |||
145 | spin_lock_irq(&mapping->tree_lock); | 144 | spin_lock_irq(&mapping->tree_lock); |
146 | __remove_from_page_cache(page); | 145 | __remove_from_page_cache(page); |
147 | spin_unlock_irq(&mapping->tree_lock); | 146 | spin_unlock_irq(&mapping->tree_lock); |
147 | mem_cgroup_uncharge_cache_page(page); | ||
148 | } | 148 | } |
149 | 149 | ||
150 | static int sync_page(void *word) | 150 | static int sync_page(void *word) |
@@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, | |||
476 | if (likely(!error)) { | 476 | if (likely(!error)) { |
477 | mapping->nrpages++; | 477 | mapping->nrpages++; |
478 | __inc_zone_page_state(page, NR_FILE_PAGES); | 478 | __inc_zone_page_state(page, NR_FILE_PAGES); |
479 | spin_unlock_irq(&mapping->tree_lock); | ||
479 | } else { | 480 | } else { |
480 | page->mapping = NULL; | 481 | page->mapping = NULL; |
482 | spin_unlock_irq(&mapping->tree_lock); | ||
481 | mem_cgroup_uncharge_cache_page(page); | 483 | mem_cgroup_uncharge_cache_page(page); |
482 | page_cache_release(page); | 484 | page_cache_release(page); |
483 | } | 485 | } |
484 | |||
485 | spin_unlock_irq(&mapping->tree_lock); | ||
486 | radix_tree_preload_end(); | 486 | radix_tree_preload_end(); |
487 | } else | 487 | } else |
488 | mem_cgroup_uncharge_cache_page(page); | 488 | mem_cgroup_uncharge_cache_page(page); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 01c2d8f14685..4a747a27a22f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1488,8 +1488,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page) | |||
1488 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); | 1488 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); |
1489 | } | 1489 | } |
1490 | 1490 | ||
1491 | #ifdef CONFIG_SWAP | ||
1491 | /* | 1492 | /* |
1492 | * called from __delete_from_swap_cache() and drop "page" account. | 1493 | * called after __delete_from_swap_cache() and drop "page" account. |
1493 | * memcg information is recorded to swap_cgroup of "ent" | 1494 | * memcg information is recorded to swap_cgroup of "ent" |
1494 | */ | 1495 | */ |
1495 | void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) | 1496 | void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) |
@@ -1506,6 +1507,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) | |||
1506 | if (memcg) | 1507 | if (memcg) |
1507 | css_put(&memcg->css); | 1508 | css_put(&memcg->css); |
1508 | } | 1509 | } |
1510 | #endif | ||
1509 | 1511 | ||
1510 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 1512 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
1511 | /* | 1513 | /* |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 3ecea98ecb45..1416e7e9e02d 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -109,8 +109,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | |||
109 | */ | 109 | */ |
110 | void __delete_from_swap_cache(struct page *page) | 110 | void __delete_from_swap_cache(struct page *page) |
111 | { | 111 | { |
112 | swp_entry_t ent = {.val = page_private(page)}; | ||
113 | |||
114 | VM_BUG_ON(!PageLocked(page)); | 112 | VM_BUG_ON(!PageLocked(page)); |
115 | VM_BUG_ON(!PageSwapCache(page)); | 113 | VM_BUG_ON(!PageSwapCache(page)); |
116 | VM_BUG_ON(PageWriteback(page)); | 114 | VM_BUG_ON(PageWriteback(page)); |
@@ -121,7 +119,6 @@ void __delete_from_swap_cache(struct page *page) | |||
121 | total_swapcache_pages--; | 119 | total_swapcache_pages--; |
122 | __dec_zone_page_state(page, NR_FILE_PAGES); | 120 | __dec_zone_page_state(page, NR_FILE_PAGES); |
123 | INC_CACHE_INFO(del_total); | 121 | INC_CACHE_INFO(del_total); |
124 | mem_cgroup_uncharge_swapcache(page, ent); | ||
125 | } | 122 | } |
126 | 123 | ||
127 | /** | 124 | /** |
@@ -191,6 +188,7 @@ void delete_from_swap_cache(struct page *page) | |||
191 | __delete_from_swap_cache(page); | 188 | __delete_from_swap_cache(page); |
192 | spin_unlock_irq(&swapper_space.tree_lock); | 189 | spin_unlock_irq(&swapper_space.tree_lock); |
193 | 190 | ||
191 | mem_cgroup_uncharge_swapcache(page, entry); | ||
194 | swap_free(entry); | 192 | swap_free(entry); |
195 | page_cache_release(page); | 193 | page_cache_release(page); |
196 | } | 194 | } |
diff --git a/mm/truncate.c b/mm/truncate.c index 55206fab7b99..12e1579f9165 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -359,6 +359,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
359 | BUG_ON(page_has_private(page)); | 359 | BUG_ON(page_has_private(page)); |
360 | __remove_from_page_cache(page); | 360 | __remove_from_page_cache(page); |
361 | spin_unlock_irq(&mapping->tree_lock); | 361 | spin_unlock_irq(&mapping->tree_lock); |
362 | mem_cgroup_uncharge_cache_page(page); | ||
362 | page_cache_release(page); /* pagecache ref */ | 363 | page_cache_release(page); /* pagecache ref */ |
363 | return 1; | 364 | return 1; |
364 | failed: | 365 | failed: |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 5fa3eda1f03f..d254306562cd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -470,10 +470,12 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) | |||
470 | swp_entry_t swap = { .val = page_private(page) }; | 470 | swp_entry_t swap = { .val = page_private(page) }; |
471 | __delete_from_swap_cache(page); | 471 | __delete_from_swap_cache(page); |
472 | spin_unlock_irq(&mapping->tree_lock); | 472 | spin_unlock_irq(&mapping->tree_lock); |
473 | mem_cgroup_uncharge_swapcache(page, swap); | ||
473 | swap_free(swap); | 474 | swap_free(swap); |
474 | } else { | 475 | } else { |
475 | __remove_from_page_cache(page); | 476 | __remove_from_page_cache(page); |
476 | spin_unlock_irq(&mapping->tree_lock); | 477 | spin_unlock_irq(&mapping->tree_lock); |
478 | mem_cgroup_uncharge_cache_page(page); | ||
477 | } | 479 | } |
478 | 480 | ||
479 | return 1; | 481 | return 1; |