diff options
author | Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> | 2009-09-21 20:02:50 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-22 10:17:35 -0400 |
commit | 31a5639623a487d6db996c8138c9e53fef2e2d91 (patch) | |
tree | 4fff22b4a0333df084a6cd89e8ecbc332dbfa299 /mm | |
parent | 8fbb398f5c78832ee61e0d5ed0793fa8857bd853 (diff) |
mm: add_to_swap_cache() must not sleep
After commit 355cfa73 ("mm: modify swap_map and add SWAP_HAS_CACHE flag"),
read_swap_cache_async() will busy-wait while a entry doesn't exist in swap
cache but it has SWAP_HAS_CACHE flag.
Such entries can exist on add/delete path of swap cache. On add path,
add_to_swap_cache() is called soon after SWAP_HAS_CACHE flag is set, and
on delete path, swapcache_free() will be called (SWAP_HAS_CACHE flag is
cleared) soon after __delete_from_swap_cache() is called. So, the
busy-wait works well in most cases.
But this mechanism can cause soft lockup if add_to_swap_cache() sleeps and
read_swap_cache_async() tries to swap-in the same entry on the same cpu.
This patch calls radix_tree_preload() before swapcache_prepare() and
divides add_to_swap_cache() into two part: radix_tree_preload() part and
radix_tree_insert() part(define it as __add_to_swap_cache()).
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/swap_state.c | 70 |
1 files changed, 46 insertions, 24 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c index 5ae6b8b78c80..b076a1a5a0aa 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -67,10 +67,10 @@ void show_swap_cache_info(void) | |||
67 | } | 67 | } |
68 | 68 | ||
69 | /* | 69 | /* |
70 | * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, | 70 | * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, |
71 | * but sets SwapCache flag and private instead of mapping and index. | 71 | * but sets SwapCache flag and private instead of mapping and index. |
72 | */ | 72 | */ |
73 | int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | 73 | static int __add_to_swap_cache(struct page *page, swp_entry_t entry) |
74 | { | 74 | { |
75 | int error; | 75 | int error; |
76 | 76 | ||
@@ -78,28 +78,37 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | |||
78 | VM_BUG_ON(PageSwapCache(page)); | 78 | VM_BUG_ON(PageSwapCache(page)); |
79 | VM_BUG_ON(!PageSwapBacked(page)); | 79 | VM_BUG_ON(!PageSwapBacked(page)); |
80 | 80 | ||
81 | page_cache_get(page); | ||
82 | SetPageSwapCache(page); | ||
83 | set_page_private(page, entry.val); | ||
84 | |||
85 | spin_lock_irq(&swapper_space.tree_lock); | ||
86 | error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); | ||
87 | if (likely(!error)) { | ||
88 | total_swapcache_pages++; | ||
89 | __inc_zone_page_state(page, NR_FILE_PAGES); | ||
90 | INC_CACHE_INFO(add_total); | ||
91 | } | ||
92 | spin_unlock_irq(&swapper_space.tree_lock); | ||
93 | |||
94 | if (unlikely(error)) { | ||
95 | set_page_private(page, 0UL); | ||
96 | ClearPageSwapCache(page); | ||
97 | page_cache_release(page); | ||
98 | } | ||
99 | |||
100 | return error; | ||
101 | } | ||
102 | |||
103 | |||
104 | int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | ||
105 | { | ||
106 | int error; | ||
107 | |||
81 | error = radix_tree_preload(gfp_mask); | 108 | error = radix_tree_preload(gfp_mask); |
82 | if (!error) { | 109 | if (!error) { |
83 | page_cache_get(page); | 110 | error = __add_to_swap_cache(page, entry); |
84 | SetPageSwapCache(page); | ||
85 | set_page_private(page, entry.val); | ||
86 | |||
87 | spin_lock_irq(&swapper_space.tree_lock); | ||
88 | error = radix_tree_insert(&swapper_space.page_tree, | ||
89 | entry.val, page); | ||
90 | if (likely(!error)) { | ||
91 | total_swapcache_pages++; | ||
92 | __inc_zone_page_state(page, NR_FILE_PAGES); | ||
93 | INC_CACHE_INFO(add_total); | ||
94 | } | ||
95 | spin_unlock_irq(&swapper_space.tree_lock); | ||
96 | radix_tree_preload_end(); | 111 | radix_tree_preload_end(); |
97 | |||
98 | if (unlikely(error)) { | ||
99 | set_page_private(page, 0UL); | ||
100 | ClearPageSwapCache(page); | ||
101 | page_cache_release(page); | ||
102 | } | ||
103 | } | 112 | } |
104 | return error; | 113 | return error; |
105 | } | 114 | } |
@@ -290,13 +299,24 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
290 | } | 299 | } |
291 | 300 | ||
292 | /* | 301 | /* |
302 | * call radix_tree_preload() while we can wait. | ||
303 | */ | ||
304 | err = radix_tree_preload(gfp_mask & GFP_KERNEL); | ||
305 | if (err) | ||
306 | break; | ||
307 | |||
308 | /* | ||
293 | * Swap entry may have been freed since our caller observed it. | 309 | * Swap entry may have been freed since our caller observed it. |
294 | */ | 310 | */ |
295 | err = swapcache_prepare(entry); | 311 | err = swapcache_prepare(entry); |
296 | if (err == -EEXIST) /* seems racy */ | 312 | if (err == -EEXIST) { /* seems racy */ |
313 | radix_tree_preload_end(); | ||
297 | continue; | 314 | continue; |
298 | if (err) /* swp entry is obsolete ? */ | 315 | } |
316 | if (err) { /* swp entry is obsolete ? */ | ||
317 | radix_tree_preload_end(); | ||
299 | break; | 318 | break; |
319 | } | ||
300 | 320 | ||
301 | /* | 321 | /* |
302 | * Associate the page with swap entry in the swap cache. | 322 | * Associate the page with swap entry in the swap cache. |
@@ -308,8 +328,9 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
308 | */ | 328 | */ |
309 | __set_page_locked(new_page); | 329 | __set_page_locked(new_page); |
310 | SetPageSwapBacked(new_page); | 330 | SetPageSwapBacked(new_page); |
311 | err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); | 331 | err = __add_to_swap_cache(new_page, entry); |
312 | if (likely(!err)) { | 332 | if (likely(!err)) { |
333 | radix_tree_preload_end(); | ||
313 | /* | 334 | /* |
314 | * Initiate read into locked page and return. | 335 | * Initiate read into locked page and return. |
315 | */ | 336 | */ |
@@ -317,6 +338,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
317 | swap_readpage(new_page); | 338 | swap_readpage(new_page); |
318 | return new_page; | 339 | return new_page; |
319 | } | 340 | } |
341 | radix_tree_preload_end(); | ||
320 | ClearPageSwapBacked(new_page); | 342 | ClearPageSwapBacked(new_page); |
321 | __clear_page_locked(new_page); | 343 | __clear_page_locked(new_page); |
322 | swapcache_free(entry, NULL); | 344 | swapcache_free(entry, NULL); |