aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>2009-09-21 20:02:50 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:17:35 -0400
commit31a5639623a487d6db996c8138c9e53fef2e2d91 (patch)
tree4fff22b4a0333df084a6cd89e8ecbc332dbfa299
parent8fbb398f5c78832ee61e0d5ed0793fa8857bd853 (diff)
mm: add_to_swap_cache() must not sleep
After commit 355cfa73 ("mm: modify swap_map and add SWAP_HAS_CACHE flag"), read_swap_cache_async() will busy-wait while a entry doesn't exist in swap cache but it has SWAP_HAS_CACHE flag. Such entries can exist on add/delete path of swap cache. On add path, add_to_swap_cache() is called soon after SWAP_HAS_CACHE flag is set, and on delete path, swapcache_free() will be called (SWAP_HAS_CACHE flag is cleared) soon after __delete_from_swap_cache() is called. So, the busy-wait works well in most cases. But this mechanism can cause soft lockup if add_to_swap_cache() sleeps and read_swap_cache_async() tries to swap-in the same entry on the same cpu. This patch calls radix_tree_preload() before swapcache_prepare() and divides add_to_swap_cache() into two part: radix_tree_preload() part and radix_tree_insert() part(define it as __add_to_swap_cache()). Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/swap_state.c70
1 files changed, 46 insertions, 24 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5ae6b8b78c80..b076a1a5a0aa 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -67,10 +67,10 @@ void show_swap_cache_info(void)
67} 67}
68 68
69/* 69/*
70 * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, 70 * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
71 * but sets SwapCache flag and private instead of mapping and index. 71 * but sets SwapCache flag and private instead of mapping and index.
72 */ 72 */
73int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) 73static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
74{ 74{
75 int error; 75 int error;
76 76
@@ -78,28 +78,37 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
78 VM_BUG_ON(PageSwapCache(page)); 78 VM_BUG_ON(PageSwapCache(page));
79 VM_BUG_ON(!PageSwapBacked(page)); 79 VM_BUG_ON(!PageSwapBacked(page));
80 80
81 page_cache_get(page);
82 SetPageSwapCache(page);
83 set_page_private(page, entry.val);
84
85 spin_lock_irq(&swapper_space.tree_lock);
86 error = radix_tree_insert(&swapper_space.page_tree, entry.val, page);
87 if (likely(!error)) {
88 total_swapcache_pages++;
89 __inc_zone_page_state(page, NR_FILE_PAGES);
90 INC_CACHE_INFO(add_total);
91 }
92 spin_unlock_irq(&swapper_space.tree_lock);
93
94 if (unlikely(error)) {
95 set_page_private(page, 0UL);
96 ClearPageSwapCache(page);
97 page_cache_release(page);
98 }
99
100 return error;
101}
102
103
104int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
105{
106 int error;
107
81 error = radix_tree_preload(gfp_mask); 108 error = radix_tree_preload(gfp_mask);
82 if (!error) { 109 if (!error) {
83 page_cache_get(page); 110 error = __add_to_swap_cache(page, entry);
84 SetPageSwapCache(page);
85 set_page_private(page, entry.val);
86
87 spin_lock_irq(&swapper_space.tree_lock);
88 error = radix_tree_insert(&swapper_space.page_tree,
89 entry.val, page);
90 if (likely(!error)) {
91 total_swapcache_pages++;
92 __inc_zone_page_state(page, NR_FILE_PAGES);
93 INC_CACHE_INFO(add_total);
94 }
95 spin_unlock_irq(&swapper_space.tree_lock);
96 radix_tree_preload_end(); 111 radix_tree_preload_end();
97
98 if (unlikely(error)) {
99 set_page_private(page, 0UL);
100 ClearPageSwapCache(page);
101 page_cache_release(page);
102 }
103 } 112 }
104 return error; 113 return error;
105} 114}
@@ -290,13 +299,24 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
290 } 299 }
291 300
292 /* 301 /*
302 * call radix_tree_preload() while we can wait.
303 */
304 err = radix_tree_preload(gfp_mask & GFP_KERNEL);
305 if (err)
306 break;
307
308 /*
293 * Swap entry may have been freed since our caller observed it. 309 * Swap entry may have been freed since our caller observed it.
294 */ 310 */
295 err = swapcache_prepare(entry); 311 err = swapcache_prepare(entry);
296 if (err == -EEXIST) /* seems racy */ 312 if (err == -EEXIST) { /* seems racy */
313 radix_tree_preload_end();
297 continue; 314 continue;
298 if (err) /* swp entry is obsolete ? */ 315 }
316 if (err) { /* swp entry is obsolete ? */
317 radix_tree_preload_end();
299 break; 318 break;
319 }
300 320
301 /* 321 /*
302 * Associate the page with swap entry in the swap cache. 322 * Associate the page with swap entry in the swap cache.
@@ -308,8 +328,9 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
308 */ 328 */
309 __set_page_locked(new_page); 329 __set_page_locked(new_page);
310 SetPageSwapBacked(new_page); 330 SetPageSwapBacked(new_page);
311 err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); 331 err = __add_to_swap_cache(new_page, entry);
312 if (likely(!err)) { 332 if (likely(!err)) {
333 radix_tree_preload_end();
313 /* 334 /*
314 * Initiate read into locked page and return. 335 * Initiate read into locked page and return.
315 */ 336 */
@@ -317,6 +338,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
317 swap_readpage(new_page); 338 swap_readpage(new_page);
318 return new_page; 339 return new_page;
319 } 340 }
341 radix_tree_preload_end();
320 ClearPageSwapBacked(new_page); 342 ClearPageSwapBacked(new_page);
321 __clear_page_locked(new_page); 343 __clear_page_locked(new_page);
322 swapcache_free(entry, NULL); 344 swapcache_free(entry, NULL);