diff options
Diffstat (limited to 'mm/swap_state.c')
-rw-r--r-- | mm/swap_state.c | 143 |
1 files changed, 82 insertions, 61 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c index 5ae6b8b78c80..6d1daeb1cb4a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -67,10 +67,10 @@ void show_swap_cache_info(void) | |||
67 | } | 67 | } |
68 | 68 | ||
69 | /* | 69 | /* |
70 | * add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, | 70 | * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space, |
71 | * but sets SwapCache flag and private instead of mapping and index. | 71 | * but sets SwapCache flag and private instead of mapping and index. |
72 | */ | 72 | */ |
73 | int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | 73 | static int __add_to_swap_cache(struct page *page, swp_entry_t entry) |
74 | { | 74 | { |
75 | int error; | 75 | int error; |
76 | 76 | ||
@@ -78,28 +78,43 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | |||
78 | VM_BUG_ON(PageSwapCache(page)); | 78 | VM_BUG_ON(PageSwapCache(page)); |
79 | VM_BUG_ON(!PageSwapBacked(page)); | 79 | VM_BUG_ON(!PageSwapBacked(page)); |
80 | 80 | ||
81 | page_cache_get(page); | ||
82 | SetPageSwapCache(page); | ||
83 | set_page_private(page, entry.val); | ||
84 | |||
85 | spin_lock_irq(&swapper_space.tree_lock); | ||
86 | error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); | ||
87 | if (likely(!error)) { | ||
88 | total_swapcache_pages++; | ||
89 | __inc_zone_page_state(page, NR_FILE_PAGES); | ||
90 | INC_CACHE_INFO(add_total); | ||
91 | } | ||
92 | spin_unlock_irq(&swapper_space.tree_lock); | ||
93 | |||
94 | if (unlikely(error)) { | ||
95 | /* | ||
96 | * Only the context which have set SWAP_HAS_CACHE flag | ||
97 | * would call add_to_swap_cache(). | ||
98 | * So add_to_swap_cache() doesn't returns -EEXIST. | ||
99 | */ | ||
100 | VM_BUG_ON(error == -EEXIST); | ||
101 | set_page_private(page, 0UL); | ||
102 | ClearPageSwapCache(page); | ||
103 | page_cache_release(page); | ||
104 | } | ||
105 | |||
106 | return error; | ||
107 | } | ||
108 | |||
109 | |||
110 | int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | ||
111 | { | ||
112 | int error; | ||
113 | |||
81 | error = radix_tree_preload(gfp_mask); | 114 | error = radix_tree_preload(gfp_mask); |
82 | if (!error) { | 115 | if (!error) { |
83 | page_cache_get(page); | 116 | error = __add_to_swap_cache(page, entry); |
84 | SetPageSwapCache(page); | ||
85 | set_page_private(page, entry.val); | ||
86 | |||
87 | spin_lock_irq(&swapper_space.tree_lock); | ||
88 | error = radix_tree_insert(&swapper_space.page_tree, | ||
89 | entry.val, page); | ||
90 | if (likely(!error)) { | ||
91 | total_swapcache_pages++; | ||
92 | __inc_zone_page_state(page, NR_FILE_PAGES); | ||
93 | INC_CACHE_INFO(add_total); | ||
94 | } | ||
95 | spin_unlock_irq(&swapper_space.tree_lock); | ||
96 | radix_tree_preload_end(); | 117 | radix_tree_preload_end(); |
97 | |||
98 | if (unlikely(error)) { | ||
99 | set_page_private(page, 0UL); | ||
100 | ClearPageSwapCache(page); | ||
101 | page_cache_release(page); | ||
102 | } | ||
103 | } | 118 | } |
104 | return error; | 119 | return error; |
105 | } | 120 | } |
@@ -137,38 +152,34 @@ int add_to_swap(struct page *page) | |||
137 | VM_BUG_ON(!PageLocked(page)); | 152 | VM_BUG_ON(!PageLocked(page)); |
138 | VM_BUG_ON(!PageUptodate(page)); | 153 | VM_BUG_ON(!PageUptodate(page)); |
139 | 154 | ||
140 | for (;;) { | 155 | entry = get_swap_page(); |
141 | entry = get_swap_page(); | 156 | if (!entry.val) |
142 | if (!entry.val) | 157 | return 0; |
143 | return 0; | ||
144 | 158 | ||
159 | /* | ||
160 | * Radix-tree node allocations from PF_MEMALLOC contexts could | ||
161 | * completely exhaust the page allocator. __GFP_NOMEMALLOC | ||
162 | * stops emergency reserves from being allocated. | ||
163 | * | ||
164 | * TODO: this could cause a theoretical memory reclaim | ||
165 | * deadlock in the swap out path. | ||
166 | */ | ||
167 | /* | ||
168 | * Add it to the swap cache and mark it dirty | ||
169 | */ | ||
170 | err = add_to_swap_cache(page, entry, | ||
171 | __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); | ||
172 | |||
173 | if (!err) { /* Success */ | ||
174 | SetPageDirty(page); | ||
175 | return 1; | ||
176 | } else { /* -ENOMEM radix-tree allocation failure */ | ||
145 | /* | 177 | /* |
146 | * Radix-tree node allocations from PF_MEMALLOC contexts could | 178 | * add_to_swap_cache() doesn't return -EEXIST, so we can safely |
147 | * completely exhaust the page allocator. __GFP_NOMEMALLOC | 179 | * clear SWAP_HAS_CACHE flag. |
148 | * stops emergency reserves from being allocated. | ||
149 | * | ||
150 | * TODO: this could cause a theoretical memory reclaim | ||
151 | * deadlock in the swap out path. | ||
152 | */ | ||
153 | /* | ||
154 | * Add it to the swap cache and mark it dirty | ||
155 | */ | 180 | */ |
156 | err = add_to_swap_cache(page, entry, | 181 | swapcache_free(entry, NULL); |
157 | __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN); | 182 | return 0; |
158 | |||
159 | switch (err) { | ||
160 | case 0: /* Success */ | ||
161 | SetPageDirty(page); | ||
162 | return 1; | ||
163 | case -EEXIST: | ||
164 | /* Raced with "speculative" read_swap_cache_async */ | ||
165 | swapcache_free(entry, NULL); | ||
166 | continue; | ||
167 | default: | ||
168 | /* -ENOMEM radix-tree allocation failure */ | ||
169 | swapcache_free(entry, NULL); | ||
170 | return 0; | ||
171 | } | ||
172 | } | 183 | } |
173 | } | 184 | } |
174 | 185 | ||
@@ -290,26 +301,31 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
290 | } | 301 | } |
291 | 302 | ||
292 | /* | 303 | /* |
304 | * call radix_tree_preload() while we can wait. | ||
305 | */ | ||
306 | err = radix_tree_preload(gfp_mask & GFP_KERNEL); | ||
307 | if (err) | ||
308 | break; | ||
309 | |||
310 | /* | ||
293 | * Swap entry may have been freed since our caller observed it. | 311 | * Swap entry may have been freed since our caller observed it. |
294 | */ | 312 | */ |
295 | err = swapcache_prepare(entry); | 313 | err = swapcache_prepare(entry); |
296 | if (err == -EEXIST) /* seems racy */ | 314 | if (err == -EEXIST) { /* seems racy */ |
315 | radix_tree_preload_end(); | ||
297 | continue; | 316 | continue; |
298 | if (err) /* swp entry is obsolete ? */ | 317 | } |
318 | if (err) { /* swp entry is obsolete ? */ | ||
319 | radix_tree_preload_end(); | ||
299 | break; | 320 | break; |
321 | } | ||
300 | 322 | ||
301 | /* | 323 | /* May fail (-ENOMEM) if radix-tree node allocation failed. */ |
302 | * Associate the page with swap entry in the swap cache. | ||
303 | * May fail (-EEXIST) if there is already a page associated | ||
304 | * with this entry in the swap cache: added by a racing | ||
305 | * read_swap_cache_async, or add_to_swap or shmem_writepage | ||
306 | * re-using the just freed swap entry for an existing page. | ||
307 | * May fail (-ENOMEM) if radix-tree node allocation failed. | ||
308 | */ | ||
309 | __set_page_locked(new_page); | 324 | __set_page_locked(new_page); |
310 | SetPageSwapBacked(new_page); | 325 | SetPageSwapBacked(new_page); |
311 | err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); | 326 | err = __add_to_swap_cache(new_page, entry); |
312 | if (likely(!err)) { | 327 | if (likely(!err)) { |
328 | radix_tree_preload_end(); | ||
313 | /* | 329 | /* |
314 | * Initiate read into locked page and return. | 330 | * Initiate read into locked page and return. |
315 | */ | 331 | */ |
@@ -317,8 +333,13 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, | |||
317 | swap_readpage(new_page); | 333 | swap_readpage(new_page); |
318 | return new_page; | 334 | return new_page; |
319 | } | 335 | } |
336 | radix_tree_preload_end(); | ||
320 | ClearPageSwapBacked(new_page); | 337 | ClearPageSwapBacked(new_page); |
321 | __clear_page_locked(new_page); | 338 | __clear_page_locked(new_page); |
339 | /* | ||
340 | * add_to_swap_cache() doesn't return -EEXIST, so we can safely | ||
341 | * clear SWAP_HAS_CACHE flag. | ||
342 | */ | ||
322 | swapcache_free(entry, NULL); | 343 | swapcache_free(entry, NULL); |
323 | } while (err != -ENOMEM); | 344 | } while (err != -ENOMEM); |
324 | 345 | ||