diff options
Diffstat (limited to 'mm/swap_state.c')
-rw-r--r-- | mm/swap_state.c | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c new file mode 100644 index 000000000000..a063a902ed03 --- /dev/null +++ b/mm/swap_state.c | |||
@@ -0,0 +1,382 @@ | |||
1 | /* | ||
2 | * linux/mm/swap_state.c | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | ||
5 | * Swap reorganised 29.12.95, Stephen Tweedie | ||
6 | * | ||
7 | * Rewritten to use page cache, (C) 1998 Stephen Tweedie | ||
8 | */ | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/mm.h> | ||
11 | #include <linux/kernel_stat.h> | ||
12 | #include <linux/swap.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/pagemap.h> | ||
15 | #include <linux/buffer_head.h> | ||
16 | #include <linux/backing-dev.h> | ||
17 | |||
18 | #include <asm/pgtable.h> | ||
19 | |||
20 | /* | ||
21 | * swapper_space is a fiction, retained to simplify the path through | ||
22 | * vmscan's shrink_list, to make sync_page look nicer, and to allow | ||
23 | * future use of radix_tree tags in the swap cache. | ||
24 | */ | ||
25 | static struct address_space_operations swap_aops = { | ||
26 | .writepage = swap_writepage, | ||
27 | .sync_page = block_sync_page, | ||
28 | .set_page_dirty = __set_page_dirty_nobuffers, | ||
29 | }; | ||
30 | |||
31 | static struct backing_dev_info swap_backing_dev_info = { | ||
32 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, | ||
33 | .unplug_io_fn = swap_unplug_io_fn, | ||
34 | }; | ||
35 | |||
36 | struct address_space swapper_space = { | ||
37 | .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), | ||
38 | .tree_lock = RW_LOCK_UNLOCKED, | ||
39 | .a_ops = &swap_aops, | ||
40 | .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), | ||
41 | .backing_dev_info = &swap_backing_dev_info, | ||
42 | }; | ||
43 | EXPORT_SYMBOL(swapper_space); | ||
44 | |||
45 | #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) | ||
46 | |||
47 | static struct { | ||
48 | unsigned long add_total; | ||
49 | unsigned long del_total; | ||
50 | unsigned long find_success; | ||
51 | unsigned long find_total; | ||
52 | unsigned long noent_race; | ||
53 | unsigned long exist_race; | ||
54 | } swap_cache_info; | ||
55 | |||
56 | void show_swap_cache_info(void) | ||
57 | { | ||
58 | printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n", | ||
59 | swap_cache_info.add_total, swap_cache_info.del_total, | ||
60 | swap_cache_info.find_success, swap_cache_info.find_total, | ||
61 | swap_cache_info.noent_race, swap_cache_info.exist_race); | ||
62 | printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); | ||
63 | printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * __add_to_swap_cache resembles add_to_page_cache on swapper_space, | ||
68 | * but sets SwapCache flag and private instead of mapping and index. | ||
69 | */ | ||
70 | static int __add_to_swap_cache(struct page *page, | ||
71 | swp_entry_t entry, int gfp_mask) | ||
72 | { | ||
73 | int error; | ||
74 | |||
75 | BUG_ON(PageSwapCache(page)); | ||
76 | BUG_ON(PagePrivate(page)); | ||
77 | error = radix_tree_preload(gfp_mask); | ||
78 | if (!error) { | ||
79 | write_lock_irq(&swapper_space.tree_lock); | ||
80 | error = radix_tree_insert(&swapper_space.page_tree, | ||
81 | entry.val, page); | ||
82 | if (!error) { | ||
83 | page_cache_get(page); | ||
84 | SetPageLocked(page); | ||
85 | SetPageSwapCache(page); | ||
86 | page->private = entry.val; | ||
87 | total_swapcache_pages++; | ||
88 | pagecache_acct(1); | ||
89 | } | ||
90 | write_unlock_irq(&swapper_space.tree_lock); | ||
91 | radix_tree_preload_end(); | ||
92 | } | ||
93 | return error; | ||
94 | } | ||
95 | |||
96 | static int add_to_swap_cache(struct page *page, swp_entry_t entry) | ||
97 | { | ||
98 | int error; | ||
99 | |||
100 | if (!swap_duplicate(entry)) { | ||
101 | INC_CACHE_INFO(noent_race); | ||
102 | return -ENOENT; | ||
103 | } | ||
104 | error = __add_to_swap_cache(page, entry, GFP_KERNEL); | ||
105 | /* | ||
106 | * Anon pages are already on the LRU, we don't run lru_cache_add here. | ||
107 | */ | ||
108 | if (error) { | ||
109 | swap_free(entry); | ||
110 | if (error == -EEXIST) | ||
111 | INC_CACHE_INFO(exist_race); | ||
112 | return error; | ||
113 | } | ||
114 | INC_CACHE_INFO(add_total); | ||
115 | return 0; | ||
116 | } | ||
117 | |||
118 | /* | ||
119 | * This must be called only on pages that have | ||
120 | * been verified to be in the swap cache. | ||
121 | */ | ||
122 | void __delete_from_swap_cache(struct page *page) | ||
123 | { | ||
124 | BUG_ON(!PageLocked(page)); | ||
125 | BUG_ON(!PageSwapCache(page)); | ||
126 | BUG_ON(PageWriteback(page)); | ||
127 | |||
128 | radix_tree_delete(&swapper_space.page_tree, page->private); | ||
129 | page->private = 0; | ||
130 | ClearPageSwapCache(page); | ||
131 | total_swapcache_pages--; | ||
132 | pagecache_acct(-1); | ||
133 | INC_CACHE_INFO(del_total); | ||
134 | } | ||
135 | |||
136 | /** | ||
137 | * add_to_swap - allocate swap space for a page | ||
138 | * @page: page we want to move to swap | ||
139 | * | ||
140 | * Allocate swap space for the page and add the page to the | ||
141 | * swap cache. Caller needs to hold the page lock. | ||
142 | */ | ||
143 | int add_to_swap(struct page * page) | ||
144 | { | ||
145 | swp_entry_t entry; | ||
146 | int pf_flags; | ||
147 | int err; | ||
148 | |||
149 | if (!PageLocked(page)) | ||
150 | BUG(); | ||
151 | |||
152 | for (;;) { | ||
153 | entry = get_swap_page(); | ||
154 | if (!entry.val) | ||
155 | return 0; | ||
156 | |||
157 | /* Radix-tree node allocations are performing | ||
158 | * GFP_ATOMIC allocations under PF_MEMALLOC. | ||
159 | * They can completely exhaust the page allocator. | ||
160 | * | ||
161 | * So PF_MEMALLOC is dropped here. This causes the slab | ||
162 | * allocations to fail earlier, so radix-tree nodes will | ||
163 | * then be allocated from the mempool reserves. | ||
164 | * | ||
165 | * We're still using __GFP_HIGH for radix-tree node | ||
166 | * allocations, so some of the emergency pools are available, | ||
167 | * just not all of them. | ||
168 | */ | ||
169 | |||
170 | pf_flags = current->flags; | ||
171 | current->flags &= ~PF_MEMALLOC; | ||
172 | |||
173 | /* | ||
174 | * Add it to the swap cache and mark it dirty | ||
175 | */ | ||
176 | err = __add_to_swap_cache(page, entry, GFP_ATOMIC|__GFP_NOWARN); | ||
177 | |||
178 | if (pf_flags & PF_MEMALLOC) | ||
179 | current->flags |= PF_MEMALLOC; | ||
180 | |||
181 | switch (err) { | ||
182 | case 0: /* Success */ | ||
183 | SetPageUptodate(page); | ||
184 | SetPageDirty(page); | ||
185 | INC_CACHE_INFO(add_total); | ||
186 | return 1; | ||
187 | case -EEXIST: | ||
188 | /* Raced with "speculative" read_swap_cache_async */ | ||
189 | INC_CACHE_INFO(exist_race); | ||
190 | swap_free(entry); | ||
191 | continue; | ||
192 | default: | ||
193 | /* -ENOMEM radix-tree allocation failure */ | ||
194 | swap_free(entry); | ||
195 | return 0; | ||
196 | } | ||
197 | } | ||
198 | } | ||
199 | |||
200 | /* | ||
201 | * This must be called only on pages that have | ||
202 | * been verified to be in the swap cache and locked. | ||
203 | * It will never put the page into the free list, | ||
204 | * the caller has a reference on the page. | ||
205 | */ | ||
206 | void delete_from_swap_cache(struct page *page) | ||
207 | { | ||
208 | swp_entry_t entry; | ||
209 | |||
210 | BUG_ON(!PageSwapCache(page)); | ||
211 | BUG_ON(!PageLocked(page)); | ||
212 | BUG_ON(PageWriteback(page)); | ||
213 | BUG_ON(PagePrivate(page)); | ||
214 | |||
215 | entry.val = page->private; | ||
216 | |||
217 | write_lock_irq(&swapper_space.tree_lock); | ||
218 | __delete_from_swap_cache(page); | ||
219 | write_unlock_irq(&swapper_space.tree_lock); | ||
220 | |||
221 | swap_free(entry); | ||
222 | page_cache_release(page); | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Strange swizzling function only for use by shmem_writepage | ||
227 | */ | ||
228 | int move_to_swap_cache(struct page *page, swp_entry_t entry) | ||
229 | { | ||
230 | int err = __add_to_swap_cache(page, entry, GFP_ATOMIC); | ||
231 | if (!err) { | ||
232 | remove_from_page_cache(page); | ||
233 | page_cache_release(page); /* pagecache ref */ | ||
234 | if (!swap_duplicate(entry)) | ||
235 | BUG(); | ||
236 | SetPageDirty(page); | ||
237 | INC_CACHE_INFO(add_total); | ||
238 | } else if (err == -EEXIST) | ||
239 | INC_CACHE_INFO(exist_race); | ||
240 | return err; | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * Strange swizzling function for shmem_getpage (and shmem_unuse) | ||
245 | */ | ||
246 | int move_from_swap_cache(struct page *page, unsigned long index, | ||
247 | struct address_space *mapping) | ||
248 | { | ||
249 | int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC); | ||
250 | if (!err) { | ||
251 | delete_from_swap_cache(page); | ||
252 | /* shift page from clean_pages to dirty_pages list */ | ||
253 | ClearPageDirty(page); | ||
254 | set_page_dirty(page); | ||
255 | } | ||
256 | return err; | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * If we are the only user, then try to free up the swap cache. | ||
261 | * | ||
262 | * Its ok to check for PageSwapCache without the page lock | ||
263 | * here because we are going to recheck again inside | ||
264 | * exclusive_swap_page() _with_ the lock. | ||
265 | * - Marcelo | ||
266 | */ | ||
267 | static inline void free_swap_cache(struct page *page) | ||
268 | { | ||
269 | if (PageSwapCache(page) && !TestSetPageLocked(page)) { | ||
270 | remove_exclusive_swap_page(page); | ||
271 | unlock_page(page); | ||
272 | } | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Perform a free_page(), also freeing any swap cache associated with | ||
277 | * this page if it is the last user of the page. Can not do a lock_page, | ||
278 | * as we are holding the page_table_lock spinlock. | ||
279 | */ | ||
280 | void free_page_and_swap_cache(struct page *page) | ||
281 | { | ||
282 | free_swap_cache(page); | ||
283 | page_cache_release(page); | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * Passed an array of pages, drop them all from swapcache and then release | ||
288 | * them. They are removed from the LRU and freed if this is their last use. | ||
289 | */ | ||
290 | void free_pages_and_swap_cache(struct page **pages, int nr) | ||
291 | { | ||
292 | int chunk = 16; | ||
293 | struct page **pagep = pages; | ||
294 | |||
295 | lru_add_drain(); | ||
296 | while (nr) { | ||
297 | int todo = min(chunk, nr); | ||
298 | int i; | ||
299 | |||
300 | for (i = 0; i < todo; i++) | ||
301 | free_swap_cache(pagep[i]); | ||
302 | release_pages(pagep, todo, 0); | ||
303 | pagep += todo; | ||
304 | nr -= todo; | ||
305 | } | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * Lookup a swap entry in the swap cache. A found page will be returned | ||
310 | * unlocked and with its refcount incremented - we rely on the kernel | ||
311 | * lock getting page table operations atomic even if we drop the page | ||
312 | * lock before returning. | ||
313 | */ | ||
314 | struct page * lookup_swap_cache(swp_entry_t entry) | ||
315 | { | ||
316 | struct page *page; | ||
317 | |||
318 | page = find_get_page(&swapper_space, entry.val); | ||
319 | |||
320 | if (page) | ||
321 | INC_CACHE_INFO(find_success); | ||
322 | |||
323 | INC_CACHE_INFO(find_total); | ||
324 | return page; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * Locate a page of swap in physical memory, reserving swap cache space | ||
329 | * and reading the disk if it is not already cached. | ||
330 | * A failure return means that either the page allocation failed or that | ||
331 | * the swap entry is no longer in use. | ||
332 | */ | ||
333 | struct page *read_swap_cache_async(swp_entry_t entry, | ||
334 | struct vm_area_struct *vma, unsigned long addr) | ||
335 | { | ||
336 | struct page *found_page, *new_page = NULL; | ||
337 | int err; | ||
338 | |||
339 | do { | ||
340 | /* | ||
341 | * First check the swap cache. Since this is normally | ||
342 | * called after lookup_swap_cache() failed, re-calling | ||
343 | * that would confuse statistics. | ||
344 | */ | ||
345 | found_page = find_get_page(&swapper_space, entry.val); | ||
346 | if (found_page) | ||
347 | break; | ||
348 | |||
349 | /* | ||
350 | * Get a new page to read into from swap. | ||
351 | */ | ||
352 | if (!new_page) { | ||
353 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); | ||
354 | if (!new_page) | ||
355 | break; /* Out of memory */ | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Associate the page with swap entry in the swap cache. | ||
360 | * May fail (-ENOENT) if swap entry has been freed since | ||
361 | * our caller observed it. May fail (-EEXIST) if there | ||
362 | * is already a page associated with this entry in the | ||
363 | * swap cache: added by a racing read_swap_cache_async, | ||
364 | * or by try_to_swap_out (or shmem_writepage) re-using | ||
365 | * the just freed swap entry for an existing page. | ||
366 | * May fail (-ENOMEM) if radix-tree node allocation failed. | ||
367 | */ | ||
368 | err = add_to_swap_cache(new_page, entry); | ||
369 | if (!err) { | ||
370 | /* | ||
371 | * Initiate read into locked page and return. | ||
372 | */ | ||
373 | lru_cache_add_active(new_page); | ||
374 | swap_readpage(NULL, new_page); | ||
375 | return new_page; | ||
376 | } | ||
377 | } while (err != -ENOENT && err != -ENOMEM); | ||
378 | |||
379 | if (new_page) | ||
380 | page_cache_release(new_page); | ||
381 | return found_page; | ||
382 | } | ||