aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swap_state.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swap_state.c')
-rw-r--r--mm/swap_state.c153
1 files changed, 71 insertions, 82 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b52635601dfe..ec42f01a8d02 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -10,6 +10,7 @@
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/kernel_stat.h> 11#include <linux/kernel_stat.h>
12#include <linux/swap.h> 12#include <linux/swap.h>
13#include <linux/swapops.h>
13#include <linux/init.h> 14#include <linux/init.h>
14#include <linux/pagemap.h> 15#include <linux/pagemap.h>
15#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
@@ -51,26 +52,22 @@ static struct {
51 unsigned long del_total; 52 unsigned long del_total;
52 unsigned long find_success; 53 unsigned long find_success;
53 unsigned long find_total; 54 unsigned long find_total;
54 unsigned long noent_race;
55 unsigned long exist_race;
56} swap_cache_info; 55} swap_cache_info;
57 56
58void show_swap_cache_info(void) 57void show_swap_cache_info(void)
59{ 58{
60 printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n", 59 printk("Swap cache: add %lu, delete %lu, find %lu/%lu\n",
61 swap_cache_info.add_total, swap_cache_info.del_total, 60 swap_cache_info.add_total, swap_cache_info.del_total,
62 swap_cache_info.find_success, swap_cache_info.find_total, 61 swap_cache_info.find_success, swap_cache_info.find_total);
63 swap_cache_info.noent_race, swap_cache_info.exist_race);
64 printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); 62 printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
65 printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); 63 printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
66} 64}
67 65
68/* 66/*
69 * __add_to_swap_cache resembles add_to_page_cache on swapper_space, 67 * add_to_swap_cache resembles add_to_page_cache on swapper_space,
70 * but sets SwapCache flag and private instead of mapping and index. 68 * but sets SwapCache flag and private instead of mapping and index.
71 */ 69 */
72static int __add_to_swap_cache(struct page *page, swp_entry_t entry, 70int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
73 gfp_t gfp_mask)
74{ 71{
75 int error; 72 int error;
76 73
@@ -88,6 +85,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
88 set_page_private(page, entry.val); 85 set_page_private(page, entry.val);
89 total_swapcache_pages++; 86 total_swapcache_pages++;
90 __inc_zone_page_state(page, NR_FILE_PAGES); 87 __inc_zone_page_state(page, NR_FILE_PAGES);
88 INC_CACHE_INFO(add_total);
91 } 89 }
92 write_unlock_irq(&swapper_space.tree_lock); 90 write_unlock_irq(&swapper_space.tree_lock);
93 radix_tree_preload_end(); 91 radix_tree_preload_end();
@@ -95,31 +93,6 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry,
95 return error; 93 return error;
96} 94}
97 95
98static int add_to_swap_cache(struct page *page, swp_entry_t entry)
99{
100 int error;
101
102 BUG_ON(PageLocked(page));
103 if (!swap_duplicate(entry)) {
104 INC_CACHE_INFO(noent_race);
105 return -ENOENT;
106 }
107 SetPageLocked(page);
108 error = __add_to_swap_cache(page, entry, GFP_KERNEL);
109 /*
110 * Anon pages are already on the LRU, we don't run lru_cache_add here.
111 */
112 if (error) {
113 ClearPageLocked(page);
114 swap_free(entry);
115 if (error == -EEXIST)
116 INC_CACHE_INFO(exist_race);
117 return error;
118 }
119 INC_CACHE_INFO(add_total);
120 return 0;
121}
122
123/* 96/*
124 * This must be called only on pages that have 97 * This must be called only on pages that have
125 * been verified to be in the swap cache. 98 * been verified to be in the swap cache.
@@ -152,6 +125,7 @@ int add_to_swap(struct page * page, gfp_t gfp_mask)
152 int err; 125 int err;
153 126
154 BUG_ON(!PageLocked(page)); 127 BUG_ON(!PageLocked(page));
128 BUG_ON(!PageUptodate(page));
155 129
156 for (;;) { 130 for (;;) {
157 entry = get_swap_page(); 131 entry = get_swap_page();
@@ -169,18 +143,15 @@ int add_to_swap(struct page * page, gfp_t gfp_mask)
169 /* 143 /*
170 * Add it to the swap cache and mark it dirty 144 * Add it to the swap cache and mark it dirty
171 */ 145 */
172 err = __add_to_swap_cache(page, entry, 146 err = add_to_swap_cache(page, entry,
173 gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); 147 gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN);
174 148
175 switch (err) { 149 switch (err) {
176 case 0: /* Success */ 150 case 0: /* Success */
177 SetPageUptodate(page);
178 SetPageDirty(page); 151 SetPageDirty(page);
179 INC_CACHE_INFO(add_total);
180 return 1; 152 return 1;
181 case -EEXIST: 153 case -EEXIST:
182 /* Raced with "speculative" read_swap_cache_async */ 154 /* Raced with "speculative" read_swap_cache_async */
183 INC_CACHE_INFO(exist_race);
184 swap_free(entry); 155 swap_free(entry);
185 continue; 156 continue;
186 default: 157 default:
@@ -211,40 +182,6 @@ void delete_from_swap_cache(struct page *page)
211 page_cache_release(page); 182 page_cache_release(page);
212} 183}
213 184
214/*
215 * Strange swizzling function only for use by shmem_writepage
216 */
217int move_to_swap_cache(struct page *page, swp_entry_t entry)
218{
219 int err = __add_to_swap_cache(page, entry, GFP_ATOMIC);
220 if (!err) {
221 remove_from_page_cache(page);
222 page_cache_release(page); /* pagecache ref */
223 if (!swap_duplicate(entry))
224 BUG();
225 SetPageDirty(page);
226 INC_CACHE_INFO(add_total);
227 } else if (err == -EEXIST)
228 INC_CACHE_INFO(exist_race);
229 return err;
230}
231
232/*
233 * Strange swizzling function for shmem_getpage (and shmem_unuse)
234 */
235int move_from_swap_cache(struct page *page, unsigned long index,
236 struct address_space *mapping)
237{
238 int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC);
239 if (!err) {
240 delete_from_swap_cache(page);
241 /* shift page from clean_pages to dirty_pages list */
242 ClearPageDirty(page);
243 set_page_dirty(page);
244 }
245 return err;
246}
247
248/* 185/*
249 * If we are the only user, then try to free up the swap cache. 186 * If we are the only user, then try to free up the swap cache.
250 * 187 *
@@ -317,7 +254,7 @@ struct page * lookup_swap_cache(swp_entry_t entry)
317 * A failure return means that either the page allocation failed or that 254 * A failure return means that either the page allocation failed or that
318 * the swap entry is no longer in use. 255 * the swap entry is no longer in use.
319 */ 256 */
320struct page *read_swap_cache_async(swp_entry_t entry, 257struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
321 struct vm_area_struct *vma, unsigned long addr) 258 struct vm_area_struct *vma, unsigned long addr)
322{ 259{
323 struct page *found_page, *new_page = NULL; 260 struct page *found_page, *new_page = NULL;
@@ -337,23 +274,27 @@ struct page *read_swap_cache_async(swp_entry_t entry,
337 * Get a new page to read into from swap. 274 * Get a new page to read into from swap.
338 */ 275 */
339 if (!new_page) { 276 if (!new_page) {
340 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, 277 new_page = alloc_page_vma(gfp_mask, vma, addr);
341 vma, addr);
342 if (!new_page) 278 if (!new_page)
343 break; /* Out of memory */ 279 break; /* Out of memory */
344 } 280 }
345 281
346 /* 282 /*
283 * Swap entry may have been freed since our caller observed it.
284 */
285 if (!swap_duplicate(entry))
286 break;
287
288 /*
347 * Associate the page with swap entry in the swap cache. 289 * Associate the page with swap entry in the swap cache.
348 * May fail (-ENOENT) if swap entry has been freed since 290 * May fail (-EEXIST) if there is already a page associated
349 * our caller observed it. May fail (-EEXIST) if there 291 * with this entry in the swap cache: added by a racing
350 * is already a page associated with this entry in the 292 * read_swap_cache_async, or add_to_swap or shmem_writepage
351 * swap cache: added by a racing read_swap_cache_async, 293 * re-using the just freed swap entry for an existing page.
352 * or by try_to_swap_out (or shmem_writepage) re-using
353 * the just freed swap entry for an existing page.
354 * May fail (-ENOMEM) if radix-tree node allocation failed. 294 * May fail (-ENOMEM) if radix-tree node allocation failed.
355 */ 295 */
356 err = add_to_swap_cache(new_page, entry); 296 SetPageLocked(new_page);
297 err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL);
357 if (!err) { 298 if (!err) {
358 /* 299 /*
359 * Initiate read into locked page and return. 300 * Initiate read into locked page and return.
@@ -362,9 +303,57 @@ struct page *read_swap_cache_async(swp_entry_t entry,
362 swap_readpage(NULL, new_page); 303 swap_readpage(NULL, new_page);
363 return new_page; 304 return new_page;
364 } 305 }
365 } while (err != -ENOENT && err != -ENOMEM); 306 ClearPageLocked(new_page);
307 swap_free(entry);
308 } while (err != -ENOMEM);
366 309
367 if (new_page) 310 if (new_page)
368 page_cache_release(new_page); 311 page_cache_release(new_page);
369 return found_page; 312 return found_page;
370} 313}
314
315/**
316 * swapin_readahead - swap in pages in hope we need them soon
317 * @entry: swap entry of this memory
318 * @vma: user vma this address belongs to
319 * @addr: target address for mempolicy
320 *
321 * Returns the struct page for entry and addr, after queueing swapin.
322 *
323 * Primitive swap readahead code. We simply read an aligned block of
324 * (1 << page_cluster) entries in the swap area. This method is chosen
325 * because it doesn't cost us any seek time. We also make sure to queue
326 * the 'original' request together with the readahead ones...
327 *
328 * This has been extended to use the NUMA policies from the mm triggering
329 * the readahead.
330 *
331 * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
332 */
333struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
334 struct vm_area_struct *vma, unsigned long addr)
335{
336 int nr_pages;
337 struct page *page;
338 unsigned long offset;
339 unsigned long end_offset;
340
341 /*
342 * Get starting offset for readaround, and number of pages to read.
343 * Adjust starting address by readbehind (for NUMA interleave case)?
344 * No, it's very unlikely that swap layout would follow vma layout,
345 * more likely that neighbouring swap pages came from the same node:
346 * so use the same "addr" to choose the same node for each swap read.
347 */
348 nr_pages = valid_swaphandles(entry, &offset);
349 for (end_offset = offset + nr_pages; offset < end_offset; offset++) {
350 /* Ok, do the async read-ahead now */
351 page = read_swap_cache_async(swp_entry(swp_type(entry), offset),
352 gfp_mask, vma, addr);
353 if (!page)
354 break;
355 page_cache_release(page);
356 }
357 lru_add_drain(); /* Push any new pages onto the LRU now */
358 return read_swap_cache_async(entry, gfp_mask, vma, addr);
359}