diff options
author | Steve French <sfrench@us.ibm.com> | 2008-02-06 11:04:00 -0500 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2008-02-06 11:04:00 -0500 |
commit | f315ccb3e679f271583f2a4f463ad9b65665b751 (patch) | |
tree | 44eb52102587d7b0bb592464cef6ec04bcac8b90 /mm/swap_state.c | |
parent | ead03e30b050d6dda769e7e9b071c5fa720bf8d2 (diff) | |
parent | 551e4fb2465b87de9d4aa1669b27d624435443bb (diff) |
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'mm/swap_state.c')
-rw-r--r-- | mm/swap_state.c | 153 |
1 files changed, 71 insertions, 82 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c index b52635601dfe..ec42f01a8d02 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/kernel_stat.h> | 11 | #include <linux/kernel_stat.h> |
12 | #include <linux/swap.h> | 12 | #include <linux/swap.h> |
13 | #include <linux/swapops.h> | ||
13 | #include <linux/init.h> | 14 | #include <linux/init.h> |
14 | #include <linux/pagemap.h> | 15 | #include <linux/pagemap.h> |
15 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
@@ -51,26 +52,22 @@ static struct { | |||
51 | unsigned long del_total; | 52 | unsigned long del_total; |
52 | unsigned long find_success; | 53 | unsigned long find_success; |
53 | unsigned long find_total; | 54 | unsigned long find_total; |
54 | unsigned long noent_race; | ||
55 | unsigned long exist_race; | ||
56 | } swap_cache_info; | 55 | } swap_cache_info; |
57 | 56 | ||
58 | void show_swap_cache_info(void) | 57 | void show_swap_cache_info(void) |
59 | { | 58 | { |
60 | printk("Swap cache: add %lu, delete %lu, find %lu/%lu, race %lu+%lu\n", | 59 | printk("Swap cache: add %lu, delete %lu, find %lu/%lu\n", |
61 | swap_cache_info.add_total, swap_cache_info.del_total, | 60 | swap_cache_info.add_total, swap_cache_info.del_total, |
62 | swap_cache_info.find_success, swap_cache_info.find_total, | 61 | swap_cache_info.find_success, swap_cache_info.find_total); |
63 | swap_cache_info.noent_race, swap_cache_info.exist_race); | ||
64 | printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); | 62 | printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10)); |
65 | printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); | 63 | printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10)); |
66 | } | 64 | } |
67 | 65 | ||
68 | /* | 66 | /* |
69 | * __add_to_swap_cache resembles add_to_page_cache on swapper_space, | 67 | * add_to_swap_cache resembles add_to_page_cache on swapper_space, |
70 | * but sets SwapCache flag and private instead of mapping and index. | 68 | * but sets SwapCache flag and private instead of mapping and index. |
71 | */ | 69 | */ |
72 | static int __add_to_swap_cache(struct page *page, swp_entry_t entry, | 70 | int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) |
73 | gfp_t gfp_mask) | ||
74 | { | 71 | { |
75 | int error; | 72 | int error; |
76 | 73 | ||
@@ -88,6 +85,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry, | |||
88 | set_page_private(page, entry.val); | 85 | set_page_private(page, entry.val); |
89 | total_swapcache_pages++; | 86 | total_swapcache_pages++; |
90 | __inc_zone_page_state(page, NR_FILE_PAGES); | 87 | __inc_zone_page_state(page, NR_FILE_PAGES); |
88 | INC_CACHE_INFO(add_total); | ||
91 | } | 89 | } |
92 | write_unlock_irq(&swapper_space.tree_lock); | 90 | write_unlock_irq(&swapper_space.tree_lock); |
93 | radix_tree_preload_end(); | 91 | radix_tree_preload_end(); |
@@ -95,31 +93,6 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry, | |||
95 | return error; | 93 | return error; |
96 | } | 94 | } |
97 | 95 | ||
98 | static int add_to_swap_cache(struct page *page, swp_entry_t entry) | ||
99 | { | ||
100 | int error; | ||
101 | |||
102 | BUG_ON(PageLocked(page)); | ||
103 | if (!swap_duplicate(entry)) { | ||
104 | INC_CACHE_INFO(noent_race); | ||
105 | return -ENOENT; | ||
106 | } | ||
107 | SetPageLocked(page); | ||
108 | error = __add_to_swap_cache(page, entry, GFP_KERNEL); | ||
109 | /* | ||
110 | * Anon pages are already on the LRU, we don't run lru_cache_add here. | ||
111 | */ | ||
112 | if (error) { | ||
113 | ClearPageLocked(page); | ||
114 | swap_free(entry); | ||
115 | if (error == -EEXIST) | ||
116 | INC_CACHE_INFO(exist_race); | ||
117 | return error; | ||
118 | } | ||
119 | INC_CACHE_INFO(add_total); | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | /* | 96 | /* |
124 | * This must be called only on pages that have | 97 | * This must be called only on pages that have |
125 | * been verified to be in the swap cache. | 98 | * been verified to be in the swap cache. |
@@ -152,6 +125,7 @@ int add_to_swap(struct page * page, gfp_t gfp_mask) | |||
152 | int err; | 125 | int err; |
153 | 126 | ||
154 | BUG_ON(!PageLocked(page)); | 127 | BUG_ON(!PageLocked(page)); |
128 | BUG_ON(!PageUptodate(page)); | ||
155 | 129 | ||
156 | for (;;) { | 130 | for (;;) { |
157 | entry = get_swap_page(); | 131 | entry = get_swap_page(); |
@@ -169,18 +143,15 @@ int add_to_swap(struct page * page, gfp_t gfp_mask) | |||
169 | /* | 143 | /* |
170 | * Add it to the swap cache and mark it dirty | 144 | * Add it to the swap cache and mark it dirty |
171 | */ | 145 | */ |
172 | err = __add_to_swap_cache(page, entry, | 146 | err = add_to_swap_cache(page, entry, |
173 | gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); | 147 | gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN); |
174 | 148 | ||
175 | switch (err) { | 149 | switch (err) { |
176 | case 0: /* Success */ | 150 | case 0: /* Success */ |
177 | SetPageUptodate(page); | ||
178 | SetPageDirty(page); | 151 | SetPageDirty(page); |
179 | INC_CACHE_INFO(add_total); | ||
180 | return 1; | 152 | return 1; |
181 | case -EEXIST: | 153 | case -EEXIST: |
182 | /* Raced with "speculative" read_swap_cache_async */ | 154 | /* Raced with "speculative" read_swap_cache_async */ |
183 | INC_CACHE_INFO(exist_race); | ||
184 | swap_free(entry); | 155 | swap_free(entry); |
185 | continue; | 156 | continue; |
186 | default: | 157 | default: |
@@ -211,40 +182,6 @@ void delete_from_swap_cache(struct page *page) | |||
211 | page_cache_release(page); | 182 | page_cache_release(page); |
212 | } | 183 | } |
213 | 184 | ||
214 | /* | ||
215 | * Strange swizzling function only for use by shmem_writepage | ||
216 | */ | ||
217 | int move_to_swap_cache(struct page *page, swp_entry_t entry) | ||
218 | { | ||
219 | int err = __add_to_swap_cache(page, entry, GFP_ATOMIC); | ||
220 | if (!err) { | ||
221 | remove_from_page_cache(page); | ||
222 | page_cache_release(page); /* pagecache ref */ | ||
223 | if (!swap_duplicate(entry)) | ||
224 | BUG(); | ||
225 | SetPageDirty(page); | ||
226 | INC_CACHE_INFO(add_total); | ||
227 | } else if (err == -EEXIST) | ||
228 | INC_CACHE_INFO(exist_race); | ||
229 | return err; | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * Strange swizzling function for shmem_getpage (and shmem_unuse) | ||
234 | */ | ||
235 | int move_from_swap_cache(struct page *page, unsigned long index, | ||
236 | struct address_space *mapping) | ||
237 | { | ||
238 | int err = add_to_page_cache(page, mapping, index, GFP_ATOMIC); | ||
239 | if (!err) { | ||
240 | delete_from_swap_cache(page); | ||
241 | /* shift page from clean_pages to dirty_pages list */ | ||
242 | ClearPageDirty(page); | ||
243 | set_page_dirty(page); | ||
244 | } | ||
245 | return err; | ||
246 | } | ||
247 | |||
248 | /* | 185 | /* |
249 | * If we are the only user, then try to free up the swap cache. | 186 | * If we are the only user, then try to free up the swap cache. |
250 | * | 187 | * |
@@ -317,7 +254,7 @@ struct page * lookup_swap_cache(swp_entry_t entry) | |||
317 | * A failure return means that either the page allocation failed or that | 254 | * A failure return means that either the page allocation failed or that |
318 | * the swap entry is no longer in use. | 255 | * the swap entry is no longer in use. |
319 | */ | 256 | */ |
320 | struct page *read_swap_cache_async(swp_entry_t entry, | 257 | struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, |
321 | struct vm_area_struct *vma, unsigned long addr) | 258 | struct vm_area_struct *vma, unsigned long addr) |
322 | { | 259 | { |
323 | struct page *found_page, *new_page = NULL; | 260 | struct page *found_page, *new_page = NULL; |
@@ -337,23 +274,27 @@ struct page *read_swap_cache_async(swp_entry_t entry, | |||
337 | * Get a new page to read into from swap. | 274 | * Get a new page to read into from swap. |
338 | */ | 275 | */ |
339 | if (!new_page) { | 276 | if (!new_page) { |
340 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, | 277 | new_page = alloc_page_vma(gfp_mask, vma, addr); |
341 | vma, addr); | ||
342 | if (!new_page) | 278 | if (!new_page) |
343 | break; /* Out of memory */ | 279 | break; /* Out of memory */ |
344 | } | 280 | } |
345 | 281 | ||
346 | /* | 282 | /* |
283 | * Swap entry may have been freed since our caller observed it. | ||
284 | */ | ||
285 | if (!swap_duplicate(entry)) | ||
286 | break; | ||
287 | |||
288 | /* | ||
347 | * Associate the page with swap entry in the swap cache. | 289 | * Associate the page with swap entry in the swap cache. |
348 | * May fail (-ENOENT) if swap entry has been freed since | 290 | * May fail (-EEXIST) if there is already a page associated |
349 | * our caller observed it. May fail (-EEXIST) if there | 291 | * with this entry in the swap cache: added by a racing |
350 | * is already a page associated with this entry in the | 292 | * read_swap_cache_async, or add_to_swap or shmem_writepage |
351 | * swap cache: added by a racing read_swap_cache_async, | 293 | * re-using the just freed swap entry for an existing page. |
352 | * or by try_to_swap_out (or shmem_writepage) re-using | ||
353 | * the just freed swap entry for an existing page. | ||
354 | * May fail (-ENOMEM) if radix-tree node allocation failed. | 294 | * May fail (-ENOMEM) if radix-tree node allocation failed. |
355 | */ | 295 | */ |
356 | err = add_to_swap_cache(new_page, entry); | 296 | SetPageLocked(new_page); |
297 | err = add_to_swap_cache(new_page, entry, gfp_mask & GFP_KERNEL); | ||
357 | if (!err) { | 298 | if (!err) { |
358 | /* | 299 | /* |
359 | * Initiate read into locked page and return. | 300 | * Initiate read into locked page and return. |
@@ -362,9 +303,57 @@ struct page *read_swap_cache_async(swp_entry_t entry, | |||
362 | swap_readpage(NULL, new_page); | 303 | swap_readpage(NULL, new_page); |
363 | return new_page; | 304 | return new_page; |
364 | } | 305 | } |
365 | } while (err != -ENOENT && err != -ENOMEM); | 306 | ClearPageLocked(new_page); |
307 | swap_free(entry); | ||
308 | } while (err != -ENOMEM); | ||
366 | 309 | ||
367 | if (new_page) | 310 | if (new_page) |
368 | page_cache_release(new_page); | 311 | page_cache_release(new_page); |
369 | return found_page; | 312 | return found_page; |
370 | } | 313 | } |
314 | |||
315 | /** | ||
316 | * swapin_readahead - swap in pages in hope we need them soon | ||
317 | * @entry: swap entry of this memory | ||
318 | * @vma: user vma this address belongs to | ||
319 | * @addr: target address for mempolicy | ||
320 | * | ||
321 | * Returns the struct page for entry and addr, after queueing swapin. | ||
322 | * | ||
323 | * Primitive swap readahead code. We simply read an aligned block of | ||
324 | * (1 << page_cluster) entries in the swap area. This method is chosen | ||
325 | * because it doesn't cost us any seek time. We also make sure to queue | ||
326 | * the 'original' request together with the readahead ones... | ||
327 | * | ||
328 | * This has been extended to use the NUMA policies from the mm triggering | ||
329 | * the readahead. | ||
330 | * | ||
331 | * Caller must hold down_read on the vma->vm_mm if vma is not NULL. | ||
332 | */ | ||
333 | struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, | ||
334 | struct vm_area_struct *vma, unsigned long addr) | ||
335 | { | ||
336 | int nr_pages; | ||
337 | struct page *page; | ||
338 | unsigned long offset; | ||
339 | unsigned long end_offset; | ||
340 | |||
341 | /* | ||
342 | * Get starting offset for readaround, and number of pages to read. | ||
343 | * Adjust starting address by readbehind (for NUMA interleave case)? | ||
344 | * No, it's very unlikely that swap layout would follow vma layout, | ||
345 | * more likely that neighbouring swap pages came from the same node: | ||
346 | * so use the same "addr" to choose the same node for each swap read. | ||
347 | */ | ||
348 | nr_pages = valid_swaphandles(entry, &offset); | ||
349 | for (end_offset = offset + nr_pages; offset < end_offset; offset++) { | ||
350 | /* Ok, do the async read-ahead now */ | ||
351 | page = read_swap_cache_async(swp_entry(swp_type(entry), offset), | ||
352 | gfp_mask, vma, addr); | ||
353 | if (!page) | ||
354 | break; | ||
355 | page_cache_release(page); | ||
356 | } | ||
357 | lru_add_drain(); /* Push any new pages onto the LRU now */ | ||
358 | return read_swap_cache_async(entry, gfp_mask, vma, addr); | ||
359 | } | ||