diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 17 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/frontswap.c | 314 | ||||
-rw-r--r-- | mm/page_io.c | 12 | ||||
-rw-r--r-- | mm/swapfile.c | 54 |
5 files changed, 385 insertions, 13 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index b2176374b98..82fed4eb2b6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -389,3 +389,20 @@ config CLEANCACHE | |||
389 | in a negligible performance hit. | 389 | in a negligible performance hit. |
390 | 390 | ||
391 | If unsure, say Y to enable cleancache | 391 | If unsure, say Y to enable cleancache |
392 | |||
393 | config FRONTSWAP | ||
394 | bool "Enable frontswap to cache swap pages if tmem is present" | ||
395 | depends on SWAP | ||
396 | default n | ||
397 | help | ||
398 | Frontswap is so named because it can be thought of as the opposite | ||
399 | of a "backing" store for a swap device. The data is stored into | ||
400 | "transcendent memory", memory that is not directly accessible or | ||
401 | addressable by the kernel and is of unknown and possibly | ||
402 | time-varying size. When space in transcendent memory is available, | ||
403 | a significant swap I/O reduction may be achieved. When none is | ||
404 | available, all frontswap calls are reduced to a single pointer- | ||
405 | compare-against-NULL resulting in a negligible performance hit | ||
406 | and swap data is stored as normal on the matching swap device. | ||
407 | |||
408 | If unsure, say Y to enable frontswap. | ||
diff --git a/mm/Makefile b/mm/Makefile index a156285ce88..2e2fbbefb99 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -29,6 +29,7 @@ obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | |||
29 | 29 | ||
30 | obj-$(CONFIG_BOUNCE) += bounce.o | 30 | obj-$(CONFIG_BOUNCE) += bounce.o |
31 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o | 31 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o |
32 | obj-$(CONFIG_FRONTSWAP) += frontswap.o | ||
32 | obj-$(CONFIG_HAS_DMA) += dmapool.o | 33 | obj-$(CONFIG_HAS_DMA) += dmapool.o |
33 | obj-$(CONFIG_HUGETLBFS) += hugetlb.o | 34 | obj-$(CONFIG_HUGETLBFS) += hugetlb.o |
34 | obj-$(CONFIG_NUMA) += mempolicy.o | 35 | obj-$(CONFIG_NUMA) += mempolicy.o |
diff --git a/mm/frontswap.c b/mm/frontswap.c new file mode 100644 index 00000000000..e25025574a0 --- /dev/null +++ b/mm/frontswap.c | |||
@@ -0,0 +1,314 @@ | |||
1 | /* | ||
2 | * Frontswap frontend | ||
3 | * | ||
4 | * This code provides the generic "frontend" layer to call a matching | ||
5 | * "backend" driver implementation of frontswap. See | ||
6 | * Documentation/vm/frontswap.txt for more information. | ||
7 | * | ||
8 | * Copyright (C) 2009-2012 Oracle Corp. All rights reserved. | ||
9 | * Author: Dan Magenheimer | ||
10 | * | ||
11 | * This work is licensed under the terms of the GNU GPL, version 2. | ||
12 | */ | ||
13 | |||
14 | #include <linux/mm.h> | ||
15 | #include <linux/mman.h> | ||
16 | #include <linux/swap.h> | ||
17 | #include <linux/swapops.h> | ||
18 | #include <linux/proc_fs.h> | ||
19 | #include <linux/security.h> | ||
20 | #include <linux/capability.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/debugfs.h> | ||
24 | #include <linux/frontswap.h> | ||
25 | #include <linux/swapfile.h> | ||
26 | |||
27 | /* | ||
28 | * frontswap_ops is set by frontswap_register_ops to contain the pointers | ||
29 | * to the frontswap "backend" implementation functions. | ||
30 | */ | ||
31 | static struct frontswap_ops frontswap_ops __read_mostly; | ||
32 | |||
33 | /* | ||
34 | * This global enablement flag reduces overhead on systems where frontswap_ops | ||
35 | * has not been registered, so is preferred to the slower alternative: a | ||
36 | * function call that checks a non-global. | ||
37 | */ | ||
38 | bool frontswap_enabled __read_mostly; | ||
39 | EXPORT_SYMBOL(frontswap_enabled); | ||
40 | |||
41 | /* | ||
42 | * If enabled, frontswap_store will return failure even on success. As | ||
43 | * a result, the swap subsystem will always write the page to swap, in | ||
44 | * effect converting frontswap into a writethrough cache. In this mode, | ||
45 | * there is no direct reduction in swap writes, but a frontswap backend | ||
46 | * can unilaterally "reclaim" any pages in use with no data loss, thus | ||
47 | * providing increases control over maximum memory usage due to frontswap. | ||
48 | */ | ||
49 | static bool frontswap_writethrough_enabled __read_mostly; | ||
50 | |||
51 | #ifdef CONFIG_DEBUG_FS | ||
52 | /* | ||
53 | * Counters available via /sys/kernel/debug/frontswap (if debugfs is | ||
54 | * properly configured). These are for information only so are not protected | ||
55 | * against increment races. | ||
56 | */ | ||
57 | static u64 frontswap_loads; | ||
58 | static u64 frontswap_succ_stores; | ||
59 | static u64 frontswap_failed_stores; | ||
60 | static u64 frontswap_invalidates; | ||
61 | |||
62 | static inline void inc_frontswap_loads(void) { | ||
63 | frontswap_loads++; | ||
64 | } | ||
65 | static inline void inc_frontswap_succ_stores(void) { | ||
66 | frontswap_succ_stores++; | ||
67 | } | ||
68 | static inline void inc_frontswap_failed_stores(void) { | ||
69 | frontswap_failed_stores++; | ||
70 | } | ||
71 | static inline void inc_frontswap_invalidates(void) { | ||
72 | frontswap_invalidates++; | ||
73 | } | ||
74 | #else | ||
75 | static inline void inc_frontswap_loads(void) { } | ||
76 | static inline void inc_frontswap_succ_stores(void) { } | ||
77 | static inline void inc_frontswap_failed_stores(void) { } | ||
78 | static inline void inc_frontswap_invalidates(void) { } | ||
79 | #endif | ||
80 | /* | ||
81 | * Register operations for frontswap, returning previous thus allowing | ||
82 | * detection of multiple backends and possible nesting. | ||
83 | */ | ||
84 | struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) | ||
85 | { | ||
86 | struct frontswap_ops old = frontswap_ops; | ||
87 | |||
88 | frontswap_ops = *ops; | ||
89 | frontswap_enabled = true; | ||
90 | return old; | ||
91 | } | ||
92 | EXPORT_SYMBOL(frontswap_register_ops); | ||
93 | |||
94 | /* | ||
95 | * Enable/disable frontswap writethrough (see above). | ||
96 | */ | ||
97 | void frontswap_writethrough(bool enable) | ||
98 | { | ||
99 | frontswap_writethrough_enabled = enable; | ||
100 | } | ||
101 | EXPORT_SYMBOL(frontswap_writethrough); | ||
102 | |||
103 | /* | ||
104 | * Called when a swap device is swapon'd. | ||
105 | */ | ||
106 | void __frontswap_init(unsigned type) | ||
107 | { | ||
108 | struct swap_info_struct *sis = swap_info[type]; | ||
109 | |||
110 | BUG_ON(sis == NULL); | ||
111 | if (sis->frontswap_map == NULL) | ||
112 | return; | ||
113 | if (frontswap_enabled) | ||
114 | (*frontswap_ops.init)(type); | ||
115 | } | ||
116 | EXPORT_SYMBOL(__frontswap_init); | ||
117 | |||
118 | /* | ||
119 | * "Store" data from a page to frontswap and associate it with the page's | ||
120 | * swaptype and offset. Page must be locked and in the swap cache. | ||
121 | * If frontswap already contains a page with matching swaptype and | ||
122 | * offset, the frontswap implmentation may either overwrite the data and | ||
123 | * return success or invalidate the page from frontswap and return failure. | ||
124 | */ | ||
125 | int __frontswap_store(struct page *page) | ||
126 | { | ||
127 | int ret = -1, dup = 0; | ||
128 | swp_entry_t entry = { .val = page_private(page), }; | ||
129 | int type = swp_type(entry); | ||
130 | struct swap_info_struct *sis = swap_info[type]; | ||
131 | pgoff_t offset = swp_offset(entry); | ||
132 | |||
133 | BUG_ON(!PageLocked(page)); | ||
134 | BUG_ON(sis == NULL); | ||
135 | if (frontswap_test(sis, offset)) | ||
136 | dup = 1; | ||
137 | ret = (*frontswap_ops.store)(type, offset, page); | ||
138 | if (ret == 0) { | ||
139 | frontswap_set(sis, offset); | ||
140 | inc_frontswap_succ_stores(); | ||
141 | if (!dup) | ||
142 | atomic_inc(&sis->frontswap_pages); | ||
143 | } else if (dup) { | ||
144 | /* | ||
145 | failed dup always results in automatic invalidate of | ||
146 | the (older) page from frontswap | ||
147 | */ | ||
148 | frontswap_clear(sis, offset); | ||
149 | atomic_dec(&sis->frontswap_pages); | ||
150 | inc_frontswap_failed_stores(); | ||
151 | } else | ||
152 | inc_frontswap_failed_stores(); | ||
153 | if (frontswap_writethrough_enabled) | ||
154 | /* report failure so swap also writes to swap device */ | ||
155 | ret = -1; | ||
156 | return ret; | ||
157 | } | ||
158 | EXPORT_SYMBOL(__frontswap_store); | ||
159 | |||
160 | /* | ||
161 | * "Get" data from frontswap associated with swaptype and offset that were | ||
162 | * specified when the data was put to frontswap and use it to fill the | ||
163 | * specified page with data. Page must be locked and in the swap cache. | ||
164 | */ | ||
165 | int __frontswap_load(struct page *page) | ||
166 | { | ||
167 | int ret = -1; | ||
168 | swp_entry_t entry = { .val = page_private(page), }; | ||
169 | int type = swp_type(entry); | ||
170 | struct swap_info_struct *sis = swap_info[type]; | ||
171 | pgoff_t offset = swp_offset(entry); | ||
172 | |||
173 | BUG_ON(!PageLocked(page)); | ||
174 | BUG_ON(sis == NULL); | ||
175 | if (frontswap_test(sis, offset)) | ||
176 | ret = (*frontswap_ops.load)(type, offset, page); | ||
177 | if (ret == 0) | ||
178 | inc_frontswap_loads(); | ||
179 | return ret; | ||
180 | } | ||
181 | EXPORT_SYMBOL(__frontswap_load); | ||
182 | |||
183 | /* | ||
184 | * Invalidate any data from frontswap associated with the specified swaptype | ||
185 | * and offset so that a subsequent "get" will fail. | ||
186 | */ | ||
187 | void __frontswap_invalidate_page(unsigned type, pgoff_t offset) | ||
188 | { | ||
189 | struct swap_info_struct *sis = swap_info[type]; | ||
190 | |||
191 | BUG_ON(sis == NULL); | ||
192 | if (frontswap_test(sis, offset)) { | ||
193 | (*frontswap_ops.invalidate_page)(type, offset); | ||
194 | atomic_dec(&sis->frontswap_pages); | ||
195 | frontswap_clear(sis, offset); | ||
196 | inc_frontswap_invalidates(); | ||
197 | } | ||
198 | } | ||
199 | EXPORT_SYMBOL(__frontswap_invalidate_page); | ||
200 | |||
201 | /* | ||
202 | * Invalidate all data from frontswap associated with all offsets for the | ||
203 | * specified swaptype. | ||
204 | */ | ||
205 | void __frontswap_invalidate_area(unsigned type) | ||
206 | { | ||
207 | struct swap_info_struct *sis = swap_info[type]; | ||
208 | |||
209 | BUG_ON(sis == NULL); | ||
210 | if (sis->frontswap_map == NULL) | ||
211 | return; | ||
212 | (*frontswap_ops.invalidate_area)(type); | ||
213 | atomic_set(&sis->frontswap_pages, 0); | ||
214 | memset(sis->frontswap_map, 0, sis->max / sizeof(long)); | ||
215 | } | ||
216 | EXPORT_SYMBOL(__frontswap_invalidate_area); | ||
217 | |||
218 | /* | ||
219 | * Frontswap, like a true swap device, may unnecessarily retain pages | ||
220 | * under certain circumstances; "shrink" frontswap is essentially a | ||
221 | * "partial swapoff" and works by calling try_to_unuse to attempt to | ||
222 | * unuse enough frontswap pages to attempt to -- subject to memory | ||
223 | * constraints -- reduce the number of pages in frontswap to the | ||
224 | * number given in the parameter target_pages. | ||
225 | */ | ||
226 | void frontswap_shrink(unsigned long target_pages) | ||
227 | { | ||
228 | struct swap_info_struct *si = NULL; | ||
229 | int si_frontswap_pages; | ||
230 | unsigned long total_pages = 0, total_pages_to_unuse; | ||
231 | unsigned long pages = 0, pages_to_unuse = 0; | ||
232 | int type; | ||
233 | bool locked = false; | ||
234 | |||
235 | /* | ||
236 | * we don't want to hold swap_lock while doing a very | ||
237 | * lengthy try_to_unuse, but swap_list may change | ||
238 | * so restart scan from swap_list.head each time | ||
239 | */ | ||
240 | spin_lock(&swap_lock); | ||
241 | locked = true; | ||
242 | total_pages = 0; | ||
243 | for (type = swap_list.head; type >= 0; type = si->next) { | ||
244 | si = swap_info[type]; | ||
245 | total_pages += atomic_read(&si->frontswap_pages); | ||
246 | } | ||
247 | if (total_pages <= target_pages) | ||
248 | goto out; | ||
249 | total_pages_to_unuse = total_pages - target_pages; | ||
250 | for (type = swap_list.head; type >= 0; type = si->next) { | ||
251 | si = swap_info[type]; | ||
252 | si_frontswap_pages = atomic_read(&si->frontswap_pages); | ||
253 | if (total_pages_to_unuse < si_frontswap_pages) | ||
254 | pages = pages_to_unuse = total_pages_to_unuse; | ||
255 | else { | ||
256 | pages = si_frontswap_pages; | ||
257 | pages_to_unuse = 0; /* unuse all */ | ||
258 | } | ||
259 | /* ensure there is enough RAM to fetch pages from frontswap */ | ||
260 | if (security_vm_enough_memory_mm(current->mm, pages)) | ||
261 | continue; | ||
262 | vm_unacct_memory(pages); | ||
263 | break; | ||
264 | } | ||
265 | if (type < 0) | ||
266 | goto out; | ||
267 | locked = false; | ||
268 | spin_unlock(&swap_lock); | ||
269 | try_to_unuse(type, true, pages_to_unuse); | ||
270 | out: | ||
271 | if (locked) | ||
272 | spin_unlock(&swap_lock); | ||
273 | return; | ||
274 | } | ||
275 | EXPORT_SYMBOL(frontswap_shrink); | ||
276 | |||
277 | /* | ||
278 | * Count and return the number of frontswap pages across all | ||
279 | * swap devices. This is exported so that backend drivers can | ||
280 | * determine current usage without reading debugfs. | ||
281 | */ | ||
282 | unsigned long frontswap_curr_pages(void) | ||
283 | { | ||
284 | int type; | ||
285 | unsigned long totalpages = 0; | ||
286 | struct swap_info_struct *si = NULL; | ||
287 | |||
288 | spin_lock(&swap_lock); | ||
289 | for (type = swap_list.head; type >= 0; type = si->next) { | ||
290 | si = swap_info[type]; | ||
291 | totalpages += atomic_read(&si->frontswap_pages); | ||
292 | } | ||
293 | spin_unlock(&swap_lock); | ||
294 | return totalpages; | ||
295 | } | ||
296 | EXPORT_SYMBOL(frontswap_curr_pages); | ||
297 | |||
298 | static int __init init_frontswap(void) | ||
299 | { | ||
300 | #ifdef CONFIG_DEBUG_FS | ||
301 | struct dentry *root = debugfs_create_dir("frontswap", NULL); | ||
302 | if (root == NULL) | ||
303 | return -ENXIO; | ||
304 | debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads); | ||
305 | debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores); | ||
306 | debugfs_create_u64("failed_stores", S_IRUGO, root, | ||
307 | &frontswap_failed_stores); | ||
308 | debugfs_create_u64("invalidates", S_IRUGO, | ||
309 | root, &frontswap_invalidates); | ||
310 | #endif | ||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | module_init(init_frontswap); | ||
diff --git a/mm/page_io.c b/mm/page_io.c index dc76b4d0611..34f02923744 100644 --- a/mm/page_io.c +++ b/mm/page_io.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/bio.h> | 18 | #include <linux/bio.h> |
19 | #include <linux/swapops.h> | 19 | #include <linux/swapops.h> |
20 | #include <linux/writeback.h> | 20 | #include <linux/writeback.h> |
21 | #include <linux/frontswap.h> | ||
21 | #include <asm/pgtable.h> | 22 | #include <asm/pgtable.h> |
22 | 23 | ||
23 | static struct bio *get_swap_bio(gfp_t gfp_flags, | 24 | static struct bio *get_swap_bio(gfp_t gfp_flags, |
@@ -98,6 +99,12 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) | |||
98 | unlock_page(page); | 99 | unlock_page(page); |
99 | goto out; | 100 | goto out; |
100 | } | 101 | } |
102 | if (frontswap_store(page) == 0) { | ||
103 | set_page_writeback(page); | ||
104 | unlock_page(page); | ||
105 | end_page_writeback(page); | ||
106 | goto out; | ||
107 | } | ||
101 | bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); | 108 | bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); |
102 | if (bio == NULL) { | 109 | if (bio == NULL) { |
103 | set_page_dirty(page); | 110 | set_page_dirty(page); |
@@ -122,6 +129,11 @@ int swap_readpage(struct page *page) | |||
122 | 129 | ||
123 | VM_BUG_ON(!PageLocked(page)); | 130 | VM_BUG_ON(!PageLocked(page)); |
124 | VM_BUG_ON(PageUptodate(page)); | 131 | VM_BUG_ON(PageUptodate(page)); |
132 | if (frontswap_load(page) == 0) { | ||
133 | SetPageUptodate(page); | ||
134 | unlock_page(page); | ||
135 | goto out; | ||
136 | } | ||
125 | bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); | 137 | bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); |
126 | if (bio == NULL) { | 138 | if (bio == NULL) { |
127 | unlock_page(page); | 139 | unlock_page(page); |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 457b10baef5..de5bc51c4a6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -31,6 +31,8 @@ | |||
31 | #include <linux/memcontrol.h> | 31 | #include <linux/memcontrol.h> |
32 | #include <linux/poll.h> | 32 | #include <linux/poll.h> |
33 | #include <linux/oom.h> | 33 | #include <linux/oom.h> |
34 | #include <linux/frontswap.h> | ||
35 | #include <linux/swapfile.h> | ||
34 | 36 | ||
35 | #include <asm/pgtable.h> | 37 | #include <asm/pgtable.h> |
36 | #include <asm/tlbflush.h> | 38 | #include <asm/tlbflush.h> |
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, | |||
42 | static void free_swap_count_continuations(struct swap_info_struct *); | 44 | static void free_swap_count_continuations(struct swap_info_struct *); |
43 | static sector_t map_swap_entry(swp_entry_t, struct block_device**); | 45 | static sector_t map_swap_entry(swp_entry_t, struct block_device**); |
44 | 46 | ||
45 | static DEFINE_SPINLOCK(swap_lock); | 47 | DEFINE_SPINLOCK(swap_lock); |
46 | static unsigned int nr_swapfiles; | 48 | static unsigned int nr_swapfiles; |
47 | long nr_swap_pages; | 49 | long nr_swap_pages; |
48 | long total_swap_pages; | 50 | long total_swap_pages; |
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry "; | |||
53 | static const char Bad_offset[] = "Bad swap offset entry "; | 55 | static const char Bad_offset[] = "Bad swap offset entry "; |
54 | static const char Unused_offset[] = "Unused swap offset entry "; | 56 | static const char Unused_offset[] = "Unused swap offset entry "; |
55 | 57 | ||
56 | static struct swap_list_t swap_list = {-1, -1}; | 58 | struct swap_list_t swap_list = {-1, -1}; |
57 | 59 | ||
58 | static struct swap_info_struct *swap_info[MAX_SWAPFILES]; | 60 | struct swap_info_struct *swap_info[MAX_SWAPFILES]; |
59 | 61 | ||
60 | static DEFINE_MUTEX(swapon_mutex); | 62 | static DEFINE_MUTEX(swapon_mutex); |
61 | 63 | ||
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, | |||
556 | swap_list.next = p->type; | 558 | swap_list.next = p->type; |
557 | nr_swap_pages++; | 559 | nr_swap_pages++; |
558 | p->inuse_pages--; | 560 | p->inuse_pages--; |
561 | frontswap_invalidate_page(p->type, offset); | ||
559 | if ((p->flags & SWP_BLKDEV) && | 562 | if ((p->flags & SWP_BLKDEV) && |
560 | disk->fops->swap_slot_free_notify) | 563 | disk->fops->swap_slot_free_notify) |
561 | disk->fops->swap_slot_free_notify(p->bdev, offset); | 564 | disk->fops->swap_slot_free_notify(p->bdev, offset); |
@@ -985,11 +988,12 @@ static int unuse_mm(struct mm_struct *mm, | |||
985 | } | 988 | } |
986 | 989 | ||
987 | /* | 990 | /* |
988 | * Scan swap_map from current position to next entry still in use. | 991 | * Scan swap_map (or frontswap_map if frontswap parameter is true) |
992 | * from current position to next entry still in use. | ||
989 | * Recycle to start on reaching the end, returning 0 when empty. | 993 | * Recycle to start on reaching the end, returning 0 when empty. |
990 | */ | 994 | */ |
991 | static unsigned int find_next_to_unuse(struct swap_info_struct *si, | 995 | static unsigned int find_next_to_unuse(struct swap_info_struct *si, |
992 | unsigned int prev) | 996 | unsigned int prev, bool frontswap) |
993 | { | 997 | { |
994 | unsigned int max = si->max; | 998 | unsigned int max = si->max; |
995 | unsigned int i = prev; | 999 | unsigned int i = prev; |
@@ -1015,6 +1019,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, | |||
1015 | prev = 0; | 1019 | prev = 0; |
1016 | i = 1; | 1020 | i = 1; |
1017 | } | 1021 | } |
1022 | if (frontswap) { | ||
1023 | if (frontswap_test(si, i)) | ||
1024 | break; | ||
1025 | else | ||
1026 | continue; | ||
1027 | } | ||
1018 | count = si->swap_map[i]; | 1028 | count = si->swap_map[i]; |
1019 | if (count && swap_count(count) != SWAP_MAP_BAD) | 1029 | if (count && swap_count(count) != SWAP_MAP_BAD) |
1020 | break; | 1030 | break; |
@@ -1026,8 +1036,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, | |||
1026 | * We completely avoid races by reading each swap page in advance, | 1036 | * We completely avoid races by reading each swap page in advance, |
1027 | * and then search for the process using it. All the necessary | 1037 | * and then search for the process using it. All the necessary |
1028 | * page table adjustments can then be made atomically. | 1038 | * page table adjustments can then be made atomically. |
1039 | * | ||
1040 | * if the boolean frontswap is true, only unuse pages_to_unuse pages; | ||
1041 | * pages_to_unuse==0 means all pages; ignored if frontswap is false | ||
1029 | */ | 1042 | */ |
1030 | static int try_to_unuse(unsigned int type) | 1043 | int try_to_unuse(unsigned int type, bool frontswap, |
1044 | unsigned long pages_to_unuse) | ||
1031 | { | 1045 | { |
1032 | struct swap_info_struct *si = swap_info[type]; | 1046 | struct swap_info_struct *si = swap_info[type]; |
1033 | struct mm_struct *start_mm; | 1047 | struct mm_struct *start_mm; |
@@ -1060,7 +1074,7 @@ static int try_to_unuse(unsigned int type) | |||
1060 | * one pass through swap_map is enough, but not necessarily: | 1074 | * one pass through swap_map is enough, but not necessarily: |
1061 | * there are races when an instance of an entry might be missed. | 1075 | * there are races when an instance of an entry might be missed. |
1062 | */ | 1076 | */ |
1063 | while ((i = find_next_to_unuse(si, i)) != 0) { | 1077 | while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { |
1064 | if (signal_pending(current)) { | 1078 | if (signal_pending(current)) { |
1065 | retval = -EINTR; | 1079 | retval = -EINTR; |
1066 | break; | 1080 | break; |
@@ -1227,6 +1241,10 @@ static int try_to_unuse(unsigned int type) | |||
1227 | * interactive performance. | 1241 | * interactive performance. |
1228 | */ | 1242 | */ |
1229 | cond_resched(); | 1243 | cond_resched(); |
1244 | if (frontswap && pages_to_unuse > 0) { | ||
1245 | if (!--pages_to_unuse) | ||
1246 | break; | ||
1247 | } | ||
1230 | } | 1248 | } |
1231 | 1249 | ||
1232 | mmput(start_mm); | 1250 | mmput(start_mm); |
@@ -1486,7 +1504,8 @@ bad_bmap: | |||
1486 | } | 1504 | } |
1487 | 1505 | ||
1488 | static void enable_swap_info(struct swap_info_struct *p, int prio, | 1506 | static void enable_swap_info(struct swap_info_struct *p, int prio, |
1489 | unsigned char *swap_map) | 1507 | unsigned char *swap_map, |
1508 | unsigned long *frontswap_map) | ||
1490 | { | 1509 | { |
1491 | int i, prev; | 1510 | int i, prev; |
1492 | 1511 | ||
@@ -1496,6 +1515,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, | |||
1496 | else | 1515 | else |
1497 | p->prio = --least_priority; | 1516 | p->prio = --least_priority; |
1498 | p->swap_map = swap_map; | 1517 | p->swap_map = swap_map; |
1518 | frontswap_map_set(p, frontswap_map); | ||
1499 | p->flags |= SWP_WRITEOK; | 1519 | p->flags |= SWP_WRITEOK; |
1500 | nr_swap_pages += p->pages; | 1520 | nr_swap_pages += p->pages; |
1501 | total_swap_pages += p->pages; | 1521 | total_swap_pages += p->pages; |
@@ -1512,6 +1532,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, | |||
1512 | swap_list.head = swap_list.next = p->type; | 1532 | swap_list.head = swap_list.next = p->type; |
1513 | else | 1533 | else |
1514 | swap_info[prev]->next = p->type; | 1534 | swap_info[prev]->next = p->type; |
1535 | frontswap_init(p->type); | ||
1515 | spin_unlock(&swap_lock); | 1536 | spin_unlock(&swap_lock); |
1516 | } | 1537 | } |
1517 | 1538 | ||
@@ -1585,7 +1606,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1585 | spin_unlock(&swap_lock); | 1606 | spin_unlock(&swap_lock); |
1586 | 1607 | ||
1587 | oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); | 1608 | oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); |
1588 | err = try_to_unuse(type); | 1609 | err = try_to_unuse(type, false, 0); /* force all pages to be unused */ |
1589 | compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); | 1610 | compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); |
1590 | 1611 | ||
1591 | if (err) { | 1612 | if (err) { |
@@ -1596,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1596 | * sys_swapoff for this swap_info_struct at this point. | 1617 | * sys_swapoff for this swap_info_struct at this point. |
1597 | */ | 1618 | */ |
1598 | /* re-insert swap space back into swap_list */ | 1619 | /* re-insert swap space back into swap_list */ |
1599 | enable_swap_info(p, p->prio, p->swap_map); | 1620 | enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); |
1600 | goto out_dput; | 1621 | goto out_dput; |
1601 | } | 1622 | } |
1602 | 1623 | ||
@@ -1622,9 +1643,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) | |||
1622 | swap_map = p->swap_map; | 1643 | swap_map = p->swap_map; |
1623 | p->swap_map = NULL; | 1644 | p->swap_map = NULL; |
1624 | p->flags = 0; | 1645 | p->flags = 0; |
1646 | frontswap_invalidate_area(type); | ||
1625 | spin_unlock(&swap_lock); | 1647 | spin_unlock(&swap_lock); |
1626 | mutex_unlock(&swapon_mutex); | 1648 | mutex_unlock(&swapon_mutex); |
1627 | vfree(swap_map); | 1649 | vfree(swap_map); |
1650 | vfree(frontswap_map_get(p)); | ||
1628 | /* Destroy swap account informatin */ | 1651 | /* Destroy swap account informatin */ |
1629 | swap_cgroup_swapoff(type); | 1652 | swap_cgroup_swapoff(type); |
1630 | 1653 | ||
@@ -1988,6 +2011,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
1988 | sector_t span; | 2011 | sector_t span; |
1989 | unsigned long maxpages; | 2012 | unsigned long maxpages; |
1990 | unsigned char *swap_map = NULL; | 2013 | unsigned char *swap_map = NULL; |
2014 | unsigned long *frontswap_map = NULL; | ||
1991 | struct page *page = NULL; | 2015 | struct page *page = NULL; |
1992 | struct inode *inode = NULL; | 2016 | struct inode *inode = NULL; |
1993 | 2017 | ||
@@ -2071,6 +2095,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2071 | error = nr_extents; | 2095 | error = nr_extents; |
2072 | goto bad_swap; | 2096 | goto bad_swap; |
2073 | } | 2097 | } |
2098 | /* frontswap enabled? set up bit-per-page map for frontswap */ | ||
2099 | if (frontswap_enabled) | ||
2100 | frontswap_map = vzalloc(maxpages / sizeof(long)); | ||
2074 | 2101 | ||
2075 | if (p->bdev) { | 2102 | if (p->bdev) { |
2076 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { | 2103 | if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { |
@@ -2086,14 +2113,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
2086 | if (swap_flags & SWAP_FLAG_PREFER) | 2113 | if (swap_flags & SWAP_FLAG_PREFER) |
2087 | prio = | 2114 | prio = |
2088 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; | 2115 | (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; |
2089 | enable_swap_info(p, prio, swap_map); | 2116 | enable_swap_info(p, prio, swap_map, frontswap_map); |
2090 | 2117 | ||
2091 | printk(KERN_INFO "Adding %uk swap on %s. " | 2118 | printk(KERN_INFO "Adding %uk swap on %s. " |
2092 | "Priority:%d extents:%d across:%lluk %s%s\n", | 2119 | "Priority:%d extents:%d across:%lluk %s%s%s\n", |
2093 | p->pages<<(PAGE_SHIFT-10), name, p->prio, | 2120 | p->pages<<(PAGE_SHIFT-10), name, p->prio, |
2094 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), | 2121 | nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), |
2095 | (p->flags & SWP_SOLIDSTATE) ? "SS" : "", | 2122 | (p->flags & SWP_SOLIDSTATE) ? "SS" : "", |
2096 | (p->flags & SWP_DISCARDABLE) ? "D" : ""); | 2123 | (p->flags & SWP_DISCARDABLE) ? "D" : "", |
2124 | (frontswap_map) ? "FS" : ""); | ||
2097 | 2125 | ||
2098 | mutex_unlock(&swapon_mutex); | 2126 | mutex_unlock(&swapon_mutex); |
2099 | atomic_inc(&proc_poll_event); | 2127 | atomic_inc(&proc_poll_event); |