aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-06-04 15:28:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-06-04 15:28:45 -0400
commita3fe778c7895cd847d23c25ad566d83346282a77 (patch)
tree53ba1b736dac08680a08753f7163a1890d7a0d03 /mm
parent9171c670b4915e30360c2aed530b8377fbbcc852 (diff)
parent165c8aed5bbc6bdddbccae0ba9db451732558ff9 (diff)
Merge tag 'stable/frontswap.v16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/mm
Pull frontswap feature from Konrad Rzeszutek Wilk: "Frontswap provides a "transcendent memory" interface for swap pages. In some environments, dramatic performance savings may be obtained because swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk. This tag provides the basic infrastructure along with some changes to the existing backends." Fix up trivial conflict in mm/Makefile due to removal of swap token code changing a line next to the new frontswap entry. This pull request came in before the merge window even opened, it got delayed to after the merge window by me just wanting to make sure it had actual users. Apparently IBM is using this on their embedded side, and Jan Beulich says that it's already made available for SLES and OpenSUSE users. Also acked by Rik van Riel, and Konrad points to other people liking it too. So in it goes. By Dan Magenheimer (4) and Konrad Rzeszutek Wilk (2) via Konrad Rzeszutek Wilk * tag 'stable/frontswap.v16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/mm: frontswap: s/put_page/store/g s/get_page/load MAINTAINER: Add myself for the frontswap API mm: frontswap: config and doc files mm: frontswap: core frontswap functionality mm: frontswap: core swap subsystem hooks and headers mm: frontswap: add frontswap header file
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig17
-rw-r--r--mm/Makefile1
-rw-r--r--mm/frontswap.c314
-rw-r--r--mm/page_io.c12
-rw-r--r--mm/swapfile.c54
5 files changed, 385 insertions, 13 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index b2176374b98e..82fed4eb2b6f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -389,3 +389,20 @@ config CLEANCACHE
389 in a negligible performance hit. 389 in a negligible performance hit.
390 390
391 If unsure, say Y to enable cleancache 391 If unsure, say Y to enable cleancache
392
393config FRONTSWAP
394 bool "Enable frontswap to cache swap pages if tmem is present"
395 depends on SWAP
396 default n
397 help
398 Frontswap is so named because it can be thought of as the opposite
399 of a "backing" store for a swap device. The data is stored into
400 "transcendent memory", memory that is not directly accessible or
401 addressable by the kernel and is of unknown and possibly
402 time-varying size. When space in transcendent memory is available,
403 a significant swap I/O reduction may be achieved. When none is
404 available, all frontswap calls are reduced to a single pointer-
405 compare-against-NULL resulting in a negligible performance hit
406 and swap data is stored as normal on the matching swap device.
407
408 If unsure, say Y to enable frontswap.
diff --git a/mm/Makefile b/mm/Makefile
index a156285ce88d..2e2fbbefb99f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
29 29
30obj-$(CONFIG_BOUNCE) += bounce.o 30obj-$(CONFIG_BOUNCE) += bounce.o
31obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o 31obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
32obj-$(CONFIG_FRONTSWAP) += frontswap.o
32obj-$(CONFIG_HAS_DMA) += dmapool.o 33obj-$(CONFIG_HAS_DMA) += dmapool.o
33obj-$(CONFIG_HUGETLBFS) += hugetlb.o 34obj-$(CONFIG_HUGETLBFS) += hugetlb.o
34obj-$(CONFIG_NUMA) += mempolicy.o 35obj-$(CONFIG_NUMA) += mempolicy.o
diff --git a/mm/frontswap.c b/mm/frontswap.c
new file mode 100644
index 000000000000..e25025574a02
--- /dev/null
+++ b/mm/frontswap.c
@@ -0,0 +1,314 @@
1/*
2 * Frontswap frontend
3 *
4 * This code provides the generic "frontend" layer to call a matching
5 * "backend" driver implementation of frontswap. See
6 * Documentation/vm/frontswap.txt for more information.
7 *
8 * Copyright (C) 2009-2012 Oracle Corp. All rights reserved.
9 * Author: Dan Magenheimer
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2.
12 */
13
14#include <linux/mm.h>
15#include <linux/mman.h>
16#include <linux/swap.h>
17#include <linux/swapops.h>
18#include <linux/proc_fs.h>
19#include <linux/security.h>
20#include <linux/capability.h>
21#include <linux/module.h>
22#include <linux/uaccess.h>
23#include <linux/debugfs.h>
24#include <linux/frontswap.h>
25#include <linux/swapfile.h>
26
27/*
28 * frontswap_ops is set by frontswap_register_ops to contain the pointers
29 * to the frontswap "backend" implementation functions.
30 */
31static struct frontswap_ops frontswap_ops __read_mostly;
32
33/*
34 * This global enablement flag reduces overhead on systems where frontswap_ops
35 * has not been registered, so is preferred to the slower alternative: a
36 * function call that checks a non-global.
37 */
38bool frontswap_enabled __read_mostly;
39EXPORT_SYMBOL(frontswap_enabled);
40
41/*
42 * If enabled, frontswap_store will return failure even on success. As
43 * a result, the swap subsystem will always write the page to swap, in
44 * effect converting frontswap into a writethrough cache. In this mode,
45 * there is no direct reduction in swap writes, but a frontswap backend
46 * can unilaterally "reclaim" any pages in use with no data loss, thus
47 * providing increases control over maximum memory usage due to frontswap.
48 */
49static bool frontswap_writethrough_enabled __read_mostly;
50
51#ifdef CONFIG_DEBUG_FS
52/*
53 * Counters available via /sys/kernel/debug/frontswap (if debugfs is
54 * properly configured). These are for information only so are not protected
55 * against increment races.
56 */
57static u64 frontswap_loads;
58static u64 frontswap_succ_stores;
59static u64 frontswap_failed_stores;
60static u64 frontswap_invalidates;
61
62static inline void inc_frontswap_loads(void) {
63 frontswap_loads++;
64}
65static inline void inc_frontswap_succ_stores(void) {
66 frontswap_succ_stores++;
67}
68static inline void inc_frontswap_failed_stores(void) {
69 frontswap_failed_stores++;
70}
71static inline void inc_frontswap_invalidates(void) {
72 frontswap_invalidates++;
73}
74#else
75static inline void inc_frontswap_loads(void) { }
76static inline void inc_frontswap_succ_stores(void) { }
77static inline void inc_frontswap_failed_stores(void) { }
78static inline void inc_frontswap_invalidates(void) { }
79#endif
80/*
81 * Register operations for frontswap, returning previous thus allowing
82 * detection of multiple backends and possible nesting.
83 */
84struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops)
85{
86 struct frontswap_ops old = frontswap_ops;
87
88 frontswap_ops = *ops;
89 frontswap_enabled = true;
90 return old;
91}
92EXPORT_SYMBOL(frontswap_register_ops);
93
94/*
95 * Enable/disable frontswap writethrough (see above).
96 */
97void frontswap_writethrough(bool enable)
98{
99 frontswap_writethrough_enabled = enable;
100}
101EXPORT_SYMBOL(frontswap_writethrough);
102
103/*
104 * Called when a swap device is swapon'd.
105 */
106void __frontswap_init(unsigned type)
107{
108 struct swap_info_struct *sis = swap_info[type];
109
110 BUG_ON(sis == NULL);
111 if (sis->frontswap_map == NULL)
112 return;
113 if (frontswap_enabled)
114 (*frontswap_ops.init)(type);
115}
116EXPORT_SYMBOL(__frontswap_init);
117
118/*
119 * "Store" data from a page to frontswap and associate it with the page's
120 * swaptype and offset. Page must be locked and in the swap cache.
121 * If frontswap already contains a page with matching swaptype and
122 * offset, the frontswap implmentation may either overwrite the data and
123 * return success or invalidate the page from frontswap and return failure.
124 */
125int __frontswap_store(struct page *page)
126{
127 int ret = -1, dup = 0;
128 swp_entry_t entry = { .val = page_private(page), };
129 int type = swp_type(entry);
130 struct swap_info_struct *sis = swap_info[type];
131 pgoff_t offset = swp_offset(entry);
132
133 BUG_ON(!PageLocked(page));
134 BUG_ON(sis == NULL);
135 if (frontswap_test(sis, offset))
136 dup = 1;
137 ret = (*frontswap_ops.store)(type, offset, page);
138 if (ret == 0) {
139 frontswap_set(sis, offset);
140 inc_frontswap_succ_stores();
141 if (!dup)
142 atomic_inc(&sis->frontswap_pages);
143 } else if (dup) {
144 /*
145 failed dup always results in automatic invalidate of
146 the (older) page from frontswap
147 */
148 frontswap_clear(sis, offset);
149 atomic_dec(&sis->frontswap_pages);
150 inc_frontswap_failed_stores();
151 } else
152 inc_frontswap_failed_stores();
153 if (frontswap_writethrough_enabled)
154 /* report failure so swap also writes to swap device */
155 ret = -1;
156 return ret;
157}
158EXPORT_SYMBOL(__frontswap_store);
159
160/*
161 * "Get" data from frontswap associated with swaptype and offset that were
162 * specified when the data was put to frontswap and use it to fill the
163 * specified page with data. Page must be locked and in the swap cache.
164 */
165int __frontswap_load(struct page *page)
166{
167 int ret = -1;
168 swp_entry_t entry = { .val = page_private(page), };
169 int type = swp_type(entry);
170 struct swap_info_struct *sis = swap_info[type];
171 pgoff_t offset = swp_offset(entry);
172
173 BUG_ON(!PageLocked(page));
174 BUG_ON(sis == NULL);
175 if (frontswap_test(sis, offset))
176 ret = (*frontswap_ops.load)(type, offset, page);
177 if (ret == 0)
178 inc_frontswap_loads();
179 return ret;
180}
181EXPORT_SYMBOL(__frontswap_load);
182
183/*
184 * Invalidate any data from frontswap associated with the specified swaptype
185 * and offset so that a subsequent "get" will fail.
186 */
187void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
188{
189 struct swap_info_struct *sis = swap_info[type];
190
191 BUG_ON(sis == NULL);
192 if (frontswap_test(sis, offset)) {
193 (*frontswap_ops.invalidate_page)(type, offset);
194 atomic_dec(&sis->frontswap_pages);
195 frontswap_clear(sis, offset);
196 inc_frontswap_invalidates();
197 }
198}
199EXPORT_SYMBOL(__frontswap_invalidate_page);
200
201/*
202 * Invalidate all data from frontswap associated with all offsets for the
203 * specified swaptype.
204 */
205void __frontswap_invalidate_area(unsigned type)
206{
207 struct swap_info_struct *sis = swap_info[type];
208
209 BUG_ON(sis == NULL);
210 if (sis->frontswap_map == NULL)
211 return;
212 (*frontswap_ops.invalidate_area)(type);
213 atomic_set(&sis->frontswap_pages, 0);
214 memset(sis->frontswap_map, 0, sis->max / sizeof(long));
215}
216EXPORT_SYMBOL(__frontswap_invalidate_area);
217
218/*
219 * Frontswap, like a true swap device, may unnecessarily retain pages
220 * under certain circumstances; "shrink" frontswap is essentially a
221 * "partial swapoff" and works by calling try_to_unuse to attempt to
222 * unuse enough frontswap pages to attempt to -- subject to memory
223 * constraints -- reduce the number of pages in frontswap to the
224 * number given in the parameter target_pages.
225 */
226void frontswap_shrink(unsigned long target_pages)
227{
228 struct swap_info_struct *si = NULL;
229 int si_frontswap_pages;
230 unsigned long total_pages = 0, total_pages_to_unuse;
231 unsigned long pages = 0, pages_to_unuse = 0;
232 int type;
233 bool locked = false;
234
235 /*
236 * we don't want to hold swap_lock while doing a very
237 * lengthy try_to_unuse, but swap_list may change
238 * so restart scan from swap_list.head each time
239 */
240 spin_lock(&swap_lock);
241 locked = true;
242 total_pages = 0;
243 for (type = swap_list.head; type >= 0; type = si->next) {
244 si = swap_info[type];
245 total_pages += atomic_read(&si->frontswap_pages);
246 }
247 if (total_pages <= target_pages)
248 goto out;
249 total_pages_to_unuse = total_pages - target_pages;
250 for (type = swap_list.head; type >= 0; type = si->next) {
251 si = swap_info[type];
252 si_frontswap_pages = atomic_read(&si->frontswap_pages);
253 if (total_pages_to_unuse < si_frontswap_pages)
254 pages = pages_to_unuse = total_pages_to_unuse;
255 else {
256 pages = si_frontswap_pages;
257 pages_to_unuse = 0; /* unuse all */
258 }
259 /* ensure there is enough RAM to fetch pages from frontswap */
260 if (security_vm_enough_memory_mm(current->mm, pages))
261 continue;
262 vm_unacct_memory(pages);
263 break;
264 }
265 if (type < 0)
266 goto out;
267 locked = false;
268 spin_unlock(&swap_lock);
269 try_to_unuse(type, true, pages_to_unuse);
270out:
271 if (locked)
272 spin_unlock(&swap_lock);
273 return;
274}
275EXPORT_SYMBOL(frontswap_shrink);
276
277/*
278 * Count and return the number of frontswap pages across all
279 * swap devices. This is exported so that backend drivers can
280 * determine current usage without reading debugfs.
281 */
282unsigned long frontswap_curr_pages(void)
283{
284 int type;
285 unsigned long totalpages = 0;
286 struct swap_info_struct *si = NULL;
287
288 spin_lock(&swap_lock);
289 for (type = swap_list.head; type >= 0; type = si->next) {
290 si = swap_info[type];
291 totalpages += atomic_read(&si->frontswap_pages);
292 }
293 spin_unlock(&swap_lock);
294 return totalpages;
295}
296EXPORT_SYMBOL(frontswap_curr_pages);
297
298static int __init init_frontswap(void)
299{
300#ifdef CONFIG_DEBUG_FS
301 struct dentry *root = debugfs_create_dir("frontswap", NULL);
302 if (root == NULL)
303 return -ENXIO;
304 debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads);
305 debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores);
306 debugfs_create_u64("failed_stores", S_IRUGO, root,
307 &frontswap_failed_stores);
308 debugfs_create_u64("invalidates", S_IRUGO,
309 root, &frontswap_invalidates);
310#endif
311 return 0;
312}
313
314module_init(init_frontswap);
diff --git a/mm/page_io.c b/mm/page_io.c
index dc76b4d0611e..34f02923744c 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -18,6 +18,7 @@
18#include <linux/bio.h> 18#include <linux/bio.h>
19#include <linux/swapops.h> 19#include <linux/swapops.h>
20#include <linux/writeback.h> 20#include <linux/writeback.h>
21#include <linux/frontswap.h>
21#include <asm/pgtable.h> 22#include <asm/pgtable.h>
22 23
23static struct bio *get_swap_bio(gfp_t gfp_flags, 24static struct bio *get_swap_bio(gfp_t gfp_flags,
@@ -98,6 +99,12 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
98 unlock_page(page); 99 unlock_page(page);
99 goto out; 100 goto out;
100 } 101 }
102 if (frontswap_store(page) == 0) {
103 set_page_writeback(page);
104 unlock_page(page);
105 end_page_writeback(page);
106 goto out;
107 }
101 bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); 108 bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
102 if (bio == NULL) { 109 if (bio == NULL) {
103 set_page_dirty(page); 110 set_page_dirty(page);
@@ -122,6 +129,11 @@ int swap_readpage(struct page *page)
122 129
123 VM_BUG_ON(!PageLocked(page)); 130 VM_BUG_ON(!PageLocked(page));
124 VM_BUG_ON(PageUptodate(page)); 131 VM_BUG_ON(PageUptodate(page));
132 if (frontswap_load(page) == 0) {
133 SetPageUptodate(page);
134 unlock_page(page);
135 goto out;
136 }
125 bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); 137 bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
126 if (bio == NULL) { 138 if (bio == NULL) {
127 unlock_page(page); 139 unlock_page(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 457b10baef59..de5bc51c4a66 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,8 @@
31#include <linux/memcontrol.h> 31#include <linux/memcontrol.h>
32#include <linux/poll.h> 32#include <linux/poll.h>
33#include <linux/oom.h> 33#include <linux/oom.h>
34#include <linux/frontswap.h>
35#include <linux/swapfile.h>
34 36
35#include <asm/pgtable.h> 37#include <asm/pgtable.h>
36#include <asm/tlbflush.h> 38#include <asm/tlbflush.h>
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
42static void free_swap_count_continuations(struct swap_info_struct *); 44static void free_swap_count_continuations(struct swap_info_struct *);
43static sector_t map_swap_entry(swp_entry_t, struct block_device**); 45static sector_t map_swap_entry(swp_entry_t, struct block_device**);
44 46
45static DEFINE_SPINLOCK(swap_lock); 47DEFINE_SPINLOCK(swap_lock);
46static unsigned int nr_swapfiles; 48static unsigned int nr_swapfiles;
47long nr_swap_pages; 49long nr_swap_pages;
48long total_swap_pages; 50long total_swap_pages;
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry ";
53static const char Bad_offset[] = "Bad swap offset entry "; 55static const char Bad_offset[] = "Bad swap offset entry ";
54static const char Unused_offset[] = "Unused swap offset entry "; 56static const char Unused_offset[] = "Unused swap offset entry ";
55 57
56static struct swap_list_t swap_list = {-1, -1}; 58struct swap_list_t swap_list = {-1, -1};
57 59
58static struct swap_info_struct *swap_info[MAX_SWAPFILES]; 60struct swap_info_struct *swap_info[MAX_SWAPFILES];
59 61
60static DEFINE_MUTEX(swapon_mutex); 62static DEFINE_MUTEX(swapon_mutex);
61 63
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
556 swap_list.next = p->type; 558 swap_list.next = p->type;
557 nr_swap_pages++; 559 nr_swap_pages++;
558 p->inuse_pages--; 560 p->inuse_pages--;
561 frontswap_invalidate_page(p->type, offset);
559 if ((p->flags & SWP_BLKDEV) && 562 if ((p->flags & SWP_BLKDEV) &&
560 disk->fops->swap_slot_free_notify) 563 disk->fops->swap_slot_free_notify)
561 disk->fops->swap_slot_free_notify(p->bdev, offset); 564 disk->fops->swap_slot_free_notify(p->bdev, offset);
@@ -985,11 +988,12 @@ static int unuse_mm(struct mm_struct *mm,
985} 988}
986 989
987/* 990/*
988 * Scan swap_map from current position to next entry still in use. 991 * Scan swap_map (or frontswap_map if frontswap parameter is true)
992 * from current position to next entry still in use.
989 * Recycle to start on reaching the end, returning 0 when empty. 993 * Recycle to start on reaching the end, returning 0 when empty.
990 */ 994 */
991static unsigned int find_next_to_unuse(struct swap_info_struct *si, 995static unsigned int find_next_to_unuse(struct swap_info_struct *si,
992 unsigned int prev) 996 unsigned int prev, bool frontswap)
993{ 997{
994 unsigned int max = si->max; 998 unsigned int max = si->max;
995 unsigned int i = prev; 999 unsigned int i = prev;
@@ -1015,6 +1019,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1015 prev = 0; 1019 prev = 0;
1016 i = 1; 1020 i = 1;
1017 } 1021 }
1022 if (frontswap) {
1023 if (frontswap_test(si, i))
1024 break;
1025 else
1026 continue;
1027 }
1018 count = si->swap_map[i]; 1028 count = si->swap_map[i];
1019 if (count && swap_count(count) != SWAP_MAP_BAD) 1029 if (count && swap_count(count) != SWAP_MAP_BAD)
1020 break; 1030 break;
@@ -1026,8 +1036,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
1026 * We completely avoid races by reading each swap page in advance, 1036 * We completely avoid races by reading each swap page in advance,
1027 * and then search for the process using it. All the necessary 1037 * and then search for the process using it. All the necessary
1028 * page table adjustments can then be made atomically. 1038 * page table adjustments can then be made atomically.
1039 *
1040 * if the boolean frontswap is true, only unuse pages_to_unuse pages;
1041 * pages_to_unuse==0 means all pages; ignored if frontswap is false
1029 */ 1042 */
1030static int try_to_unuse(unsigned int type) 1043int try_to_unuse(unsigned int type, bool frontswap,
1044 unsigned long pages_to_unuse)
1031{ 1045{
1032 struct swap_info_struct *si = swap_info[type]; 1046 struct swap_info_struct *si = swap_info[type];
1033 struct mm_struct *start_mm; 1047 struct mm_struct *start_mm;
@@ -1060,7 +1074,7 @@ static int try_to_unuse(unsigned int type)
1060 * one pass through swap_map is enough, but not necessarily: 1074 * one pass through swap_map is enough, but not necessarily:
1061 * there are races when an instance of an entry might be missed. 1075 * there are races when an instance of an entry might be missed.
1062 */ 1076 */
1063 while ((i = find_next_to_unuse(si, i)) != 0) { 1077 while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
1064 if (signal_pending(current)) { 1078 if (signal_pending(current)) {
1065 retval = -EINTR; 1079 retval = -EINTR;
1066 break; 1080 break;
@@ -1227,6 +1241,10 @@ static int try_to_unuse(unsigned int type)
1227 * interactive performance. 1241 * interactive performance.
1228 */ 1242 */
1229 cond_resched(); 1243 cond_resched();
1244 if (frontswap && pages_to_unuse > 0) {
1245 if (!--pages_to_unuse)
1246 break;
1247 }
1230 } 1248 }
1231 1249
1232 mmput(start_mm); 1250 mmput(start_mm);
@@ -1486,7 +1504,8 @@ bad_bmap:
1486} 1504}
1487 1505
1488static void enable_swap_info(struct swap_info_struct *p, int prio, 1506static void enable_swap_info(struct swap_info_struct *p, int prio,
1489 unsigned char *swap_map) 1507 unsigned char *swap_map,
1508 unsigned long *frontswap_map)
1490{ 1509{
1491 int i, prev; 1510 int i, prev;
1492 1511
@@ -1496,6 +1515,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1496 else 1515 else
1497 p->prio = --least_priority; 1516 p->prio = --least_priority;
1498 p->swap_map = swap_map; 1517 p->swap_map = swap_map;
1518 frontswap_map_set(p, frontswap_map);
1499 p->flags |= SWP_WRITEOK; 1519 p->flags |= SWP_WRITEOK;
1500 nr_swap_pages += p->pages; 1520 nr_swap_pages += p->pages;
1501 total_swap_pages += p->pages; 1521 total_swap_pages += p->pages;
@@ -1512,6 +1532,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1512 swap_list.head = swap_list.next = p->type; 1532 swap_list.head = swap_list.next = p->type;
1513 else 1533 else
1514 swap_info[prev]->next = p->type; 1534 swap_info[prev]->next = p->type;
1535 frontswap_init(p->type);
1515 spin_unlock(&swap_lock); 1536 spin_unlock(&swap_lock);
1516} 1537}
1517 1538
@@ -1585,7 +1606,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1585 spin_unlock(&swap_lock); 1606 spin_unlock(&swap_lock);
1586 1607
1587 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); 1608 oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
1588 err = try_to_unuse(type); 1609 err = try_to_unuse(type, false, 0); /* force all pages to be unused */
1589 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); 1610 compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj);
1590 1611
1591 if (err) { 1612 if (err) {
@@ -1596,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1596 * sys_swapoff for this swap_info_struct at this point. 1617 * sys_swapoff for this swap_info_struct at this point.
1597 */ 1618 */
1598 /* re-insert swap space back into swap_list */ 1619 /* re-insert swap space back into swap_list */
1599 enable_swap_info(p, p->prio, p->swap_map); 1620 enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
1600 goto out_dput; 1621 goto out_dput;
1601 } 1622 }
1602 1623
@@ -1622,9 +1643,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1622 swap_map = p->swap_map; 1643 swap_map = p->swap_map;
1623 p->swap_map = NULL; 1644 p->swap_map = NULL;
1624 p->flags = 0; 1645 p->flags = 0;
1646 frontswap_invalidate_area(type);
1625 spin_unlock(&swap_lock); 1647 spin_unlock(&swap_lock);
1626 mutex_unlock(&swapon_mutex); 1648 mutex_unlock(&swapon_mutex);
1627 vfree(swap_map); 1649 vfree(swap_map);
1650 vfree(frontswap_map_get(p));
1628 /* Destroy swap account informatin */ 1651 /* Destroy swap account informatin */
1629 swap_cgroup_swapoff(type); 1652 swap_cgroup_swapoff(type);
1630 1653
@@ -1988,6 +2011,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1988 sector_t span; 2011 sector_t span;
1989 unsigned long maxpages; 2012 unsigned long maxpages;
1990 unsigned char *swap_map = NULL; 2013 unsigned char *swap_map = NULL;
2014 unsigned long *frontswap_map = NULL;
1991 struct page *page = NULL; 2015 struct page *page = NULL;
1992 struct inode *inode = NULL; 2016 struct inode *inode = NULL;
1993 2017
@@ -2071,6 +2095,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2071 error = nr_extents; 2095 error = nr_extents;
2072 goto bad_swap; 2096 goto bad_swap;
2073 } 2097 }
2098 /* frontswap enabled? set up bit-per-page map for frontswap */
2099 if (frontswap_enabled)
2100 frontswap_map = vzalloc(maxpages / sizeof(long));
2074 2101
2075 if (p->bdev) { 2102 if (p->bdev) {
2076 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { 2103 if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2086,14 +2113,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
2086 if (swap_flags & SWAP_FLAG_PREFER) 2113 if (swap_flags & SWAP_FLAG_PREFER)
2087 prio = 2114 prio =
2088 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; 2115 (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
2089 enable_swap_info(p, prio, swap_map); 2116 enable_swap_info(p, prio, swap_map, frontswap_map);
2090 2117
2091 printk(KERN_INFO "Adding %uk swap on %s. " 2118 printk(KERN_INFO "Adding %uk swap on %s. "
2092 "Priority:%d extents:%d across:%lluk %s%s\n", 2119 "Priority:%d extents:%d across:%lluk %s%s%s\n",
2093 p->pages<<(PAGE_SHIFT-10), name, p->prio, 2120 p->pages<<(PAGE_SHIFT-10), name, p->prio,
2094 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), 2121 nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
2095 (p->flags & SWP_SOLIDSTATE) ? "SS" : "", 2122 (p->flags & SWP_SOLIDSTATE) ? "SS" : "",
2096 (p->flags & SWP_DISCARDABLE) ? "D" : ""); 2123 (p->flags & SWP_DISCARDABLE) ? "D" : "",
2124 (frontswap_map) ? "FS" : "");
2097 2125
2098 mutex_unlock(&swapon_mutex); 2126 mutex_unlock(&swapon_mutex);
2099 atomic_inc(&proc_poll_event); 2127 atomic_inc(&proc_poll_event);