aboutsummaryrefslogtreecommitdiffstats
path: root/mm/frontswap.c
diff options
context:
space:
mode:
authorDan Streetman <ddstreet@ieee.org>2015-06-24 19:58:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 20:49:45 -0400
commitd1dc6f1bcf1e998e7ce65fc120da371ab047a999 (patch)
tree62626a5b4404a960bb2e12992033283ad244734b /mm/frontswap.c
parentb05b9f5f9dcf593a0e9327676b78e6c17b4218e8 (diff)
frontswap: allow multiple backends
Change frontswap single pointer to a singly linked list of frontswap implementations. Update Xen tmem implementation as register no longer returns anything. Frontswap only keeps track of a single implementation; any implementation that registers second (or later) will replace the previously registered implementation, and gets a pointer to the previous implementation that the new implementation is expected to pass all frontswap functions to if it can't handle the function itself. However that method doesn't really make much sense, as passing that work on to every implementation adds unnecessary work to implementations; instead, frontswap should simply keep a list of all registered implementations and try each implementation for any function. Most importantly, neither of the two currently existing frontswap implementations in the kernel actually do anything with any previous frontswap implementation that they replace when registering. This allows frontswap to successfully manage multiple implementations by keeping a list of them all. Signed-off-by: Dan Streetman <ddstreet@ieee.org> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: David Vrabel <david.vrabel@citrix.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/frontswap.c')
-rw-r--r--mm/frontswap.c215
1 files changed, 131 insertions, 84 deletions
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 8d82809eb085..27a9924caf61 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -21,11 +21,16 @@
21#include <linux/swapfile.h> 21#include <linux/swapfile.h>
22 22
23/* 23/*
24 * frontswap_ops is set by frontswap_register_ops to contain the pointers 24 * frontswap_ops are added by frontswap_register_ops, and provide the
25 * to the frontswap "backend" implementation functions. 25 * frontswap "backend" implementation functions. Multiple implementations
26 * may be registered, but implementations can never deregister. This
27 * is a simple singly-linked list of all registered implementations.
26 */ 28 */
27static struct frontswap_ops *frontswap_ops __read_mostly; 29static struct frontswap_ops *frontswap_ops __read_mostly;
28 30
31#define for_each_frontswap_ops(ops) \
32 for ((ops) = frontswap_ops; (ops); (ops) = (ops)->next)
33
29/* 34/*
30 * If enabled, frontswap_store will return failure even on success. As 35 * If enabled, frontswap_store will return failure even on success. As
31 * a result, the swap subsystem will always write the page to swap, in 36 * a result, the swap subsystem will always write the page to swap, in
@@ -79,15 +84,6 @@ static inline void inc_frontswap_invalidates(void) { }
79 * on all frontswap functions to not call the backend until the backend 84 * on all frontswap functions to not call the backend until the backend
80 * has registered. 85 * has registered.
81 * 86 *
82 * Specifically when no backend is registered (nobody called
83 * frontswap_register_ops) all calls to frontswap_init (which is done via
84 * swapon -> enable_swap_info -> frontswap_init) are registered and remembered
85 * (via the setting of need_init bitmap) but fail to create tmem_pools. When a
86 * backend registers with frontswap at some later point the previous
87 * calls to frontswap_init are executed (by iterating over the need_init
88 * bitmap) to create tmem_pools and set the respective poolids. All of that is
89 * guarded by us using atomic bit operations on the 'need_init' bitmap.
90 *
91 * This would not guards us against the user deciding to call swapoff right as 87 * This would not guards us against the user deciding to call swapoff right as
92 * we are calling the backend to initialize (so swapon is in action). 88 * we are calling the backend to initialize (so swapon is in action).
93 * Fortunatly for us, the swapon_mutex has been taked by the callee so we are 89 * Fortunatly for us, the swapon_mutex has been taked by the callee so we are
@@ -106,37 +102,64 @@ static inline void inc_frontswap_invalidates(void) { }
106 * 102 *
107 * Obviously the opposite (unloading the backend) must be done after all 103 * Obviously the opposite (unloading the backend) must be done after all
108 * the frontswap_[store|load|invalidate_area|invalidate_page] start 104 * the frontswap_[store|load|invalidate_area|invalidate_page] start
109 * ignorning or failing the requests - at which point frontswap_ops 105 * ignoring or failing the requests. However, there is currently no way
110 * would have to be made in some fashion atomic. 106 * to unload a backend once it is registered.
111 */ 107 */
112static DECLARE_BITMAP(need_init, MAX_SWAPFILES);
113 108
114/* 109/*
115 * Register operations for frontswap, returning previous thus allowing 110 * Register operations for frontswap
116 * detection of multiple backends and possible nesting.
117 */ 111 */
118struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops) 112void frontswap_register_ops(struct frontswap_ops *ops)
119{ 113{
120 struct frontswap_ops *old = frontswap_ops; 114 DECLARE_BITMAP(a, MAX_SWAPFILES);
121 int i; 115 DECLARE_BITMAP(b, MAX_SWAPFILES);
122 116 struct swap_info_struct *si;
123 for (i = 0; i < MAX_SWAPFILES; i++) { 117 unsigned int i;
124 if (test_and_clear_bit(i, need_init)) { 118
125 struct swap_info_struct *sis = swap_info[i]; 119 bitmap_zero(a, MAX_SWAPFILES);
126 /* __frontswap_init _should_ have set it! */ 120 bitmap_zero(b, MAX_SWAPFILES);
127 if (!sis->frontswap_map) 121
128 return ERR_PTR(-EINVAL); 122 spin_lock(&swap_lock);
129 ops->init(i); 123 plist_for_each_entry(si, &swap_active_head, list) {
130 } 124 if (!WARN_ON(!si->frontswap_map))
125 set_bit(si->type, a);
131 } 126 }
127 spin_unlock(&swap_lock);
128
129 /* the new ops needs to know the currently active swap devices */
130 for_each_set_bit(i, a, MAX_SWAPFILES)
131 ops->init(i);
132
132 /* 133 /*
133 * We MUST have frontswap_ops set _after_ the frontswap_init's 134 * Setting frontswap_ops must happen after the ops->init() calls
134 * have been called. Otherwise __frontswap_store might fail. Hence 135 * above; cmpxchg implies smp_mb() which will ensure the init is
135 * the barrier to make sure compiler does not re-order us. 136 * complete at this point.
136 */ 137 */
137 barrier(); 138 do {
138 frontswap_ops = ops; 139 ops->next = frontswap_ops;
139 return old; 140 } while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next);
141
142 spin_lock(&swap_lock);
143 plist_for_each_entry(si, &swap_active_head, list) {
144 if (si->frontswap_map)
145 set_bit(si->type, b);
146 }
147 spin_unlock(&swap_lock);
148
149 /*
150 * On the very unlikely chance that a swap device was added or
151 * removed between setting the "a" list bits and the ops init
152 * calls, we re-check and do init or invalidate for any changed
153 * bits.
154 */
155 if (unlikely(!bitmap_equal(a, b, MAX_SWAPFILES))) {
156 for (i = 0; i < MAX_SWAPFILES; i++) {
157 if (!test_bit(i, a) && test_bit(i, b))
158 ops->init(i);
159 else if (test_bit(i, a) && !test_bit(i, b))
160 ops->invalidate_area(i);
161 }
162 }
140} 163}
141EXPORT_SYMBOL(frontswap_register_ops); 164EXPORT_SYMBOL(frontswap_register_ops);
142 165
@@ -164,6 +187,7 @@ EXPORT_SYMBOL(frontswap_tmem_exclusive_gets);
164void __frontswap_init(unsigned type, unsigned long *map) 187void __frontswap_init(unsigned type, unsigned long *map)
165{ 188{
166 struct swap_info_struct *sis = swap_info[type]; 189 struct swap_info_struct *sis = swap_info[type];
190 struct frontswap_ops *ops;
167 191
168 BUG_ON(sis == NULL); 192 BUG_ON(sis == NULL);
169 193
@@ -179,28 +203,30 @@ void __frontswap_init(unsigned type, unsigned long *map)
179 * p->frontswap set to something valid to work properly. 203 * p->frontswap set to something valid to work properly.
180 */ 204 */
181 frontswap_map_set(sis, map); 205 frontswap_map_set(sis, map);
182 if (frontswap_ops) 206
183 frontswap_ops->init(type); 207 for_each_frontswap_ops(ops)
184 else { 208 ops->init(type);
185 BUG_ON(type >= MAX_SWAPFILES);
186 set_bit(type, need_init);
187 }
188} 209}
189EXPORT_SYMBOL(__frontswap_init); 210EXPORT_SYMBOL(__frontswap_init);
190 211
191bool __frontswap_test(struct swap_info_struct *sis, 212bool __frontswap_test(struct swap_info_struct *sis,
192 pgoff_t offset) 213 pgoff_t offset)
193{ 214{
194 bool ret = false; 215 if (sis->frontswap_map)
195 216 return test_bit(offset, sis->frontswap_map);
196 if (frontswap_ops && sis->frontswap_map) 217 return false;
197 ret = test_bit(offset, sis->frontswap_map);
198 return ret;
199} 218}
200EXPORT_SYMBOL(__frontswap_test); 219EXPORT_SYMBOL(__frontswap_test);
201 220
221static inline void __frontswap_set(struct swap_info_struct *sis,
222 pgoff_t offset)
223{
224 set_bit(offset, sis->frontswap_map);
225 atomic_inc(&sis->frontswap_pages);
226}
227
202static inline void __frontswap_clear(struct swap_info_struct *sis, 228static inline void __frontswap_clear(struct swap_info_struct *sis,
203 pgoff_t offset) 229 pgoff_t offset)
204{ 230{
205 clear_bit(offset, sis->frontswap_map); 231 clear_bit(offset, sis->frontswap_map);
206 atomic_dec(&sis->frontswap_pages); 232 atomic_dec(&sis->frontswap_pages);
@@ -215,39 +241,46 @@ static inline void __frontswap_clear(struct swap_info_struct *sis,
215 */ 241 */
216int __frontswap_store(struct page *page) 242int __frontswap_store(struct page *page)
217{ 243{
218 int ret = -1, dup = 0; 244 int ret = -1;
219 swp_entry_t entry = { .val = page_private(page), }; 245 swp_entry_t entry = { .val = page_private(page), };
220 int type = swp_type(entry); 246 int type = swp_type(entry);
221 struct swap_info_struct *sis = swap_info[type]; 247 struct swap_info_struct *sis = swap_info[type];
222 pgoff_t offset = swp_offset(entry); 248 pgoff_t offset = swp_offset(entry);
249 struct frontswap_ops *ops;
223 250
224 /* 251 /*
225 * Return if no backend registed. 252 * Return if no backend registed.
226 * Don't need to inc frontswap_failed_stores here. 253 * Don't need to inc frontswap_failed_stores here.
227 */ 254 */
228 if (!frontswap_ops) 255 if (!frontswap_ops)
229 return ret; 256 return -1;
230 257
231 BUG_ON(!PageLocked(page)); 258 BUG_ON(!PageLocked(page));
232 BUG_ON(sis == NULL); 259 BUG_ON(sis == NULL);
233 if (__frontswap_test(sis, offset)) 260
234 dup = 1; 261 /*
235 ret = frontswap_ops->store(type, offset, page); 262 * If a dup, we must remove the old page first; we can't leave the
263 * old page no matter if the store of the new page succeeds or fails,
264 * and we can't rely on the new page replacing the old page as we may
265 * not store to the same implementation that contains the old page.
266 */
267 if (__frontswap_test(sis, offset)) {
268 __frontswap_clear(sis, offset);
269 for_each_frontswap_ops(ops)
270 ops->invalidate_page(type, offset);
271 }
272
273 /* Try to store in each implementation, until one succeeds. */
274 for_each_frontswap_ops(ops) {
275 ret = ops->store(type, offset, page);
276 if (!ret) /* successful store */
277 break;
278 }
236 if (ret == 0) { 279 if (ret == 0) {
237 set_bit(offset, sis->frontswap_map); 280 __frontswap_set(sis, offset);
238 inc_frontswap_succ_stores(); 281 inc_frontswap_succ_stores();
239 if (!dup)
240 atomic_inc(&sis->frontswap_pages);
241 } else { 282 } else {
242 /*
243 failed dup always results in automatic invalidate of
244 the (older) page from frontswap
245 */
246 inc_frontswap_failed_stores(); 283 inc_frontswap_failed_stores();
247 if (dup) {
248 __frontswap_clear(sis, offset);
249 frontswap_ops->invalidate_page(type, offset);
250 }
251 } 284 }
252 if (frontswap_writethrough_enabled) 285 if (frontswap_writethrough_enabled)
253 /* report failure so swap also writes to swap device */ 286 /* report failure so swap also writes to swap device */
@@ -268,14 +301,22 @@ int __frontswap_load(struct page *page)
268 int type = swp_type(entry); 301 int type = swp_type(entry);
269 struct swap_info_struct *sis = swap_info[type]; 302 struct swap_info_struct *sis = swap_info[type];
270 pgoff_t offset = swp_offset(entry); 303 pgoff_t offset = swp_offset(entry);
304 struct frontswap_ops *ops;
305
306 if (!frontswap_ops)
307 return -1;
271 308
272 BUG_ON(!PageLocked(page)); 309 BUG_ON(!PageLocked(page));
273 BUG_ON(sis == NULL); 310 BUG_ON(sis == NULL);
274 /* 311 if (!__frontswap_test(sis, offset))
275 * __frontswap_test() will check whether there is backend registered 312 return -1;
276 */ 313
277 if (__frontswap_test(sis, offset)) 314 /* Try loading from each implementation, until one succeeds. */
278 ret = frontswap_ops->load(type, offset, page); 315 for_each_frontswap_ops(ops) {
316 ret = ops->load(type, offset, page);
317 if (!ret) /* successful load */
318 break;
319 }
279 if (ret == 0) { 320 if (ret == 0) {
280 inc_frontswap_loads(); 321 inc_frontswap_loads();
281 if (frontswap_tmem_exclusive_gets_enabled) { 322 if (frontswap_tmem_exclusive_gets_enabled) {
@@ -294,16 +335,19 @@ EXPORT_SYMBOL(__frontswap_load);
294void __frontswap_invalidate_page(unsigned type, pgoff_t offset) 335void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
295{ 336{
296 struct swap_info_struct *sis = swap_info[type]; 337 struct swap_info_struct *sis = swap_info[type];
338 struct frontswap_ops *ops;
339
340 if (!frontswap_ops)
341 return;
297 342
298 BUG_ON(sis == NULL); 343 BUG_ON(sis == NULL);
299 /* 344 if (!__frontswap_test(sis, offset))
300 * __frontswap_test() will check whether there is backend registered 345 return;
301 */ 346
302 if (__frontswap_test(sis, offset)) { 347 for_each_frontswap_ops(ops)
303 frontswap_ops->invalidate_page(type, offset); 348 ops->invalidate_page(type, offset);
304 __frontswap_clear(sis, offset); 349 __frontswap_clear(sis, offset);
305 inc_frontswap_invalidates(); 350 inc_frontswap_invalidates();
306 }
307} 351}
308EXPORT_SYMBOL(__frontswap_invalidate_page); 352EXPORT_SYMBOL(__frontswap_invalidate_page);
309 353
@@ -314,16 +358,19 @@ EXPORT_SYMBOL(__frontswap_invalidate_page);
314void __frontswap_invalidate_area(unsigned type) 358void __frontswap_invalidate_area(unsigned type)
315{ 359{
316 struct swap_info_struct *sis = swap_info[type]; 360 struct swap_info_struct *sis = swap_info[type];
361 struct frontswap_ops *ops;
317 362
318 if (frontswap_ops) { 363 if (!frontswap_ops)
319 BUG_ON(sis == NULL); 364 return;
320 if (sis->frontswap_map == NULL) 365
321 return; 366 BUG_ON(sis == NULL);
322 frontswap_ops->invalidate_area(type); 367 if (sis->frontswap_map == NULL)
323 atomic_set(&sis->frontswap_pages, 0); 368 return;
324 bitmap_zero(sis->frontswap_map, sis->max); 369
325 } 370 for_each_frontswap_ops(ops)
326 clear_bit(type, need_init); 371 ops->invalidate_area(type);
372 atomic_set(&sis->frontswap_pages, 0);
373 bitmap_zero(sis->frontswap_map, sis->max);
327} 374}
328EXPORT_SYMBOL(__frontswap_invalidate_area); 375EXPORT_SYMBOL(__frontswap_invalidate_area);
329 376