aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 20:37:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-04-30 20:37:43 -0400
commit5f56886521d6ddd3648777fae44d82382dd8c87f (patch)
treeaa0db6331cdb01c23f1884439840aadd31bbcca4 /mm
parentf1e9a236e5ddab6c349611ee86f54291916f226c (diff)
parente2a8b0a779787314eca1061308a8182e6c5bfabd (diff)
Merge branch 'akpm' (incoming from Andrew)
Merge third batch of fixes from Andrew Morton: "Most of the rest. I still have two large patchsets against AIO and IPC, but they're a bit stuck behind other trees and I'm about to vanish for six days. - random fixlets - inotify - more of the MM queue - show_stack() cleanups - DMI update - kthread/workqueue things - compat cleanups - epoll udpates - binfmt updates - nilfs2 - hfs - hfsplus - ptrace - kmod - coredump - kexec - rbtree - pids - pidns - pps - semaphore tweaks - some w1 patches - relay updates - core Kconfig changes - sysrq tweaks" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (109 commits) Documentation/sysrq: fix inconstistent help message of sysrq key ethernet/emac/sysrq: fix inconstistent help message of sysrq key sparc/sysrq: fix inconstistent help message of sysrq key powerpc/xmon/sysrq: fix inconstistent help message of sysrq key ARM/etm/sysrq: fix inconstistent help message of sysrq key power/sysrq: fix inconstistent help message of sysrq key kgdb/sysrq: fix inconstistent help message of sysrq key lib/decompress.c: fix initconst notifier-error-inject: fix module names in Kconfig kernel/sys.c: make prctl(PR_SET_MM) generally available UAPI: remove empty Kbuild files menuconfig: print more info for symbol without prompts init/Kconfig: re-order CONFIG_EXPERT options to fix menuconfig display kconfig menu: move Virtualization drivers near other virtualization options Kconfig: consolidate CONFIG_DEBUG_STRICT_USER_COPY_CHECKS relay: use macro PAGE_ALIGN instead of FIX_SIZE kernel/relay.c: move FIX_SIZE macro into relay.c kernel/relay.c: remove unused function argument actor drivers/w1/slaves/w1_ds2760.c: fix the error handling in w1_ds2760_add_slave() drivers/w1/slaves/w1_ds2781.c: fix the error handling in w1_ds2781_add_slave() ...
Diffstat (limited to 'mm')
-rw-r--r--mm/cleancache.c265
-rw-r--r--mm/frontswap.c156
-rw-r--r--mm/swapfile.c17
3 files changed, 359 insertions, 79 deletions
diff --git a/mm/cleancache.c b/mm/cleancache.c
index d76ba74be2d0..5875f48ce279 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -19,20 +19,10 @@
19#include <linux/cleancache.h> 19#include <linux/cleancache.h>
20 20
21/* 21/*
22 * This global enablement flag may be read thousands of times per second
23 * by cleancache_get/put/invalidate even on systems where cleancache_ops
24 * is not claimed (e.g. cleancache is config'ed on but remains
25 * disabled), so is preferred to the slower alternative: a function
26 * call that checks a non-global.
27 */
28int cleancache_enabled __read_mostly;
29EXPORT_SYMBOL(cleancache_enabled);
30
31/*
32 * cleancache_ops is set by cleancache_ops_register to contain the pointers 22 * cleancache_ops is set by cleancache_ops_register to contain the pointers
33 * to the cleancache "backend" implementation functions. 23 * to the cleancache "backend" implementation functions.
34 */ 24 */
35static struct cleancache_ops cleancache_ops __read_mostly; 25static struct cleancache_ops *cleancache_ops __read_mostly;
36 26
37/* 27/*
38 * Counters available via /sys/kernel/debug/frontswap (if debugfs is 28 * Counters available via /sys/kernel/debug/frontswap (if debugfs is
@@ -45,15 +35,101 @@ static u64 cleancache_puts;
45static u64 cleancache_invalidates; 35static u64 cleancache_invalidates;
46 36
47/* 37/*
48 * register operations for cleancache, returning previous thus allowing 38 * When no backend is registered all calls to init_fs and init_shared_fs
49 * detection of multiple backends and possible nesting 39 * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or
40 * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array
41 * [shared_|]fs_poolid_map) are given to the respective super block
42 * (sb->cleancache_poolid) and no tmem_pools are created. When a backend
43 * registers with cleancache the previous calls to init_fs and init_shared_fs
44 * are executed to create tmem_pools and set the respective poolids. While no
45 * backend is registered all "puts", "gets" and "flushes" are ignored or failed.
46 */
47#define MAX_INITIALIZABLE_FS 32
48#define FAKE_FS_POOLID_OFFSET 1000
49#define FAKE_SHARED_FS_POOLID_OFFSET 2000
50
51#define FS_NO_BACKEND (-1)
52#define FS_UNKNOWN (-2)
53static int fs_poolid_map[MAX_INITIALIZABLE_FS];
54static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS];
55static char *uuids[MAX_INITIALIZABLE_FS];
56/*
57 * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads
58 * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple
59 * threads calling mount (and ending up in __cleancache_init_[shared|]fs).
60 */
61static DEFINE_MUTEX(poolid_mutex);
62/*
63 * When set to false (default) all calls to the cleancache functions, except
64 * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded
65 * by the if (!cleancache_ops) return. This means multiple threads (from
66 * different filesystems) will be checking cleancache_ops. The usage of a
67 * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are
68 * OK if the time between the backend's have been initialized (and
69 * cleancache_ops has been set to not NULL) and when the filesystems start
70 * actually calling the backends. The inverse (when unloading) is obviously
71 * not good - but this shim does not do that (yet).
72 */
73
74/*
75 * The backends and filesystems work all asynchronously. This is b/c the
76 * backends can be built as modules.
77 * The usual sequence of events is:
78 * a) mount / -> __cleancache_init_fs is called. We set the
79 * [shared_|]fs_poolid_map and uuids for.
80 *
81 * b). user does I/Os -> we call the rest of __cleancache_* functions
82 * which return immediately as cleancache_ops is false.
83 *
84 * c). modprobe zcache -> cleancache_register_ops. We init the backend
85 * and set cleancache_ops to true, and for any fs_poolid_map
86 * (which is set by __cleancache_init_fs) we initialize the poolid.
87 *
88 * d). user does I/Os -> now that cleancache_ops is true all the
89 * __cleancache_* functions can call the backend. They all check
90 * that fs_poolid_map is valid and if so invoke the backend.
91 *
92 * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is
93 * reset (which is the second check in the __cleancache_* ops
94 * to call the backend).
95 *
96 * The sequence of event could also be c), followed by a), and d). and e). The
97 * c) would not happen anymore. There is also the chance of c), and one thread
98 * doing a) + d), and another doing e). For that case we depend on the
99 * filesystem calling __cleancache_invalidate_fs in the proper sequence (so
100 * that it handles all I/Os before it invalidates the fs (which is last part
101 * of unmounting process).
102 *
103 * Note: The acute reader will notice that there is no "rmmod zcache" case.
104 * This is b/c the functionality for that is not yet implemented and when
105 * done, will require some extra locking not yet devised.
106 */
107
108/*
109 * Register operations for cleancache, returning previous thus allowing
110 * detection of multiple backends and possible nesting.
50 */ 111 */
51struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops) 112struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops)
52{ 113{
53 struct cleancache_ops old = cleancache_ops; 114 struct cleancache_ops *old = cleancache_ops;
115 int i;
54 116
55 cleancache_ops = *ops; 117 mutex_lock(&poolid_mutex);
56 cleancache_enabled = 1; 118 for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
119 if (fs_poolid_map[i] == FS_NO_BACKEND)
120 fs_poolid_map[i] = ops->init_fs(PAGE_SIZE);
121 if (shared_fs_poolid_map[i] == FS_NO_BACKEND)
122 shared_fs_poolid_map[i] = ops->init_shared_fs
123 (uuids[i], PAGE_SIZE);
124 }
125 /*
126 * We MUST set cleancache_ops _after_ we have called the backends
127 * init_fs or init_shared_fs functions. Otherwise the compiler might
128 * re-order where cleancache_ops is set in this function.
129 */
130 barrier();
131 cleancache_ops = ops;
132 mutex_unlock(&poolid_mutex);
57 return old; 133 return old;
58} 134}
59EXPORT_SYMBOL(cleancache_register_ops); 135EXPORT_SYMBOL(cleancache_register_ops);
@@ -61,15 +137,42 @@ EXPORT_SYMBOL(cleancache_register_ops);
61/* Called by a cleancache-enabled filesystem at time of mount */ 137/* Called by a cleancache-enabled filesystem at time of mount */
62void __cleancache_init_fs(struct super_block *sb) 138void __cleancache_init_fs(struct super_block *sb)
63{ 139{
64 sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE); 140 int i;
141
142 mutex_lock(&poolid_mutex);
143 for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
144 if (fs_poolid_map[i] == FS_UNKNOWN) {
145 sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET;
146 if (cleancache_ops)
147 fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE);
148 else
149 fs_poolid_map[i] = FS_NO_BACKEND;
150 break;
151 }
152 }
153 mutex_unlock(&poolid_mutex);
65} 154}
66EXPORT_SYMBOL(__cleancache_init_fs); 155EXPORT_SYMBOL(__cleancache_init_fs);
67 156
68/* Called by a cleancache-enabled clustered filesystem at time of mount */ 157/* Called by a cleancache-enabled clustered filesystem at time of mount */
69void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) 158void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
70{ 159{
71 sb->cleancache_poolid = 160 int i;
72 (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE); 161
162 mutex_lock(&poolid_mutex);
163 for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
164 if (shared_fs_poolid_map[i] == FS_UNKNOWN) {
165 sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET;
166 uuids[i] = uuid;
167 if (cleancache_ops)
168 shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs
169 (uuid, PAGE_SIZE);
170 else
171 shared_fs_poolid_map[i] = FS_NO_BACKEND;
172 break;
173 }
174 }
175 mutex_unlock(&poolid_mutex);
73} 176}
74EXPORT_SYMBOL(__cleancache_init_shared_fs); 177EXPORT_SYMBOL(__cleancache_init_shared_fs);
75 178
@@ -99,27 +202,53 @@ static int cleancache_get_key(struct inode *inode,
99} 202}
100 203
101/* 204/*
205 * Returns a pool_id that is associated with a given fake poolid.
206 */
207static int get_poolid_from_fake(int fake_pool_id)
208{
209 if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET)
210 return shared_fs_poolid_map[fake_pool_id -
211 FAKE_SHARED_FS_POOLID_OFFSET];
212 else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET)
213 return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET];
214 return FS_NO_BACKEND;
215}
216
217/*
102 * "Get" data from cleancache associated with the poolid/inode/index 218 * "Get" data from cleancache associated with the poolid/inode/index
103 * that were specified when the data was put to cleanache and, if 219 * that were specified when the data was put to cleanache and, if
104 * successful, use it to fill the specified page with data and return 0. 220 * successful, use it to fill the specified page with data and return 0.
105 * The pageframe is unchanged and returns -1 if the get fails. 221 * The pageframe is unchanged and returns -1 if the get fails.
106 * Page must be locked by caller. 222 * Page must be locked by caller.
223 *
224 * The function has two checks before any action is taken - whether
225 * a backend is registered and whether the sb->cleancache_poolid
226 * is correct.
107 */ 227 */
108int __cleancache_get_page(struct page *page) 228int __cleancache_get_page(struct page *page)
109{ 229{
110 int ret = -1; 230 int ret = -1;
111 int pool_id; 231 int pool_id;
232 int fake_pool_id;
112 struct cleancache_filekey key = { .u.key = { 0 } }; 233 struct cleancache_filekey key = { .u.key = { 0 } };
113 234
235 if (!cleancache_ops) {
236 cleancache_failed_gets++;
237 goto out;
238 }
239
114 VM_BUG_ON(!PageLocked(page)); 240 VM_BUG_ON(!PageLocked(page));
115 pool_id = page->mapping->host->i_sb->cleancache_poolid; 241 fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
116 if (pool_id < 0) 242 if (fake_pool_id < 0)
117 goto out; 243 goto out;
244 pool_id = get_poolid_from_fake(fake_pool_id);
118 245
119 if (cleancache_get_key(page->mapping->host, &key) < 0) 246 if (cleancache_get_key(page->mapping->host, &key) < 0)
120 goto out; 247 goto out;
121 248
122 ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page); 249 if (pool_id >= 0)
250 ret = cleancache_ops->get_page(pool_id,
251 key, page->index, page);
123 if (ret == 0) 252 if (ret == 0)
124 cleancache_succ_gets++; 253 cleancache_succ_gets++;
125 else 254 else
@@ -134,17 +263,32 @@ EXPORT_SYMBOL(__cleancache_get_page);
134 * (previously-obtained per-filesystem) poolid and the page's, 263 * (previously-obtained per-filesystem) poolid and the page's,
135 * inode and page index. Page must be locked. Note that a put_page 264 * inode and page index. Page must be locked. Note that a put_page
136 * always "succeeds", though a subsequent get_page may succeed or fail. 265 * always "succeeds", though a subsequent get_page may succeed or fail.
266 *
267 * The function has two checks before any action is taken - whether
268 * a backend is registered and whether the sb->cleancache_poolid
269 * is correct.
137 */ 270 */
138void __cleancache_put_page(struct page *page) 271void __cleancache_put_page(struct page *page)
139{ 272{
140 int pool_id; 273 int pool_id;
274 int fake_pool_id;
141 struct cleancache_filekey key = { .u.key = { 0 } }; 275 struct cleancache_filekey key = { .u.key = { 0 } };
142 276
277 if (!cleancache_ops) {
278 cleancache_puts++;
279 return;
280 }
281
143 VM_BUG_ON(!PageLocked(page)); 282 VM_BUG_ON(!PageLocked(page));
144 pool_id = page->mapping->host->i_sb->cleancache_poolid; 283 fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
284 if (fake_pool_id < 0)
285 return;
286
287 pool_id = get_poolid_from_fake(fake_pool_id);
288
145 if (pool_id >= 0 && 289 if (pool_id >= 0 &&
146 cleancache_get_key(page->mapping->host, &key) >= 0) { 290 cleancache_get_key(page->mapping->host, &key) >= 0) {
147 (*cleancache_ops.put_page)(pool_id, key, page->index, page); 291 cleancache_ops->put_page(pool_id, key, page->index, page);
148 cleancache_puts++; 292 cleancache_puts++;
149 } 293 }
150} 294}
@@ -153,19 +297,31 @@ EXPORT_SYMBOL(__cleancache_put_page);
153/* 297/*
154 * Invalidate any data from cleancache associated with the poolid and the 298 * Invalidate any data from cleancache associated with the poolid and the
155 * page's inode and page index so that a subsequent "get" will fail. 299 * page's inode and page index so that a subsequent "get" will fail.
300 *
301 * The function has two checks before any action is taken - whether
302 * a backend is registered and whether the sb->cleancache_poolid
303 * is correct.
156 */ 304 */
157void __cleancache_invalidate_page(struct address_space *mapping, 305void __cleancache_invalidate_page(struct address_space *mapping,
158 struct page *page) 306 struct page *page)
159{ 307{
160 /* careful... page->mapping is NULL sometimes when this is called */ 308 /* careful... page->mapping is NULL sometimes when this is called */
161 int pool_id = mapping->host->i_sb->cleancache_poolid; 309 int pool_id;
310 int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
162 struct cleancache_filekey key = { .u.key = { 0 } }; 311 struct cleancache_filekey key = { .u.key = { 0 } };
163 312
164 if (pool_id >= 0) { 313 if (!cleancache_ops)
314 return;
315
316 if (fake_pool_id >= 0) {
317 pool_id = get_poolid_from_fake(fake_pool_id);
318 if (pool_id < 0)
319 return;
320
165 VM_BUG_ON(!PageLocked(page)); 321 VM_BUG_ON(!PageLocked(page));
166 if (cleancache_get_key(mapping->host, &key) >= 0) { 322 if (cleancache_get_key(mapping->host, &key) >= 0) {
167 (*cleancache_ops.invalidate_page)(pool_id, 323 cleancache_ops->invalidate_page(pool_id,
168 key, page->index); 324 key, page->index);
169 cleancache_invalidates++; 325 cleancache_invalidates++;
170 } 326 }
171 } 327 }
@@ -176,34 +332,63 @@ EXPORT_SYMBOL(__cleancache_invalidate_page);
176 * Invalidate all data from cleancache associated with the poolid and the 332 * Invalidate all data from cleancache associated with the poolid and the
177 * mappings's inode so that all subsequent gets to this poolid/inode 333 * mappings's inode so that all subsequent gets to this poolid/inode
178 * will fail. 334 * will fail.
335 *
336 * The function has two checks before any action is taken - whether
337 * a backend is registered and whether the sb->cleancache_poolid
338 * is correct.
179 */ 339 */
180void __cleancache_invalidate_inode(struct address_space *mapping) 340void __cleancache_invalidate_inode(struct address_space *mapping)
181{ 341{
182 int pool_id = mapping->host->i_sb->cleancache_poolid; 342 int pool_id;
343 int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
183 struct cleancache_filekey key = { .u.key = { 0 } }; 344 struct cleancache_filekey key = { .u.key = { 0 } };
184 345
346 if (!cleancache_ops)
347 return;
348
349 if (fake_pool_id < 0)
350 return;
351
352 pool_id = get_poolid_from_fake(fake_pool_id);
353
185 if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) 354 if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
186 (*cleancache_ops.invalidate_inode)(pool_id, key); 355 cleancache_ops->invalidate_inode(pool_id, key);
187} 356}
188EXPORT_SYMBOL(__cleancache_invalidate_inode); 357EXPORT_SYMBOL(__cleancache_invalidate_inode);
189 358
190/* 359/*
191 * Called by any cleancache-enabled filesystem at time of unmount; 360 * Called by any cleancache-enabled filesystem at time of unmount;
192 * note that pool_id is surrendered and may be reutrned by a subsequent 361 * note that pool_id is surrendered and may be returned by a subsequent
193 * cleancache_init_fs or cleancache_init_shared_fs 362 * cleancache_init_fs or cleancache_init_shared_fs.
194 */ 363 */
195void __cleancache_invalidate_fs(struct super_block *sb) 364void __cleancache_invalidate_fs(struct super_block *sb)
196{ 365{
197 if (sb->cleancache_poolid >= 0) { 366 int index;
198 int old_poolid = sb->cleancache_poolid; 367 int fake_pool_id = sb->cleancache_poolid;
199 sb->cleancache_poolid = -1; 368 int old_poolid = fake_pool_id;
200 (*cleancache_ops.invalidate_fs)(old_poolid); 369
370 mutex_lock(&poolid_mutex);
371 if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) {
372 index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET;
373 old_poolid = shared_fs_poolid_map[index];
374 shared_fs_poolid_map[index] = FS_UNKNOWN;
375 uuids[index] = NULL;
376 } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) {
377 index = fake_pool_id - FAKE_FS_POOLID_OFFSET;
378 old_poolid = fs_poolid_map[index];
379 fs_poolid_map[index] = FS_UNKNOWN;
201 } 380 }
381 sb->cleancache_poolid = -1;
382 if (cleancache_ops)
383 cleancache_ops->invalidate_fs(old_poolid);
384 mutex_unlock(&poolid_mutex);
202} 385}
203EXPORT_SYMBOL(__cleancache_invalidate_fs); 386EXPORT_SYMBOL(__cleancache_invalidate_fs);
204 387
205static int __init init_cleancache(void) 388static int __init init_cleancache(void)
206{ 389{
390 int i;
391
207#ifdef CONFIG_DEBUG_FS 392#ifdef CONFIG_DEBUG_FS
208 struct dentry *root = debugfs_create_dir("cleancache", NULL); 393 struct dentry *root = debugfs_create_dir("cleancache", NULL);
209 if (root == NULL) 394 if (root == NULL)
@@ -215,6 +400,10 @@ static int __init init_cleancache(void)
215 debugfs_create_u64("invalidates", S_IRUGO, 400 debugfs_create_u64("invalidates", S_IRUGO,
216 root, &cleancache_invalidates); 401 root, &cleancache_invalidates);
217#endif 402#endif
403 for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
404 fs_poolid_map[i] = FS_UNKNOWN;
405 shared_fs_poolid_map[i] = FS_UNKNOWN;
406 }
218 return 0; 407 return 0;
219} 408}
220module_init(init_cleancache) 409module_init(init_cleancache)
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 2890e67d6026..538367ef1372 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -24,15 +24,7 @@
24 * frontswap_ops is set by frontswap_register_ops to contain the pointers 24 * frontswap_ops is set by frontswap_register_ops to contain the pointers
25 * to the frontswap "backend" implementation functions. 25 * to the frontswap "backend" implementation functions.
26 */ 26 */
27static struct frontswap_ops frontswap_ops __read_mostly; 27static struct frontswap_ops *frontswap_ops __read_mostly;
28
29/*
30 * This global enablement flag reduces overhead on systems where frontswap_ops
31 * has not been registered, so is preferred to the slower alternative: a
32 * function call that checks a non-global.
33 */
34bool frontswap_enabled __read_mostly;
35EXPORT_SYMBOL(frontswap_enabled);
36 28
37/* 29/*
38 * If enabled, frontswap_store will return failure even on success. As 30 * If enabled, frontswap_store will return failure even on success. As
@@ -80,16 +72,70 @@ static inline void inc_frontswap_succ_stores(void) { }
80static inline void inc_frontswap_failed_stores(void) { } 72static inline void inc_frontswap_failed_stores(void) { }
81static inline void inc_frontswap_invalidates(void) { } 73static inline void inc_frontswap_invalidates(void) { }
82#endif 74#endif
75
76/*
77 * Due to the asynchronous nature of the backends loading potentially
78 * _after_ the swap system has been activated, we have chokepoints
79 * on all frontswap functions to not call the backend until the backend
80 * has registered.
81 *
82 * Specifically when no backend is registered (nobody called
83 * frontswap_register_ops) all calls to frontswap_init (which is done via
84 * swapon -> enable_swap_info -> frontswap_init) are registered and remembered
85 * (via the setting of need_init bitmap) but fail to create tmem_pools. When a
86 * backend registers with frontswap at some later point the previous
87 * calls to frontswap_init are executed (by iterating over the need_init
88 * bitmap) to create tmem_pools and set the respective poolids. All of that is
89 * guarded by us using atomic bit operations on the 'need_init' bitmap.
90 *
91 * This would not guards us against the user deciding to call swapoff right as
92 * we are calling the backend to initialize (so swapon is in action).
93 * Fortunatly for us, the swapon_mutex has been taked by the callee so we are
94 * OK. The other scenario where calls to frontswap_store (called via
95 * swap_writepage) is racing with frontswap_invalidate_area (called via
96 * swapoff) is again guarded by the swap subsystem.
97 *
98 * While no backend is registered all calls to frontswap_[store|load|
99 * invalidate_area|invalidate_page] are ignored or fail.
100 *
101 * The time between the backend being registered and the swap file system
102 * calling the backend (via the frontswap_* functions) is indeterminate as
103 * frontswap_ops is not atomic_t (or a value guarded by a spinlock).
104 * That is OK as we are comfortable missing some of these calls to the newly
105 * registered backend.
106 *
107 * Obviously the opposite (unloading the backend) must be done after all
108 * the frontswap_[store|load|invalidate_area|invalidate_page] start
109 * ignorning or failing the requests - at which point frontswap_ops
110 * would have to be made in some fashion atomic.
111 */
112static DECLARE_BITMAP(need_init, MAX_SWAPFILES);
113
83/* 114/*
84 * Register operations for frontswap, returning previous thus allowing 115 * Register operations for frontswap, returning previous thus allowing
85 * detection of multiple backends and possible nesting. 116 * detection of multiple backends and possible nesting.
86 */ 117 */
87struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) 118struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops)
88{ 119{
89 struct frontswap_ops old = frontswap_ops; 120 struct frontswap_ops *old = frontswap_ops;
90 121 int i;
91 frontswap_ops = *ops; 122
92 frontswap_enabled = true; 123 for (i = 0; i < MAX_SWAPFILES; i++) {
124 if (test_and_clear_bit(i, need_init)) {
125 struct swap_info_struct *sis = swap_info[i];
126 /* __frontswap_init _should_ have set it! */
127 if (!sis->frontswap_map)
128 return ERR_PTR(-EINVAL);
129 ops->init(i);
130 }
131 }
132 /*
133 * We MUST have frontswap_ops set _after_ the frontswap_init's
134 * have been called. Otherwise __frontswap_store might fail. Hence
135 * the barrier to make sure compiler does not re-order us.
136 */
137 barrier();
138 frontswap_ops = ops;
93 return old; 139 return old;
94} 140}
95EXPORT_SYMBOL(frontswap_register_ops); 141EXPORT_SYMBOL(frontswap_register_ops);
@@ -115,20 +161,48 @@ EXPORT_SYMBOL(frontswap_tmem_exclusive_gets);
115/* 161/*
116 * Called when a swap device is swapon'd. 162 * Called when a swap device is swapon'd.
117 */ 163 */
118void __frontswap_init(unsigned type) 164void __frontswap_init(unsigned type, unsigned long *map)
119{ 165{
120 struct swap_info_struct *sis = swap_info[type]; 166 struct swap_info_struct *sis = swap_info[type];
121 167
122 BUG_ON(sis == NULL); 168 BUG_ON(sis == NULL);
123 if (sis->frontswap_map == NULL) 169
170 /*
171 * p->frontswap is a bitmap that we MUST have to figure out which page
172 * has gone in frontswap. Without it there is no point of continuing.
173 */
174 if (WARN_ON(!map))
124 return; 175 return;
125 frontswap_ops.init(type); 176 /*
177 * Irregardless of whether the frontswap backend has been loaded
178 * before this function or it will be later, we _MUST_ have the
179 * p->frontswap set to something valid to work properly.
180 */
181 frontswap_map_set(sis, map);
182 if (frontswap_ops)
183 frontswap_ops->init(type);
184 else {
185 BUG_ON(type > MAX_SWAPFILES);
186 set_bit(type, need_init);
187 }
126} 188}
127EXPORT_SYMBOL(__frontswap_init); 189EXPORT_SYMBOL(__frontswap_init);
128 190
129static inline void __frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) 191bool __frontswap_test(struct swap_info_struct *sis,
192 pgoff_t offset)
193{
194 bool ret = false;
195
196 if (frontswap_ops && sis->frontswap_map)
197 ret = test_bit(offset, sis->frontswap_map);
198 return ret;
199}
200EXPORT_SYMBOL(__frontswap_test);
201
202static inline void __frontswap_clear(struct swap_info_struct *sis,
203 pgoff_t offset)
130{ 204{
131 frontswap_clear(sis, offset); 205 clear_bit(offset, sis->frontswap_map);
132 atomic_dec(&sis->frontswap_pages); 206 atomic_dec(&sis->frontswap_pages);
133} 207}
134 208
@@ -147,13 +221,20 @@ int __frontswap_store(struct page *page)
147 struct swap_info_struct *sis = swap_info[type]; 221 struct swap_info_struct *sis = swap_info[type];
148 pgoff_t offset = swp_offset(entry); 222 pgoff_t offset = swp_offset(entry);
149 223
224 /*
225 * Return if no backend registed.
226 * Don't need to inc frontswap_failed_stores here.
227 */
228 if (!frontswap_ops)
229 return ret;
230
150 BUG_ON(!PageLocked(page)); 231 BUG_ON(!PageLocked(page));
151 BUG_ON(sis == NULL); 232 BUG_ON(sis == NULL);
152 if (frontswap_test(sis, offset)) 233 if (__frontswap_test(sis, offset))
153 dup = 1; 234 dup = 1;
154 ret = frontswap_ops.store(type, offset, page); 235 ret = frontswap_ops->store(type, offset, page);
155 if (ret == 0) { 236 if (ret == 0) {
156 frontswap_set(sis, offset); 237 set_bit(offset, sis->frontswap_map);
157 inc_frontswap_succ_stores(); 238 inc_frontswap_succ_stores();
158 if (!dup) 239 if (!dup)
159 atomic_inc(&sis->frontswap_pages); 240 atomic_inc(&sis->frontswap_pages);
@@ -188,13 +269,16 @@ int __frontswap_load(struct page *page)
188 269
189 BUG_ON(!PageLocked(page)); 270 BUG_ON(!PageLocked(page));
190 BUG_ON(sis == NULL); 271 BUG_ON(sis == NULL);
191 if (frontswap_test(sis, offset)) 272 /*
192 ret = frontswap_ops.load(type, offset, page); 273 * __frontswap_test() will check whether there is backend registered
274 */
275 if (__frontswap_test(sis, offset))
276 ret = frontswap_ops->load(type, offset, page);
193 if (ret == 0) { 277 if (ret == 0) {
194 inc_frontswap_loads(); 278 inc_frontswap_loads();
195 if (frontswap_tmem_exclusive_gets_enabled) { 279 if (frontswap_tmem_exclusive_gets_enabled) {
196 SetPageDirty(page); 280 SetPageDirty(page);
197 frontswap_clear(sis, offset); 281 __frontswap_clear(sis, offset);
198 } 282 }
199 } 283 }
200 return ret; 284 return ret;
@@ -210,8 +294,11 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
210 struct swap_info_struct *sis = swap_info[type]; 294 struct swap_info_struct *sis = swap_info[type];
211 295
212 BUG_ON(sis == NULL); 296 BUG_ON(sis == NULL);
213 if (frontswap_test(sis, offset)) { 297 /*
214 frontswap_ops.invalidate_page(type, offset); 298 * __frontswap_test() will check whether there is backend registered
299 */
300 if (__frontswap_test(sis, offset)) {
301 frontswap_ops->invalidate_page(type, offset);
215 __frontswap_clear(sis, offset); 302 __frontswap_clear(sis, offset);
216 inc_frontswap_invalidates(); 303 inc_frontswap_invalidates();
217 } 304 }
@@ -226,12 +313,15 @@ void __frontswap_invalidate_area(unsigned type)
226{ 313{
227 struct swap_info_struct *sis = swap_info[type]; 314 struct swap_info_struct *sis = swap_info[type];
228 315
229 BUG_ON(sis == NULL); 316 if (frontswap_ops) {
230 if (sis->frontswap_map == NULL) 317 BUG_ON(sis == NULL);
231 return; 318 if (sis->frontswap_map == NULL)
232 frontswap_ops.invalidate_area(type); 319 return;
233 atomic_set(&sis->frontswap_pages, 0); 320 frontswap_ops->invalidate_area(type);
234 memset(sis->frontswap_map, 0, sis->max / sizeof(long)); 321 atomic_set(&sis->frontswap_pages, 0);
322 memset(sis->frontswap_map, 0, sis->max / sizeof(long));
323 }
324 clear_bit(type, need_init);
235} 325}
236EXPORT_SYMBOL(__frontswap_invalidate_area); 326EXPORT_SYMBOL(__frontswap_invalidate_area);
237 327
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d417efddfe74..6c340d908b27 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1509,8 +1509,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
1509} 1509}
1510 1510
1511static void _enable_swap_info(struct swap_info_struct *p, int prio, 1511static void _enable_swap_info(struct swap_info_struct *p, int prio,
1512 unsigned char *swap_map, 1512 unsigned char *swap_map)
1513 unsigned long *frontswap_map)
1514{ 1513{
1515 int i, prev; 1514 int i, prev;
1516 1515
@@ -1519,7 +1518,6 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
1519 else 1518 else
1520 p->prio = --least_priority; 1519 p->prio = --least_priority;
1521 p->swap_map = swap_map; 1520 p->swap_map = swap_map;
1522 frontswap_map_set(p, frontswap_map);
1523 p->flags |= SWP_WRITEOK; 1521 p->flags |= SWP_WRITEOK;
1524 atomic_long_add(p->pages, &nr_swap_pages); 1522 atomic_long_add(p->pages, &nr_swap_pages);
1525 total_swap_pages += p->pages; 1523 total_swap_pages += p->pages;
@@ -1542,10 +1540,10 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
1542 unsigned char *swap_map, 1540 unsigned char *swap_map,
1543 unsigned long *frontswap_map) 1541 unsigned long *frontswap_map)
1544{ 1542{
1543 frontswap_init(p->type, frontswap_map);
1545 spin_lock(&swap_lock); 1544 spin_lock(&swap_lock);
1546 spin_lock(&p->lock); 1545 spin_lock(&p->lock);
1547 _enable_swap_info(p, prio, swap_map, frontswap_map); 1546 _enable_swap_info(p, prio, swap_map);
1548 frontswap_init(p->type);
1549 spin_unlock(&p->lock); 1547 spin_unlock(&p->lock);
1550 spin_unlock(&swap_lock); 1548 spin_unlock(&swap_lock);
1551} 1549}
@@ -1554,7 +1552,7 @@ static void reinsert_swap_info(struct swap_info_struct *p)
1554{ 1552{
1555 spin_lock(&swap_lock); 1553 spin_lock(&swap_lock);
1556 spin_lock(&p->lock); 1554 spin_lock(&p->lock);
1557 _enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); 1555 _enable_swap_info(p, p->prio, p->swap_map);
1558 spin_unlock(&p->lock); 1556 spin_unlock(&p->lock);
1559 spin_unlock(&swap_lock); 1557 spin_unlock(&swap_lock);
1560} 1558}
@@ -1563,6 +1561,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1563{ 1561{
1564 struct swap_info_struct *p = NULL; 1562 struct swap_info_struct *p = NULL;
1565 unsigned char *swap_map; 1563 unsigned char *swap_map;
1564 unsigned long *frontswap_map;
1566 struct file *swap_file, *victim; 1565 struct file *swap_file, *victim;
1567 struct address_space *mapping; 1566 struct address_space *mapping;
1568 struct inode *inode; 1567 struct inode *inode;
@@ -1662,12 +1661,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1662 swap_map = p->swap_map; 1661 swap_map = p->swap_map;
1663 p->swap_map = NULL; 1662 p->swap_map = NULL;
1664 p->flags = 0; 1663 p->flags = 0;
1665 frontswap_invalidate_area(type); 1664 frontswap_map = frontswap_map_get(p);
1665 frontswap_map_set(p, NULL);
1666 spin_unlock(&p->lock); 1666 spin_unlock(&p->lock);
1667 spin_unlock(&swap_lock); 1667 spin_unlock(&swap_lock);
1668 frontswap_invalidate_area(type);
1668 mutex_unlock(&swapon_mutex); 1669 mutex_unlock(&swapon_mutex);
1669 vfree(swap_map); 1670 vfree(swap_map);
1670 vfree(frontswap_map_get(p)); 1671 vfree(frontswap_map);
1671 /* Destroy swap account informatin */ 1672 /* Destroy swap account informatin */
1672 swap_cgroup_swapoff(type); 1673 swap_cgroup_swapoff(type);
1673 1674