aboutsummaryrefslogtreecommitdiffstats
path: root/mm/cleancache.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-14 19:49:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-14 19:49:17 -0400
commit1dcf58d6e6e6eb7ec10e9abc56887b040205b06f (patch)
treec03e7a25ef13eea62f1547914a76e5c68f3f4c28 /mm/cleancache.c
parent80dcc31fbe55932ac9204daee5f2ebc0c49b6da3 (diff)
parente4b0db72be2487bae0e3251c22f82c104f7c1cfd (diff)
Merge branch 'akpm' (patches from Andrew)
Merge first patchbomb from Andrew Morton: - arch/sh updates - ocfs2 updates - kernel/watchdog feature - about half of mm/ * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (122 commits) Documentation: update arch list in the 'memtest' entry Kconfig: memtest: update number of test patterns up to 17 arm: add support for memtest arm64: add support for memtest memtest: use phys_addr_t for physical addresses mm: move memtest under mm mm, hugetlb: abort __get_user_pages if current has been oom killed mm, mempool: do not allow atomic resizing memcg: print cgroup information when system panics due to panic_on_oom mm: numa: remove migrate_ratelimited mm: fold arch_randomize_brk into ARCH_HAS_ELF_RANDOMIZE mm: split ET_DYN ASLR from mmap ASLR s390: redefine randomize_et_dyn for ELF_ET_DYN_BASE mm: expose arch_mmap_rnd when available s390: standardize mmap_rnd() usage powerpc: standardize mmap_rnd() usage mips: extract logic for mmap_rnd() arm64: standardize mmap_rnd() usage x86: standardize mmap_rnd() usage arm: factor out mmap ASLR into mmap_rnd ...
Diffstat (limited to 'mm/cleancache.c')
-rw-r--r--mm/cleancache.c276
1 files changed, 93 insertions, 183 deletions
diff --git a/mm/cleancache.c b/mm/cleancache.c
index 053bcd8f12fb..8fc50811119b 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -19,7 +19,7 @@
19#include <linux/cleancache.h> 19#include <linux/cleancache.h>
20 20
21/* 21/*
22 * cleancache_ops is set by cleancache_ops_register to contain the pointers 22 * cleancache_ops is set by cleancache_register_ops to contain the pointers
23 * to the cleancache "backend" implementation functions. 23 * to the cleancache "backend" implementation functions.
24 */ 24 */
25static struct cleancache_ops *cleancache_ops __read_mostly; 25static struct cleancache_ops *cleancache_ops __read_mostly;
@@ -34,145 +34,107 @@ static u64 cleancache_failed_gets;
34static u64 cleancache_puts; 34static u64 cleancache_puts;
35static u64 cleancache_invalidates; 35static u64 cleancache_invalidates;
36 36
37/* 37static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
38 * When no backend is registered all calls to init_fs and init_shared_fs 38{
39 * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or 39 switch (sb->cleancache_poolid) {
40 * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array 40 case CLEANCACHE_NO_BACKEND:
41 * [shared_|]fs_poolid_map) are given to the respective super block 41 __cleancache_init_fs(sb);
42 * (sb->cleancache_poolid) and no tmem_pools are created. When a backend 42 break;
43 * registers with cleancache the previous calls to init_fs and init_shared_fs 43 case CLEANCACHE_NO_BACKEND_SHARED:
44 * are executed to create tmem_pools and set the respective poolids. While no 44 __cleancache_init_shared_fs(sb);
45 * backend is registered all "puts", "gets" and "flushes" are ignored or failed. 45 break;
46 */ 46 }
47#define MAX_INITIALIZABLE_FS 32 47}
48#define FAKE_FS_POOLID_OFFSET 1000
49#define FAKE_SHARED_FS_POOLID_OFFSET 2000
50
51#define FS_NO_BACKEND (-1)
52#define FS_UNKNOWN (-2)
53static int fs_poolid_map[MAX_INITIALIZABLE_FS];
54static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS];
55static char *uuids[MAX_INITIALIZABLE_FS];
56/*
57 * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads
58 * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple
59 * threads calling mount (and ending up in __cleancache_init_[shared|]fs).
60 */
61static DEFINE_MUTEX(poolid_mutex);
62/*
63 * When set to false (default) all calls to the cleancache functions, except
64 * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded
65 * by the if (!cleancache_ops) return. This means multiple threads (from
66 * different filesystems) will be checking cleancache_ops. The usage of a
67 * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are
68 * OK if the time between the backend's have been initialized (and
69 * cleancache_ops has been set to not NULL) and when the filesystems start
70 * actually calling the backends. The inverse (when unloading) is obviously
71 * not good - but this shim does not do that (yet).
72 */
73
74/*
75 * The backends and filesystems work all asynchronously. This is b/c the
76 * backends can be built as modules.
77 * The usual sequence of events is:
78 * a) mount / -> __cleancache_init_fs is called. We set the
79 * [shared_|]fs_poolid_map and uuids for.
80 *
81 * b). user does I/Os -> we call the rest of __cleancache_* functions
82 * which return immediately as cleancache_ops is false.
83 *
84 * c). modprobe zcache -> cleancache_register_ops. We init the backend
85 * and set cleancache_ops to true, and for any fs_poolid_map
86 * (which is set by __cleancache_init_fs) we initialize the poolid.
87 *
88 * d). user does I/Os -> now that cleancache_ops is true all the
89 * __cleancache_* functions can call the backend. They all check
90 * that fs_poolid_map is valid and if so invoke the backend.
91 *
92 * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is
93 * reset (which is the second check in the __cleancache_* ops
94 * to call the backend).
95 *
96 * The sequence of event could also be c), followed by a), and d). and e). The
97 * c) would not happen anymore. There is also the chance of c), and one thread
98 * doing a) + d), and another doing e). For that case we depend on the
99 * filesystem calling __cleancache_invalidate_fs in the proper sequence (so
100 * that it handles all I/Os before it invalidates the fs (which is last part
101 * of unmounting process).
102 *
103 * Note: The acute reader will notice that there is no "rmmod zcache" case.
104 * This is b/c the functionality for that is not yet implemented and when
105 * done, will require some extra locking not yet devised.
106 */
107 48
108/* 49/*
109 * Register operations for cleancache, returning previous thus allowing 50 * Register operations for cleancache. Returns 0 on success.
110 * detection of multiple backends and possible nesting.
111 */ 51 */
112struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops) 52int cleancache_register_ops(struct cleancache_ops *ops)
113{ 53{
114 struct cleancache_ops *old = cleancache_ops; 54 if (cmpxchg(&cleancache_ops, NULL, ops))
115 int i; 55 return -EBUSY;
116 56
117 mutex_lock(&poolid_mutex);
118 for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
119 if (fs_poolid_map[i] == FS_NO_BACKEND)
120 fs_poolid_map[i] = ops->init_fs(PAGE_SIZE);
121 if (shared_fs_poolid_map[i] == FS_NO_BACKEND)
122 shared_fs_poolid_map[i] = ops->init_shared_fs
123 (uuids[i], PAGE_SIZE);
124 }
125 /* 57 /*
126 * We MUST set cleancache_ops _after_ we have called the backends 58 * A cleancache backend can be built as a module and hence loaded after
127 * init_fs or init_shared_fs functions. Otherwise the compiler might 59 * a cleancache enabled filesystem has called cleancache_init_fs. To
128 * re-order where cleancache_ops is set in this function. 60 * handle such a scenario, here we call ->init_fs or ->init_shared_fs
61 * for each active super block. To differentiate between local and
62 * shared filesystems, we temporarily initialize sb->cleancache_poolid
63 * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED
64 * respectively in case there is no backend registered at the time
65 * cleancache_init_fs or cleancache_init_shared_fs is called.
66 *
67 * Since filesystems can be mounted concurrently with cleancache
68 * backend registration, we have to be careful to guarantee that all
69 * cleancache enabled filesystems that has been mounted by the time
70 * cleancache_register_ops is called has got and all mounted later will
71 * get cleancache_poolid. This is assured by the following statements
72 * tied together:
73 *
74 * a) iterate_supers skips only those super blocks that has started
75 * ->kill_sb
76 *
77 * b) if iterate_supers encounters a super block that has not finished
78 * ->mount yet, it waits until it is finished
79 *
80 * c) cleancache_init_fs is called from ->mount and
81 * cleancache_invalidate_fs is called from ->kill_sb
82 *
83 * d) we call iterate_supers after cleancache_ops has been set
84 *
85 * From a) it follows that if iterate_supers skips a super block, then
86 * either the super block is already dead, in which case we do not need
87 * to bother initializing cleancache for it, or it was mounted after we
88 * initiated iterate_supers. In the latter case, it must have seen
89 * cleancache_ops set according to d) and initialized cleancache from
90 * ->mount by itself according to c). This proves that we call
91 * ->init_fs at least once for each active super block.
92 *
93 * From b) and c) it follows that if iterate_supers encounters a super
94 * block that has already started ->init_fs, it will wait until ->mount
95 * and hence ->init_fs has finished, then check cleancache_poolid, see
96 * that it has already been set and therefore do nothing. This proves
97 * that we call ->init_fs no more than once for each super block.
98 *
99 * Combined together, the last two paragraphs prove the function
100 * correctness.
101 *
102 * Note that various cleancache callbacks may proceed before this
103 * function is called or even concurrently with it, but since
104 * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop
105 * until the corresponding ->init_fs has been actually called and
106 * cleancache_ops has been set.
129 */ 107 */
130 barrier(); 108 iterate_supers(cleancache_register_ops_sb, NULL);
131 cleancache_ops = ops; 109 return 0;
132 mutex_unlock(&poolid_mutex);
133 return old;
134} 110}
135EXPORT_SYMBOL(cleancache_register_ops); 111EXPORT_SYMBOL(cleancache_register_ops);
136 112
137/* Called by a cleancache-enabled filesystem at time of mount */ 113/* Called by a cleancache-enabled filesystem at time of mount */
138void __cleancache_init_fs(struct super_block *sb) 114void __cleancache_init_fs(struct super_block *sb)
139{ 115{
140 int i; 116 int pool_id = CLEANCACHE_NO_BACKEND;
141 117
142 mutex_lock(&poolid_mutex); 118 if (cleancache_ops) {
143 for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { 119 pool_id = cleancache_ops->init_fs(PAGE_SIZE);
144 if (fs_poolid_map[i] == FS_UNKNOWN) { 120 if (pool_id < 0)
145 sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET; 121 pool_id = CLEANCACHE_NO_POOL;
146 if (cleancache_ops)
147 fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE);
148 else
149 fs_poolid_map[i] = FS_NO_BACKEND;
150 break;
151 }
152 } 122 }
153 mutex_unlock(&poolid_mutex); 123 sb->cleancache_poolid = pool_id;
154} 124}
155EXPORT_SYMBOL(__cleancache_init_fs); 125EXPORT_SYMBOL(__cleancache_init_fs);
156 126
157/* Called by a cleancache-enabled clustered filesystem at time of mount */ 127/* Called by a cleancache-enabled clustered filesystem at time of mount */
158void __cleancache_init_shared_fs(char *uuid, struct super_block *sb) 128void __cleancache_init_shared_fs(struct super_block *sb)
159{ 129{
160 int i; 130 int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
161 131
162 mutex_lock(&poolid_mutex); 132 if (cleancache_ops) {
163 for (i = 0; i < MAX_INITIALIZABLE_FS; i++) { 133 pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE);
164 if (shared_fs_poolid_map[i] == FS_UNKNOWN) { 134 if (pool_id < 0)
165 sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET; 135 pool_id = CLEANCACHE_NO_POOL;
166 uuids[i] = uuid;
167 if (cleancache_ops)
168 shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs
169 (uuid, PAGE_SIZE);
170 else
171 shared_fs_poolid_map[i] = FS_NO_BACKEND;
172 break;
173 }
174 } 136 }
175 mutex_unlock(&poolid_mutex); 137 sb->cleancache_poolid = pool_id;
176} 138}
177EXPORT_SYMBOL(__cleancache_init_shared_fs); 139EXPORT_SYMBOL(__cleancache_init_shared_fs);
178 140
@@ -202,19 +164,6 @@ static int cleancache_get_key(struct inode *inode,
202} 164}
203 165
204/* 166/*
205 * Returns a pool_id that is associated with a given fake poolid.
206 */
207static int get_poolid_from_fake(int fake_pool_id)
208{
209 if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET)
210 return shared_fs_poolid_map[fake_pool_id -
211 FAKE_SHARED_FS_POOLID_OFFSET];
212 else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET)
213 return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET];
214 return FS_NO_BACKEND;
215}
216
217/*
218 * "Get" data from cleancache associated with the poolid/inode/index 167 * "Get" data from cleancache associated with the poolid/inode/index
219 * that were specified when the data was put to cleanache and, if 168 * that were specified when the data was put to cleanache and, if
220 * successful, use it to fill the specified page with data and return 0. 169 * successful, use it to fill the specified page with data and return 0.
@@ -229,7 +178,6 @@ int __cleancache_get_page(struct page *page)
229{ 178{
230 int ret = -1; 179 int ret = -1;
231 int pool_id; 180 int pool_id;
232 int fake_pool_id;
233 struct cleancache_filekey key = { .u.key = { 0 } }; 181 struct cleancache_filekey key = { .u.key = { 0 } };
234 182
235 if (!cleancache_ops) { 183 if (!cleancache_ops) {
@@ -238,17 +186,14 @@ int __cleancache_get_page(struct page *page)
238 } 186 }
239 187
240 VM_BUG_ON_PAGE(!PageLocked(page), page); 188 VM_BUG_ON_PAGE(!PageLocked(page), page);
241 fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; 189 pool_id = page->mapping->host->i_sb->cleancache_poolid;
242 if (fake_pool_id < 0) 190 if (pool_id < 0)
243 goto out; 191 goto out;
244 pool_id = get_poolid_from_fake(fake_pool_id);
245 192
246 if (cleancache_get_key(page->mapping->host, &key) < 0) 193 if (cleancache_get_key(page->mapping->host, &key) < 0)
247 goto out; 194 goto out;
248 195
249 if (pool_id >= 0) 196 ret = cleancache_ops->get_page(pool_id, key, page->index, page);
250 ret = cleancache_ops->get_page(pool_id,
251 key, page->index, page);
252 if (ret == 0) 197 if (ret == 0)
253 cleancache_succ_gets++; 198 cleancache_succ_gets++;
254 else 199 else
@@ -271,7 +216,6 @@ EXPORT_SYMBOL(__cleancache_get_page);
271void __cleancache_put_page(struct page *page) 216void __cleancache_put_page(struct page *page)
272{ 217{
273 int pool_id; 218 int pool_id;
274 int fake_pool_id;
275 struct cleancache_filekey key = { .u.key = { 0 } }; 219 struct cleancache_filekey key = { .u.key = { 0 } };
276 220
277 if (!cleancache_ops) { 221 if (!cleancache_ops) {
@@ -280,12 +224,7 @@ void __cleancache_put_page(struct page *page)
280 } 224 }
281 225
282 VM_BUG_ON_PAGE(!PageLocked(page), page); 226 VM_BUG_ON_PAGE(!PageLocked(page), page);
283 fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; 227 pool_id = page->mapping->host->i_sb->cleancache_poolid;
284 if (fake_pool_id < 0)
285 return;
286
287 pool_id = get_poolid_from_fake(fake_pool_id);
288
289 if (pool_id >= 0 && 228 if (pool_id >= 0 &&
290 cleancache_get_key(page->mapping->host, &key) >= 0) { 229 cleancache_get_key(page->mapping->host, &key) >= 0) {
291 cleancache_ops->put_page(pool_id, key, page->index, page); 230 cleancache_ops->put_page(pool_id, key, page->index, page);
@@ -306,18 +245,13 @@ void __cleancache_invalidate_page(struct address_space *mapping,
306 struct page *page) 245 struct page *page)
307{ 246{
308 /* careful... page->mapping is NULL sometimes when this is called */ 247 /* careful... page->mapping is NULL sometimes when this is called */
309 int pool_id; 248 int pool_id = mapping->host->i_sb->cleancache_poolid;
310 int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
311 struct cleancache_filekey key = { .u.key = { 0 } }; 249 struct cleancache_filekey key = { .u.key = { 0 } };
312 250
313 if (!cleancache_ops) 251 if (!cleancache_ops)
314 return; 252 return;
315 253
316 if (fake_pool_id >= 0) { 254 if (pool_id >= 0) {
317 pool_id = get_poolid_from_fake(fake_pool_id);
318 if (pool_id < 0)
319 return;
320
321 VM_BUG_ON_PAGE(!PageLocked(page), page); 255 VM_BUG_ON_PAGE(!PageLocked(page), page);
322 if (cleancache_get_key(mapping->host, &key) >= 0) { 256 if (cleancache_get_key(mapping->host, &key) >= 0) {
323 cleancache_ops->invalidate_page(pool_id, 257 cleancache_ops->invalidate_page(pool_id,
@@ -339,18 +273,12 @@ EXPORT_SYMBOL(__cleancache_invalidate_page);
339 */ 273 */
340void __cleancache_invalidate_inode(struct address_space *mapping) 274void __cleancache_invalidate_inode(struct address_space *mapping)
341{ 275{
342 int pool_id; 276 int pool_id = mapping->host->i_sb->cleancache_poolid;
343 int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
344 struct cleancache_filekey key = { .u.key = { 0 } }; 277 struct cleancache_filekey key = { .u.key = { 0 } };
345 278
346 if (!cleancache_ops) 279 if (!cleancache_ops)
347 return; 280 return;
348 281
349 if (fake_pool_id < 0)
350 return;
351
352 pool_id = get_poolid_from_fake(fake_pool_id);
353
354 if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) 282 if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
355 cleancache_ops->invalidate_inode(pool_id, key); 283 cleancache_ops->invalidate_inode(pool_id, key);
356} 284}
@@ -363,32 +291,18 @@ EXPORT_SYMBOL(__cleancache_invalidate_inode);
363 */ 291 */
364void __cleancache_invalidate_fs(struct super_block *sb) 292void __cleancache_invalidate_fs(struct super_block *sb)
365{ 293{
366 int index; 294 int pool_id;
367 int fake_pool_id = sb->cleancache_poolid;
368 int old_poolid = fake_pool_id;
369 295
370 mutex_lock(&poolid_mutex); 296 pool_id = sb->cleancache_poolid;
371 if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) { 297 sb->cleancache_poolid = CLEANCACHE_NO_POOL;
372 index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET; 298
373 old_poolid = shared_fs_poolid_map[index]; 299 if (cleancache_ops && pool_id >= 0)
374 shared_fs_poolid_map[index] = FS_UNKNOWN; 300 cleancache_ops->invalidate_fs(pool_id);
375 uuids[index] = NULL;
376 } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) {
377 index = fake_pool_id - FAKE_FS_POOLID_OFFSET;
378 old_poolid = fs_poolid_map[index];
379 fs_poolid_map[index] = FS_UNKNOWN;
380 }
381 sb->cleancache_poolid = -1;
382 if (cleancache_ops)
383 cleancache_ops->invalidate_fs(old_poolid);
384 mutex_unlock(&poolid_mutex);
385} 301}
386EXPORT_SYMBOL(__cleancache_invalidate_fs); 302EXPORT_SYMBOL(__cleancache_invalidate_fs);
387 303
388static int __init init_cleancache(void) 304static int __init init_cleancache(void)
389{ 305{
390 int i;
391
392#ifdef CONFIG_DEBUG_FS 306#ifdef CONFIG_DEBUG_FS
393 struct dentry *root = debugfs_create_dir("cleancache", NULL); 307 struct dentry *root = debugfs_create_dir("cleancache", NULL);
394 if (root == NULL) 308 if (root == NULL)
@@ -400,10 +314,6 @@ static int __init init_cleancache(void)
400 debugfs_create_u64("invalidates", S_IRUGO, 314 debugfs_create_u64("invalidates", S_IRUGO,
401 root, &cleancache_invalidates); 315 root, &cleancache_invalidates);
402#endif 316#endif
403 for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
404 fs_poolid_map[i] = FS_UNKNOWN;
405 shared_fs_poolid_map[i] = FS_UNKNOWN;
406 }
407 return 0; 317 return 0;
408} 318}
409module_init(init_cleancache) 319module_init(init_cleancache)