aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/caching/backend-api.txt658
-rw-r--r--Documentation/filesystems/caching/cachefiles.txt501
-rw-r--r--Documentation/filesystems/caching/fscache.txt333
-rw-r--r--Documentation/filesystems/caching/netfs-api.txt778
-rw-r--r--Documentation/filesystems/caching/object.txt313
-rw-r--r--Documentation/filesystems/caching/operations.txt213
-rw-r--r--Documentation/slow-work.txt174
-rw-r--r--fs/Kconfig7
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/Kconfig8
-rw-r--r--fs/afs/Makefile3
-rw-r--r--fs/afs/cache.c503
-rw-r--r--fs/afs/cache.h15
-rw-r--r--fs/afs/cell.c16
-rw-r--r--fs/afs/file.c220
-rw-r--r--fs/afs/inode.c31
-rw-r--r--fs/afs/internal.h53
-rw-r--r--fs/afs/main.c27
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/vlocation.c25
-rw-r--r--fs/afs/volume.c14
-rw-r--r--fs/afs/write.c21
-rw-r--r--fs/cachefiles/Kconfig39
-rw-r--r--fs/cachefiles/Makefile18
-rw-r--r--fs/cachefiles/bind.c286
-rw-r--r--fs/cachefiles/daemon.c755
-rw-r--r--fs/cachefiles/interface.c449
-rw-r--r--fs/cachefiles/internal.h360
-rw-r--r--fs/cachefiles/key.c159
-rw-r--r--fs/cachefiles/main.c106
-rw-r--r--fs/cachefiles/namei.c771
-rw-r--r--fs/cachefiles/proc.c134
-rw-r--r--fs/cachefiles/rdwr.c879
-rw-r--r--fs/cachefiles/security.c116
-rw-r--r--fs/cachefiles/xattr.c291
-rw-r--r--fs/fscache/Kconfig56
-rw-r--r--fs/fscache/Makefile19
-rw-r--r--fs/fscache/cache.c415
-rw-r--r--fs/fscache/cookie.c500
-rw-r--r--fs/fscache/fsdef.c144
-rw-r--r--fs/fscache/histogram.c109
-rw-r--r--fs/fscache/internal.h380
-rw-r--r--fs/fscache/main.c124
-rw-r--r--fs/fscache/netfs.c103
-rw-r--r--fs/fscache/object.c810
-rw-r--r--fs/fscache/operation.c459
-rw-r--r--fs/fscache/page.c816
-rw-r--r--fs/fscache/proc.c68
-rw-r--r--fs/fscache/stats.c212
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfs/Makefile1
-rw-r--r--fs/nfs/client.c14
-rw-r--r--fs/nfs/file.c38
-rw-r--r--fs/nfs/fscache-index.c337
-rw-r--r--fs/nfs/fscache.c523
-rw-r--r--fs/nfs/fscache.h220
-rw-r--r--fs/nfs/inode.c14
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/iostat.h18
-rw-r--r--fs/nfs/read.c27
-rw-r--r--fs/nfs/super.c45
-rw-r--r--fs/splice.c3
-rw-r--r--fs/super.c1
-rw-r--r--include/linux/fscache-cache.h505
-rw-r--r--include/linux/fscache.h618
-rw-r--r--include/linux/nfs_fs.h13
-rw-r--r--include/linux/nfs_fs_sb.h11
-rw-r--r--include/linux/nfs_iostat.h12
-rw-r--r--include/linux/page-flags.h40
-rw-r--r--include/linux/pagemap.h5
-rw-r--r--include/linux/slow-work.h95
-rw-r--r--init/Kconfig12
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/slow-work.c640
-rw-r--r--kernel/sysctl.c9
-rw-r--r--mm/filemap.c21
-rw-r--r--mm/migrate.c10
-rw-r--r--mm/readahead.c40
-rw-r--r--mm/swap.c4
-rw-r--r--mm/truncate.c10
-rw-r--r--mm/vmscan.c6
-rw-r--r--security/security.c2
82 files changed, 15414 insertions, 390 deletions
diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
new file mode 100644
index 000000000000..382d52cdaf2d
--- /dev/null
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -0,0 +1,658 @@
1 ==========================
2 FS-CACHE CACHE BACKEND API
3 ==========================
4
5The FS-Cache system provides an API by which actual caches can be supplied to
6FS-Cache for it to then serve out to network filesystems and other interested
7parties.
8
9This API is declared in <linux/fscache-cache.h>.
10
11
12====================================
13INITIALISING AND REGISTERING A CACHE
14====================================
15
16To start off, a cache definition must be initialised and registered for each
17cache the backend wants to make available. For instance, CacheFS does this in
18the fill_super() operation on mounting.
19
20The cache definition (struct fscache_cache) should be initialised by calling:
21
22 void fscache_init_cache(struct fscache_cache *cache,
23 struct fscache_cache_ops *ops,
24 const char *idfmt,
25 ...);
26
27Where:
28
29 (*) "cache" is a pointer to the cache definition;
30
31 (*) "ops" is a pointer to the table of operations that the backend supports on
32 this cache; and
33
34 (*) "idfmt" is a format and printf-style arguments for constructing a label
35 for the cache.
36
37
38The cache should then be registered with FS-Cache by passing a pointer to the
39previously initialised cache definition to:
40
41 int fscache_add_cache(struct fscache_cache *cache,
42 struct fscache_object *fsdef,
43 const char *tagname);
44
45Two extra arguments should also be supplied:
46
47 (*) "fsdef" which should point to the object representation for the FS-Cache
48 master index in this cache. Netfs primary index entries will be created
49 here. FS-Cache keeps the caller's reference to the index object if
50 successful and will release it upon withdrawal of the cache.
51
52 (*) "tagname" which, if given, should be a text string naming this cache. If
53 this is NULL, the identifier will be used instead. For CacheFS, the
54 identifier is set to name the underlying block device and the tag can be
55 supplied by mount.
56
57This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag
58is already in use. 0 will be returned on success.
59
60
61=====================
62UNREGISTERING A CACHE
63=====================
64
65A cache can be withdrawn from the system by calling this function with a
66pointer to the cache definition:
67
68 void fscache_withdraw_cache(struct fscache_cache *cache);
69
70In CacheFS's case, this is called by put_super().
71
72
73========
74SECURITY
75========
76
77The cache methods are executed one of two contexts:
78
79 (1) that of the userspace process that issued the netfs operation that caused
80 the cache method to be invoked, or
81
82 (2) that of one of the processes in the FS-Cache thread pool.
83
84In either case, this may not be an appropriate context in which to access the
85cache.
86
87The calling process's fsuid, fsgid and SELinux security identities may need to
88be masqueraded for the duration of the cache driver's access to the cache.
89This is left to the cache to handle; FS-Cache makes no effort in this regard.
90
91
92===================================
93CONTROL AND STATISTICS PRESENTATION
94===================================
95
96The cache may present data to the outside world through FS-Cache's interfaces
97in sysfs and procfs - the former for control and the latter for statistics.
98
99A sysfs directory called /sys/fs/fscache/<cachetag>/ is created if CONFIG_SYSFS
100is enabled. This is accessible through the kobject struct fscache_cache::kobj
101and is for use by the cache as it sees fit.
102
103
104========================
105RELEVANT DATA STRUCTURES
106========================
107
108 (*) Index/Data file FS-Cache representation cookie:
109
110 struct fscache_cookie {
111 struct fscache_object_def *def;
112 struct fscache_netfs *netfs;
113 void *netfs_data;
114 ...
115 };
116
117 The fields that might be of use to the backend describe the object
118 definition, the netfs definition and the netfs's data for this cookie.
119 The object definition contain functions supplied by the netfs for loading
120 and matching index entries; these are required to provide some of the
121 cache operations.
122
123
124 (*) In-cache object representation:
125
126 struct fscache_object {
127 int debug_id;
128 enum {
129 FSCACHE_OBJECT_RECYCLING,
130 ...
131 } state;
132 spinlock_t lock
133 struct fscache_cache *cache;
134 struct fscache_cookie *cookie;
135 ...
136 };
137
138 Structures of this type should be allocated by the cache backend and
139 passed to FS-Cache when requested by the appropriate cache operation. In
140 the case of CacheFS, they're embedded in CacheFS's internal object
141 structures.
142
143 The debug_id is a simple integer that can be used in debugging messages
144 that refer to a particular object. In such a case it should be printed
145 using "OBJ%x" to be consistent with FS-Cache.
146
147 Each object contains a pointer to the cookie that represents the object it
148 is backing. An object should retired when put_object() is called if it is
149 in state FSCACHE_OBJECT_RECYCLING. The fscache_object struct should be
150 initialised by calling fscache_object_init(object).
151
152
153 (*) FS-Cache operation record:
154
155 struct fscache_operation {
156 atomic_t usage;
157 struct fscache_object *object;
158 unsigned long flags;
159 #define FSCACHE_OP_EXCLUSIVE
160 void (*processor)(struct fscache_operation *op);
161 void (*release)(struct fscache_operation *op);
162 ...
163 };
164
165 FS-Cache has a pool of threads that it uses to give CPU time to the
166 various asynchronous operations that need to be done as part of driving
167 the cache. These are represented by the above structure. The processor
168 method is called to give the op CPU time, and the release method to get
169 rid of it when its usage count reaches 0.
170
171 An operation can be made exclusive upon an object by setting the
172 appropriate flag before enqueuing it with fscache_enqueue_operation(). If
173 an operation needs more processing time, it should be enqueued again.
174
175
176 (*) FS-Cache retrieval operation record:
177
178 struct fscache_retrieval {
179 struct fscache_operation op;
180 struct address_space *mapping;
181 struct list_head *to_do;
182 ...
183 };
184
185 A structure of this type is allocated by FS-Cache to record retrieval and
186 allocation requests made by the netfs. This struct is then passed to the
187 backend to do the operation. The backend may get extra refs to it by
188 calling fscache_get_retrieval() and refs may be discarded by calling
189 fscache_put_retrieval().
190
191 A retrieval operation can be used by the backend to do retrieval work. To
192 do this, the retrieval->op.processor method pointer should be set
193 appropriately by the backend and fscache_enqueue_retrieval() called to
194 submit it to the thread pool. CacheFiles, for example, uses this to queue
195 page examination when it detects PG_lock being cleared.
196
197 The to_do field is an empty list available for the cache backend to use as
198 it sees fit.
199
200
201 (*) FS-Cache storage operation record:
202
203 struct fscache_storage {
204 struct fscache_operation op;
205 pgoff_t store_limit;
206 ...
207 };
208
209 A structure of this type is allocated by FS-Cache to record outstanding
210 writes to be made. FS-Cache itself enqueues this operation and invokes
211 the write_page() method on the object at appropriate times to effect
212 storage.
213
214
215================
216CACHE OPERATIONS
217================
218
219The cache backend provides FS-Cache with a table of operations that can be
220performed on the denizens of the cache. These are held in a structure of type:
221
222 struct fscache_cache_ops
223
224 (*) Name of cache provider [mandatory]:
225
226 const char *name
227
228 This isn't strictly an operation, but should be pointed at a string naming
229 the backend.
230
231
232 (*) Allocate a new object [mandatory]:
233
234 struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
235 struct fscache_cookie *cookie)
236
237 This method is used to allocate a cache object representation to back a
238 cookie in a particular cache. fscache_object_init() should be called on
239 the object to initialise it prior to returning.
240
241 This function may also be used to parse the index key to be used for
242 multiple lookup calls to turn it into a more convenient form. FS-Cache
243 will call the lookup_complete() method to allow the cache to release the
244 form once lookup is complete or aborted.
245
246
247 (*) Look up and create object [mandatory]:
248
249 void (*lookup_object)(struct fscache_object *object)
250
251 This method is used to look up an object, given that the object is already
252 allocated and attached to the cookie. This should instantiate that object
253 in the cache if it can.
254
255 The method should call fscache_object_lookup_negative() as soon as
256 possible if it determines the object doesn't exist in the cache. If the
257 object is found to exist and the netfs indicates that it is valid then
258 fscache_obtained_object() should be called once the object is in a
259 position to have data stored in it. Similarly, fscache_obtained_object()
260 should also be called once a non-present object has been created.
261
262 If a lookup error occurs, fscache_object_lookup_error() should be called
263 to abort the lookup of that object.
264
265
266 (*) Release lookup data [mandatory]:
267
268 void (*lookup_complete)(struct fscache_object *object)
269
270 This method is called to ask the cache to release any resources it was
271 using to perform a lookup.
272
273
274 (*) Increment object refcount [mandatory]:
275
276 struct fscache_object *(*grab_object)(struct fscache_object *object)
277
278 This method is called to increment the reference count on an object. It
279 may fail (for instance if the cache is being withdrawn) by returning NULL.
280 It should return the object pointer if successful.
281
282
283 (*) Lock/Unlock object [mandatory]:
284
285 void (*lock_object)(struct fscache_object *object)
286 void (*unlock_object)(struct fscache_object *object)
287
288 These methods are used to exclusively lock an object. It must be possible
289 to schedule with the lock held, so a spinlock isn't sufficient.
290
291
292 (*) Pin/Unpin object [optional]:
293
294 int (*pin_object)(struct fscache_object *object)
295 void (*unpin_object)(struct fscache_object *object)
296
297 These methods are used to pin an object into the cache. Once pinned an
298 object cannot be reclaimed to make space. Return -ENOSPC if there's not
299 enough space in the cache to permit this.
300
301
302 (*) Update object [mandatory]:
303
304 int (*update_object)(struct fscache_object *object)
305
306 This is called to update the index entry for the specified object. The
307 new information should be in object->cookie->netfs_data. This can be
308 obtained by calling object->cookie->def->get_aux()/get_attr().
309
310
311 (*) Discard object [mandatory]:
312
313 void (*drop_object)(struct fscache_object *object)
314
315 This method is called to indicate that an object has been unbound from its
316 cookie, and that the cache should release the object's resources and
317 retire it if it's in state FSCACHE_OBJECT_RECYCLING.
318
319 This method should not attempt to release any references held by the
320 caller. The caller will invoke the put_object() method as appropriate.
321
322
323 (*) Release object reference [mandatory]:
324
325 void (*put_object)(struct fscache_object *object)
326
327 This method is used to discard a reference to an object. The object may
328 be freed when all the references to it are released.
329
330
331 (*) Synchronise a cache [mandatory]:
332
333 void (*sync)(struct fscache_cache *cache)
334
335 This is called to ask the backend to synchronise a cache with its backing
336 device.
337
338
339 (*) Dissociate a cache [mandatory]:
340
341 void (*dissociate_pages)(struct fscache_cache *cache)
342
343 This is called to ask a cache to perform any page dissociations as part of
344 cache withdrawal.
345
346
347 (*) Notification that the attributes on a netfs file changed [mandatory]:
348
349 int (*attr_changed)(struct fscache_object *object);
350
351 This is called to indicate to the cache that certain attributes on a netfs
352 file have changed (for example the maximum size a file may reach). The
353 cache can read these from the netfs by calling the cookie's get_attr()
354 method.
355
356 The cache may use the file size information to reserve space on the cache.
357 It should also call fscache_set_store_limit() to indicate to FS-Cache the
358 highest byte it's willing to store for an object.
359
360 This method may return -ve if an error occurred or the cache object cannot
361 be expanded. In such a case, the object will be withdrawn from service.
362
363 This operation is run asynchronously from FS-Cache's thread pool, and
364 storage and retrieval operations from the netfs are excluded during the
365 execution of this operation.
366
367
368 (*) Reserve cache space for an object's data [optional]:
369
370 int (*reserve_space)(struct fscache_object *object, loff_t size);
371
372 This is called to request that cache space be reserved to hold the data
373 for an object and the metadata used to track it. Zero size should be
374 taken as request to cancel a reservation.
375
376 This should return 0 if successful, -ENOSPC if there isn't enough space
377 available, or -ENOMEM or -EIO on other errors.
378
379 The reservation may exceed the current size of the object, thus permitting
380 future expansion. If the amount of space consumed by an object would
381 exceed the reservation, it's permitted to refuse requests to allocate
382 pages, but not required. An object may be pruned down to its reservation
383 size if larger than that already.
384
385
386 (*) Request page be read from cache [mandatory]:
387
388 int (*read_or_alloc_page)(struct fscache_retrieval *op,
389 struct page *page,
390 gfp_t gfp)
391
392 This is called to attempt to read a netfs page from the cache, or to
393 reserve a backing block if not. FS-Cache will have done as much checking
394 as it can before calling, but most of the work belongs to the backend.
395
396 If there's no page in the cache, then -ENODATA should be returned if the
397 backend managed to reserve a backing block; -ENOBUFS or -ENOMEM if it
398 didn't.
399
400 If there is suitable data in the cache, then a read operation should be
401 queued and 0 returned. When the read finishes, fscache_end_io() should be
402 called.
403
404 The fscache_mark_pages_cached() should be called for the page if any cache
405 metadata is retained. This will indicate to the netfs that the page needs
406 explicit uncaching. This operation takes a pagevec, thus allowing several
407 pages to be marked at once.
408
409 The retrieval record pointed to by op should be retained for each page
410 queued and released when I/O on the page has been formally ended.
411 fscache_get/put_retrieval() are available for this purpose.
412
413 The retrieval record may be used to get CPU time via the FS-Cache thread
414 pool. If this is desired, the op->op.processor should be set to point to
415 the appropriate processing routine, and fscache_enqueue_retrieval() should
416 be called at an appropriate point to request CPU time. For instance, the
417 retrieval routine could be enqueued upon the completion of a disk read.
418 The to_do field in the retrieval record is provided to aid in this.
419
420 If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS
421 returned if possible or fscache_end_io() called with a suitable error
422 code..
423
424
425 (*) Request pages be read from cache [mandatory]:
426
427 int (*read_or_alloc_pages)(struct fscache_retrieval *op,
428 struct list_head *pages,
429 unsigned *nr_pages,
430 gfp_t gfp)
431
432 This is like the read_or_alloc_page() method, except it is handed a list
433 of pages instead of one page. Any pages on which a read operation is
434 started must be added to the page cache for the specified mapping and also
435 to the LRU. Such pages must also be removed from the pages list and
436 *nr_pages decremented per page.
437
438 If there was an error such as -ENOMEM, then that should be returned; else
439 if one or more pages couldn't be read or allocated, then -ENOBUFS should
440 be returned; else if one or more pages couldn't be read, then -ENODATA
441 should be returned. If all the pages are dispatched then 0 should be
442 returned.
443
444
445 (*) Request page be allocated in the cache [mandatory]:
446
447 int (*allocate_page)(struct fscache_retrieval *op,
448 struct page *page,
449 gfp_t gfp)
450
451 This is like the read_or_alloc_page() method, except that it shouldn't
452 read from the cache, even if there's data there that could be retrieved.
453 It should, however, set up any internal metadata required such that
454 the write_page() method can write to the cache.
455
456 If there's no backing block available, then -ENOBUFS should be returned
457 (or -ENOMEM if there were other problems). If a block is successfully
458 allocated, then the netfs page should be marked and 0 returned.
459
460
461 (*) Request pages be allocated in the cache [mandatory]:
462
463 int (*allocate_pages)(struct fscache_retrieval *op,
464 struct list_head *pages,
465 unsigned *nr_pages,
466 gfp_t gfp)
467
468 This is an multiple page version of the allocate_page() method. pages and
469 nr_pages should be treated as for the read_or_alloc_pages() method.
470
471
472 (*) Request page be written to cache [mandatory]:
473
474 int (*write_page)(struct fscache_storage *op,
475 struct page *page);
476
477 This is called to write from a page on which there was a previously
478 successful read_or_alloc_page() call or similar. FS-Cache filters out
479 pages that don't have mappings.
480
481 This method is called asynchronously from the FS-Cache thread pool. It is
482 not required to actually store anything, provided -ENODATA is then
483 returned to the next read of this page.
484
485 If an error occurred, then a negative error code should be returned,
486 otherwise zero should be returned. FS-Cache will take appropriate action
487 in response to an error, such as withdrawing this object.
488
489 If this method returns success then FS-Cache will inform the netfs
490 appropriately.
491
492
493 (*) Discard retained per-page metadata [mandatory]:
494
495 void (*uncache_page)(struct fscache_object *object, struct page *page)
496
497 This is called when a netfs page is being evicted from the pagecache. The
498 cache backend should tear down any internal representation or tracking it
499 maintains for this page.
500
501
502==================
503FS-CACHE UTILITIES
504==================
505
506FS-Cache provides some utilities that a cache backend may make use of:
507
508 (*) Note occurrence of an I/O error in a cache:
509
510 void fscache_io_error(struct fscache_cache *cache)
511
512 This tells FS-Cache that an I/O error occurred in the cache. After this
513 has been called, only resource dissociation operations (object and page
514 release) will be passed from the netfs to the cache backend for the
515 specified cache.
516
517 This does not actually withdraw the cache. That must be done separately.
518
519
520 (*) Invoke the retrieval I/O completion function:
521
522 void fscache_end_io(struct fscache_retrieval *op, struct page *page,
523 int error);
524
525 This is called to note the end of an attempt to retrieve a page. The
526 error value should be 0 if successful and an error otherwise.
527
528
529 (*) Set highest store limit:
530
531 void fscache_set_store_limit(struct fscache_object *object,
532 loff_t i_size);
533
534 This sets the limit FS-Cache imposes on the highest byte it's willing to
535 try and store for a netfs. Any page over this limit is automatically
536 rejected by fscache_read_alloc_page() and co with -ENOBUFS.
537
538
539 (*) Mark pages as being cached:
540
541 void fscache_mark_pages_cached(struct fscache_retrieval *op,
542 struct pagevec *pagevec);
543
544 This marks a set of pages as being cached. After this has been called,
545 the netfs must call fscache_uncache_page() to unmark the pages.
546
547
548 (*) Perform coherency check on an object:
549
550 enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
551 const void *data,
552 uint16_t datalen);
553
554 This asks the netfs to perform a coherency check on an object that has
555 just been looked up. The cookie attached to the object will determine the
556 netfs to use. data and datalen should specify where the auxiliary data
557 retrieved from the cache can be found.
558
559 One of three values will be returned:
560
561 (*) FSCACHE_CHECKAUX_OKAY
562
563 The coherency data indicates the object is valid as is.
564
565 (*) FSCACHE_CHECKAUX_NEEDS_UPDATE
566
567 The coherency data needs updating, but otherwise the object is
568 valid.
569
570 (*) FSCACHE_CHECKAUX_OBSOLETE
571
572 The coherency data indicates that the object is obsolete and should
573 be discarded.
574
575
576 (*) Initialise a freshly allocated object:
577
578 void fscache_object_init(struct fscache_object *object);
579
580 This initialises all the fields in an object representation.
581
582
583 (*) Indicate the destruction of an object:
584
585 void fscache_object_destroyed(struct fscache_cache *cache);
586
587 This must be called to inform FS-Cache that an object that belonged to a
588 cache has been destroyed and deallocated. This will allow continuation
589 of the cache withdrawal process when it is stopped pending destruction of
590 all the objects.
591
592
593 (*) Indicate negative lookup on an object:
594
595 void fscache_object_lookup_negative(struct fscache_object *object);
596
597 This is called to indicate to FS-Cache that a lookup process for an object
598 found a negative result.
599
600 This changes the state of an object to permit reads pending on lookup
601 completion to go off and start fetching data from the netfs server as it's
602 known at this point that there can't be any data in the cache.
603
604 This may be called multiple times on an object. Only the first call is
605 significant - all subsequent calls are ignored.
606
607
608 (*) Indicate an object has been obtained:
609
610 void fscache_obtained_object(struct fscache_object *object);
611
612 This is called to indicate to FS-Cache that a lookup process for an object
613 produced a positive result, or that an object was created. This should
614 only be called once for any particular object.
615
616 This changes the state of an object to indicate:
617
618 (1) if no call to fscache_object_lookup_negative() has been made on
619 this object, that there may be data available, and that reads can
620 now go and look for it; and
621
622 (2) that writes may now proceed against this object.
623
624
625 (*) Indicate that object lookup failed:
626
627 void fscache_object_lookup_error(struct fscache_object *object);
628
629 This marks an object as having encountered a fatal error (usually EIO)
630 and causes it to move into a state whereby it will be withdrawn as soon
631 as possible.
632
633
634 (*) Get and release references on a retrieval record:
635
636 void fscache_get_retrieval(struct fscache_retrieval *op);
637 void fscache_put_retrieval(struct fscache_retrieval *op);
638
639 These two functions are used to retain a retrieval record whilst doing
640 asynchronous data retrieval and block allocation.
641
642
643 (*) Enqueue a retrieval record for processing.
644
645 void fscache_enqueue_retrieval(struct fscache_retrieval *op);
646
647 This enqueues a retrieval record for processing by the FS-Cache thread
648 pool. One of the threads in the pool will invoke the retrieval record's
649 op->op.processor callback function. This function may be called from
650 within the callback function.
651
652
653 (*) List of object state names:
654
655 const char *fscache_object_states[];
656
657 For debugging purposes, this may be used to turn the state that an object
658 is in into a text string for display purposes.
diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt
new file mode 100644
index 000000000000..c78a49b7bba6
--- /dev/null
+++ b/Documentation/filesystems/caching/cachefiles.txt
@@ -0,0 +1,501 @@
1 ===============================================
2 CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
3 ===============================================
4
5Contents:
6
7 (*) Overview.
8
9 (*) Requirements.
10
11 (*) Configuration.
12
13 (*) Starting the cache.
14
15 (*) Things to avoid.
16
17 (*) Cache culling.
18
19 (*) Cache structure.
20
21 (*) Security model and SELinux.
22
23 (*) A note on security.
24
25 (*) Statistical information.
26
27 (*) Debugging.
28
29
30========
31OVERVIEW
32========
33
34CacheFiles is a caching backend that's meant to use as a cache a directory on
35an already mounted filesystem of a local type (such as Ext3).
36
37CacheFiles uses a userspace daemon to do some of the cache management - such as
38reaping stale nodes and culling. This is called cachefilesd and lives in
39/sbin.
40
41The filesystem and data integrity of the cache are only as good as those of the
42filesystem providing the backing services. Note that CacheFiles does not
43attempt to journal anything since the journalling interfaces of the various
44filesystems are very specific in nature.
45
46CacheFiles creates a misc character device - "/dev/cachefiles" - that is used
47to communication with the daemon. Only one thing may have this open at once,
48and whilst it is open, a cache is at least partially in existence. The daemon
49opens this and sends commands down it to control the cache.
50
51CacheFiles is currently limited to a single cache.
52
53CacheFiles attempts to maintain at least a certain percentage of free space on
54the filesystem, shrinking the cache by culling the objects it contains to make
55space if necessary - see the "Cache Culling" section. This means it can be
56placed on the same medium as a live set of data, and will expand to make use of
57spare space and automatically contract when the set of data requires more
58space.
59
60
61============
62REQUIREMENTS
63============
64
65The use of CacheFiles and its daemon requires the following features to be
66available in the system and in the cache filesystem:
67
68 - dnotify.
69
70 - extended attributes (xattrs).
71
72 - openat() and friends.
73
74 - bmap() support on files in the filesystem (FIBMAP ioctl).
75
76 - The use of bmap() to detect a partial page at the end of the file.
77
78It is strongly recommended that the "dir_index" option is enabled on Ext3
79filesystems being used as a cache.
80
81
82=============
83CONFIGURATION
84=============
85
86The cache is configured by a script in /etc/cachefilesd.conf. These commands
87set up cache ready for use. The following script commands are available:
88
89 (*) brun <N>%
90 (*) bcull <N>%
91 (*) bstop <N>%
92 (*) frun <N>%
93 (*) fcull <N>%
94 (*) fstop <N>%
95
96 Configure the culling limits. Optional. See the section on culling
97 The defaults are 7% (run), 5% (cull) and 1% (stop) respectively.
98
99 The commands beginning with a 'b' are file space (block) limits, those
100 beginning with an 'f' are file count limits.
101
102 (*) dir <path>
103
104 Specify the directory containing the root of the cache. Mandatory.
105
106 (*) tag <name>
107
108 Specify a tag to FS-Cache to use in distinguishing multiple caches.
109 Optional. The default is "CacheFiles".
110
111 (*) debug <mask>
112
113 Specify a numeric bitmask to control debugging in the kernel module.
114 Optional. The default is zero (all off). The following values can be
115 OR'd into the mask to collect various information:
116
117 1 Turn on trace of function entry (_enter() macros)
118 2 Turn on trace of function exit (_leave() macros)
119 4 Turn on trace of internal debug points (_debug())
120
121 This mask can also be set through sysfs, eg:
122
123 echo 5 >/sys/modules/cachefiles/parameters/debug
124
125
126==================
127STARTING THE CACHE
128==================
129
130The cache is started by running the daemon. The daemon opens the cache device,
131configures the cache and tells it to begin caching. At that point the cache
132binds to fscache and the cache becomes live.
133
134The daemon is run as follows:
135
136 /sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
137
138The flags are:
139
140 (*) -d
141
142 Increase the debugging level. This can be specified multiple times and
143 is cumulative with itself.
144
145 (*) -s
146
147 Send messages to stderr instead of syslog.
148
149 (*) -n
150
151 Don't daemonise and go into background.
152
153 (*) -f <configfile>
154
155 Use an alternative configuration file rather than the default one.
156
157
158===============
159THINGS TO AVOID
160===============
161
162Do not mount other things within the cache as this will cause problems. The
163kernel module contains its own very cut-down path walking facility that ignores
164mountpoints, but the daemon can't avoid them.
165
166Do not create, rename or unlink files and directories in the cache whilst the
167cache is active, as this may cause the state to become uncertain.
168
169Renaming files in the cache might make objects appear to be other objects (the
170filename is part of the lookup key).
171
172Do not change or remove the extended attributes attached to cache files by the
173cache as this will cause the cache state management to get confused.
174
175Do not create files or directories in the cache, lest the cache get confused or
176serve incorrect data.
177
178Do not chmod files in the cache. The module creates things with minimal
179permissions to prevent random users being able to access them directly.
180
181
182=============
183CACHE CULLING
184=============
185
186The cache may need culling occasionally to make space. This involves
187discarding objects from the cache that have been used less recently than
188anything else. Culling is based on the access time of data objects. Empty
189directories are culled if not in use.
190
191Cache culling is done on the basis of the percentage of blocks and the
192percentage of files available in the underlying filesystem. There are six
193"limits":
194
195 (*) brun
196 (*) frun
197
198 If the amount of free space and the number of available files in the cache
199 rises above both these limits, then culling is turned off.
200
201 (*) bcull
202 (*) fcull
203
204 If the amount of available space or the number of available files in the
205 cache falls below either of these limits, then culling is started.
206
207 (*) bstop
208 (*) fstop
209
210 If the amount of available space or the number of available files in the
211 cache falls below either of these limits, then no further allocation of
212 disk space or files is permitted until culling has raised things above
213 these limits again.
214
215These must be configured thusly:
216
217 0 <= bstop < bcull < brun < 100
218 0 <= fstop < fcull < frun < 100
219
220Note that these are percentages of available space and available files, and do
221_not_ appear as 100 minus the percentage displayed by the "df" program.
222
223The userspace daemon scans the cache to build up a table of cullable objects.
224These are then culled in least recently used order. A new scan of the cache is
225started as soon as space is made in the table. Objects will be skipped if
226their atimes have changed or if the kernel module says it is still using them.
227
228
229===============
230CACHE STRUCTURE
231===============
232
233The CacheFiles module will create two directories in the directory it was
234given:
235
236 (*) cache/
237
238 (*) graveyard/
239
240The active cache objects all reside in the first directory. The CacheFiles
241kernel module moves any retired or culled objects that it can't simply unlink
242to the graveyard from which the daemon will actually delete them.
243
244The daemon uses dnotify to monitor the graveyard directory, and will delete
245anything that appears therein.
246
247
248The module represents index objects as directories with the filename "I..." or
249"J...". Note that the "cache/" directory is itself a special index.
250
251Data objects are represented as files if they have no children, or directories
252if they do. Their filenames all begin "D..." or "E...". If represented as a
253directory, data objects will have a file in the directory called "data" that
254actually holds the data.
255
256Special objects are similar to data objects, except their filenames begin
257"S..." or "T...".
258
259
260If an object has children, then it will be represented as a directory.
261Immediately in the representative directory are a collection of directories
262named for hash values of the child object keys with an '@' prepended. Into
263this directory, if possible, will be placed the representations of the child
264objects:
265
266 INDEX INDEX INDEX DATA FILES
267 ========= ========== ================================= ================
268 cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
269 cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
270 cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
271 cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry
272
273
274If the key is so long that it exceeds NAME_MAX with the decorations added on to
275it, then it will be cut into pieces, the first few of which will be used to
276make a nest of directories, and the last one of which will be the objects
277inside the last directory. The names of the intermediate directories will have
278'+' prepended:
279
280 J1223/@23/+xy...z/+kl...m/Epqr
281
282
283Note that keys are raw data, and not only may they exceed NAME_MAX in size,
284they may also contain things like '/' and NUL characters, and so they may not
285be suitable for turning directly into a filename.
286
287To handle this, CacheFiles will use a suitably printable filename directly and
288"base-64" encode ones that aren't directly suitable. The two versions of
289object filenames indicate the encoding:
290
291 OBJECT TYPE PRINTABLE ENCODED
292 =============== =============== ===============
293 Index "I..." "J..."
294 Data "D..." "E..."
295 Special "S..." "T..."
296
297Intermediate directories are always "@" or "+" as appropriate.
298
299
300Each object in the cache has an extended attribute label that holds the object
301type ID (required to distinguish special objects) and the auxiliary data from
302the netfs. The latter is used to detect stale objects in the cache and update
303or retire them.
304
305
306Note that CacheFiles will erase from the cache any file it doesn't recognise or
307any file of an incorrect type (such as a FIFO file or a device file).
308
309
310==========================
311SECURITY MODEL AND SELINUX
312==========================
313
314CacheFiles is implemented to deal properly with the LSM security features of
315the Linux kernel and the SELinux facility.
316
317One of the problems that CacheFiles faces is that it is generally acting on
318behalf of a process, and running in that process's context, and that includes a
319security context that is not appropriate for accessing the cache - either
320because the files in the cache are inaccessible to that process, or because if
321the process creates a file in the cache, that file may be inaccessible to other
322processes.
323
324The way CacheFiles works is to temporarily change the security context (fsuid,
325fsgid and actor security label) that the process acts as - without changing the
326security context of the process when it the target of an operation performed by
327some other process (so signalling and suchlike still work correctly).
328
329
330When the CacheFiles module is asked to bind to its cache, it:
331
332 (1) Finds the security label attached to the root cache directory and uses
333 that as the security label with which it will create files. By default,
334 this is:
335
336 cachefiles_var_t
337
338 (2) Finds the security label of the process which issued the bind request
339 (presumed to be the cachefilesd daemon), which by default will be:
340
341 cachefilesd_t
342
343 and asks LSM to supply a security ID as which it should act given the
344 daemon's label. By default, this will be:
345
346 cachefiles_kernel_t
347
348 SELinux transitions the daemon's security ID to the module's security ID
349 based on a rule of this form in the policy.
350
351 type_transition <daemon's-ID> kernel_t : process <module's-ID>;
352
353 For instance:
354
355 type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t;
356
357
358The module's security ID gives it permission to create, move and remove files
359and directories in the cache, to find and access directories and files in the
360cache, to set and access extended attributes on cache objects, and to read and
361write files in the cache.
362
363The daemon's security ID gives it only a very restricted set of permissions: it
364may scan directories, stat files and erase files and directories. It may
365not read or write files in the cache, and so it is precluded from accessing the
366data cached therein; nor is it permitted to create new files in the cache.
367
368
369There are policy source files available in:
370
371 http://people.redhat.com/~dhowells/fscache/cachefilesd-0.8.tar.bz2
372
373and later versions. In that tarball, see the files:
374
375 cachefilesd.te
376 cachefilesd.fc
377 cachefilesd.if
378
379They are built and installed directly by the RPM.
380
381If a non-RPM based system is being used, then copy the above files to their own
382directory and run:
383
384 make -f /usr/share/selinux/devel/Makefile
385 semodule -i cachefilesd.pp
386
387You will need checkpolicy and selinux-policy-devel installed prior to the
388build.
389
390
391By default, the cache is located in /var/fscache, but if it is desirable that
392it should be elsewhere, than either the above policy files must be altered, or
393an auxiliary policy must be installed to label the alternate location of the
394cache.
395
396For instructions on how to add an auxiliary policy to enable the cache to be
397located elsewhere when SELinux is in enforcing mode, please see:
398
399 /usr/share/doc/cachefilesd-*/move-cache.txt
400
401When the cachefilesd rpm is installed; alternatively, the document can be found
402in the sources.
403
404
405==================
406A NOTE ON SECURITY
407==================
408
409CacheFiles makes use of the split security in the task_struct. It allocates
410its own task_security structure, and redirects current->act_as to point to it
411when it acts on behalf of another process, in that process's context.
412
413The reason it does this is that it calls vfs_mkdir() and suchlike rather than
414bypassing security and calling inode ops directly. Therefore the VFS and LSM
415may deny the CacheFiles access to the cache data because under some
416circumstances the caching code is running in the security context of whatever
417process issued the original syscall on the netfs.
418
419Furthermore, should CacheFiles create a file or directory, the security
420parameters with that object is created (UID, GID, security label) would be
421derived from that process that issued the system call, thus potentially
422preventing other processes from accessing the cache - including CacheFiles's
423cache management daemon (cachefilesd).
424
425What is required is to temporarily override the security of the process that
426issued the system call. We can't, however, just do an in-place change of the
427security data as that affects the process as an object, not just as a subject.
428This means it may lose signals or ptrace events for example, and affects what
429the process looks like in /proc.
430
431So CacheFiles makes use of a logical split in the security between the
432objective security (task->sec) and the subjective security (task->act_as). The
433objective security holds the intrinsic security properties of a process and is
434never overridden. This is what appears in /proc, and is what is used when a
435process is the target of an operation by some other process (SIGKILL for
436example).
437
438The subjective security holds the active security properties of a process, and
439may be overridden. This is not seen externally, and is used whan a process
440acts upon another object, for example SIGKILLing another process or opening a
441file.
442
443LSM hooks exist that allow SELinux (or Smack or whatever) to reject a request
444for CacheFiles to run in a context of a specific security label, or to create
445files and directories with another security label.
446
447
448=======================
449STATISTICAL INFORMATION
450=======================
451
452If FS-Cache is compiled with the following option enabled:
453
454 CONFIG_CACHEFILES_HISTOGRAM=y
455
456then it will gather certain statistics and display them through a proc file.
457
458 (*) /proc/fs/cachefiles/histogram
459
460 cat /proc/fs/cachefiles/histogram
461 JIFS SECS LOOKUPS MKDIRS CREATES
462 ===== ===== ========= ========= =========
463
464 This shows the breakdown of the number of times each amount of time
465 between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The
466 columns are as follows:
467
468 COLUMN TIME MEASUREMENT
469 ======= =======================================================
470 LOOKUPS Length of time to perform a lookup on the backing fs
471 MKDIRS Length of time to perform a mkdir on the backing fs
472 CREATES Length of time to perform a create on the backing fs
473
474 Each row shows the number of events that took a particular range of times.
475 Each step is 1 jiffy in size. The JIFS column indicates the particular
476 jiffy range covered, and the SECS field the equivalent number of seconds.
477
478
479=========
480DEBUGGING
481=========
482
483If CONFIG_CACHEFILES_DEBUG is enabled, the CacheFiles facility can have runtime
484debugging enabled by adjusting the value in:
485
486 /sys/module/cachefiles/parameters/debug
487
488This is a bitmask of debugging streams to enable:
489
490 BIT VALUE STREAM POINT
491 ======= ======= =============================== =======================
492 0 1 General Function entry trace
493 1 2 Function exit trace
494 2 4 General
495
496The appropriate set of values should be OR'd together and the result written to
497the control file. For example:
498
499 echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug
500
501will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
new file mode 100644
index 000000000000..9e94b9491d89
--- /dev/null
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -0,0 +1,333 @@
1 ==========================
2 General Filesystem Caching
3 ==========================
4
5========
6OVERVIEW
7========
8
9This facility is a general purpose cache for network filesystems, though it
10could be used for caching other things such as ISO9660 filesystems too.
11
12FS-Cache mediates between cache backends (such as CacheFS) and network
13filesystems:
14
15 +---------+
16 | | +--------------+
17 | NFS |--+ | |
18 | | | +-->| CacheFS |
19 +---------+ | +----------+ | | /dev/hda5 |
20 | | | | +--------------+
21 +---------+ +-->| | |
22 | | | |--+
23 | AFS |----->| FS-Cache |
24 | | | |--+
25 +---------+ +-->| | |
26 | | | | +--------------+
27 +---------+ | +----------+ | | |
28 | | | +-->| CacheFiles |
29 | ISOFS |--+ | /var/cache |
30 | | +--------------+
31 +---------+
32
33Or to look at it another way, FS-Cache is a module that provides a caching
34facility to a network filesystem such that the cache is transparent to the
35user:
36
37 +---------+
38 | |
39 | Server |
40 | |
41 +---------+
42 | NETWORK
43 ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
44 |
45 | +----------+
46 V | |
47 +---------+ | |
48 | | | |
49 | NFS |----->| FS-Cache |
50 | | | |--+
51 +---------+ | | | +--------------+ +--------------+
52 | | | | | | | |
53 V +----------+ +-->| CacheFiles |-->| Ext3 |
54 +---------+ | /var/cache | | /dev/sda6 |
55 | | +--------------+ +--------------+
56 | VFS | ^ ^
57 | | | |
58 +---------+ +--------------+ |
59 | KERNEL SPACE | |
60 ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~
61 | USER SPACE | |
62 V | |
63 +---------+ +--------------+
64 | | | |
65 | Process | | cachefilesd |
66 | | | |
67 +---------+ +--------------+
68
69
70FS-Cache does not follow the idea of completely loading every netfs file
71opened in its entirety into a cache before permitting it to be accessed and
72then serving the pages out of that cache rather than the netfs inode because:
73
74 (1) It must be practical to operate without a cache.
75
76 (2) The size of any accessible file must not be limited to the size of the
77 cache.
78
79 (3) The combined size of all opened files (this includes mapped libraries)
80 must not be limited to the size of the cache.
81
82 (4) The user should not be forced to download an entire file just to do a
83 one-off access of a small portion of it (such as might be done with the
84 "file" program).
85
86It instead serves the cache out in PAGE_SIZE chunks as and when requested by
87the netfs('s) using it.
88
89
90FS-Cache provides the following facilities:
91
92 (1) More than one cache can be used at once. Caches can be selected
93 explicitly by use of tags.
94
95 (2) Caches can be added / removed at any time.
96
97 (3) The netfs is provided with an interface that allows either party to
98 withdraw caching facilities from a file (required for (2)).
99
100 (4) The interface to the netfs returns as few errors as possible, preferring
101 rather to let the netfs remain oblivious.
102
103 (5) Cookies are used to represent indices, files and other objects to the
104 netfs. The simplest cookie is just a NULL pointer - indicating nothing
105 cached there.
106
107 (6) The netfs is allowed to propose - dynamically - any index hierarchy it
108 desires, though it must be aware that the index search function is
109 recursive, stack space is limited, and indices can only be children of
110 indices.
111
112 (7) Data I/O is done direct to and from the netfs's pages. The netfs
113 indicates that page A is at index B of the data-file represented by cookie
114 C, and that it should be read or written. The cache backend may or may
115 not start I/O on that page, but if it does, a netfs callback will be
116 invoked to indicate completion. The I/O may be either synchronous or
117 asynchronous.
118
119 (8) Cookies can be "retired" upon release. At this point FS-Cache will mark
120 them as obsolete and the index hierarchy rooted at that point will get
121 recycled.
122
123 (9) The netfs provides a "match" function for index searches. In addition to
124 saying whether a match was made or not, this can also specify that an
125 entry should be updated or deleted.
126
127(10) As much as possible is done asynchronously.
128
129
130FS-Cache maintains a virtual indexing tree in which all indices, files, objects
131and pages are kept. Bits of this tree may actually reside in one or more
132caches.
133
134 FSDEF
135 |
136 +------------------------------------+
137 | |
138 NFS AFS
139 | |
140 +--------------------------+ +-----------+
141 | | | |
142 homedir mirror afs.org redhat.com
143 | | |
144 +------------+ +---------------+ +----------+
145 | | | | | |
146 00001 00002 00007 00125 vol00001 vol00002
147 | | | | |
148 +---+---+ +-----+ +---+ +------+------+ +-----+----+
149 | | | | | | | | | | | | |
150PG0 PG1 PG2 PG0 XATTR PG0 PG1 DIRENT DIRENT DIRENT R/W R/O Bak
151 | |
152 PG0 +-------+
153 | |
154 00001 00003
155 |
156 +---+---+
157 | | |
158 PG0 PG1 PG2
159
160In the example above, you can see two netfs's being backed: NFS and AFS. These
161have different index hierarchies:
162
163 (*) The NFS primary index contains per-server indices. Each server index is
164 indexed by NFS file handles to get data file objects. Each data file
165 objects can have an array of pages, but may also have further child
166 objects, such as extended attributes and directory entries. Extended
167 attribute objects themselves have page-array contents.
168
169 (*) The AFS primary index contains per-cell indices. Each cell index contains
170 per-logical-volume indices. Each of volume index contains up to three
171 indices for the read-write, read-only and backup mirrors of those volumes.
172 Each of these contains vnode data file objects, each of which contains an
173 array of pages.
174
175The very top index is the FS-Cache master index in which individual netfs's
176have entries.
177
178Any index object may reside in more than one cache, provided it only has index
179children. Any index with non-index object children will be assumed to only
180reside in one cache.
181
182
183The netfs API to FS-Cache can be found in:
184
185 Documentation/filesystems/caching/netfs-api.txt
186
187The cache backend API to FS-Cache can be found in:
188
189 Documentation/filesystems/caching/backend-api.txt
190
191A description of the internal representations and object state machine can be
192found in:
193
194 Documentation/filesystems/caching/object.txt
195
196
197=======================
198STATISTICAL INFORMATION
199=======================
200
201If FS-Cache is compiled with the following options enabled:
202
203 CONFIG_FSCACHE_STATS=y
204 CONFIG_FSCACHE_HISTOGRAM=y
205
206then it will gather certain statistics and display them through a number of
207proc files.
208
209 (*) /proc/fs/fscache/stats
210
211 This shows counts of a number of events that can happen in FS-Cache:
212
213 CLASS EVENT MEANING
214 ======= ======= =======================================================
215 Cookies idx=N Number of index cookies allocated
216 dat=N Number of data storage cookies allocated
217 spc=N Number of special cookies allocated
218 Objects alc=N Number of objects allocated
219 nal=N Number of object allocation failures
220 avl=N Number of objects that reached the available state
221 ded=N Number of objects that reached the dead state
222 ChkAux non=N Number of objects that didn't have a coherency check
223 ok=N Number of objects that passed a coherency check
224 upd=N Number of objects that needed a coherency data update
225 obs=N Number of objects that were declared obsolete
226 Pages mrk=N Number of pages marked as being cached
227 unc=N Number of uncache page requests seen
228 Acquire n=N Number of acquire cookie requests seen
229 nul=N Number of acq reqs given a NULL parent
230 noc=N Number of acq reqs rejected due to no cache available
231 ok=N Number of acq reqs succeeded
232 nbf=N Number of acq reqs rejected due to error
233 oom=N Number of acq reqs failed on ENOMEM
234 Lookups n=N Number of lookup calls made on cache backends
235 neg=N Number of negative lookups made
236 pos=N Number of positive lookups made
237 crt=N Number of objects created by lookup
238 Updates n=N Number of update cookie requests seen
239 nul=N Number of upd reqs given a NULL parent
240 run=N Number of upd reqs granted CPU time
241 Relinqs n=N Number of relinquish cookie requests seen
242 nul=N Number of rlq reqs given a NULL parent
243 wcr=N Number of rlq reqs waited on completion of creation
244 AttrChg n=N Number of attribute changed requests seen
245 ok=N Number of attr changed requests queued
246 nbf=N Number of attr changed rejected -ENOBUFS
247 oom=N Number of attr changed failed -ENOMEM
248 run=N Number of attr changed ops given CPU time
249 Allocs n=N Number of allocation requests seen
250 ok=N Number of successful alloc reqs
251 wt=N Number of alloc reqs that waited on lookup completion
252 nbf=N Number of alloc reqs rejected -ENOBUFS
253 ops=N Number of alloc reqs submitted
254 owt=N Number of alloc reqs waited for CPU time
255 Retrvls n=N Number of retrieval (read) requests seen
256 ok=N Number of successful retr reqs
257 wt=N Number of retr reqs that waited on lookup completion
258 nod=N Number of retr reqs returned -ENODATA
259 nbf=N Number of retr reqs rejected -ENOBUFS
260 int=N Number of retr reqs aborted -ERESTARTSYS
261 oom=N Number of retr reqs failed -ENOMEM
262 ops=N Number of retr reqs submitted
263 owt=N Number of retr reqs waited for CPU time
264 Stores n=N Number of storage (write) requests seen
265 ok=N Number of successful store reqs
266 agn=N Number of store reqs on a page already pending storage
267 nbf=N Number of store reqs rejected -ENOBUFS
268 oom=N Number of store reqs failed -ENOMEM
269 ops=N Number of store reqs submitted
270 run=N Number of store reqs granted CPU time
271 Ops pend=N Number of times async ops added to pending queues
272 run=N Number of times async ops given CPU time
273 enq=N Number of times async ops queued for processing
274 dfr=N Number of async ops queued for deferred release
275 rel=N Number of async ops released
276 gc=N Number of deferred-release async ops garbage collected
277
278
279 (*) /proc/fs/fscache/histogram
280
281 cat /proc/fs/fscache/histogram
282 JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS
283 ===== ===== ========= ========= ========= ========= =========
284
285 This shows the breakdown of the number of times each amount of time
286 between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The
287 columns are as follows:
288
289 COLUMN TIME MEASUREMENT
290 ======= =======================================================
291 OBJ INST Length of time to instantiate an object
292 OP RUNS Length of time a call to process an operation took
293 OBJ RUNS Length of time a call to process an object event took
294 RETRV DLY Time between an requesting a read and lookup completing
295 RETRIEVLS Time between beginning and end of a retrieval
296
297 Each row shows the number of events that took a particular range of times.
298 Each step is 1 jiffy in size. The JIFS column indicates the particular
299 jiffy range covered, and the SECS field the equivalent number of seconds.
300
301
302=========
303DEBUGGING
304=========
305
306If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime
307debugging enabled by adjusting the value in:
308
309 /sys/module/fscache/parameters/debug
310
311This is a bitmask of debugging streams to enable:
312
313 BIT VALUE STREAM POINT
314 ======= ======= =============================== =======================
315 0 1 Cache management Function entry trace
316 1 2 Function exit trace
317 2 4 General
318 3 8 Cookie management Function entry trace
319 4 16 Function exit trace
320 5 32 General
321 6 64 Page handling Function entry trace
322 7 128 Function exit trace
323 8 256 General
324 9 512 Operation management Function entry trace
325 10 1024 Function exit trace
326 11 2048 General
327
328The appropriate set of values should be OR'd together and the result written to
329the control file. For example:
330
331 echo $((1|8|64)) >/sys/module/fscache/parameters/debug
332
333will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
new file mode 100644
index 000000000000..4db125b3a5c6
--- /dev/null
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -0,0 +1,778 @@
1 ===============================
2 FS-CACHE NETWORK FILESYSTEM API
3 ===============================
4
5There's an API by which a network filesystem can make use of the FS-Cache
6facilities. This is based around a number of principles:
7
8 (1) Caches can store a number of different object types. There are two main
9 object types: indices and files. The first is a special type used by
10 FS-Cache to make finding objects faster and to make retiring of groups of
11 objects easier.
12
13 (2) Every index, file or other object is represented by a cookie. This cookie
14 may or may not have anything associated with it, but the netfs doesn't
15 need to care.
16
17 (3) Barring the top-level index (one entry per cached netfs), the index
18 hierarchy for each netfs is structured according the whim of the netfs.
19
20This API is declared in <linux/fscache.h>.
21
22This document contains the following sections:
23
24 (1) Network filesystem definition
25 (2) Index definition
26 (3) Object definition
27 (4) Network filesystem (un)registration
28 (5) Cache tag lookup
29 (6) Index registration
30 (7) Data file registration
31 (8) Miscellaneous object registration
32 (9) Setting the data file size
33 (10) Page alloc/read/write
34 (11) Page uncaching
35 (12) Index and data file update
36 (13) Miscellaneous cookie operations
37 (14) Cookie unregistration
38 (15) Index and data file invalidation
39 (16) FS-Cache specific page flags.
40
41
42=============================
43NETWORK FILESYSTEM DEFINITION
44=============================
45
46FS-Cache needs a description of the network filesystem. This is specified
47using a record of the following structure:
48
49 struct fscache_netfs {
50 uint32_t version;
51 const char *name;
52 struct fscache_cookie *primary_index;
53 ...
54 };
55
56This first two fields should be filled in before registration, and the third
57will be filled in by the registration function; any other fields should just be
58ignored and are for internal use only.
59
60The fields are:
61
62 (1) The name of the netfs (used as the key in the toplevel index).
63
64 (2) The version of the netfs (if the name matches but the version doesn't, the
65 entire in-cache hierarchy for this netfs will be scrapped and begun
66 afresh).
67
68 (3) The cookie representing the primary index will be allocated according to
69 another parameter passed into the registration function.
70
71For example, kAFS (linux/fs/afs/) uses the following definitions to describe
72itself:
73
74 struct fscache_netfs afs_cache_netfs = {
75 .version = 0,
76 .name = "afs",
77 };
78
79
80================
81INDEX DEFINITION
82================
83
84Indices are used for two purposes:
85
86 (1) To aid the finding of a file based on a series of keys (such as AFS's
87 "cell", "volume ID", "vnode ID").
88
89 (2) To make it easier to discard a subset of all the files cached based around
90 a particular key - for instance to mirror the removal of an AFS volume.
91
92However, since it's unlikely that any two netfs's are going to want to define
93their index hierarchies in quite the same way, FS-Cache tries to impose as few
94restraints as possible on how an index is structured and where it is placed in
95the tree. The netfs can even mix indices and data files at the same level, but
96it's not recommended.
97
98Each index entry consists of a key of indeterminate length plus some auxilliary
99data, also of indeterminate length.
100
101There are some limits on indices:
102
103 (1) Any index containing non-index objects should be restricted to a single
104 cache. Any such objects created within an index will be created in the
105 first cache only. The cache in which an index is created can be
106 controlled by cache tags (see below).
107
108 (2) The entry data must be atomically journallable, so it is limited to about
109 400 bytes at present. At least 400 bytes will be available.
110
111 (3) The depth of the index tree should be judged with care as the search
112 function is recursive. Too many layers will run the kernel out of stack.
113
114
115=================
116OBJECT DEFINITION
117=================
118
119To define an object, a structure of the following type should be filled out:
120
121 struct fscache_cookie_def
122 {
123 uint8_t name[16];
124 uint8_t type;
125
126 struct fscache_cache_tag *(*select_cache)(
127 const void *parent_netfs_data,
128 const void *cookie_netfs_data);
129
130 uint16_t (*get_key)(const void *cookie_netfs_data,
131 void *buffer,
132 uint16_t bufmax);
133
134 void (*get_attr)(const void *cookie_netfs_data,
135 uint64_t *size);
136
137 uint16_t (*get_aux)(const void *cookie_netfs_data,
138 void *buffer,
139 uint16_t bufmax);
140
141 enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
142 const void *data,
143 uint16_t datalen);
144
145 void (*get_context)(void *cookie_netfs_data, void *context);
146
147 void (*put_context)(void *cookie_netfs_data, void *context);
148
149 void (*mark_pages_cached)(void *cookie_netfs_data,
150 struct address_space *mapping,
151 struct pagevec *cached_pvec);
152
153 void (*now_uncached)(void *cookie_netfs_data);
154 };
155
156This has the following fields:
157
158 (1) The type of the object [mandatory].
159
160 This is one of the following values:
161
162 (*) FSCACHE_COOKIE_TYPE_INDEX
163
164 This defines an index, which is a special FS-Cache type.
165
166 (*) FSCACHE_COOKIE_TYPE_DATAFILE
167
168 This defines an ordinary data file.
169
170 (*) Any other value between 2 and 255
171
172 This defines an extraordinary object such as an XATTR.
173
174 (2) The name of the object type (NUL terminated unless all 16 chars are used)
175 [optional].
176
177 (3) A function to select the cache in which to store an index [optional].
178
179 This function is invoked when an index needs to be instantiated in a cache
180 during the instantiation of a non-index object. Only the immediate index
181 parent for the non-index object will be queried. Any indices above that
182 in the hierarchy may be stored in multiple caches. This function does not
183 need to be supplied for any non-index object or any index that will only
184 have index children.
185
186 If this function is not supplied or if it returns NULL then the first
187 cache in the parent's list will be chosed, or failing that, the first
188 cache in the master list.
189
190 (4) A function to retrieve an object's key from the netfs [mandatory].
191
192 This function will be called with the netfs data that was passed to the
193 cookie acquisition function and the maximum length of key data that it may
194 provide. It should write the required key data into the given buffer and
195 return the quantity it wrote.
196
197 (5) A function to retrieve attribute data from the netfs [optional].
198
199 This function will be called with the netfs data that was passed to the
200 cookie acquisition function. It should return the size of the file if
201 this is a data file. The size may be used to govern how much cache must
202 be reserved for this file in the cache.
203
204 If the function is absent, a file size of 0 is assumed.
205
206 (6) A function to retrieve auxilliary data from the netfs [optional].
207
208 This function will be called with the netfs data that was passed to the
209 cookie acquisition function and the maximum length of auxilliary data that
210 it may provide. It should write the auxilliary data into the given buffer
211 and return the quantity it wrote.
212
213 If this function is absent, the auxilliary data length will be set to 0.
214
215 The length of the auxilliary data buffer may be dependent on the key
216 length. A netfs mustn't rely on being able to provide more than 400 bytes
217 for both.
218
219 (7) A function to check the auxilliary data [optional].
220
221 This function will be called to check that a match found in the cache for
222 this object is valid. For instance with AFS it could check the auxilliary
223 data against the data version number returned by the server to determine
224 whether the index entry in a cache is still valid.
225
226 If this function is absent, it will be assumed that matching objects in a
227 cache are always valid.
228
229 If present, the function should return one of the following values:
230
231 (*) FSCACHE_CHECKAUX_OKAY - the entry is okay as is
232 (*) FSCACHE_CHECKAUX_NEEDS_UPDATE - the entry requires update
233 (*) FSCACHE_CHECKAUX_OBSOLETE - the entry should be deleted
234
235 This function can also be used to extract data from the auxilliary data in
236 the cache and copy it into the netfs's structures.
237
238 (8) A pair of functions to manage contexts for the completion callback
239 [optional].
240
241 The cache read/write functions are passed a context which is then passed
242 to the I/O completion callback function. To ensure this context remains
243 valid until after the I/O completion is called, two functions may be
244 provided: one to get an extra reference on the context, and one to drop a
245 reference to it.
246
247 If the context is not used or is a type of object that won't go out of
248 scope, then these functions are not required. These functions are not
249 required for indices as indices may not contain data. These functions may
250 be called in interrupt context and so may not sleep.
251
252 (9) A function to mark a page as retaining cache metadata [optional].
253
254 This is called by the cache to indicate that it is retaining in-memory
255 information for this page and that the netfs should uncache the page when
256 it has finished. This does not indicate whether there's data on the disk
257 or not. Note that several pages at once may be presented for marking.
258
259 The PG_fscache bit is set on the pages before this function would be
260 called, so the function need not be provided if this is sufficient.
261
262 This function is not required for indices as they're not permitted data.
263
264(10) A function to unmark all the pages retaining cache metadata [mandatory].
265
266 This is called by FS-Cache to indicate that a backing store is being
267 unbound from a cookie and that all the marks on the pages should be
268 cleared to prevent confusion. Note that the cache will have torn down all
269 its tracking information so that the pages don't need to be explicitly
270 uncached.
271
272 This function is not required for indices as they're not permitted data.
273
274
275===================================
276NETWORK FILESYSTEM (UN)REGISTRATION
277===================================
278
279The first step is to declare the network filesystem to the cache. This also
280involves specifying the layout of the primary index (for AFS, this would be the
281"cell" level).
282
283The registration function is:
284
285 int fscache_register_netfs(struct fscache_netfs *netfs);
286
287It just takes a pointer to the netfs definition. It returns 0 or an error as
288appropriate.
289
290For kAFS, registration is done as follows:
291
292 ret = fscache_register_netfs(&afs_cache_netfs);
293
294The last step is, of course, unregistration:
295
296 void fscache_unregister_netfs(struct fscache_netfs *netfs);
297
298
299================
300CACHE TAG LOOKUP
301================
302
303FS-Cache permits the use of more than one cache. To permit particular index
304subtrees to be bound to particular caches, the second step is to look up cache
305representation tags. This step is optional; it can be left entirely up to
306FS-Cache as to which cache should be used. The problem with doing that is that
307FS-Cache will always pick the first cache that was registered.
308
309To get the representation for a named tag:
310
311 struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name);
312
313This takes a text string as the name and returns a representation of a tag. It
314will never return an error. It may return a dummy tag, however, if it runs out
315of memory; this will inhibit caching with this tag.
316
317Any representation so obtained must be released by passing it to this function:
318
319 void fscache_release_cache_tag(struct fscache_cache_tag *tag);
320
321The tag will be retrieved by FS-Cache when it calls the object definition
322operation select_cache().
323
324
325==================
326INDEX REGISTRATION
327==================
328
329The third step is to inform FS-Cache about part of an index hierarchy that can
330be used to locate files. This is done by requesting a cookie for each index in
331the path to the file:
332
333 struct fscache_cookie *
334 fscache_acquire_cookie(struct fscache_cookie *parent,
335 const struct fscache_object_def *def,
336 void *netfs_data);
337
338This function creates an index entry in the index represented by parent,
339filling in the index entry by calling the operations pointed to by def.
340
341Note that this function never returns an error - all errors are handled
342internally. It may, however, return NULL to indicate no cookie. It is quite
343acceptable to pass this token back to this function as the parent to another
344acquisition (or even to the relinquish cookie, read page and write page
345functions - see below).
346
347Note also that no indices are actually created in a cache until a non-index
348object needs to be created somewhere down the hierarchy. Furthermore, an index
349may be created in several different caches independently at different times.
350This is all handled transparently, and the netfs doesn't see any of it.
351
352For example, with AFS, a cell would be added to the primary index. This index
353entry would have a dependent inode containing a volume location index for the
354volume mappings within this cell:
355
356 cell->cache =
357 fscache_acquire_cookie(afs_cache_netfs.primary_index,
358 &afs_cell_cache_index_def,
359 cell);
360
361Then when a volume location was accessed, it would be entered into the cell's
362index and an inode would be allocated that acts as a volume type and hash chain
363combination:
364
365 vlocation->cache =
366 fscache_acquire_cookie(cell->cache,
367 &afs_vlocation_cache_index_def,
368 vlocation);
369
370And then a particular flavour of volume (R/O for example) could be added to
371that index, creating another index for vnodes (AFS inode equivalents):
372
373 volume->cache =
374 fscache_acquire_cookie(vlocation->cache,
375 &afs_volume_cache_index_def,
376 volume);
377
378
379======================
380DATA FILE REGISTRATION
381======================
382
383The fourth step is to request a data file be created in the cache. This is
384identical to index cookie acquisition. The only difference is that the type in
385the object definition should be something other than index type.
386
387 vnode->cache =
388 fscache_acquire_cookie(volume->cache,
389 &afs_vnode_cache_object_def,
390 vnode);
391
392
393=================================
394MISCELLANEOUS OBJECT REGISTRATION
395=================================
396
397An optional step is to request an object of miscellaneous type be created in
398the cache. This is almost identical to index cookie acquisition. The only
399difference is that the type in the object definition should be something other
400than index type. Whilst the parent object could be an index, it's more likely
401it would be some other type of object such as a data file.
402
403 xattr->cache =
404 fscache_acquire_cookie(vnode->cache,
405 &afs_xattr_cache_object_def,
406 xattr);
407
408Miscellaneous objects might be used to store extended attributes or directory
409entries for example.
410
411
412==========================
413SETTING THE DATA FILE SIZE
414==========================
415
416The fifth step is to set the physical attributes of the file, such as its size.
417This doesn't automatically reserve any space in the cache, but permits the
418cache to adjust its metadata for data tracking appropriately:
419
420 int fscache_attr_changed(struct fscache_cookie *cookie);
421
422The cache will return -ENOBUFS if there is no backing cache or if there is no
423space to allocate any extra metadata required in the cache. The attributes
424will be accessed with the get_attr() cookie definition operation.
425
426Note that attempts to read or write data pages in the cache over this size may
427be rebuffed with -ENOBUFS.
428
429This operation schedules an attribute adjustment to happen asynchronously at
430some point in the future, and as such, it may happen after the function returns
431to the caller. The attribute adjustment excludes read and write operations.
432
433
434=====================
435PAGE READ/ALLOC/WRITE
436=====================
437
438And the sixth step is to store and retrieve pages in the cache. There are
439three functions that are used to do this.
440
441Note:
442
443 (1) A page should not be re-read or re-allocated without uncaching it first.
444
445 (2) A read or allocated page must be uncached when the netfs page is released
446 from the pagecache.
447
448 (3) A page should only be written to the cache if previous read or allocated.
449
450This permits the cache to maintain its page tracking in proper order.
451
452
453PAGE READ
454---------
455
456Firstly, the netfs should ask FS-Cache to examine the caches and read the
457contents cached for a particular page of a particular file if present, or else
458allocate space to store the contents if not:
459
460 typedef
461 void (*fscache_rw_complete_t)(struct page *page,
462 void *context,
463 int error);
464
465 int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
466 struct page *page,
467 fscache_rw_complete_t end_io_func,
468 void *context,
469 gfp_t gfp);
470
471The cookie argument must specify a cookie for an object that isn't an index,
472the page specified will have the data loaded into it (and is also used to
473specify the page number), and the gfp argument is used to control how any
474memory allocations made are satisfied.
475
476If the cookie indicates the inode is not cached:
477
478 (1) The function will return -ENOBUFS.
479
480Else if there's a copy of the page resident in the cache:
481
482 (1) The mark_pages_cached() cookie operation will be called on that page.
483
484 (2) The function will submit a request to read the data from the cache's
485 backing device directly into the page specified.
486
487 (3) The function will return 0.
488
489 (4) When the read is complete, end_io_func() will be invoked with:
490
491 (*) The netfs data supplied when the cookie was created.
492
493 (*) The page descriptor.
494
495 (*) The context argument passed to the above function. This will be
496 maintained with the get_context/put_context functions mentioned above.
497
498 (*) An argument that's 0 on success or negative for an error code.
499
500 If an error occurs, it should be assumed that the page contains no usable
501 data.
502
503 end_io_func() will be called in process context if the read is results in
504 an error, but it might be called in interrupt context if the read is
505 successful.
506
507Otherwise, if there's not a copy available in cache, but the cache may be able
508to store the page:
509
510 (1) The mark_pages_cached() cookie operation will be called on that page.
511
512 (2) A block may be reserved in the cache and attached to the object at the
513 appropriate place.
514
515 (3) The function will return -ENODATA.
516
517This function may also return -ENOMEM or -EINTR, in which case it won't have
518read any data from the cache.
519
520
521PAGE ALLOCATE
522-------------
523
524Alternatively, if there's not expected to be any data in the cache for a page
525because the file has been extended, a block can simply be allocated instead:
526
527 int fscache_alloc_page(struct fscache_cookie *cookie,
528 struct page *page,
529 gfp_t gfp);
530
531This is similar to the fscache_read_or_alloc_page() function, except that it
532never reads from the cache. It will return 0 if a block has been allocated,
533rather than -ENODATA as the other would. One or the other must be performed
534before writing to the cache.
535
536The mark_pages_cached() cookie operation will be called on the page if
537successful.
538
539
540PAGE WRITE
541----------
542
543Secondly, if the netfs changes the contents of the page (either due to an
544initial download or if a user performs a write), then the page should be
545written back to the cache:
546
547 int fscache_write_page(struct fscache_cookie *cookie,
548 struct page *page,
549 gfp_t gfp);
550
551The cookie argument must specify a data file cookie, the page specified should
552contain the data to be written (and is also used to specify the page number),
553and the gfp argument is used to control how any memory allocations made are
554satisfied.
555
556The page must have first been read or allocated successfully and must not have
557been uncached before writing is performed.
558
559If the cookie indicates the inode is not cached then:
560
561 (1) The function will return -ENOBUFS.
562
563Else if space can be allocated in the cache to hold this page:
564
565 (1) PG_fscache_write will be set on the page.
566
567 (2) The function will submit a request to write the data to cache's backing
568 device directly from the page specified.
569
570 (3) The function will return 0.
571
572 (4) When the write is complete PG_fscache_write is cleared on the page and
573 anyone waiting for that bit will be woken up.
574
575Else if there's no space available in the cache, -ENOBUFS will be returned. It
576is also possible for the PG_fscache_write bit to be cleared when no write took
577place if unforeseen circumstances arose (such as a disk error).
578
579Writing takes place asynchronously.
580
581
582MULTIPLE PAGE READ
583------------------
584
585A facility is provided to read several pages at once, as requested by the
586readpages() address space operation:
587
588 int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
589 struct address_space *mapping,
590 struct list_head *pages,
591 int *nr_pages,
592 fscache_rw_complete_t end_io_func,
593 void *context,
594 gfp_t gfp);
595
596This works in a similar way to fscache_read_or_alloc_page(), except:
597
598 (1) Any page it can retrieve data for is removed from pages and nr_pages and
599 dispatched for reading to the disk. Reads of adjacent pages on disk may
600 be merged for greater efficiency.
601
602 (2) The mark_pages_cached() cookie operation will be called on several pages
603 at once if they're being read or allocated.
604
605 (3) If there was an general error, then that error will be returned.
606
607 Else if some pages couldn't be allocated or read, then -ENOBUFS will be
608 returned.
609
610 Else if some pages couldn't be read but were allocated, then -ENODATA will
611 be returned.
612
613 Otherwise, if all pages had reads dispatched, then 0 will be returned, the
614 list will be empty and *nr_pages will be 0.
615
616 (4) end_io_func will be called once for each page being read as the reads
617 complete. It will be called in process context if error != 0, but it may
618 be called in interrupt context if there is no error.
619
620Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude
621some of the pages being read and some being allocated. Those pages will have
622been marked appropriately and will need uncaching.
623
624
625==============
626PAGE UNCACHING
627==============
628
629To uncache a page, this function should be called:
630
631 void fscache_uncache_page(struct fscache_cookie *cookie,
632 struct page *page);
633
634This function permits the cache to release any in-memory representation it
635might be holding for this netfs page. This function must be called once for
636each page on which the read or write page functions above have been called to
637make sure the cache's in-memory tracking information gets torn down.
638
639Note that pages can't be explicitly deleted from the a data file. The whole
640data file must be retired (see the relinquish cookie function below).
641
642Furthermore, note that this does not cancel the asynchronous read or write
643operation started by the read/alloc and write functions, so the page
644invalidation and release functions must use:
645
646 bool fscache_check_page_write(struct fscache_cookie *cookie,
647 struct page *page);
648
649to see if a page is being written to the cache, and:
650
651 void fscache_wait_on_page_write(struct fscache_cookie *cookie,
652 struct page *page);
653
654to wait for it to finish if it is.
655
656
657==========================
658INDEX AND DATA FILE UPDATE
659==========================
660
661To request an update of the index data for an index or other object, the
662following function should be called:
663
664 void fscache_update_cookie(struct fscache_cookie *cookie);
665
666This function will refer back to the netfs_data pointer stored in the cookie by
667the acquisition function to obtain the data to write into each revised index
668entry. The update method in the parent index definition will be called to
669transfer the data.
670
671Note that partial updates may happen automatically at other times, such as when
672data blocks are added to a data file object.
673
674
675===============================
676MISCELLANEOUS COOKIE OPERATIONS
677===============================
678
679There are a number of operations that can be used to control cookies:
680
681 (*) Cookie pinning:
682
683 int fscache_pin_cookie(struct fscache_cookie *cookie);
684 void fscache_unpin_cookie(struct fscache_cookie *cookie);
685
686 These operations permit data cookies to be pinned into the cache and to
687 have the pinning removed. They are not permitted on index cookies.
688
689 The pinning function will return 0 if successful, -ENOBUFS in the cookie
690 isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning,
691 -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
692 -EIO if there's any other problem.
693
694 (*) Data space reservation:
695
696 int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size);
697
698 This permits a netfs to request cache space be reserved to store up to the
699 given amount of a file. It is permitted to ask for more than the current
700 size of the file to allow for future file expansion.
701
702 If size is given as zero then the reservation will be cancelled.
703
704 The function will return 0 if successful, -ENOBUFS in the cookie isn't
705 backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations,
706 -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
707 -EIO if there's any other problem.
708
709 Note that this doesn't pin an object in a cache; it can still be culled to
710 make space if it's not in use.
711
712
713=====================
714COOKIE UNREGISTRATION
715=====================
716
717To get rid of a cookie, this function should be called.
718
719 void fscache_relinquish_cookie(struct fscache_cookie *cookie,
720 int retire);
721
722If retire is non-zero, then the object will be marked for recycling, and all
723copies of it will be removed from all active caches in which it is present.
724Not only that but all child objects will also be retired.
725
726If retire is zero, then the object may be available again when next the
727acquisition function is called. Retirement here will overrule the pinning on a
728cookie.
729
730One very important note - relinquish must NOT be called for a cookie unless all
731the cookies for "child" indices, objects and pages have been relinquished
732first.
733
734
735================================
736INDEX AND DATA FILE INVALIDATION
737================================
738
739There is no direct way to invalidate an index subtree or a data file. To do
740this, the caller should relinquish and retire the cookie they have, and then
741acquire a new one.
742
743
744===========================
745FS-CACHE SPECIFIC PAGE FLAG
746===========================
747
748FS-Cache makes use of a page flag, PG_private_2, for its own purpose. This is
749given the alternative name PG_fscache.
750
751PG_fscache is used to indicate that the page is known by the cache, and that
752the cache must be informed if the page is going to go away. It's an indication
753to the netfs that the cache has an interest in this page, where an interest may
754be a pointer to it, resources allocated or reserved for it, or I/O in progress
755upon it.
756
757The netfs can use this information in methods such as releasepage() to
758determine whether it needs to uncache a page or update it.
759
760Furthermore, if this bit is set, releasepage() and invalidatepage() operations
761will be called on a page to get rid of it, even if PG_private is not set. This
762allows caching to attempted on a page before read_cache_pages() to be called
763after fscache_read_or_alloc_pages() as the former will try and release pages it
764was given under certain circumstances.
765
766This bit does not overlap with such as PG_private. This means that FS-Cache
767can be used with a filesystem that uses the block buffering code.
768
769There are a number of operations defined on this flag:
770
771 int PageFsCache(struct page *page);
772 void SetPageFsCache(struct page *page)
773 void ClearPageFsCache(struct page *page)
774 int TestSetPageFsCache(struct page *page)
775 int TestClearPageFsCache(struct page *page)
776
777These functions are bit test, bit set, bit clear, bit test and set and bit
778test and clear operations on PG_fscache.
diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt
new file mode 100644
index 000000000000..e8b0a35d8fe5
--- /dev/null
+++ b/Documentation/filesystems/caching/object.txt
@@ -0,0 +1,313 @@
1 ====================================================
2 IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT
3 ====================================================
4
5By: David Howells <dhowells@redhat.com>
6
7Contents:
8
9 (*) Representation
10
11 (*) Object management state machine.
12
13 - Provision of cpu time.
14 - Locking simplification.
15
16 (*) The set of states.
17
18 (*) The set of events.
19
20
21==============
22REPRESENTATION
23==============
24
25FS-Cache maintains an in-kernel representation of each object that a netfs is
26currently interested in. Such objects are represented by the fscache_cookie
27struct and are referred to as cookies.
28
29FS-Cache also maintains a separate in-kernel representation of the objects that
30a cache backend is currently actively caching. Such objects are represented by
31the fscache_object struct. The cache backends allocate these upon request, and
32are expected to embed them in their own representations. These are referred to
33as objects.
34
35There is a 1:N relationship between cookies and objects. A cookie may be
36represented by multiple objects - an index may exist in more than one cache -
37or even by no objects (it may not be cached).
38
39Furthermore, both cookies and objects are hierarchical. The two hierarchies
40correspond, but the cookies tree is a superset of the union of the object trees
41of multiple caches:
42
43 NETFS INDEX TREE : CACHE 1 : CACHE 2
44 : :
45 : +-----------+ :
46 +----------->| IObject | :
47 +-----------+ | : +-----------+ :
48 | ICookie |-------+ : | :
49 +-----------+ | : | : +-----------+
50 | +------------------------------>| IObject |
51 | : | : +-----------+
52 | : V : |
53 | : +-----------+ : |
54 V +----------->| IObject | : |
55 +-----------+ | : +-----------+ : |
56 | ICookie |-------+ : | : V
57 +-----------+ | : | : +-----------+
58 | +------------------------------>| IObject |
59 +-----+-----+ : | : +-----------+
60 | | : | : |
61 V | : V : |
62 +-----------+ | : +-----------+ : |
63 | ICookie |------------------------->| IObject | : |
64 +-----------+ | : +-----------+ : |
65 | V : | : V
66 | +-----------+ : | : +-----------+
67 | | ICookie |-------------------------------->| IObject |
68 | +-----------+ : | : +-----------+
69 V | : V : |
70 +-----------+ | : +-----------+ : |
71 | DCookie |------------------------->| DObject | : |
72 +-----------+ | : +-----------+ : |
73 | : : |
74 +-------+-------+ : : |
75 | | : : |
76 V V : : V
77 +-----------+ +-----------+ : : +-----------+
78 | DCookie | | DCookie |------------------------>| DObject |
79 +-----------+ +-----------+ : : +-----------+
80 : :
81
82In the above illustration, ICookie and IObject represent indices and DCookie
83and DObject represent data storage objects. Indices may have representation in
84multiple caches, but currently, non-index objects may not. Objects of any type
85may also be entirely unrepresented.
86
87As far as the netfs API goes, the netfs is only actually permitted to see
88pointers to the cookies. The cookies themselves and any objects attached to
89those cookies are hidden from it.
90
91
92===============================
93OBJECT MANAGEMENT STATE MACHINE
94===============================
95
96Within FS-Cache, each active object is managed by its own individual state
97machine. The state for an object is kept in the fscache_object struct, in
98object->state. A cookie may point to a set of objects that are in different
99states.
100
101Each state has an action associated with it that is invoked when the machine
102wakes up in that state. There are four logical sets of states:
103
104 (1) Preparation: states that wait for the parent objects to become ready. The
105 representations are hierarchical, and it is expected that an object must
106 be created or accessed with respect to its parent object.
107
108 (2) Initialisation: states that perform lookups in the cache and validate
109 what's found and that create on disk any missing metadata.
110
111 (3) Normal running: states that allow netfs operations on objects to proceed
112 and that update the state of objects.
113
114 (4) Termination: states that detach objects from their netfs cookies, that
115 delete objects from disk, that handle disk and system errors and that free
116 up in-memory resources.
117
118
119In most cases, transitioning between states is in response to signalled events.
120When a state has finished processing, it will usually set the mask of events in
121which it is interested (object->event_mask) and relinquish the worker thread.
122Then when an event is raised (by calling fscache_raise_event()), if the event
123is not masked, the object will be queued for processing (by calling
124fscache_enqueue_object()).
125
126
127PROVISION OF CPU TIME
128---------------------
129
130The work to be done by the various states is given CPU time by the threads of
131the slow work facility (see Documentation/slow-work.txt). This is used in
132preference to the workqueue facility because:
133
134 (1) Threads may be completely occupied for very long periods of time by a
135 particular work item. These state actions may be doing sequences of
136 synchronous, journalled disk accesses (lookup, mkdir, create, setxattr,
137 getxattr, truncate, unlink, rmdir, rename).
138
139 (2) Threads may do little actual work, but may rather spend a lot of time
140 sleeping on I/O. This means that single-threaded and 1-per-CPU-threaded
141 workqueues don't necessarily have the right numbers of threads.
142
143
144LOCKING SIMPLIFICATION
145----------------------
146
147Because only one worker thread may be operating on any particular object's
148state machine at once, this simplifies the locking, particularly with respect
149to disconnecting the netfs's representation of a cache object (fscache_cookie)
150from the cache backend's representation (fscache_object) - which may be
151requested from either end.
152
153
154=================
155THE SET OF STATES
156=================
157
158The object state machine has a set of states that it can be in. There are
159preparation states in which the object sets itself up and waits for its parent
160object to transit to a state that allows access to its children:
161
162 (1) State FSCACHE_OBJECT_INIT.
163
164 Initialise the object and wait for the parent object to become active. In
165 the cache, it is expected that it will not be possible to look an object
166 up from the parent object, until that parent object itself has been looked
167 up.
168
169There are initialisation states in which the object sets itself up and accesses
170disk for the object metadata:
171
172 (2) State FSCACHE_OBJECT_LOOKING_UP.
173
174 Look up the object on disk, using the parent as a starting point.
175 FS-Cache expects the cache backend to probe the cache to see whether this
176 object is represented there, and if it is, to see if it's valid (coherency
177 management).
178
179 The cache should call fscache_object_lookup_negative() to indicate lookup
180 failure for whatever reason, and should call fscache_obtained_object() to
181 indicate success.
182
183 At the completion of lookup, FS-Cache will let the netfs go ahead with
184 read operations, no matter whether the file is yet cached. If not yet
185 cached, read operations will be immediately rejected with ENODATA until
186 the first known page is uncached - as to that point there can be no data
187 to be read out of the cache for that file that isn't currently also held
188 in the pagecache.
189
190 (3) State FSCACHE_OBJECT_CREATING.
191
192 Create an object on disk, using the parent as a starting point. This
193 happens if the lookup failed to find the object, or if the object's
194 coherency data indicated what's on disk is out of date. In this state,
195 FS-Cache expects the cache to create
196
197 The cache should call fscache_obtained_object() if creation completes
198 successfully, fscache_object_lookup_negative() otherwise.
199
200 At the completion of creation, FS-Cache will start processing write
201 operations the netfs has queued for an object. If creation failed, the
202 write ops will be transparently discarded, and nothing recorded in the
203 cache.
204
205There are some normal running states in which the object spends its time
206servicing netfs requests:
207
208 (4) State FSCACHE_OBJECT_AVAILABLE.
209
210 A transient state in which pending operations are started, child objects
211 are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary
212 lookup data is freed.
213
214 (5) State FSCACHE_OBJECT_ACTIVE.
215
216 The normal running state. In this state, requests the netfs makes will be
217 passed on to the cache.
218
219 (6) State FSCACHE_OBJECT_UPDATING.
220
221 The state machine comes here to update the object in the cache from the
222 netfs's records. This involves updating the auxiliary data that is used
223 to maintain coherency.
224
225And there are terminal states in which an object cleans itself up, deallocates
226memory and potentially deletes stuff from disk:
227
228 (7) State FSCACHE_OBJECT_LC_DYING.
229
230 The object comes here if it is dying because of a lookup or creation
231 error. This would be due to a disk error or system error of some sort.
232 Temporary data is cleaned up, and the parent is released.
233
234 (8) State FSCACHE_OBJECT_DYING.
235
236 The object comes here if it is dying due to an error, because its parent
237 cookie has been relinquished by the netfs or because the cache is being
238 withdrawn.
239
240 Any child objects waiting on this one are given CPU time so that they too
241 can destroy themselves. This object waits for all its children to go away
242 before advancing to the next state.
243
244 (9) State FSCACHE_OBJECT_ABORT_INIT.
245
246 The object comes to this state if it was waiting on its parent in
247 FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself
248 so that the parent may proceed from the FSCACHE_OBJECT_DYING state.
249
250(10) State FSCACHE_OBJECT_RELEASING.
251(11) State FSCACHE_OBJECT_RECYCLING.
252
253 The object comes to one of these two states when dying once it is rid of
254 all its children, if it is dying because the netfs relinquished its
255 cookie. In the first state, the cached data is expected to persist, and
256 in the second it will be deleted.
257
258(12) State FSCACHE_OBJECT_WITHDRAWING.
259
260 The object transits to this state if the cache decides it wants to
261 withdraw the object from service, perhaps to make space, but also due to
262 error or just because the whole cache is being withdrawn.
263
264(13) State FSCACHE_OBJECT_DEAD.
265
266 The object transits to this state when the in-memory object record is
267 ready to be deleted. The object processor shouldn't ever see an object in
268 this state.
269
270
271THE SET OF EVENTS
272-----------------
273
274There are a number of events that can be raised to an object state machine:
275
276 (*) FSCACHE_OBJECT_EV_UPDATE
277
278 The netfs requested that an object be updated. The state machine will ask
279 the cache backend to update the object, and the cache backend will ask the
280 netfs for details of the change through its cookie definition ops.
281
282 (*) FSCACHE_OBJECT_EV_CLEARED
283
284 This is signalled in two circumstances:
285
286 (a) when an object's last child object is dropped and
287
288 (b) when the last operation outstanding on an object is completed.
289
290 This is used to proceed from the dying state.
291
292 (*) FSCACHE_OBJECT_EV_ERROR
293
294 This is signalled when an I/O error occurs during the processing of some
295 object.
296
297 (*) FSCACHE_OBJECT_EV_RELEASE
298 (*) FSCACHE_OBJECT_EV_RETIRE
299
300 These are signalled when the netfs relinquishes a cookie it was using.
301 The event selected depends on whether the netfs asks for the backing
302 object to be retired (deleted) or retained.
303
304 (*) FSCACHE_OBJECT_EV_WITHDRAW
305
306 This is signalled when the cache backend wants to withdraw an object.
307 This means that the object will have to be detached from the netfs's
308 cookie.
309
310Because the withdrawing releasing/retiring events are all handled by the object
311state machine, it doesn't matter if there's a collision with both ends trying
312to sever the connection at the same time. The state machine can just pick
313which one it wants to honour, and that effects the other.
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt
new file mode 100644
index 000000000000..b6b070c57cbf
--- /dev/null
+++ b/Documentation/filesystems/caching/operations.txt
@@ -0,0 +1,213 @@
1 ================================
2 ASYNCHRONOUS OPERATIONS HANDLING
3 ================================
4
5By: David Howells <dhowells@redhat.com>
6
7Contents:
8
9 (*) Overview.
10
11 (*) Operation record initialisation.
12
13 (*) Parameters.
14
15 (*) Procedure.
16
17 (*) Asynchronous callback.
18
19
20========
21OVERVIEW
22========
23
24FS-Cache has an asynchronous operations handling facility that it uses for its
25data storage and retrieval routines. Its operations are represented by
26fscache_operation structs, though these are usually embedded into some other
27structure.
28
29This facility is available to and expected to be be used by the cache backends,
30and FS-Cache will create operations and pass them off to the appropriate cache
31backend for completion.
32
33To make use of this facility, <linux/fscache-cache.h> should be #included.
34
35
36===============================
37OPERATION RECORD INITIALISATION
38===============================
39
40An operation is recorded in an fscache_operation struct:
41
42 struct fscache_operation {
43 union {
44 struct work_struct fast_work;
45 struct slow_work slow_work;
46 };
47 unsigned long flags;
48 fscache_operation_processor_t processor;
49 ...
50 };
51
52Someone wanting to issue an operation should allocate something with this
53struct embedded in it. They should initialise it by calling:
54
55 void fscache_operation_init(struct fscache_operation *op,
56 fscache_operation_release_t release);
57
58with the operation to be initialised and the release function to use.
59
60The op->flags parameter should be set to indicate the CPU time provision and
61the exclusivity (see the Parameters section).
62
63The op->fast_work, op->slow_work and op->processor flags should be set as
64appropriate for the CPU time provision (see the Parameters section).
65
66FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the
67operation and waited for afterwards.
68
69
70==========
71PARAMETERS
72==========
73
74There are a number of parameters that can be set in the operation record's flag
75parameter. There are three options for the provision of CPU time in these
76operations:
77
78 (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD). A thread
79 may decide it wants to handle an operation itself without deferring it to
80 another thread.
81
82 This is, for example, used in read operations for calling readpages() on
83 the backing filesystem in CacheFiles. Although readpages() does an
84 asynchronous data fetch, the determination of whether pages exist is done
85 synchronously - and the netfs does not proceed until this has been
86 determined.
87
88 If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags
89 before submitting the operation, and the operating thread must wait for it
90 to be cleared before proceeding:
91
92 wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
93 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
94
95
96 (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it
97 will be given to keventd to process. Such an operation is not permitted
98 to sleep on I/O.
99
100 This is, for example, used by CacheFiles to copy data from a backing fs
101 page to a netfs page after the backing fs has read the page in.
102
103 If this option is used, op->fast_work and op->processor must be
104 initialised before submitting the operation:
105
106 INIT_WORK(&op->fast_work, do_some_work);
107
108
109 (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it
110 will be given to the slow work facility to process. Such an operation is
111 permitted to sleep on I/O.
112
113 This is, for example, used by FS-Cache to handle background writes of
114 pages that have just been fetched from a remote server.
115
116 If this option is used, op->slow_work and op->processor must be
117 initialised before submitting the operation:
118
119 fscache_operation_init_slow(op, processor)
120
121
122Furthermore, operations may be one of two types:
123
124 (1) Exclusive (FSCACHE_OP_EXCLUSIVE). Operations of this type may not run in
125 conjunction with any other operation on the object being operated upon.
126
127 An example of this is the attribute change operation, in which the file
128 being written to may need truncation.
129
130 (2) Shareable. Operations of this type may be running simultaneously. It's
131 up to the operation implementation to prevent interference between other
132 operations running at the same time.
133
134
135=========
136PROCEDURE
137=========
138
139Operations are used through the following procedure:
140
141 (1) The submitting thread must allocate the operation and initialise it
142 itself. Normally this would be part of a more specific structure with the
143 generic op embedded within.
144
145 (2) The submitting thread must then submit the operation for processing using
146 one of the following two functions:
147
148 int fscache_submit_op(struct fscache_object *object,
149 struct fscache_operation *op);
150
151 int fscache_submit_exclusive_op(struct fscache_object *object,
152 struct fscache_operation *op);
153
154 The first function should be used to submit non-exclusive ops and the
155 second to submit exclusive ones. The caller must still set the
156 FSCACHE_OP_EXCLUSIVE flag.
157
158 If successful, both functions will assign the operation to the specified
159 object and return 0. -ENOBUFS will be returned if the object specified is
160 permanently unavailable.
161
162 The operation manager will defer operations on an object that is still
163 undergoing lookup or creation. The operation will also be deferred if an
164 operation of conflicting exclusivity is in progress on the object.
165
166 If the operation is asynchronous, the manager will retain a reference to
167 it, so the caller should put their reference to it by passing it to:
168
169 void fscache_put_operation(struct fscache_operation *op);
170
171 (3) If the submitting thread wants to do the work itself, and has marked the
172 operation with FSCACHE_OP_MYTHREAD, then it should monitor
173 FSCACHE_OP_WAITING as described above and check the state of the object if
174 necessary (the object might have died whilst the thread was waiting).
175
176 When it has finished doing its processing, it should call
177 fscache_put_operation() on it.
178
179 (4) The operation holds an effective lock upon the object, preventing other
180 exclusive ops conflicting until it is released. The operation can be
181 enqueued for further immediate asynchronous processing by adjusting the
182 CPU time provisioning option if necessary, eg:
183
184 op->flags &= ~FSCACHE_OP_TYPE;
185 op->flags |= ~FSCACHE_OP_FAST;
186
187 and calling:
188
189 void fscache_enqueue_operation(struct fscache_operation *op)
190
191 This can be used to allow other things to have use of the worker thread
192 pools.
193
194
195=====================
196ASYNCHRONOUS CALLBACK
197=====================
198
199When used in asynchronous mode, the worker thread pool will invoke the
200processor method with a pointer to the operation. This should then get at the
201container struct by using container_of():
202
203 static void fscache_write_op(struct fscache_operation *_op)
204 {
205 struct fscache_storage *op =
206 container_of(_op, struct fscache_storage, op);
207 ...
208 }
209
210The caller holds a reference on the operation, and will invoke
211fscache_put_operation() when the processor function returns. The processor
212function is at liberty to call fscache_enqueue_operation() or to take extra
213references.
diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
new file mode 100644
index 000000000000..ebc50f808ea4
--- /dev/null
+++ b/Documentation/slow-work.txt
@@ -0,0 +1,174 @@
1 ====================================
2 SLOW WORK ITEM EXECUTION THREAD POOL
3 ====================================
4
5By: David Howells <dhowells@redhat.com>
6
7The slow work item execution thread pool is a pool of threads for performing
8things that take a relatively long time, such as making mkdir calls.
9Typically, when processing something, these items will spend a lot of time
10blocking a thread on I/O, thus making that thread unavailable for doing other
11work.
12
13The standard workqueue model is unsuitable for this class of work item as that
14limits the owner to a single thread or a single thread per CPU. For some
15tasks, however, more threads - or fewer - are required.
16
17There is just one pool per system. It contains no threads unless something
18wants to use it - and that something must register its interest first. When
19the pool is active, the number of threads it contains is dynamic, varying
20between a maximum and minimum setting, depending on the load.
21
22
23====================
24CLASSES OF WORK ITEM
25====================
26
27This pool support two classes of work items:
28
29 (*) Slow work items.
30
31 (*) Very slow work items.
32
33The former are expected to finish much quicker than the latter.
34
35An operation of the very slow class may do a batch combination of several
36lookups, mkdirs, and a create for instance.
37
38An operation of the ordinarily slow class may, for example, write stuff or
39expand files, provided the time taken to do so isn't too long.
40
41Operations of both types may sleep during execution, thus tying up the thread
42loaned to it.
43
44
45THREAD-TO-CLASS ALLOCATION
46--------------------------
47
48Not all the threads in the pool are available to work on very slow work items.
49The number will be between one and one fewer than the number of active threads.
50This is configurable (see the "Pool Configuration" section).
51
52All the threads are available to work on ordinarily slow work items, but a
53percentage of the threads will prefer to work on very slow work items.
54
55The configuration ensures that at least one thread will be available to work on
56very slow work items, and at least one thread will be available that won't work
57on very slow work items at all.
58
59
60=====================
61USING SLOW WORK ITEMS
62=====================
63
64Firstly, a module or subsystem wanting to make use of slow work items must
65register its interest:
66
67 int ret = slow_work_register_user();
68
69This will return 0 if successful, or a -ve error upon failure.
70
71
72Slow work items may then be set up by:
73
74 (1) Declaring a slow_work struct type variable:
75
76 #include <linux/slow-work.h>
77
78 struct slow_work myitem;
79
80 (2) Declaring the operations to be used for this item:
81
82 struct slow_work_ops myitem_ops = {
83 .get_ref = myitem_get_ref,
84 .put_ref = myitem_put_ref,
85 .execute = myitem_execute,
86 };
87
88 [*] For a description of the ops, see section "Item Operations".
89
90 (3) Initialising the item:
91
92 slow_work_init(&myitem, &myitem_ops);
93
94 or:
95
96 vslow_work_init(&myitem, &myitem_ops);
97
98 depending on its class.
99
100A suitably set up work item can then be enqueued for processing:
101
102 int ret = slow_work_enqueue(&myitem);
103
104This will return a -ve error if the thread pool is unable to gain a reference
105on the item, 0 otherwise.
106
107
108The items are reference counted, so there ought to be no need for a flush
109operation. When all a module's slow work items have been processed, and the
110module has no further interest in the facility, it should unregister its
111interest:
112
113 slow_work_unregister_user();
114
115
116===============
117ITEM OPERATIONS
118===============
119
120Each work item requires a table of operations of type struct slow_work_ops.
121All members are required:
122
123 (*) Get a reference on an item:
124
125 int (*get_ref)(struct slow_work *work);
126
127 This allows the thread pool to attempt to pin an item by getting a
128 reference on it. This function should return 0 if the reference was
129 granted, or a -ve error otherwise. If an error is returned,
130 slow_work_enqueue() will fail.
131
132 The reference is held whilst the item is queued and whilst it is being
133 executed. The item may then be requeued with the same reference held, or
134 the reference will be released.
135
136 (*) Release a reference on an item:
137
138 void (*put_ref)(struct slow_work *work);
139
140 This allows the thread pool to unpin an item by releasing the reference on
141 it. The thread pool will not touch the item again once this has been
142 called.
143
144 (*) Execute an item:
145
146 void (*execute)(struct slow_work *work);
147
148 This should perform the work required of the item. It may sleep, it may
149 perform disk I/O and it may wait for locks.
150
151
152==================
153POOL CONFIGURATION
154==================
155
156The slow-work thread pool has a number of configurables:
157
158 (*) /proc/sys/kernel/slow-work/min-threads
159
160 The minimum number of threads that should be in the pool whilst it is in
161 use. This may be anywhere between 2 and max-threads.
162
163 (*) /proc/sys/kernel/slow-work/max-threads
164
165 The maximum number of threads that should in the pool. This may be
166 anywhere between min-threads and 255 or NR_CPUS * 2, whichever is greater.
167
168 (*) /proc/sys/kernel/slow-work/vslow-percentage
169
170 The percentage of active threads in the pool that may be used to execute
171 very slow work items. This may be between 1 and 99. The resultant number
172 is bounded to between 1 and one fewer than the number of active threads.
173 This ensures there is always at least one thread that can process very
174 slow work items, and always at least one thread that won't.
diff --git a/fs/Kconfig b/fs/Kconfig
index ae3b34a2ea69..86b203fc3c56 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -66,6 +66,13 @@ config GENERIC_ACL
66 bool 66 bool
67 select FS_POSIX_ACL 67 select FS_POSIX_ACL
68 68
69menu "Caches"
70
71source "fs/fscache/Kconfig"
72source "fs/cachefiles/Kconfig"
73
74endmenu
75
69if BLOCK 76if BLOCK
70menu "CD-ROM/DVD Filesystems" 77menu "CD-ROM/DVD Filesystems"
71 78
diff --git a/fs/Makefile b/fs/Makefile
index 15f73014a208..70b2aed87133 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_PROFILING) += dcookies.o
63obj-$(CONFIG_DLM) += dlm/ 63obj-$(CONFIG_DLM) += dlm/
64 64
65# Do not add any filesystems before this line 65# Do not add any filesystems before this line
66obj-$(CONFIG_FSCACHE) += fscache/
66obj-$(CONFIG_REISERFS_FS) += reiserfs/ 67obj-$(CONFIG_REISERFS_FS) += reiserfs/
67obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 68obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
68obj-$(CONFIG_EXT2_FS) += ext2/ 69obj-$(CONFIG_EXT2_FS) += ext2/
@@ -116,6 +117,7 @@ obj-$(CONFIG_AFS_FS) += afs/
116obj-$(CONFIG_BEFS_FS) += befs/ 117obj-$(CONFIG_BEFS_FS) += befs/
117obj-$(CONFIG_HOSTFS) += hostfs/ 118obj-$(CONFIG_HOSTFS) += hostfs/
118obj-$(CONFIG_HPPFS) += hppfs/ 119obj-$(CONFIG_HPPFS) += hppfs/
120obj-$(CONFIG_CACHEFILES) += cachefiles/
119obj-$(CONFIG_DEBUG_FS) += debugfs/ 121obj-$(CONFIG_DEBUG_FS) += debugfs/
120obj-$(CONFIG_OCFS2_FS) += ocfs2/ 122obj-$(CONFIG_OCFS2_FS) += ocfs2/
121obj-$(CONFIG_BTRFS_FS) += btrfs/ 123obj-$(CONFIG_BTRFS_FS) += btrfs/
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index e7b522fe15e1..5c4e61d3c772 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -19,3 +19,11 @@ config AFS_DEBUG
19 See <file:Documentation/filesystems/afs.txt> for more information. 19 See <file:Documentation/filesystems/afs.txt> for more information.
20 20
21 If unsure, say N. 21 If unsure, say N.
22
23config AFS_FSCACHE
24 bool "Provide AFS client caching support (EXPERIMENTAL)"
25 depends on EXPERIMENTAL
26 depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y
27 help
28 Say Y here if you want AFS data to be cached locally on disk through
29 the generic filesystem cache manager
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index a66671082cfb..4f64b95d57bd 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -2,7 +2,10 @@
2# Makefile for Red Hat Linux AFS client. 2# Makefile for Red Hat Linux AFS client.
3# 3#
4 4
5afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o
6
5kafs-objs := \ 7kafs-objs := \
8 $(afs-cache-y) \
6 callback.o \ 9 callback.o \
7 cell.o \ 10 cell.o \
8 cmservice.o \ 11 cmservice.o \
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index de0d7de69edc..e2b1d3f16519 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -1,6 +1,6 @@
1/* AFS caching stuff 1/* AFS caching stuff
2 * 2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -9,248 +9,395 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#ifdef AFS_CACHING_SUPPORT 12#include <linux/slab.h>
13static cachefs_match_val_t afs_cell_cache_match(void *target, 13#include <linux/sched.h>
14 const void *entry); 14#include "internal.h"
15static void afs_cell_cache_update(void *source, void *entry); 15
16 16static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
17struct cachefs_index_def afs_cache_cell_index_def = { 17 void *buffer, uint16_t buflen);
18 .name = "cell_ix", 18static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
19 .data_size = sizeof(struct afs_cache_cell), 19 void *buffer, uint16_t buflen);
20 .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, 20static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
21 .match = afs_cell_cache_match, 21 const void *buffer,
22 .update = afs_cell_cache_update, 22 uint16_t buflen);
23
24static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
25 void *buffer, uint16_t buflen);
26static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
27 void *buffer, uint16_t buflen);
28static enum fscache_checkaux afs_vlocation_cache_check_aux(
29 void *cookie_netfs_data, const void *buffer, uint16_t buflen);
30
31static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
32 void *buffer, uint16_t buflen);
33
34static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
35 void *buffer, uint16_t buflen);
36static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
37 uint64_t *size);
38static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
39 void *buffer, uint16_t buflen);
40static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
41 const void *buffer,
42 uint16_t buflen);
43static void afs_vnode_cache_now_uncached(void *cookie_netfs_data);
44
45struct fscache_netfs afs_cache_netfs = {
46 .name = "afs",
47 .version = 0,
48};
49
50struct fscache_cookie_def afs_cell_cache_index_def = {
51 .name = "AFS.cell",
52 .type = FSCACHE_COOKIE_TYPE_INDEX,
53 .get_key = afs_cell_cache_get_key,
54 .get_aux = afs_cell_cache_get_aux,
55 .check_aux = afs_cell_cache_check_aux,
56};
57
58struct fscache_cookie_def afs_vlocation_cache_index_def = {
59 .name = "AFS.vldb",
60 .type = FSCACHE_COOKIE_TYPE_INDEX,
61 .get_key = afs_vlocation_cache_get_key,
62 .get_aux = afs_vlocation_cache_get_aux,
63 .check_aux = afs_vlocation_cache_check_aux,
64};
65
66struct fscache_cookie_def afs_volume_cache_index_def = {
67 .name = "AFS.volume",
68 .type = FSCACHE_COOKIE_TYPE_INDEX,
69 .get_key = afs_volume_cache_get_key,
70};
71
72struct fscache_cookie_def afs_vnode_cache_index_def = {
73 .name = "AFS.vnode",
74 .type = FSCACHE_COOKIE_TYPE_DATAFILE,
75 .get_key = afs_vnode_cache_get_key,
76 .get_attr = afs_vnode_cache_get_attr,
77 .get_aux = afs_vnode_cache_get_aux,
78 .check_aux = afs_vnode_cache_check_aux,
79 .now_uncached = afs_vnode_cache_now_uncached,
23}; 80};
24#endif
25 81
26/* 82/*
27 * match a cell record obtained from the cache 83 * set the key for the index entry
28 */ 84 */
29#ifdef AFS_CACHING_SUPPORT 85static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data,
30static cachefs_match_val_t afs_cell_cache_match(void *target, 86 void *buffer, uint16_t bufmax)
31 const void *entry)
32{ 87{
33 const struct afs_cache_cell *ccell = entry; 88 const struct afs_cell *cell = cookie_netfs_data;
34 struct afs_cell *cell = target; 89 uint16_t klen;
35 90
36 _enter("{%s},{%s}", ccell->name, cell->name); 91 _enter("%p,%p,%u", cell, buffer, bufmax);
37 92
38 if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) { 93 klen = strlen(cell->name);
39 _leave(" = SUCCESS"); 94 if (klen > bufmax)
40 return CACHEFS_MATCH_SUCCESS; 95 return 0;
41 }
42 96
43 _leave(" = FAILED"); 97 memcpy(buffer, cell->name, klen);
44 return CACHEFS_MATCH_FAILED; 98 return klen;
45} 99}
46#endif
47 100
48/* 101/*
49 * update a cell record in the cache 102 * provide new auxilliary cache data
50 */ 103 */
51#ifdef AFS_CACHING_SUPPORT 104static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data,
52static void afs_cell_cache_update(void *source, void *entry) 105 void *buffer, uint16_t bufmax)
53{ 106{
54 struct afs_cache_cell *ccell = entry; 107 const struct afs_cell *cell = cookie_netfs_data;
55 struct afs_cell *cell = source; 108 uint16_t dlen;
56 109
57 _enter("%p,%p", source, entry); 110 _enter("%p,%p,%u", cell, buffer, bufmax);
58 111
59 strncpy(ccell->name, cell->name, sizeof(ccell->name)); 112 dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]);
113 dlen = min(dlen, bufmax);
114 dlen &= ~(sizeof(cell->vl_addrs[0]) - 1);
60 115
61 memcpy(ccell->vl_servers, 116 memcpy(buffer, cell->vl_addrs, dlen);
62 cell->vl_addrs, 117 return dlen;
63 min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs))); 118}
64 119
120/*
121 * check that the auxilliary data indicates that the entry is still valid
122 */
123static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data,
124 const void *buffer,
125 uint16_t buflen)
126{
127 _leave(" = OKAY");
128 return FSCACHE_CHECKAUX_OKAY;
65} 129}
66#endif
67
68#ifdef AFS_CACHING_SUPPORT
69static cachefs_match_val_t afs_vlocation_cache_match(void *target,
70 const void *entry);
71static void afs_vlocation_cache_update(void *source, void *entry);
72
73struct cachefs_index_def afs_vlocation_cache_index_def = {
74 .name = "vldb",
75 .data_size = sizeof(struct afs_cache_vlocation),
76 .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 },
77 .match = afs_vlocation_cache_match,
78 .update = afs_vlocation_cache_update,
79};
80#endif
81 130
131/*****************************************************************************/
82/* 132/*
83 * match a VLDB record stored in the cache 133 * set the key for the index entry
84 * - may also load target from entry
85 */ 134 */
86#ifdef AFS_CACHING_SUPPORT 135static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data,
87static cachefs_match_val_t afs_vlocation_cache_match(void *target, 136 void *buffer, uint16_t bufmax)
88 const void *entry)
89{ 137{
90 const struct afs_cache_vlocation *vldb = entry; 138 const struct afs_vlocation *vlocation = cookie_netfs_data;
91 struct afs_vlocation *vlocation = target; 139 uint16_t klen;
140
141 _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
142
143 klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name));
144 if (klen > bufmax)
145 return 0;
92 146
93 _enter("{%s},{%s}", vlocation->vldb.name, vldb->name); 147 memcpy(buffer, vlocation->vldb.name, klen);
94 148
95 if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0 149 _leave(" = %u", klen);
96 ) { 150 return klen;
97 if (!vlocation->valid || 151}
98 vlocation->vldb.rtime == vldb->rtime 152
153/*
154 * provide new auxilliary cache data
155 */
156static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data,
157 void *buffer, uint16_t bufmax)
158{
159 const struct afs_vlocation *vlocation = cookie_netfs_data;
160 uint16_t dlen;
161
162 _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax);
163
164 dlen = sizeof(struct afs_cache_vlocation);
165 dlen -= offsetof(struct afs_cache_vlocation, nservers);
166 if (dlen > bufmax)
167 return 0;
168
169 memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen);
170
171 _leave(" = %u", dlen);
172 return dlen;
173}
174
175/*
176 * check that the auxilliary data indicates that the entry is still valid
177 */
178static
179enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data,
180 const void *buffer,
181 uint16_t buflen)
182{
183 const struct afs_cache_vlocation *cvldb;
184 struct afs_vlocation *vlocation = cookie_netfs_data;
185 uint16_t dlen;
186
187 _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen);
188
189 /* check the size of the data is what we're expecting */
190 dlen = sizeof(struct afs_cache_vlocation);
191 dlen -= offsetof(struct afs_cache_vlocation, nservers);
192 if (dlen != buflen)
193 return FSCACHE_CHECKAUX_OBSOLETE;
194
195 cvldb = container_of(buffer, struct afs_cache_vlocation, nservers);
196
197 /* if what's on disk is more valid than what's in memory, then use the
198 * VL record from the cache */
199 if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) {
200 memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen);
201 vlocation->valid = 1;
202 _leave(" = SUCCESS [c->m]");
203 return FSCACHE_CHECKAUX_OKAY;
204 }
205
206 /* need to update the cache if the cached info differs */
207 if (memcmp(&vlocation->vldb, buffer, dlen) != 0) {
208 /* delete if the volume IDs for this name differ */
209 if (memcmp(&vlocation->vldb.vid, &cvldb->vid,
210 sizeof(cvldb->vid)) != 0
99 ) { 211 ) {
100 vlocation->vldb = *vldb; 212 _leave(" = OBSOLETE");
101 vlocation->valid = 1; 213 return FSCACHE_CHECKAUX_OBSOLETE;
102 _leave(" = SUCCESS [c->m]");
103 return CACHEFS_MATCH_SUCCESS;
104 } else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) {
105 /* delete if VIDs for this name differ */
106 if (memcmp(&vlocation->vldb.vid,
107 &vldb->vid,
108 sizeof(vldb->vid)) != 0) {
109 _leave(" = DELETE");
110 return CACHEFS_MATCH_SUCCESS_DELETE;
111 }
112
113 _leave(" = UPDATE");
114 return CACHEFS_MATCH_SUCCESS_UPDATE;
115 } else {
116 _leave(" = SUCCESS");
117 return CACHEFS_MATCH_SUCCESS;
118 } 214 }
215
216 _leave(" = UPDATE");
217 return FSCACHE_CHECKAUX_NEEDS_UPDATE;
119 } 218 }
120 219
121 _leave(" = FAILED"); 220 _leave(" = OKAY");
122 return CACHEFS_MATCH_FAILED; 221 return FSCACHE_CHECKAUX_OKAY;
123} 222}
124#endif
125 223
224/*****************************************************************************/
126/* 225/*
127 * update a VLDB record stored in the cache 226 * set the key for the volume index entry
128 */ 227 */
129#ifdef AFS_CACHING_SUPPORT 228static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data,
130static void afs_vlocation_cache_update(void *source, void *entry) 229 void *buffer, uint16_t bufmax)
131{ 230{
132 struct afs_cache_vlocation *vldb = entry; 231 const struct afs_volume *volume = cookie_netfs_data;
133 struct afs_vlocation *vlocation = source; 232 uint16_t klen;
233
234 _enter("{%u},%p,%u", volume->type, buffer, bufmax);
235
236 klen = sizeof(volume->type);
237 if (klen > bufmax)
238 return 0;
134 239
135 _enter(""); 240 memcpy(buffer, &volume->type, sizeof(volume->type));
241
242 _leave(" = %u", klen);
243 return klen;
136 244
137 *vldb = vlocation->vldb;
138} 245}
139#endif
140
141#ifdef AFS_CACHING_SUPPORT
142static cachefs_match_val_t afs_volume_cache_match(void *target,
143 const void *entry);
144static void afs_volume_cache_update(void *source, void *entry);
145
146struct cachefs_index_def afs_volume_cache_index_def = {
147 .name = "volume",
148 .data_size = sizeof(struct afs_cache_vhash),
149 .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 },
150 .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 },
151 .match = afs_volume_cache_match,
152 .update = afs_volume_cache_update,
153};
154#endif
155 246
247/*****************************************************************************/
156/* 248/*
157 * match a volume hash record stored in the cache 249 * set the key for the index entry
158 */ 250 */
159#ifdef AFS_CACHING_SUPPORT 251static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data,
160static cachefs_match_val_t afs_volume_cache_match(void *target, 252 void *buffer, uint16_t bufmax)
161 const void *entry)
162{ 253{
163 const struct afs_cache_vhash *vhash = entry; 254 const struct afs_vnode *vnode = cookie_netfs_data;
164 struct afs_volume *volume = target; 255 uint16_t klen;
165 256
166 _enter("{%u},{%u}", volume->type, vhash->vtype); 257 _enter("{%x,%x,%llx},%p,%u",
258 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
259 buffer, bufmax);
167 260
168 if (volume->type == vhash->vtype) { 261 klen = sizeof(vnode->fid.vnode);
169 _leave(" = SUCCESS"); 262 if (klen > bufmax)
170 return CACHEFS_MATCH_SUCCESS; 263 return 0;
171 } 264
265 memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode));
172 266
173 _leave(" = FAILED"); 267 _leave(" = %u", klen);
174 return CACHEFS_MATCH_FAILED; 268 return klen;
175} 269}
176#endif
177 270
178/* 271/*
179 * update a volume hash record stored in the cache 272 * provide updated file attributes
180 */ 273 */
181#ifdef AFS_CACHING_SUPPORT 274static void afs_vnode_cache_get_attr(const void *cookie_netfs_data,
182static void afs_volume_cache_update(void *source, void *entry) 275 uint64_t *size)
183{ 276{
184 struct afs_cache_vhash *vhash = entry; 277 const struct afs_vnode *vnode = cookie_netfs_data;
185 struct afs_volume *volume = source;
186 278
187 _enter(""); 279 _enter("{%x,%x,%llx},",
280 vnode->fid.vnode, vnode->fid.unique,
281 vnode->status.data_version);
188 282
189 vhash->vtype = volume->type; 283 *size = vnode->status.size;
190} 284}
191#endif
192
193#ifdef AFS_CACHING_SUPPORT
194static cachefs_match_val_t afs_vnode_cache_match(void *target,
195 const void *entry);
196static void afs_vnode_cache_update(void *source, void *entry);
197
198struct cachefs_index_def afs_vnode_cache_index_def = {
199 .name = "vnode",
200 .data_size = sizeof(struct afs_cache_vnode),
201 .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 4 },
202 .match = afs_vnode_cache_match,
203 .update = afs_vnode_cache_update,
204};
205#endif
206 285
207/* 286/*
208 * match a vnode record stored in the cache 287 * provide new auxilliary cache data
288 */
289static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data,
290 void *buffer, uint16_t bufmax)
291{
292 const struct afs_vnode *vnode = cookie_netfs_data;
293 uint16_t dlen;
294
295 _enter("{%x,%x,%Lx},%p,%u",
296 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
297 buffer, bufmax);
298
299 dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
300 if (dlen > bufmax)
301 return 0;
302
303 memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique));
304 buffer += sizeof(vnode->fid.unique);
305 memcpy(buffer, &vnode->status.data_version,
306 sizeof(vnode->status.data_version));
307
308 _leave(" = %u", dlen);
309 return dlen;
310}
311
312/*
313 * check that the auxilliary data indicates that the entry is still valid
209 */ 314 */
210#ifdef AFS_CACHING_SUPPORT 315static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
211static cachefs_match_val_t afs_vnode_cache_match(void *target, 316 const void *buffer,
212 const void *entry) 317 uint16_t buflen)
213{ 318{
214 const struct afs_cache_vnode *cvnode = entry; 319 struct afs_vnode *vnode = cookie_netfs_data;
215 struct afs_vnode *vnode = target; 320 uint16_t dlen;
216 321
217 _enter("{%x,%x,%Lx},{%x,%x,%Lx}", 322 _enter("{%x,%x,%llx},%p,%u",
218 vnode->fid.vnode, 323 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
219 vnode->fid.unique, 324 buffer, buflen);
220 vnode->status.version, 325
221 cvnode->vnode_id, 326 /* check the size of the data is what we're expecting */
222 cvnode->vnode_unique, 327 dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version);
223 cvnode->data_version); 328 if (dlen != buflen) {
224 329 _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen);
225 if (vnode->fid.vnode != cvnode->vnode_id) { 330 return FSCACHE_CHECKAUX_OBSOLETE;
226 _leave(" = FAILED");
227 return CACHEFS_MATCH_FAILED;
228 } 331 }
229 332
230 if (vnode->fid.unique != cvnode->vnode_unique || 333 if (memcmp(buffer,
231 vnode->status.version != cvnode->data_version) { 334 &vnode->fid.unique,
232 _leave(" = DELETE"); 335 sizeof(vnode->fid.unique)
233 return CACHEFS_MATCH_SUCCESS_DELETE; 336 ) != 0) {
337 unsigned unique;
338
339 memcpy(&unique, buffer, sizeof(unique));
340
341 _leave(" = OBSOLETE [uniq %x != %x]",
342 unique, vnode->fid.unique);
343 return FSCACHE_CHECKAUX_OBSOLETE;
344 }
345
346 if (memcmp(buffer + sizeof(vnode->fid.unique),
347 &vnode->status.data_version,
348 sizeof(vnode->status.data_version)
349 ) != 0) {
350 afs_dataversion_t version;
351
352 memcpy(&version, buffer + sizeof(vnode->fid.unique),
353 sizeof(version));
354
355 _leave(" = OBSOLETE [vers %llx != %llx]",
356 version, vnode->status.data_version);
357 return FSCACHE_CHECKAUX_OBSOLETE;
234 } 358 }
235 359
236 _leave(" = SUCCESS"); 360 _leave(" = SUCCESS");
237 return CACHEFS_MATCH_SUCCESS; 361 return FSCACHE_CHECKAUX_OKAY;
238} 362}
239#endif
240 363
241/* 364/*
242 * update a vnode record stored in the cache 365 * indication the cookie is no longer uncached
366 * - this function is called when the backing store currently caching a cookie
367 * is removed
368 * - the netfs should use this to clean up any markers indicating cached pages
369 * - this is mandatory for any object that may have data
243 */ 370 */
244#ifdef AFS_CACHING_SUPPORT 371static void afs_vnode_cache_now_uncached(void *cookie_netfs_data)
245static void afs_vnode_cache_update(void *source, void *entry)
246{ 372{
247 struct afs_cache_vnode *cvnode = entry; 373 struct afs_vnode *vnode = cookie_netfs_data;
248 struct afs_vnode *vnode = source; 374 struct pagevec pvec;
375 pgoff_t first;
376 int loop, nr_pages;
377
378 _enter("{%x,%x,%Lx}",
379 vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version);
380
381 pagevec_init(&pvec, 0);
382 first = 0;
383
384 for (;;) {
385 /* grab a bunch of pages to clean */
386 nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping,
387 first,
388 PAGEVEC_SIZE - pagevec_count(&pvec));
389 if (!nr_pages)
390 break;
249 391
250 _enter(""); 392 for (loop = 0; loop < nr_pages; loop++)
393 ClearPageFsCache(pvec.pages[loop]);
394
395 first = pvec.pages[nr_pages - 1]->index + 1;
396
397 pvec.nr = nr_pages;
398 pagevec_release(&pvec);
399 cond_resched();
400 }
251 401
252 cvnode->vnode_id = vnode->fid.vnode; 402 _leave("");
253 cvnode->vnode_unique = vnode->fid.unique;
254 cvnode->data_version = vnode->status.version;
255} 403}
256#endif
diff --git a/fs/afs/cache.h b/fs/afs/cache.h
index 36a3642cf90e..5c4f6b499e90 100644
--- a/fs/afs/cache.h
+++ b/fs/afs/cache.h
@@ -1,6 +1,6 @@
1/* AFS local cache management interface 1/* AFS local cache management interface
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -9,15 +9,4 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#ifndef AFS_CACHE_H 12#include <linux/fscache.h>
13#define AFS_CACHE_H
14
15#undef AFS_CACHING_SUPPORT
16
17#include <linux/mm.h>
18#ifdef AFS_CACHING_SUPPORT
19#include <linux/cachefs.h>
20#endif
21#include "types.h"
22
23#endif /* AFS_CACHE_H */
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 5e1df14e16b1..e19c13f059ed 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -147,12 +147,11 @@ struct afs_cell *afs_cell_create(const char *name, char *vllist)
147 if (ret < 0) 147 if (ret < 0)
148 goto error; 148 goto error;
149 149
150#ifdef AFS_CACHING_SUPPORT 150#ifdef CONFIG_AFS_FSCACHE
151 /* put it up for caching */ 151 /* put it up for caching (this never returns an error) */
152 cachefs_acquire_cookie(afs_cache_netfs.primary_index, 152 cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index,
153 &afs_vlocation_cache_index_def, 153 &afs_cell_cache_index_def,
154 cell, 154 cell);
155 &cell->cache);
156#endif 155#endif
157 156
158 /* add to the cell lists */ 157 /* add to the cell lists */
@@ -362,10 +361,9 @@ static void afs_cell_destroy(struct afs_cell *cell)
362 list_del_init(&cell->proc_link); 361 list_del_init(&cell->proc_link);
363 up_write(&afs_proc_cells_sem); 362 up_write(&afs_proc_cells_sem);
364 363
365#ifdef AFS_CACHING_SUPPORT 364#ifdef CONFIG_AFS_FSCACHE
366 cachefs_relinquish_cookie(cell->cache, 0); 365 fscache_relinquish_cookie(cell->cache, 0);
367#endif 366#endif
368
369 key_put(cell->anonymous_key); 367 key_put(cell->anonymous_key);
370 kfree(cell); 368 kfree(cell);
371 369
diff --git a/fs/afs/file.c b/fs/afs/file.c
index a3901769a96c..7a1d942ef68d 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -23,6 +23,9 @@ static void afs_invalidatepage(struct page *page, unsigned long offset);
23static int afs_releasepage(struct page *page, gfp_t gfp_flags); 23static int afs_releasepage(struct page *page, gfp_t gfp_flags);
24static int afs_launder_page(struct page *page); 24static int afs_launder_page(struct page *page);
25 25
26static int afs_readpages(struct file *filp, struct address_space *mapping,
27 struct list_head *pages, unsigned nr_pages);
28
26const struct file_operations afs_file_operations = { 29const struct file_operations afs_file_operations = {
27 .open = afs_open, 30 .open = afs_open,
28 .release = afs_release, 31 .release = afs_release,
@@ -46,6 +49,7 @@ const struct inode_operations afs_file_inode_operations = {
46 49
47const struct address_space_operations afs_fs_aops = { 50const struct address_space_operations afs_fs_aops = {
48 .readpage = afs_readpage, 51 .readpage = afs_readpage,
52 .readpages = afs_readpages,
49 .set_page_dirty = afs_set_page_dirty, 53 .set_page_dirty = afs_set_page_dirty,
50 .launder_page = afs_launder_page, 54 .launder_page = afs_launder_page,
51 .releasepage = afs_releasepage, 55 .releasepage = afs_releasepage,
@@ -101,37 +105,18 @@ int afs_release(struct inode *inode, struct file *file)
101/* 105/*
102 * deal with notification that a page was read from the cache 106 * deal with notification that a page was read from the cache
103 */ 107 */
104#ifdef AFS_CACHING_SUPPORT 108static void afs_file_readpage_read_complete(struct page *page,
105static void afs_readpage_read_complete(void *cookie_data, 109 void *data,
106 struct page *page, 110 int error)
107 void *data,
108 int error)
109{ 111{
110 _enter("%p,%p,%p,%d", cookie_data, page, data, error); 112 _enter("%p,%p,%d", page, data, error);
111 113
112 if (error) 114 /* if the read completes with an error, we just unlock the page and let
113 SetPageError(page); 115 * the VM reissue the readpage */
114 else 116 if (!error)
115 SetPageUptodate(page); 117 SetPageUptodate(page);
116 unlock_page(page); 118 unlock_page(page);
117
118} 119}
119#endif
120
121/*
122 * deal with notification that a page was written to the cache
123 */
124#ifdef AFS_CACHING_SUPPORT
125static void afs_readpage_write_complete(void *cookie_data,
126 struct page *page,
127 void *data,
128 int error)
129{
130 _enter("%p,%p,%p,%d", cookie_data, page, data, error);
131
132 unlock_page(page);
133}
134#endif
135 120
136/* 121/*
137 * AFS read page from file, directory or symlink 122 * AFS read page from file, directory or symlink
@@ -161,9 +146,9 @@ static int afs_readpage(struct file *file, struct page *page)
161 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) 146 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
162 goto error; 147 goto error;
163 148
164#ifdef AFS_CACHING_SUPPORT
165 /* is it cached? */ 149 /* is it cached? */
166 ret = cachefs_read_or_alloc_page(vnode->cache, 150#ifdef CONFIG_AFS_FSCACHE
151 ret = fscache_read_or_alloc_page(vnode->cache,
167 page, 152 page,
168 afs_file_readpage_read_complete, 153 afs_file_readpage_read_complete,
169 NULL, 154 NULL,
@@ -171,20 +156,21 @@ static int afs_readpage(struct file *file, struct page *page)
171#else 156#else
172 ret = -ENOBUFS; 157 ret = -ENOBUFS;
173#endif 158#endif
174
175 switch (ret) { 159 switch (ret) {
176 /* read BIO submitted and wb-journal entry found */
177 case 1:
178 BUG(); // TODO - handle wb-journal match
179
180 /* read BIO submitted (page in cache) */ 160 /* read BIO submitted (page in cache) */
181 case 0: 161 case 0:
182 break; 162 break;
183 163
184 /* no page available in cache */ 164 /* page not yet cached */
185 case -ENOBUFS:
186 case -ENODATA: 165 case -ENODATA:
166 _debug("cache said ENODATA");
167 goto go_on;
168
169 /* page will not be cached */
170 case -ENOBUFS:
171 _debug("cache said ENOBUFS");
187 default: 172 default:
173 go_on:
188 offset = page->index << PAGE_CACHE_SHIFT; 174 offset = page->index << PAGE_CACHE_SHIFT;
189 len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE); 175 len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE);
190 176
@@ -198,27 +184,25 @@ static int afs_readpage(struct file *file, struct page *page)
198 set_bit(AFS_VNODE_DELETED, &vnode->flags); 184 set_bit(AFS_VNODE_DELETED, &vnode->flags);
199 ret = -ESTALE; 185 ret = -ESTALE;
200 } 186 }
201#ifdef AFS_CACHING_SUPPORT 187
202 cachefs_uncache_page(vnode->cache, page); 188#ifdef CONFIG_AFS_FSCACHE
189 fscache_uncache_page(vnode->cache, page);
203#endif 190#endif
191 BUG_ON(PageFsCache(page));
204 goto error; 192 goto error;
205 } 193 }
206 194
207 SetPageUptodate(page); 195 SetPageUptodate(page);
208 196
209#ifdef AFS_CACHING_SUPPORT 197 /* send the page to the cache */
210 if (cachefs_write_page(vnode->cache, 198#ifdef CONFIG_AFS_FSCACHE
211 page, 199 if (PageFsCache(page) &&
212 afs_file_readpage_write_complete, 200 fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) {
213 NULL, 201 fscache_uncache_page(vnode->cache, page);
214 GFP_KERNEL) != 0 202 BUG_ON(PageFsCache(page));
215 ) {
216 cachefs_uncache_page(vnode->cache, page);
217 unlock_page(page);
218 } 203 }
219#else
220 unlock_page(page);
221#endif 204#endif
205 unlock_page(page);
222 } 206 }
223 207
224 _leave(" = 0"); 208 _leave(" = 0");
@@ -232,34 +216,59 @@ error:
232} 216}
233 217
234/* 218/*
235 * invalidate part or all of a page 219 * read a set of pages
236 */ 220 */
237static void afs_invalidatepage(struct page *page, unsigned long offset) 221static int afs_readpages(struct file *file, struct address_space *mapping,
222 struct list_head *pages, unsigned nr_pages)
238{ 223{
239 int ret = 1; 224 struct afs_vnode *vnode;
225 int ret = 0;
240 226
241 _enter("{%lu},%lu", page->index, offset); 227 _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages);
242 228
243 BUG_ON(!PageLocked(page)); 229 vnode = AFS_FS_I(mapping->host);
230 if (vnode->flags & AFS_VNODE_DELETED) {
231 _leave(" = -ESTALE");
232 return -ESTALE;
233 }
244 234
245 if (PagePrivate(page)) { 235 /* attempt to read as many of the pages as possible */
246 /* We release buffers only if the entire page is being 236#ifdef CONFIG_AFS_FSCACHE
247 * invalidated. 237 ret = fscache_read_or_alloc_pages(vnode->cache,
248 * The get_block cached value has been unconditionally 238 mapping,
249 * invalidated, so real IO is not possible anymore. 239 pages,
250 */ 240 &nr_pages,
251 if (offset == 0) { 241 afs_file_readpage_read_complete,
252 BUG_ON(!PageLocked(page)); 242 NULL,
253 243 mapping_gfp_mask(mapping));
254 ret = 0; 244#else
255 if (!PageWriteback(page)) 245 ret = -ENOBUFS;
256 ret = page->mapping->a_ops->releasepage(page, 246#endif
257 0); 247
258 /* possibly should BUG_ON(!ret); - neilb */ 248 switch (ret) {
259 } 249 /* all pages are being read from the cache */
250 case 0:
251 BUG_ON(!list_empty(pages));
252 BUG_ON(nr_pages != 0);
253 _leave(" = 0 [reading all]");
254 return 0;
255
256 /* there were pages that couldn't be read from the cache */
257 case -ENODATA:
258 case -ENOBUFS:
259 break;
260
261 /* other error */
262 default:
263 _leave(" = %d", ret);
264 return ret;
260 } 265 }
261 266
262 _leave(" = %d", ret); 267 /* load the missing pages from the network */
268 ret = read_cache_pages(mapping, pages, (void *) afs_readpage, file);
269
270 _leave(" = %d [netting]", ret);
271 return ret;
263} 272}
264 273
265/* 274/*
@@ -273,25 +282,82 @@ static int afs_launder_page(struct page *page)
273} 282}
274 283
275/* 284/*
276 * release a page and cleanup its private data 285 * invalidate part or all of a page
286 * - release a page and clean up its private data if offset is 0 (indicating
287 * the entire page)
288 */
289static void afs_invalidatepage(struct page *page, unsigned long offset)
290{
291 struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
292
293 _enter("{%lu},%lu", page->index, offset);
294
295 BUG_ON(!PageLocked(page));
296
297 /* we clean up only if the entire page is being invalidated */
298 if (offset == 0) {
299#ifdef CONFIG_AFS_FSCACHE
300 if (PageFsCache(page)) {
301 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
302 fscache_wait_on_page_write(vnode->cache, page);
303 fscache_uncache_page(vnode->cache, page);
304 ClearPageFsCache(page);
305 }
306#endif
307
308 if (PagePrivate(page)) {
309 if (wb && !PageWriteback(page)) {
310 set_page_private(page, 0);
311 afs_put_writeback(wb);
312 }
313
314 if (!page_private(page))
315 ClearPagePrivate(page);
316 }
317 }
318
319 _leave("");
320}
321
322/*
323 * release a page and clean up its private state if it's not busy
324 * - return true if the page can now be released, false if not
277 */ 325 */
278static int afs_releasepage(struct page *page, gfp_t gfp_flags) 326static int afs_releasepage(struct page *page, gfp_t gfp_flags)
279{ 327{
328 struct afs_writeback *wb = (struct afs_writeback *) page_private(page);
280 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); 329 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
281 struct afs_writeback *wb;
282 330
283 _enter("{{%x:%u}[%lu],%lx},%x", 331 _enter("{{%x:%u}[%lu],%lx},%x",
284 vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, 332 vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
285 gfp_flags); 333 gfp_flags);
286 334
335 /* deny if page is being written to the cache and the caller hasn't
336 * elected to wait */
337#ifdef CONFIG_AFS_FSCACHE
338 if (PageFsCache(page)) {
339 if (fscache_check_page_write(vnode->cache, page)) {
340 if (!(gfp_flags & __GFP_WAIT)) {
341 _leave(" = F [cache busy]");
342 return 0;
343 }
344 fscache_wait_on_page_write(vnode->cache, page);
345 }
346
347 fscache_uncache_page(vnode->cache, page);
348 ClearPageFsCache(page);
349 }
350#endif
351
287 if (PagePrivate(page)) { 352 if (PagePrivate(page)) {
288 wb = (struct afs_writeback *) page_private(page); 353 if (wb) {
289 ASSERT(wb != NULL); 354 set_page_private(page, 0);
290 set_page_private(page, 0); 355 afs_put_writeback(wb);
356 }
291 ClearPagePrivate(page); 357 ClearPagePrivate(page);
292 afs_put_writeback(wb);
293 } 358 }
294 359
295 _leave(" = 0"); 360 /* indicate that the page can be released */
296 return 0; 361 _leave(" = T");
362 return 1;
297} 363}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index bb47217f6a18..c048f0658751 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -61,6 +61,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
61 return -EBADMSG; 61 return -EBADMSG;
62 } 62 }
63 63
64#ifdef CONFIG_AFS_FSCACHE
65 if (vnode->status.size != inode->i_size)
66 fscache_attr_changed(vnode->cache);
67#endif
68
64 inode->i_nlink = vnode->status.nlink; 69 inode->i_nlink = vnode->status.nlink;
65 inode->i_uid = vnode->status.owner; 70 inode->i_uid = vnode->status.owner;
66 inode->i_gid = 0; 71 inode->i_gid = 0;
@@ -149,15 +154,6 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
149 return inode; 154 return inode;
150 } 155 }
151 156
152#ifdef AFS_CACHING_SUPPORT
153 /* set up caching before reading the status, as fetch-status reads the
154 * first page of symlinks to see if they're really mntpts */
155 cachefs_acquire_cookie(vnode->volume->cache,
156 NULL,
157 vnode,
158 &vnode->cache);
159#endif
160
161 if (!status) { 157 if (!status) {
162 /* it's a remotely extant inode */ 158 /* it's a remotely extant inode */
163 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); 159 set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags);
@@ -183,6 +179,15 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
183 } 179 }
184 } 180 }
185 181
182 /* set up caching before mapping the status, as map-status reads the
183 * first page of symlinks to see if they're really mountpoints */
184 inode->i_size = vnode->status.size;
185#ifdef CONFIG_AFS_FSCACHE
186 vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
187 &afs_vnode_cache_index_def,
188 vnode);
189#endif
190
186 ret = afs_inode_map_status(vnode, key); 191 ret = afs_inode_map_status(vnode, key);
187 if (ret < 0) 192 if (ret < 0)
188 goto bad_inode; 193 goto bad_inode;
@@ -196,6 +201,10 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
196 201
197 /* failure */ 202 /* failure */
198bad_inode: 203bad_inode:
204#ifdef CONFIG_AFS_FSCACHE
205 fscache_relinquish_cookie(vnode->cache, 0);
206 vnode->cache = NULL;
207#endif
199 iget_failed(inode); 208 iget_failed(inode);
200 _leave(" = %d [bad]", ret); 209 _leave(" = %d [bad]", ret);
201 return ERR_PTR(ret); 210 return ERR_PTR(ret);
@@ -340,8 +349,8 @@ void afs_clear_inode(struct inode *inode)
340 ASSERT(list_empty(&vnode->writebacks)); 349 ASSERT(list_empty(&vnode->writebacks));
341 ASSERT(!vnode->cb_promised); 350 ASSERT(!vnode->cb_promised);
342 351
343#ifdef AFS_CACHING_SUPPORT 352#ifdef CONFIG_AFS_FSCACHE
344 cachefs_relinquish_cookie(vnode->cache, 0); 353 fscache_relinquish_cookie(vnode->cache, 0);
345 vnode->cache = NULL; 354 vnode->cache = NULL;
346#endif 355#endif
347 356
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 67f259d99cd6..106be66dafd2 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -21,6 +21,7 @@
21 21
22#include "afs.h" 22#include "afs.h"
23#include "afs_vl.h" 23#include "afs_vl.h"
24#include "cache.h"
24 25
25#define AFS_CELL_MAX_ADDRS 15 26#define AFS_CELL_MAX_ADDRS 15
26 27
@@ -193,8 +194,8 @@ struct afs_cell {
193 struct key *anonymous_key; /* anonymous user key for this cell */ 194 struct key *anonymous_key; /* anonymous user key for this cell */
194 struct list_head proc_link; /* /proc cell list link */ 195 struct list_head proc_link; /* /proc cell list link */
195 struct proc_dir_entry *proc_dir; /* /proc dir for this cell */ 196 struct proc_dir_entry *proc_dir; /* /proc dir for this cell */
196#ifdef AFS_CACHING_SUPPORT 197#ifdef CONFIG_AFS_FSCACHE
197 struct cachefs_cookie *cache; /* caching cookie */ 198 struct fscache_cookie *cache; /* caching cookie */
198#endif 199#endif
199 200
200 /* server record management */ 201 /* server record management */
@@ -249,8 +250,8 @@ struct afs_vlocation {
249 struct list_head grave; /* link in master graveyard list */ 250 struct list_head grave; /* link in master graveyard list */
250 struct list_head update; /* link in master update list */ 251 struct list_head update; /* link in master update list */
251 struct afs_cell *cell; /* cell to which volume belongs */ 252 struct afs_cell *cell; /* cell to which volume belongs */
252#ifdef AFS_CACHING_SUPPORT 253#ifdef CONFIG_AFS_FSCACHE
253 struct cachefs_cookie *cache; /* caching cookie */ 254 struct fscache_cookie *cache; /* caching cookie */
254#endif 255#endif
255 struct afs_cache_vlocation vldb; /* volume information DB record */ 256 struct afs_cache_vlocation vldb; /* volume information DB record */
256 struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ 257 struct afs_volume *vols[3]; /* volume access record pointer (index by type) */
@@ -302,8 +303,8 @@ struct afs_volume {
302 atomic_t usage; 303 atomic_t usage;
303 struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ 304 struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */
304 struct afs_vlocation *vlocation; /* volume location */ 305 struct afs_vlocation *vlocation; /* volume location */
305#ifdef AFS_CACHING_SUPPORT 306#ifdef CONFIG_AFS_FSCACHE
306 struct cachefs_cookie *cache; /* caching cookie */ 307 struct fscache_cookie *cache; /* caching cookie */
307#endif 308#endif
308 afs_volid_t vid; /* volume ID */ 309 afs_volid_t vid; /* volume ID */
309 afs_voltype_t type; /* type of volume */ 310 afs_voltype_t type; /* type of volume */
@@ -333,8 +334,8 @@ struct afs_vnode {
333 struct afs_server *server; /* server currently supplying this file */ 334 struct afs_server *server; /* server currently supplying this file */
334 struct afs_fid fid; /* the file identifier for this inode */ 335 struct afs_fid fid; /* the file identifier for this inode */
335 struct afs_file_status status; /* AFS status info for this file */ 336 struct afs_file_status status; /* AFS status info for this file */
336#ifdef AFS_CACHING_SUPPORT 337#ifdef CONFIG_AFS_FSCACHE
337 struct cachefs_cookie *cache; /* caching cookie */ 338 struct fscache_cookie *cache; /* caching cookie */
338#endif 339#endif
339 struct afs_permits *permits; /* cache of permits so far obtained */ 340 struct afs_permits *permits; /* cache of permits so far obtained */
340 struct mutex permits_lock; /* lock for altering permits list */ 341 struct mutex permits_lock; /* lock for altering permits list */
@@ -428,6 +429,22 @@ struct afs_uuid {
428 429
429/*****************************************************************************/ 430/*****************************************************************************/
430/* 431/*
432 * cache.c
433 */
434#ifdef CONFIG_AFS_FSCACHE
435extern struct fscache_netfs afs_cache_netfs;
436extern struct fscache_cookie_def afs_cell_cache_index_def;
437extern struct fscache_cookie_def afs_vlocation_cache_index_def;
438extern struct fscache_cookie_def afs_volume_cache_index_def;
439extern struct fscache_cookie_def afs_vnode_cache_index_def;
440#else
441#define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL)
442#define afs_vlocation_cache_index_def (*(struct fscache_cookie_def *) NULL)
443#define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL)
444#define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL)
445#endif
446
447/*
431 * callback.c 448 * callback.c
432 */ 449 */
433extern void afs_init_callback_state(struct afs_server *); 450extern void afs_init_callback_state(struct afs_server *);
@@ -446,9 +463,6 @@ extern void afs_callback_update_kill(void);
446 */ 463 */
447extern struct rw_semaphore afs_proc_cells_sem; 464extern struct rw_semaphore afs_proc_cells_sem;
448extern struct list_head afs_proc_cells; 465extern struct list_head afs_proc_cells;
449#ifdef AFS_CACHING_SUPPORT
450extern struct cachefs_index_def afs_cache_cell_index_def;
451#endif
452 466
453#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) 467#define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0)
454extern int afs_cell_init(char *); 468extern int afs_cell_init(char *);
@@ -554,9 +568,6 @@ extern void afs_clear_inode(struct inode *);
554 * main.c 568 * main.c
555 */ 569 */
556extern struct afs_uuid afs_uuid; 570extern struct afs_uuid afs_uuid;
557#ifdef AFS_CACHING_SUPPORT
558extern struct cachefs_netfs afs_cache_netfs;
559#endif
560 571
561/* 572/*
562 * misc.c 573 * misc.c
@@ -637,10 +648,6 @@ extern int afs_get_MAC_address(u8 *, size_t);
637/* 648/*
638 * vlclient.c 649 * vlclient.c
639 */ 650 */
640#ifdef AFS_CACHING_SUPPORT
641extern struct cachefs_index_def afs_vlocation_cache_index_def;
642#endif
643
644extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, 651extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *,
645 const char *, struct afs_cache_vlocation *, 652 const char *, struct afs_cache_vlocation *,
646 const struct afs_wait_mode *); 653 const struct afs_wait_mode *);
@@ -664,12 +671,6 @@ extern void afs_vlocation_purge(void);
664/* 671/*
665 * vnode.c 672 * vnode.c
666 */ 673 */
667#ifdef AFS_CACHING_SUPPORT
668extern struct cachefs_index_def afs_vnode_cache_index_def;
669#endif
670
671extern struct afs_timer_ops afs_vnode_cb_timed_out_ops;
672
673static inline struct afs_vnode *AFS_FS_I(struct inode *inode) 674static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
674{ 675{
675 return container_of(inode, struct afs_vnode, vfs_inode); 676 return container_of(inode, struct afs_vnode, vfs_inode);
@@ -711,10 +712,6 @@ extern int afs_vnode_release_lock(struct afs_vnode *, struct key *);
711/* 712/*
712 * volume.c 713 * volume.c
713 */ 714 */
714#ifdef AFS_CACHING_SUPPORT
715extern struct cachefs_index_def afs_volume_cache_index_def;
716#endif
717
718#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) 715#define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0)
719 716
720extern void afs_put_volume(struct afs_volume *); 717extern void afs_put_volume(struct afs_volume *);
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 2d3e5d4fb9f7..66d54d348c55 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -1,6 +1,6 @@
1/* AFS client file system 1/* AFS client file system
2 * 2 *
3 * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
5 * 5 *
6 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -29,18 +29,6 @@ static char *rootcell;
29module_param(rootcell, charp, 0); 29module_param(rootcell, charp, 0);
30MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); 30MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list");
31 31
32#ifdef AFS_CACHING_SUPPORT
33static struct cachefs_netfs_operations afs_cache_ops = {
34 .get_page_cookie = afs_cache_get_page_cookie,
35};
36
37struct cachefs_netfs afs_cache_netfs = {
38 .name = "afs",
39 .version = 0,
40 .ops = &afs_cache_ops,
41};
42#endif
43
44struct afs_uuid afs_uuid; 32struct afs_uuid afs_uuid;
45 33
46/* 34/*
@@ -104,10 +92,9 @@ static int __init afs_init(void)
104 if (ret < 0) 92 if (ret < 0)
105 return ret; 93 return ret;
106 94
107#ifdef AFS_CACHING_SUPPORT 95#ifdef CONFIG_AFS_FSCACHE
108 /* we want to be able to cache */ 96 /* we want to be able to cache */
109 ret = cachefs_register_netfs(&afs_cache_netfs, 97 ret = fscache_register_netfs(&afs_cache_netfs);
110 &afs_cache_cell_index_def);
111 if (ret < 0) 98 if (ret < 0)
112 goto error_cache; 99 goto error_cache;
113#endif 100#endif
@@ -142,8 +129,8 @@ error_fs:
142error_open_socket: 129error_open_socket:
143error_vl_update_init: 130error_vl_update_init:
144error_cell_init: 131error_cell_init:
145#ifdef AFS_CACHING_SUPPORT 132#ifdef CONFIG_AFS_FSCACHE
146 cachefs_unregister_netfs(&afs_cache_netfs); 133 fscache_unregister_netfs(&afs_cache_netfs);
147error_cache: 134error_cache:
148#endif 135#endif
149 afs_callback_update_kill(); 136 afs_callback_update_kill();
@@ -175,8 +162,8 @@ static void __exit afs_exit(void)
175 afs_vlocation_purge(); 162 afs_vlocation_purge();
176 flush_scheduled_work(); 163 flush_scheduled_work();
177 afs_cell_purge(); 164 afs_cell_purge();
178#ifdef AFS_CACHING_SUPPORT 165#ifdef CONFIG_AFS_FSCACHE
179 cachefs_unregister_netfs(&afs_cache_netfs); 166 fscache_unregister_netfs(&afs_cache_netfs);
180#endif 167#endif
181 afs_proc_cleanup(); 168 afs_proc_cleanup();
182 rcu_barrier(); 169 rcu_barrier();
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 78db4953a800..2b9e2d03a390 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -173,9 +173,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
173 if (PageError(page)) 173 if (PageError(page))
174 goto error; 174 goto error;
175 175
176 buf = kmap(page); 176 buf = kmap_atomic(page, KM_USER0);
177 memcpy(devname, buf, size); 177 memcpy(devname, buf, size);
178 kunmap(page); 178 kunmap_atomic(buf, KM_USER0);
179 page_cache_release(page); 179 page_cache_release(page);
180 page = NULL; 180 page = NULL;
181 181
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 849fc3160cb5..ec2a7431e458 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -281,9 +281,8 @@ static void afs_vlocation_apply_update(struct afs_vlocation *vl,
281 281
282 vl->vldb = *vldb; 282 vl->vldb = *vldb;
283 283
284#ifdef AFS_CACHING_SUPPORT 284#ifdef CONFIG_AFS_FSCACHE
285 /* update volume entry in local cache */ 285 fscache_update_cookie(vl->cache);
286 cachefs_update_cookie(vl->cache);
287#endif 286#endif
288} 287}
289 288
@@ -304,11 +303,9 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
304 memset(&vldb, 0, sizeof(vldb)); 303 memset(&vldb, 0, sizeof(vldb));
305 304
306 /* see if we have an in-cache copy (will set vl->valid if there is) */ 305 /* see if we have an in-cache copy (will set vl->valid if there is) */
307#ifdef AFS_CACHING_SUPPORT 306#ifdef CONFIG_AFS_FSCACHE
308 cachefs_acquire_cookie(cell->cache, 307 vl->cache = fscache_acquire_cookie(vl->cell->cache,
309 &afs_volume_cache_index_def, 308 &afs_vlocation_cache_index_def, vl);
310 vlocation,
311 &vl->cache);
312#endif 309#endif
313 310
314 if (vl->valid) { 311 if (vl->valid) {
@@ -420,6 +417,11 @@ fill_in_record:
420 spin_unlock(&vl->lock); 417 spin_unlock(&vl->lock);
421 wake_up(&vl->waitq); 418 wake_up(&vl->waitq);
422 419
420 /* update volume entry in local cache */
421#ifdef CONFIG_AFS_FSCACHE
422 fscache_update_cookie(vl->cache);
423#endif
424
423 /* schedule for regular updates */ 425 /* schedule for regular updates */
424 afs_vlocation_queue_for_updates(vl); 426 afs_vlocation_queue_for_updates(vl);
425 goto success; 427 goto success;
@@ -465,7 +467,7 @@ found_in_memory:
465 spin_unlock(&vl->lock); 467 spin_unlock(&vl->lock);
466 468
467success: 469success:
468 _leave(" = %p",vl); 470 _leave(" = %p", vl);
469 return vl; 471 return vl;
470 472
471error_abandon: 473error_abandon:
@@ -523,10 +525,9 @@ static void afs_vlocation_destroy(struct afs_vlocation *vl)
523{ 525{
524 _enter("%p", vl); 526 _enter("%p", vl);
525 527
526#ifdef AFS_CACHING_SUPPORT 528#ifdef CONFIG_AFS_FSCACHE
527 cachefs_relinquish_cookie(vl->cache, 0); 529 fscache_relinquish_cookie(vl->cache, 0);
528#endif 530#endif
529
530 afs_put_cell(vl->cell); 531 afs_put_cell(vl->cell);
531 kfree(vl); 532 kfree(vl);
532} 533}
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 8bab0e3437f9..a353e69e2391 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -124,13 +124,11 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params)
124 } 124 }
125 125
126 /* attach the cache and volume location */ 126 /* attach the cache and volume location */
127#ifdef AFS_CACHING_SUPPORT 127#ifdef CONFIG_AFS_FSCACHE
128 cachefs_acquire_cookie(vlocation->cache, 128 volume->cache = fscache_acquire_cookie(vlocation->cache,
129 &afs_vnode_cache_index_def, 129 &afs_volume_cache_index_def,
130 volume, 130 volume);
131 &volume->cache);
132#endif 131#endif
133
134 afs_get_vlocation(vlocation); 132 afs_get_vlocation(vlocation);
135 volume->vlocation = vlocation; 133 volume->vlocation = vlocation;
136 134
@@ -194,8 +192,8 @@ void afs_put_volume(struct afs_volume *volume)
194 up_write(&vlocation->cell->vl_sem); 192 up_write(&vlocation->cell->vl_sem);
195 193
196 /* finish cleaning up the volume */ 194 /* finish cleaning up the volume */
197#ifdef AFS_CACHING_SUPPORT 195#ifdef CONFIG_AFS_FSCACHE
198 cachefs_relinquish_cookie(volume->cache, 0); 196 fscache_relinquish_cookie(volume->cache, 0);
199#endif 197#endif
200 afs_put_vlocation(vlocation); 198 afs_put_vlocation(vlocation);
201 199
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3fb36d433621..c2e7a7ff0080 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -780,3 +780,24 @@ int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
780 _leave(" = %d", ret); 780 _leave(" = %d", ret);
781 return ret; 781 return ret;
782} 782}
783
784/*
785 * notification that a previously read-only page is about to become writable
786 * - if it returns an error, the caller will deliver a bus error signal
787 */
788int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
789{
790 struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host);
791
792 _enter("{{%x:%u}},{%lx}",
793 vnode->fid.vid, vnode->fid.vnode, page->index);
794
795 /* wait for the page to be written to the cache before we allow it to
796 * be modified */
797#ifdef CONFIG_AFS_FSCACHE
798 fscache_wait_on_page_write(vnode->cache, page);
799#endif
800
801 _leave(" = 0");
802 return 0;
803}
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig
new file mode 100644
index 000000000000..80e9c6167f0b
--- /dev/null
+++ b/fs/cachefiles/Kconfig
@@ -0,0 +1,39 @@
1
2config CACHEFILES
3 tristate "Filesystem caching on files"
4 depends on FSCACHE && BLOCK
5 help
6 This permits use of a mounted filesystem as a cache for other
7 filesystems - primarily networking filesystems - thus allowing fast
8 local disk to enhance the speed of slower devices.
9
10 See Documentation/filesystems/caching/cachefiles.txt for more
11 information.
12
13config CACHEFILES_DEBUG
14 bool "Debug CacheFiles"
15 depends on CACHEFILES
16 help
17 This permits debugging to be dynamically enabled in the filesystem
18 caching on files module. If this is set, the debugging output may be
19 enabled by setting bits in /sys/modules/cachefiles/parameter/debug or
20 by including a debugging specifier in /etc/cachefilesd.conf.
21
22config CACHEFILES_HISTOGRAM
23 bool "Gather latency information on CacheFiles"
24 depends on CACHEFILES && PROC_FS
25 help
26
27 This option causes latency information to be gathered on CacheFiles
28 operation and exported through file:
29
30 /proc/fs/cachefiles/histogram
31
32 The generation of this histogram adds a certain amount of overhead to
33 execution as there are a number of points at which data is gathered,
34 and on a multi-CPU system these may be on cachelines that keep
35 bouncing between CPUs. On the other hand, the histogram may be
36 useful for debugging purposes. Saying 'N' here is recommended.
37
38 See Documentation/filesystems/caching/cachefiles.txt for more
39 information.
diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile
new file mode 100644
index 000000000000..32cbab0ffce3
--- /dev/null
+++ b/fs/cachefiles/Makefile
@@ -0,0 +1,18 @@
1#
2# Makefile for caching in a mounted filesystem
3#
4
5cachefiles-y := \
6 bind.o \
7 daemon.o \
8 interface.o \
9 key.o \
10 main.o \
11 namei.o \
12 rdwr.o \
13 security.o \
14 xattr.o
15
16cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o
17
18obj-$(CONFIG_CACHEFILES) := cachefiles.o
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
new file mode 100644
index 000000000000..3797e0077b35
--- /dev/null
+++ b/fs/cachefiles/bind.c
@@ -0,0 +1,286 @@
1/* Bind and unbind a cache from the filesystem backing it
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/sched.h>
15#include <linux/completion.h>
16#include <linux/slab.h>
17#include <linux/fs.h>
18#include <linux/file.h>
19#include <linux/namei.h>
20#include <linux/mount.h>
21#include <linux/statfs.h>
22#include <linux/ctype.h>
23#include "internal.h"
24
25static int cachefiles_daemon_add_cache(struct cachefiles_cache *caches);
26
27/*
28 * bind a directory as a cache
29 */
30int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
31{
32 _enter("{%u,%u,%u,%u,%u,%u},%s",
33 cache->frun_percent,
34 cache->fcull_percent,
35 cache->fstop_percent,
36 cache->brun_percent,
37 cache->bcull_percent,
38 cache->bstop_percent,
39 args);
40
41 /* start by checking things over */
42 ASSERT(cache->fstop_percent >= 0 &&
43 cache->fstop_percent < cache->fcull_percent &&
44 cache->fcull_percent < cache->frun_percent &&
45 cache->frun_percent < 100);
46
47 ASSERT(cache->bstop_percent >= 0 &&
48 cache->bstop_percent < cache->bcull_percent &&
49 cache->bcull_percent < cache->brun_percent &&
50 cache->brun_percent < 100);
51
52 if (*args) {
53 kerror("'bind' command doesn't take an argument");
54 return -EINVAL;
55 }
56
57 if (!cache->rootdirname) {
58 kerror("No cache directory specified");
59 return -EINVAL;
60 }
61
62 /* don't permit already bound caches to be re-bound */
63 if (test_bit(CACHEFILES_READY, &cache->flags)) {
64 kerror("Cache already bound");
65 return -EBUSY;
66 }
67
68 /* make sure we have copies of the tag and dirname strings */
69 if (!cache->tag) {
70 /* the tag string is released by the fops->release()
71 * function, so we don't release it on error here */
72 cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
73 if (!cache->tag)
74 return -ENOMEM;
75 }
76
77 /* add the cache */
78 return cachefiles_daemon_add_cache(cache);
79}
80
81/*
82 * add a cache
83 */
84static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
85{
86 struct cachefiles_object *fsdef;
87 struct nameidata nd;
88 struct kstatfs stats;
89 struct dentry *graveyard, *cachedir, *root;
90 const struct cred *saved_cred;
91 int ret;
92
93 _enter("");
94
95 /* we want to work under the module's security ID */
96 ret = cachefiles_get_security_ID(cache);
97 if (ret < 0)
98 return ret;
99
100 cachefiles_begin_secure(cache, &saved_cred);
101
102 /* allocate the root index object */
103 ret = -ENOMEM;
104
105 fsdef = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL);
106 if (!fsdef)
107 goto error_root_object;
108
109 ASSERTCMP(fsdef->backer, ==, NULL);
110
111 atomic_set(&fsdef->usage, 1);
112 fsdef->type = FSCACHE_COOKIE_TYPE_INDEX;
113
114 _debug("- fsdef %p", fsdef);
115
116 /* look up the directory at the root of the cache */
117 memset(&nd, 0, sizeof(nd));
118
119 ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd);
120 if (ret < 0)
121 goto error_open_root;
122
123 cache->mnt = mntget(nd.path.mnt);
124 root = dget(nd.path.dentry);
125 path_put(&nd.path);
126
127 /* check parameters */
128 ret = -EOPNOTSUPP;
129 if (!root->d_inode ||
130 !root->d_inode->i_op ||
131 !root->d_inode->i_op->lookup ||
132 !root->d_inode->i_op->mkdir ||
133 !root->d_inode->i_op->setxattr ||
134 !root->d_inode->i_op->getxattr ||
135 !root->d_sb ||
136 !root->d_sb->s_op ||
137 !root->d_sb->s_op->statfs ||
138 !root->d_sb->s_op->sync_fs)
139 goto error_unsupported;
140
141 ret = -EROFS;
142 if (root->d_sb->s_flags & MS_RDONLY)
143 goto error_unsupported;
144
145 /* determine the security of the on-disk cache as this governs
146 * security ID of files we create */
147 ret = cachefiles_determine_cache_security(cache, root, &saved_cred);
148 if (ret < 0)
149 goto error_unsupported;
150
151 /* get the cache size and blocksize */
152 ret = vfs_statfs(root, &stats);
153 if (ret < 0)
154 goto error_unsupported;
155
156 ret = -ERANGE;
157 if (stats.f_bsize <= 0)
158 goto error_unsupported;
159
160 ret = -EOPNOTSUPP;
161 if (stats.f_bsize > PAGE_SIZE)
162 goto error_unsupported;
163
164 cache->bsize = stats.f_bsize;
165 cache->bshift = 0;
166 if (stats.f_bsize < PAGE_SIZE)
167 cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize);
168
169 _debug("blksize %u (shift %u)",
170 cache->bsize, cache->bshift);
171
172 _debug("size %llu, avail %llu",
173 (unsigned long long) stats.f_blocks,
174 (unsigned long long) stats.f_bavail);
175
176 /* set up caching limits */
177 do_div(stats.f_files, 100);
178 cache->fstop = stats.f_files * cache->fstop_percent;
179 cache->fcull = stats.f_files * cache->fcull_percent;
180 cache->frun = stats.f_files * cache->frun_percent;
181
182 _debug("limits {%llu,%llu,%llu} files",
183 (unsigned long long) cache->frun,
184 (unsigned long long) cache->fcull,
185 (unsigned long long) cache->fstop);
186
187 stats.f_blocks >>= cache->bshift;
188 do_div(stats.f_blocks, 100);
189 cache->bstop = stats.f_blocks * cache->bstop_percent;
190 cache->bcull = stats.f_blocks * cache->bcull_percent;
191 cache->brun = stats.f_blocks * cache->brun_percent;
192
193 _debug("limits {%llu,%llu,%llu} blocks",
194 (unsigned long long) cache->brun,
195 (unsigned long long) cache->bcull,
196 (unsigned long long) cache->bstop);
197
198 /* get the cache directory and check its type */
199 cachedir = cachefiles_get_directory(cache, root, "cache");
200 if (IS_ERR(cachedir)) {
201 ret = PTR_ERR(cachedir);
202 goto error_unsupported;
203 }
204
205 fsdef->dentry = cachedir;
206 fsdef->fscache.cookie = NULL;
207
208 ret = cachefiles_check_object_type(fsdef);
209 if (ret < 0)
210 goto error_unsupported;
211
212 /* get the graveyard directory */
213 graveyard = cachefiles_get_directory(cache, root, "graveyard");
214 if (IS_ERR(graveyard)) {
215 ret = PTR_ERR(graveyard);
216 goto error_unsupported;
217 }
218
219 cache->graveyard = graveyard;
220
221 /* publish the cache */
222 fscache_init_cache(&cache->cache,
223 &cachefiles_cache_ops,
224 "%s",
225 fsdef->dentry->d_sb->s_id);
226
227 fscache_object_init(&fsdef->fscache, NULL, &cache->cache);
228
229 ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag);
230 if (ret < 0)
231 goto error_add_cache;
232
233 /* done */
234 set_bit(CACHEFILES_READY, &cache->flags);
235 dput(root);
236
237 printk(KERN_INFO "CacheFiles:"
238 " File cache on %s registered\n",
239 cache->cache.identifier);
240
241 /* check how much space the cache has */
242 cachefiles_has_space(cache, 0, 0);
243 cachefiles_end_secure(cache, saved_cred);
244 return 0;
245
246error_add_cache:
247 dput(cache->graveyard);
248 cache->graveyard = NULL;
249error_unsupported:
250 mntput(cache->mnt);
251 cache->mnt = NULL;
252 dput(fsdef->dentry);
253 fsdef->dentry = NULL;
254 dput(root);
255error_open_root:
256 kmem_cache_free(cachefiles_object_jar, fsdef);
257error_root_object:
258 cachefiles_end_secure(cache, saved_cred);
259 kerror("Failed to register: %d", ret);
260 return ret;
261}
262
263/*
264 * unbind a cache on fd release
265 */
266void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
267{
268 _enter("");
269
270 if (test_bit(CACHEFILES_READY, &cache->flags)) {
271 printk(KERN_INFO "CacheFiles:"
272 " File cache on %s unregistering\n",
273 cache->cache.identifier);
274
275 fscache_withdraw_cache(&cache->cache);
276 }
277
278 dput(cache->graveyard);
279 mntput(cache->mnt);
280
281 kfree(cache->rootdirname);
282 kfree(cache->secctx);
283 kfree(cache->tag);
284
285 _leave("");
286}
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
new file mode 100644
index 000000000000..4618516dd994
--- /dev/null
+++ b/fs/cachefiles/daemon.c
@@ -0,0 +1,755 @@
1/* Daemon interface
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/sched.h>
15#include <linux/completion.h>
16#include <linux/slab.h>
17#include <linux/fs.h>
18#include <linux/file.h>
19#include <linux/namei.h>
20#include <linux/poll.h>
21#include <linux/mount.h>
22#include <linux/statfs.h>
23#include <linux/ctype.h>
24#include <linux/fs_struct.h>
25#include "internal.h"
26
27static int cachefiles_daemon_open(struct inode *, struct file *);
28static int cachefiles_daemon_release(struct inode *, struct file *);
29static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t,
30 loff_t *);
31static ssize_t cachefiles_daemon_write(struct file *, const char __user *,
32 size_t, loff_t *);
33static unsigned int cachefiles_daemon_poll(struct file *,
34 struct poll_table_struct *);
35static int cachefiles_daemon_frun(struct cachefiles_cache *, char *);
36static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *);
37static int cachefiles_daemon_fstop(struct cachefiles_cache *, char *);
38static int cachefiles_daemon_brun(struct cachefiles_cache *, char *);
39static int cachefiles_daemon_bcull(struct cachefiles_cache *, char *);
40static int cachefiles_daemon_bstop(struct cachefiles_cache *, char *);
41static int cachefiles_daemon_cull(struct cachefiles_cache *, char *);
42static int cachefiles_daemon_debug(struct cachefiles_cache *, char *);
43static int cachefiles_daemon_dir(struct cachefiles_cache *, char *);
44static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *);
45static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *);
46static int cachefiles_daemon_tag(struct cachefiles_cache *, char *);
47
48static unsigned long cachefiles_open;
49
50const struct file_operations cachefiles_daemon_fops = {
51 .owner = THIS_MODULE,
52 .open = cachefiles_daemon_open,
53 .release = cachefiles_daemon_release,
54 .read = cachefiles_daemon_read,
55 .write = cachefiles_daemon_write,
56 .poll = cachefiles_daemon_poll,
57};
58
59struct cachefiles_daemon_cmd {
60 char name[8];
61 int (*handler)(struct cachefiles_cache *cache, char *args);
62};
63
64static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = {
65 { "bind", cachefiles_daemon_bind },
66 { "brun", cachefiles_daemon_brun },
67 { "bcull", cachefiles_daemon_bcull },
68 { "bstop", cachefiles_daemon_bstop },
69 { "cull", cachefiles_daemon_cull },
70 { "debug", cachefiles_daemon_debug },
71 { "dir", cachefiles_daemon_dir },
72 { "frun", cachefiles_daemon_frun },
73 { "fcull", cachefiles_daemon_fcull },
74 { "fstop", cachefiles_daemon_fstop },
75 { "inuse", cachefiles_daemon_inuse },
76 { "secctx", cachefiles_daemon_secctx },
77 { "tag", cachefiles_daemon_tag },
78 { "", NULL }
79};
80
81
82/*
83 * do various checks
84 */
85static int cachefiles_daemon_open(struct inode *inode, struct file *file)
86{
87 struct cachefiles_cache *cache;
88
89 _enter("");
90
91 /* only the superuser may do this */
92 if (!capable(CAP_SYS_ADMIN))
93 return -EPERM;
94
95 /* the cachefiles device may only be open once at a time */
96 if (xchg(&cachefiles_open, 1) == 1)
97 return -EBUSY;
98
99 /* allocate a cache record */
100 cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL);
101 if (!cache) {
102 cachefiles_open = 0;
103 return -ENOMEM;
104 }
105
106 mutex_init(&cache->daemon_mutex);
107 cache->active_nodes = RB_ROOT;
108 rwlock_init(&cache->active_lock);
109 init_waitqueue_head(&cache->daemon_pollwq);
110
111 /* set default caching limits
112 * - limit at 1% free space and/or free files
113 * - cull below 5% free space and/or free files
114 * - cease culling above 7% free space and/or free files
115 */
116 cache->frun_percent = 7;
117 cache->fcull_percent = 5;
118 cache->fstop_percent = 1;
119 cache->brun_percent = 7;
120 cache->bcull_percent = 5;
121 cache->bstop_percent = 1;
122
123 file->private_data = cache;
124 cache->cachefilesd = file;
125 return 0;
126}
127
128/*
129 * release a cache
130 */
131static int cachefiles_daemon_release(struct inode *inode, struct file *file)
132{
133 struct cachefiles_cache *cache = file->private_data;
134
135 _enter("");
136
137 ASSERT(cache);
138
139 set_bit(CACHEFILES_DEAD, &cache->flags);
140
141 cachefiles_daemon_unbind(cache);
142
143 ASSERT(!cache->active_nodes.rb_node);
144
145 /* clean up the control file interface */
146 cache->cachefilesd = NULL;
147 file->private_data = NULL;
148 cachefiles_open = 0;
149
150 kfree(cache);
151
152 _leave("");
153 return 0;
154}
155
156/*
157 * read the cache state
158 */
159static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer,
160 size_t buflen, loff_t *pos)
161{
162 struct cachefiles_cache *cache = file->private_data;
163 char buffer[256];
164 int n;
165
166 //_enter(",,%zu,", buflen);
167
168 if (!test_bit(CACHEFILES_READY, &cache->flags))
169 return 0;
170
171 /* check how much space the cache has */
172 cachefiles_has_space(cache, 0, 0);
173
174 /* summarise */
175 clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags);
176
177 n = snprintf(buffer, sizeof(buffer),
178 "cull=%c"
179 " frun=%llx"
180 " fcull=%llx"
181 " fstop=%llx"
182 " brun=%llx"
183 " bcull=%llx"
184 " bstop=%llx",
185 test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0',
186 (unsigned long long) cache->frun,
187 (unsigned long long) cache->fcull,
188 (unsigned long long) cache->fstop,
189 (unsigned long long) cache->brun,
190 (unsigned long long) cache->bcull,
191 (unsigned long long) cache->bstop
192 );
193
194 if (n > buflen)
195 return -EMSGSIZE;
196
197 if (copy_to_user(_buffer, buffer, n) != 0)
198 return -EFAULT;
199
200 return n;
201}
202
203/*
204 * command the cache
205 */
206static ssize_t cachefiles_daemon_write(struct file *file,
207 const char __user *_data,
208 size_t datalen,
209 loff_t *pos)
210{
211 const struct cachefiles_daemon_cmd *cmd;
212 struct cachefiles_cache *cache = file->private_data;
213 ssize_t ret;
214 char *data, *args, *cp;
215
216 //_enter(",,%zu,", datalen);
217
218 ASSERT(cache);
219
220 if (test_bit(CACHEFILES_DEAD, &cache->flags))
221 return -EIO;
222
223 if (datalen < 0 || datalen > PAGE_SIZE - 1)
224 return -EOPNOTSUPP;
225
226 /* drag the command string into the kernel so we can parse it */
227 data = kmalloc(datalen + 1, GFP_KERNEL);
228 if (!data)
229 return -ENOMEM;
230
231 ret = -EFAULT;
232 if (copy_from_user(data, _data, datalen) != 0)
233 goto error;
234
235 data[datalen] = '\0';
236
237 ret = -EINVAL;
238 if (memchr(data, '\0', datalen))
239 goto error;
240
241 /* strip any newline */
242 cp = memchr(data, '\n', datalen);
243 if (cp) {
244 if (cp == data)
245 goto error;
246
247 *cp = '\0';
248 }
249
250 /* parse the command */
251 ret = -EOPNOTSUPP;
252
253 for (args = data; *args; args++)
254 if (isspace(*args))
255 break;
256 if (*args) {
257 if (args == data)
258 goto error;
259 *args = '\0';
260 for (args++; isspace(*args); args++)
261 continue;
262 }
263
264 /* run the appropriate command handler */
265 for (cmd = cachefiles_daemon_cmds; cmd->name[0]; cmd++)
266 if (strcmp(cmd->name, data) == 0)
267 goto found_command;
268
269error:
270 kfree(data);
271 //_leave(" = %zd", ret);
272 return ret;
273
274found_command:
275 mutex_lock(&cache->daemon_mutex);
276
277 ret = -EIO;
278 if (!test_bit(CACHEFILES_DEAD, &cache->flags))
279 ret = cmd->handler(cache, args);
280
281 mutex_unlock(&cache->daemon_mutex);
282
283 if (ret == 0)
284 ret = datalen;
285 goto error;
286}
287
288/*
289 * poll for culling state
290 * - use POLLOUT to indicate culling state
291 */
292static unsigned int cachefiles_daemon_poll(struct file *file,
293 struct poll_table_struct *poll)
294{
295 struct cachefiles_cache *cache = file->private_data;
296 unsigned int mask;
297
298 poll_wait(file, &cache->daemon_pollwq, poll);
299 mask = 0;
300
301 if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags))
302 mask |= POLLIN;
303
304 if (test_bit(CACHEFILES_CULLING, &cache->flags))
305 mask |= POLLOUT;
306
307 return mask;
308}
309
310/*
311 * give a range error for cache space constraints
312 * - can be tail-called
313 */
314static int cachefiles_daemon_range_error(struct cachefiles_cache *cache,
315 char *args)
316{
317 kerror("Free space limits must be in range"
318 " 0%%<=stop<cull<run<100%%");
319
320 return -EINVAL;
321}
322
323/*
324 * set the percentage of files at which to stop culling
325 * - command: "frun <N>%"
326 */
327static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args)
328{
329 unsigned long frun;
330
331 _enter(",%s", args);
332
333 if (!*args)
334 return -EINVAL;
335
336 frun = simple_strtoul(args, &args, 10);
337 if (args[0] != '%' || args[1] != '\0')
338 return -EINVAL;
339
340 if (frun <= cache->fcull_percent || frun >= 100)
341 return cachefiles_daemon_range_error(cache, args);
342
343 cache->frun_percent = frun;
344 return 0;
345}
346
347/*
348 * set the percentage of files at which to start culling
349 * - command: "fcull <N>%"
350 */
351static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args)
352{
353 unsigned long fcull;
354
355 _enter(",%s", args);
356
357 if (!*args)
358 return -EINVAL;
359
360 fcull = simple_strtoul(args, &args, 10);
361 if (args[0] != '%' || args[1] != '\0')
362 return -EINVAL;
363
364 if (fcull <= cache->fstop_percent || fcull >= cache->frun_percent)
365 return cachefiles_daemon_range_error(cache, args);
366
367 cache->fcull_percent = fcull;
368 return 0;
369}
370
371/*
372 * set the percentage of files at which to stop allocating
373 * - command: "fstop <N>%"
374 */
375static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args)
376{
377 unsigned long fstop;
378
379 _enter(",%s", args);
380
381 if (!*args)
382 return -EINVAL;
383
384 fstop = simple_strtoul(args, &args, 10);
385 if (args[0] != '%' || args[1] != '\0')
386 return -EINVAL;
387
388 if (fstop < 0 || fstop >= cache->fcull_percent)
389 return cachefiles_daemon_range_error(cache, args);
390
391 cache->fstop_percent = fstop;
392 return 0;
393}
394
395/*
396 * set the percentage of blocks at which to stop culling
397 * - command: "brun <N>%"
398 */
399static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args)
400{
401 unsigned long brun;
402
403 _enter(",%s", args);
404
405 if (!*args)
406 return -EINVAL;
407
408 brun = simple_strtoul(args, &args, 10);
409 if (args[0] != '%' || args[1] != '\0')
410 return -EINVAL;
411
412 if (brun <= cache->bcull_percent || brun >= 100)
413 return cachefiles_daemon_range_error(cache, args);
414
415 cache->brun_percent = brun;
416 return 0;
417}
418
419/*
420 * set the percentage of blocks at which to start culling
421 * - command: "bcull <N>%"
422 */
423static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args)
424{
425 unsigned long bcull;
426
427 _enter(",%s", args);
428
429 if (!*args)
430 return -EINVAL;
431
432 bcull = simple_strtoul(args, &args, 10);
433 if (args[0] != '%' || args[1] != '\0')
434 return -EINVAL;
435
436 if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent)
437 return cachefiles_daemon_range_error(cache, args);
438
439 cache->bcull_percent = bcull;
440 return 0;
441}
442
443/*
444 * set the percentage of blocks at which to stop allocating
445 * - command: "bstop <N>%"
446 */
447static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args)
448{
449 unsigned long bstop;
450
451 _enter(",%s", args);
452
453 if (!*args)
454 return -EINVAL;
455
456 bstop = simple_strtoul(args, &args, 10);
457 if (args[0] != '%' || args[1] != '\0')
458 return -EINVAL;
459
460 if (bstop < 0 || bstop >= cache->bcull_percent)
461 return cachefiles_daemon_range_error(cache, args);
462
463 cache->bstop_percent = bstop;
464 return 0;
465}
466
467/*
468 * set the cache directory
469 * - command: "dir <name>"
470 */
471static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args)
472{
473 char *dir;
474
475 _enter(",%s", args);
476
477 if (!*args) {
478 kerror("Empty directory specified");
479 return -EINVAL;
480 }
481
482 if (cache->rootdirname) {
483 kerror("Second cache directory specified");
484 return -EEXIST;
485 }
486
487 dir = kstrdup(args, GFP_KERNEL);
488 if (!dir)
489 return -ENOMEM;
490
491 cache->rootdirname = dir;
492 return 0;
493}
494
495/*
496 * set the cache security context
497 * - command: "secctx <ctx>"
498 */
499static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args)
500{
501 char *secctx;
502
503 _enter(",%s", args);
504
505 if (!*args) {
506 kerror("Empty security context specified");
507 return -EINVAL;
508 }
509
510 if (cache->secctx) {
511 kerror("Second security context specified");
512 return -EINVAL;
513 }
514
515 secctx = kstrdup(args, GFP_KERNEL);
516 if (!secctx)
517 return -ENOMEM;
518
519 cache->secctx = secctx;
520 return 0;
521}
522
523/*
524 * set the cache tag
525 * - command: "tag <name>"
526 */
527static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args)
528{
529 char *tag;
530
531 _enter(",%s", args);
532
533 if (!*args) {
534 kerror("Empty tag specified");
535 return -EINVAL;
536 }
537
538 if (cache->tag)
539 return -EEXIST;
540
541 tag = kstrdup(args, GFP_KERNEL);
542 if (!tag)
543 return -ENOMEM;
544
545 cache->tag = tag;
546 return 0;
547}
548
549/*
550 * request a node in the cache be culled from the current working directory
551 * - command: "cull <name>"
552 */
553static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args)
554{
555 struct fs_struct *fs;
556 struct dentry *dir;
557 const struct cred *saved_cred;
558 int ret;
559
560 _enter(",%s", args);
561
562 if (strchr(args, '/'))
563 goto inval;
564
565 if (!test_bit(CACHEFILES_READY, &cache->flags)) {
566 kerror("cull applied to unready cache");
567 return -EIO;
568 }
569
570 if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
571 kerror("cull applied to dead cache");
572 return -EIO;
573 }
574
575 /* extract the directory dentry from the cwd */
576 fs = current->fs;
577 read_lock(&fs->lock);
578 dir = dget(fs->pwd.dentry);
579 read_unlock(&fs->lock);
580
581 if (!S_ISDIR(dir->d_inode->i_mode))
582 goto notdir;
583
584 cachefiles_begin_secure(cache, &saved_cred);
585 ret = cachefiles_cull(cache, dir, args);
586 cachefiles_end_secure(cache, saved_cred);
587
588 dput(dir);
589 _leave(" = %d", ret);
590 return ret;
591
592notdir:
593 dput(dir);
594 kerror("cull command requires dirfd to be a directory");
595 return -ENOTDIR;
596
597inval:
598 kerror("cull command requires dirfd and filename");
599 return -EINVAL;
600}
601
602/*
603 * set debugging mode
604 * - command: "debug <mask>"
605 */
606static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args)
607{
608 unsigned long mask;
609
610 _enter(",%s", args);
611
612 mask = simple_strtoul(args, &args, 0);
613 if (args[0] != '\0')
614 goto inval;
615
616 cachefiles_debug = mask;
617 _leave(" = 0");
618 return 0;
619
620inval:
621 kerror("debug command requires mask");
622 return -EINVAL;
623}
624
625/*
626 * find out whether an object in the current working directory is in use or not
627 * - command: "inuse <name>"
628 */
629static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args)
630{
631 struct fs_struct *fs;
632 struct dentry *dir;
633 const struct cred *saved_cred;
634 int ret;
635
636 //_enter(",%s", args);
637
638 if (strchr(args, '/'))
639 goto inval;
640
641 if (!test_bit(CACHEFILES_READY, &cache->flags)) {
642 kerror("inuse applied to unready cache");
643 return -EIO;
644 }
645
646 if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
647 kerror("inuse applied to dead cache");
648 return -EIO;
649 }
650
651 /* extract the directory dentry from the cwd */
652 fs = current->fs;
653 read_lock(&fs->lock);
654 dir = dget(fs->pwd.dentry);
655 read_unlock(&fs->lock);
656
657 if (!S_ISDIR(dir->d_inode->i_mode))
658 goto notdir;
659
660 cachefiles_begin_secure(cache, &saved_cred);
661 ret = cachefiles_check_in_use(cache, dir, args);
662 cachefiles_end_secure(cache, saved_cred);
663
664 dput(dir);
665 //_leave(" = %d", ret);
666 return ret;
667
668notdir:
669 dput(dir);
670 kerror("inuse command requires dirfd to be a directory");
671 return -ENOTDIR;
672
673inval:
674 kerror("inuse command requires dirfd and filename");
675 return -EINVAL;
676}
677
678/*
679 * see if we have space for a number of pages and/or a number of files in the
680 * cache
681 */
682int cachefiles_has_space(struct cachefiles_cache *cache,
683 unsigned fnr, unsigned bnr)
684{
685 struct kstatfs stats;
686 int ret;
687
688 //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u",
689 // (unsigned long long) cache->frun,
690 // (unsigned long long) cache->fcull,
691 // (unsigned long long) cache->fstop,
692 // (unsigned long long) cache->brun,
693 // (unsigned long long) cache->bcull,
694 // (unsigned long long) cache->bstop,
695 // fnr, bnr);
696
697 /* find out how many pages of blockdev are available */
698 memset(&stats, 0, sizeof(stats));
699
700 ret = vfs_statfs(cache->mnt->mnt_root, &stats);
701 if (ret < 0) {
702 if (ret == -EIO)
703 cachefiles_io_error(cache, "statfs failed");
704 _leave(" = %d", ret);
705 return ret;
706 }
707
708 stats.f_bavail >>= cache->bshift;
709
710 //_debug("avail %llu,%llu",
711 // (unsigned long long) stats.f_ffree,
712 // (unsigned long long) stats.f_bavail);
713
714 /* see if there is sufficient space */
715 if (stats.f_ffree > fnr)
716 stats.f_ffree -= fnr;
717 else
718 stats.f_ffree = 0;
719
720 if (stats.f_bavail > bnr)
721 stats.f_bavail -= bnr;
722 else
723 stats.f_bavail = 0;
724
725 ret = -ENOBUFS;
726 if (stats.f_ffree < cache->fstop ||
727 stats.f_bavail < cache->bstop)
728 goto begin_cull;
729
730 ret = 0;
731 if (stats.f_ffree < cache->fcull ||
732 stats.f_bavail < cache->bcull)
733 goto begin_cull;
734
735 if (test_bit(CACHEFILES_CULLING, &cache->flags) &&
736 stats.f_ffree >= cache->frun &&
737 stats.f_bavail >= cache->brun &&
738 test_and_clear_bit(CACHEFILES_CULLING, &cache->flags)
739 ) {
740 _debug("cease culling");
741 cachefiles_state_changed(cache);
742 }
743
744 //_leave(" = 0");
745 return 0;
746
747begin_cull:
748 if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) {
749 _debug("### CULL CACHE ###");
750 cachefiles_state_changed(cache);
751 }
752
753 _leave(" = %d", ret);
754 return ret;
755}
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
new file mode 100644
index 000000000000..1e962348d111
--- /dev/null
+++ b/fs/cachefiles/interface.c
@@ -0,0 +1,449 @@
1/* FS-Cache interface to CacheFiles
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/mount.h>
13#include <linux/buffer_head.h>
14#include "internal.h"
15
16#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
17
18struct cachefiles_lookup_data {
19 struct cachefiles_xattr *auxdata; /* auxiliary data */
20 char *key; /* key path */
21};
22
23static int cachefiles_attr_changed(struct fscache_object *_object);
24
25/*
26 * allocate an object record for a cookie lookup and prepare the lookup data
27 */
28static struct fscache_object *cachefiles_alloc_object(
29 struct fscache_cache *_cache,
30 struct fscache_cookie *cookie)
31{
32 struct cachefiles_lookup_data *lookup_data;
33 struct cachefiles_object *object;
34 struct cachefiles_cache *cache;
35 struct cachefiles_xattr *auxdata;
36 unsigned keylen, auxlen;
37 void *buffer;
38 char *key;
39
40 cache = container_of(_cache, struct cachefiles_cache, cache);
41
42 _enter("{%s},%p,", cache->cache.identifier, cookie);
43
44 lookup_data = kmalloc(sizeof(*lookup_data), GFP_KERNEL);
45 if (!lookup_data)
46 goto nomem_lookup_data;
47
48 /* create a new object record and a temporary leaf image */
49 object = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL);
50 if (!object)
51 goto nomem_object;
52
53 ASSERTCMP(object->backer, ==, NULL);
54
55 BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags));
56 atomic_set(&object->usage, 1);
57
58 fscache_object_init(&object->fscache, cookie, &cache->cache);
59
60 object->type = cookie->def->type;
61
62 /* get hold of the raw key
63 * - stick the length on the front and leave space on the back for the
64 * encoder
65 */
66 buffer = kmalloc((2 + 512) + 3, GFP_KERNEL);
67 if (!buffer)
68 goto nomem_buffer;
69
70 keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512);
71 ASSERTCMP(keylen, <, 512);
72
73 *(uint16_t *)buffer = keylen;
74 ((char *)buffer)[keylen + 2] = 0;
75 ((char *)buffer)[keylen + 3] = 0;
76 ((char *)buffer)[keylen + 4] = 0;
77
78 /* turn the raw key into something that can work with as a filename */
79 key = cachefiles_cook_key(buffer, keylen + 2, object->type);
80 if (!key)
81 goto nomem_key;
82
83 /* get hold of the auxiliary data and prepend the object type */
84 auxdata = buffer;
85 auxlen = 0;
86 if (cookie->def->get_aux) {
87 auxlen = cookie->def->get_aux(cookie->netfs_data,
88 auxdata->data, 511);
89 ASSERTCMP(auxlen, <, 511);
90 }
91
92 auxdata->len = auxlen + 1;
93 auxdata->type = cookie->def->type;
94
95 lookup_data->auxdata = auxdata;
96 lookup_data->key = key;
97 object->lookup_data = lookup_data;
98
99 _leave(" = %p [%p]", &object->fscache, lookup_data);
100 return &object->fscache;
101
102nomem_key:
103 kfree(buffer);
104nomem_buffer:
105 BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags));
106 kmem_cache_free(cachefiles_object_jar, object);
107 fscache_object_destroyed(&cache->cache);
108nomem_object:
109 kfree(lookup_data);
110nomem_lookup_data:
111 _leave(" = -ENOMEM");
112 return ERR_PTR(-ENOMEM);
113}
114
115/*
116 * attempt to look up the nominated node in this cache
117 */
118static void cachefiles_lookup_object(struct fscache_object *_object)
119{
120 struct cachefiles_lookup_data *lookup_data;
121 struct cachefiles_object *parent, *object;
122 struct cachefiles_cache *cache;
123 const struct cred *saved_cred;
124 int ret;
125
126 _enter("{OBJ%x}", _object->debug_id);
127
128 cache = container_of(_object->cache, struct cachefiles_cache, cache);
129 parent = container_of(_object->parent,
130 struct cachefiles_object, fscache);
131 object = container_of(_object, struct cachefiles_object, fscache);
132 lookup_data = object->lookup_data;
133
134 ASSERTCMP(lookup_data, !=, NULL);
135
136 /* look up the key, creating any missing bits */
137 cachefiles_begin_secure(cache, &saved_cred);
138 ret = cachefiles_walk_to_object(parent, object,
139 lookup_data->key,
140 lookup_data->auxdata);
141 cachefiles_end_secure(cache, saved_cred);
142
143 /* polish off by setting the attributes of non-index files */
144 if (ret == 0 &&
145 object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
146 cachefiles_attr_changed(&object->fscache);
147
148 if (ret < 0) {
149 printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n",
150 ret);
151 fscache_object_lookup_error(&object->fscache);
152 }
153
154 _leave(" [%d]", ret);
155}
156
157/*
158 * indication of lookup completion
159 */
160static void cachefiles_lookup_complete(struct fscache_object *_object)
161{
162 struct cachefiles_object *object;
163
164 object = container_of(_object, struct cachefiles_object, fscache);
165
166 _enter("{OBJ%x,%p}", object->fscache.debug_id, object->lookup_data);
167
168 if (object->lookup_data) {
169 kfree(object->lookup_data->key);
170 kfree(object->lookup_data->auxdata);
171 kfree(object->lookup_data);
172 object->lookup_data = NULL;
173 }
174}
175
176/*
177 * increment the usage count on an inode object (may fail if unmounting)
178 */
179static
180struct fscache_object *cachefiles_grab_object(struct fscache_object *_object)
181{
182 struct cachefiles_object *object =
183 container_of(_object, struct cachefiles_object, fscache);
184
185 _enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage));
186
187#ifdef CACHEFILES_DEBUG_SLAB
188 ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000);
189#endif
190
191 atomic_inc(&object->usage);
192 return &object->fscache;
193}
194
195/*
196 * update the auxilliary data for an object object on disk
197 */
198static void cachefiles_update_object(struct fscache_object *_object)
199{
200 struct cachefiles_object *object;
201 struct cachefiles_xattr *auxdata;
202 struct cachefiles_cache *cache;
203 struct fscache_cookie *cookie;
204 const struct cred *saved_cred;
205 unsigned auxlen;
206
207 _enter("{OBJ%x}", _object->debug_id);
208
209 object = container_of(_object, struct cachefiles_object, fscache);
210 cache = container_of(object->fscache.cache, struct cachefiles_cache,
211 cache);
212 cookie = object->fscache.cookie;
213
214 if (!cookie->def->get_aux) {
215 _leave(" [no aux]");
216 return;
217 }
218
219 auxdata = kmalloc(2 + 512 + 3, GFP_KERNEL);
220 if (!auxdata) {
221 _leave(" [nomem]");
222 return;
223 }
224
225 auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511);
226 ASSERTCMP(auxlen, <, 511);
227
228 auxdata->len = auxlen + 1;
229 auxdata->type = cookie->def->type;
230
231 cachefiles_begin_secure(cache, &saved_cred);
232 cachefiles_update_object_xattr(object, auxdata);
233 cachefiles_end_secure(cache, saved_cred);
234 kfree(auxdata);
235 _leave("");
236}
237
238/*
239 * discard the resources pinned by an object and effect retirement if
240 * requested
241 */
242static void cachefiles_drop_object(struct fscache_object *_object)
243{
244 struct cachefiles_object *object;
245 struct cachefiles_cache *cache;
246 const struct cred *saved_cred;
247
248 ASSERT(_object);
249
250 object = container_of(_object, struct cachefiles_object, fscache);
251
252 _enter("{OBJ%x,%d}",
253 object->fscache.debug_id, atomic_read(&object->usage));
254
255 cache = container_of(object->fscache.cache,
256 struct cachefiles_cache, cache);
257
258#ifdef CACHEFILES_DEBUG_SLAB
259 ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000);
260#endif
261
262 /* delete retired objects */
263 if (object->fscache.state == FSCACHE_OBJECT_RECYCLING &&
264 _object != cache->cache.fsdef
265 ) {
266 _debug("- retire object OBJ%x", object->fscache.debug_id);
267 cachefiles_begin_secure(cache, &saved_cred);
268 cachefiles_delete_object(cache, object);
269 cachefiles_end_secure(cache, saved_cred);
270 }
271
272 /* close the filesystem stuff attached to the object */
273 if (object->backer != object->dentry)
274 dput(object->backer);
275 object->backer = NULL;
276
277 /* note that the object is now inactive */
278 if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) {
279 write_lock(&cache->active_lock);
280 if (!test_and_clear_bit(CACHEFILES_OBJECT_ACTIVE,
281 &object->flags))
282 BUG();
283 rb_erase(&object->active_node, &cache->active_nodes);
284 wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
285 write_unlock(&cache->active_lock);
286 }
287
288 dput(object->dentry);
289 object->dentry = NULL;
290
291 _leave("");
292}
293
294/*
295 * dispose of a reference to an object
296 */
297static void cachefiles_put_object(struct fscache_object *_object)
298{
299 struct cachefiles_object *object;
300 struct fscache_cache *cache;
301
302 ASSERT(_object);
303
304 object = container_of(_object, struct cachefiles_object, fscache);
305
306 _enter("{OBJ%x,%d}",
307 object->fscache.debug_id, atomic_read(&object->usage));
308
309#ifdef CACHEFILES_DEBUG_SLAB
310 ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000);
311#endif
312
313 ASSERTIFCMP(object->fscache.parent,
314 object->fscache.parent->n_children, >, 0);
315
316 if (atomic_dec_and_test(&object->usage)) {
317 _debug("- kill object OBJ%x", object->fscache.debug_id);
318
319 ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags));
320 ASSERTCMP(object->fscache.parent, ==, NULL);
321 ASSERTCMP(object->backer, ==, NULL);
322 ASSERTCMP(object->dentry, ==, NULL);
323 ASSERTCMP(object->fscache.n_ops, ==, 0);
324 ASSERTCMP(object->fscache.n_children, ==, 0);
325
326 if (object->lookup_data) {
327 kfree(object->lookup_data->key);
328 kfree(object->lookup_data->auxdata);
329 kfree(object->lookup_data);
330 object->lookup_data = NULL;
331 }
332
333 cache = object->fscache.cache;
334 kmem_cache_free(cachefiles_object_jar, object);
335 fscache_object_destroyed(cache);
336 }
337
338 _leave("");
339}
340
341/*
342 * sync a cache
343 */
344static void cachefiles_sync_cache(struct fscache_cache *_cache)
345{
346 struct cachefiles_cache *cache;
347 const struct cred *saved_cred;
348 int ret;
349
350 _enter("%p", _cache);
351
352 cache = container_of(_cache, struct cachefiles_cache, cache);
353
354 /* make sure all pages pinned by operations on behalf of the netfs are
355 * written to disc */
356 cachefiles_begin_secure(cache, &saved_cred);
357 ret = fsync_super(cache->mnt->mnt_sb);
358 cachefiles_end_secure(cache, saved_cred);
359
360 if (ret == -EIO)
361 cachefiles_io_error(cache,
362 "Attempt to sync backing fs superblock"
363 " returned error %d",
364 ret);
365}
366
367/*
368 * notification the attributes on an object have changed
369 * - called with reads/writes excluded by FS-Cache
370 */
371static int cachefiles_attr_changed(struct fscache_object *_object)
372{
373 struct cachefiles_object *object;
374 struct cachefiles_cache *cache;
375 const struct cred *saved_cred;
376 struct iattr newattrs;
377 uint64_t ni_size;
378 loff_t oi_size;
379 int ret;
380
381 _object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size);
382
383 _enter("{OBJ%x},[%llu]",
384 _object->debug_id, (unsigned long long) ni_size);
385
386 object = container_of(_object, struct cachefiles_object, fscache);
387 cache = container_of(object->fscache.cache,
388 struct cachefiles_cache, cache);
389
390 if (ni_size == object->i_size)
391 return 0;
392
393 if (!object->backer)
394 return -ENOBUFS;
395
396 ASSERT(S_ISREG(object->backer->d_inode->i_mode));
397
398 fscache_set_store_limit(&object->fscache, ni_size);
399
400 oi_size = i_size_read(object->backer->d_inode);
401 if (oi_size == ni_size)
402 return 0;
403
404 newattrs.ia_size = ni_size;
405 newattrs.ia_valid = ATTR_SIZE;
406
407 cachefiles_begin_secure(cache, &saved_cred);
408 mutex_lock(&object->backer->d_inode->i_mutex);
409 ret = notify_change(object->backer, &newattrs);
410 mutex_unlock(&object->backer->d_inode->i_mutex);
411 cachefiles_end_secure(cache, saved_cred);
412
413 if (ret == -EIO) {
414 fscache_set_store_limit(&object->fscache, 0);
415 cachefiles_io_error_obj(object, "Size set failed");
416 ret = -ENOBUFS;
417 }
418
419 _leave(" = %d", ret);
420 return ret;
421}
422
423/*
424 * dissociate a cache from all the pages it was backing
425 */
426static void cachefiles_dissociate_pages(struct fscache_cache *cache)
427{
428 _enter("");
429}
430
431const struct fscache_cache_ops cachefiles_cache_ops = {
432 .name = "cachefiles",
433 .alloc_object = cachefiles_alloc_object,
434 .lookup_object = cachefiles_lookup_object,
435 .lookup_complete = cachefiles_lookup_complete,
436 .grab_object = cachefiles_grab_object,
437 .update_object = cachefiles_update_object,
438 .drop_object = cachefiles_drop_object,
439 .put_object = cachefiles_put_object,
440 .sync_cache = cachefiles_sync_cache,
441 .attr_changed = cachefiles_attr_changed,
442 .read_or_alloc_page = cachefiles_read_or_alloc_page,
443 .read_or_alloc_pages = cachefiles_read_or_alloc_pages,
444 .allocate_page = cachefiles_allocate_page,
445 .allocate_pages = cachefiles_allocate_pages,
446 .write_page = cachefiles_write_page,
447 .uncache_page = cachefiles_uncache_page,
448 .dissociate_pages = cachefiles_dissociate_pages,
449};
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
new file mode 100644
index 000000000000..19218e1463d6
--- /dev/null
+++ b/fs/cachefiles/internal.h
@@ -0,0 +1,360 @@
1/* General netfs cache on cache files internal defs
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/fscache-cache.h>
13#include <linux/timer.h>
14#include <linux/wait.h>
15#include <linux/workqueue.h>
16#include <linux/security.h>
17
18struct cachefiles_cache;
19struct cachefiles_object;
20
21extern unsigned cachefiles_debug;
22#define CACHEFILES_DEBUG_KENTER 1
23#define CACHEFILES_DEBUG_KLEAVE 2
24#define CACHEFILES_DEBUG_KDEBUG 4
25
26/*
27 * node records
28 */
29struct cachefiles_object {
30 struct fscache_object fscache; /* fscache handle */
31 struct cachefiles_lookup_data *lookup_data; /* cached lookup data */
32 struct dentry *dentry; /* the file/dir representing this object */
33 struct dentry *backer; /* backing file */
34 loff_t i_size; /* object size */
35 unsigned long flags;
36#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
37 atomic_t usage; /* object usage count */
38 uint8_t type; /* object type */
39 uint8_t new; /* T if object new */
40 spinlock_t work_lock;
41 struct rb_node active_node; /* link in active tree (dentry is key) */
42};
43
44extern struct kmem_cache *cachefiles_object_jar;
45
46/*
47 * Cache files cache definition
48 */
49struct cachefiles_cache {
50 struct fscache_cache cache; /* FS-Cache record */
51 struct vfsmount *mnt; /* mountpoint holding the cache */
52 struct dentry *graveyard; /* directory into which dead objects go */
53 struct file *cachefilesd; /* manager daemon handle */
54 const struct cred *cache_cred; /* security override for accessing cache */
55 struct mutex daemon_mutex; /* command serialisation mutex */
56 wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */
57 struct rb_root active_nodes; /* active nodes (can't be culled) */
58 rwlock_t active_lock; /* lock for active_nodes */
59 atomic_t gravecounter; /* graveyard uniquifier */
60 unsigned frun_percent; /* when to stop culling (% files) */
61 unsigned fcull_percent; /* when to start culling (% files) */
62 unsigned fstop_percent; /* when to stop allocating (% files) */
63 unsigned brun_percent; /* when to stop culling (% blocks) */
64 unsigned bcull_percent; /* when to start culling (% blocks) */
65 unsigned bstop_percent; /* when to stop allocating (% blocks) */
66 unsigned bsize; /* cache's block size */
67 unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */
68 uint64_t frun; /* when to stop culling */
69 uint64_t fcull; /* when to start culling */
70 uint64_t fstop; /* when to stop allocating */
71 sector_t brun; /* when to stop culling */
72 sector_t bcull; /* when to start culling */
73 sector_t bstop; /* when to stop allocating */
74 unsigned long flags;
75#define CACHEFILES_READY 0 /* T if cache prepared */
76#define CACHEFILES_DEAD 1 /* T if cache dead */
77#define CACHEFILES_CULLING 2 /* T if cull engaged */
78#define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */
79 char *rootdirname; /* name of cache root directory */
80 char *secctx; /* LSM security context */
81 char *tag; /* cache binding tag */
82};
83
84/*
85 * backing file read tracking
86 */
87struct cachefiles_one_read {
88 wait_queue_t monitor; /* link into monitored waitqueue */
89 struct page *back_page; /* backing file page we're waiting for */
90 struct page *netfs_page; /* netfs page we're going to fill */
91 struct fscache_retrieval *op; /* retrieval op covering this */
92 struct list_head op_link; /* link in op's todo list */
93};
94
95/*
96 * backing file write tracking
97 */
98struct cachefiles_one_write {
99 struct page *netfs_page; /* netfs page to copy */
100 struct cachefiles_object *object;
101 struct list_head obj_link; /* link in object's lists */
102 fscache_rw_complete_t end_io_func;
103 void *context;
104};
105
106/*
107 * auxiliary data xattr buffer
108 */
109struct cachefiles_xattr {
110 uint16_t len;
111 uint8_t type;
112 uint8_t data[];
113};
114
115/*
116 * note change of state for daemon
117 */
118static inline void cachefiles_state_changed(struct cachefiles_cache *cache)
119{
120 set_bit(CACHEFILES_STATE_CHANGED, &cache->flags);
121 wake_up_all(&cache->daemon_pollwq);
122}
123
124/*
125 * cf-bind.c
126 */
127extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args);
128extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache);
129
130/*
131 * cf-daemon.c
132 */
133extern const struct file_operations cachefiles_daemon_fops;
134
135extern int cachefiles_has_space(struct cachefiles_cache *cache,
136 unsigned fnr, unsigned bnr);
137
138/*
139 * cf-interface.c
140 */
141extern const struct fscache_cache_ops cachefiles_cache_ops;
142
143/*
144 * cf-key.c
145 */
146extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
147
148/*
149 * cf-namei.c
150 */
151extern int cachefiles_delete_object(struct cachefiles_cache *cache,
152 struct cachefiles_object *object);
153extern int cachefiles_walk_to_object(struct cachefiles_object *parent,
154 struct cachefiles_object *object,
155 const char *key,
156 struct cachefiles_xattr *auxdata);
157extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
158 struct dentry *dir,
159 const char *name);
160
161extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
162 char *filename);
163
164extern int cachefiles_check_in_use(struct cachefiles_cache *cache,
165 struct dentry *dir, char *filename);
166
167/*
168 * cf-proc.c
169 */
170#ifdef CONFIG_CACHEFILES_HISTOGRAM
171extern atomic_t cachefiles_lookup_histogram[HZ];
172extern atomic_t cachefiles_mkdir_histogram[HZ];
173extern atomic_t cachefiles_create_histogram[HZ];
174
175extern int __init cachefiles_proc_init(void);
176extern void cachefiles_proc_cleanup(void);
177static inline
178void cachefiles_hist(atomic_t histogram[], unsigned long start_jif)
179{
180 unsigned long jif = jiffies - start_jif;
181 if (jif >= HZ)
182 jif = HZ - 1;
183 atomic_inc(&histogram[jif]);
184}
185
186#else
187#define cachefiles_proc_init() (0)
188#define cachefiles_proc_cleanup() do {} while (0)
189#define cachefiles_hist(hist, start_jif) do {} while (0)
190#endif
191
192/*
193 * cf-rdwr.c
194 */
195extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *,
196 struct page *, gfp_t);
197extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *,
198 struct list_head *, unsigned *,
199 gfp_t);
200extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *,
201 gfp_t);
202extern int cachefiles_allocate_pages(struct fscache_retrieval *,
203 struct list_head *, unsigned *, gfp_t);
204extern int cachefiles_write_page(struct fscache_storage *, struct page *);
205extern void cachefiles_uncache_page(struct fscache_object *, struct page *);
206
207/*
208 * cf-security.c
209 */
210extern int cachefiles_get_security_ID(struct cachefiles_cache *cache);
211extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
212 struct dentry *root,
213 const struct cred **_saved_cred);
214
215static inline void cachefiles_begin_secure(struct cachefiles_cache *cache,
216 const struct cred **_saved_cred)
217{
218 *_saved_cred = override_creds(cache->cache_cred);
219}
220
221static inline void cachefiles_end_secure(struct cachefiles_cache *cache,
222 const struct cred *saved_cred)
223{
224 revert_creds(saved_cred);
225}
226
227/*
228 * cf-xattr.c
229 */
230extern int cachefiles_check_object_type(struct cachefiles_object *object);
231extern int cachefiles_set_object_xattr(struct cachefiles_object *object,
232 struct cachefiles_xattr *auxdata);
233extern int cachefiles_update_object_xattr(struct cachefiles_object *object,
234 struct cachefiles_xattr *auxdata);
235extern int cachefiles_check_object_xattr(struct cachefiles_object *object,
236 struct cachefiles_xattr *auxdata);
237extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
238 struct dentry *dentry);
239
240
241/*
242 * error handling
243 */
244#define kerror(FMT, ...) printk(KERN_ERR "CacheFiles: "FMT"\n", ##__VA_ARGS__)
245
246#define cachefiles_io_error(___cache, FMT, ...) \
247do { \
248 kerror("I/O Error: " FMT, ##__VA_ARGS__); \
249 fscache_io_error(&(___cache)->cache); \
250 set_bit(CACHEFILES_DEAD, &(___cache)->flags); \
251} while (0)
252
253#define cachefiles_io_error_obj(object, FMT, ...) \
254do { \
255 struct cachefiles_cache *___cache; \
256 \
257 ___cache = container_of((object)->fscache.cache, \
258 struct cachefiles_cache, cache); \
259 cachefiles_io_error(___cache, FMT, ##__VA_ARGS__); \
260} while (0)
261
262
263/*
264 * debug tracing
265 */
266#define dbgprintk(FMT, ...) \
267 printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
268
269/* make sure we maintain the format strings, even when debugging is disabled */
270static inline void _dbprintk(const char *fmt, ...)
271 __attribute__((format(printf, 1, 2)));
272static inline void _dbprintk(const char *fmt, ...)
273{
274}
275
276#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
277#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
278#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
279
280
281#if defined(__KDEBUG)
282#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
283#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
284#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
285
286#elif defined(CONFIG_CACHEFILES_DEBUG)
287#define _enter(FMT, ...) \
288do { \
289 if (cachefiles_debug & CACHEFILES_DEBUG_KENTER) \
290 kenter(FMT, ##__VA_ARGS__); \
291} while (0)
292
293#define _leave(FMT, ...) \
294do { \
295 if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE) \
296 kleave(FMT, ##__VA_ARGS__); \
297} while (0)
298
299#define _debug(FMT, ...) \
300do { \
301 if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG) \
302 kdebug(FMT, ##__VA_ARGS__); \
303} while (0)
304
305#else
306#define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
307#define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
308#define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__)
309#endif
310
311#if 1 /* defined(__KDEBUGALL) */
312
313#define ASSERT(X) \
314do { \
315 if (unlikely(!(X))) { \
316 printk(KERN_ERR "\n"); \
317 printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
318 BUG(); \
319 } \
320} while (0)
321
322#define ASSERTCMP(X, OP, Y) \
323do { \
324 if (unlikely(!((X) OP (Y)))) { \
325 printk(KERN_ERR "\n"); \
326 printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
327 printk(KERN_ERR "%lx " #OP " %lx is false\n", \
328 (unsigned long)(X), (unsigned long)(Y)); \
329 BUG(); \
330 } \
331} while (0)
332
333#define ASSERTIF(C, X) \
334do { \
335 if (unlikely((C) && !(X))) { \
336 printk(KERN_ERR "\n"); \
337 printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
338 BUG(); \
339 } \
340} while (0)
341
342#define ASSERTIFCMP(C, X, OP, Y) \
343do { \
344 if (unlikely((C) && !((X) OP (Y)))) { \
345 printk(KERN_ERR "\n"); \
346 printk(KERN_ERR "CacheFiles: Assertion failed\n"); \
347 printk(KERN_ERR "%lx " #OP " %lx is false\n", \
348 (unsigned long)(X), (unsigned long)(Y)); \
349 BUG(); \
350 } \
351} while (0)
352
353#else
354
355#define ASSERT(X) do {} while (0)
356#define ASSERTCMP(X, OP, Y) do {} while (0)
357#define ASSERTIF(C, X) do {} while (0)
358#define ASSERTIFCMP(C, X, OP, Y) do {} while (0)
359
360#endif
diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c
new file mode 100644
index 000000000000..81b8b2b3a674
--- /dev/null
+++ b/fs/cachefiles/key.c
@@ -0,0 +1,159 @@
1/* Key to pathname encoder
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/slab.h>
13#include "internal.h"
14
15static const char cachefiles_charmap[64] =
16 "0123456789" /* 0 - 9 */
17 "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */
18 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */
19 "_-" /* 62 - 63 */
20 ;
21
22static const char cachefiles_filecharmap[256] = {
23 /* we skip space and tab and control chars */
24 [33 ... 46] = 1, /* '!' -> '.' */
25 /* we skip '/' as it's significant to pathwalk */
26 [48 ... 127] = 1, /* '0' -> '~' */
27};
28
29/*
30 * turn the raw key into something cooked
31 * - the raw key should include the length in the two bytes at the front
32 * - the key may be up to 514 bytes in length (including the length word)
33 * - "base64" encode the strange keys, mapping 3 bytes of raw to four of
34 * cooked
35 * - need to cut the cooked key into 252 char lengths (189 raw bytes)
36 */
37char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type)
38{
39 unsigned char csum, ch;
40 unsigned int acc;
41 char *key;
42 int loop, len, max, seg, mark, print;
43
44 _enter(",%d", keylen);
45
46 BUG_ON(keylen < 2 || keylen > 514);
47
48 csum = raw[0] + raw[1];
49 print = 1;
50 for (loop = 2; loop < keylen; loop++) {
51 ch = raw[loop];
52 csum += ch;
53 print &= cachefiles_filecharmap[ch];
54 }
55
56 if (print) {
57 /* if the path is usable ASCII, then we render it directly */
58 max = keylen - 2;
59 max += 2; /* two base64'd length chars on the front */
60 max += 5; /* @checksum/M */
61 max += 3 * 2; /* maximum number of segment dividers (".../M")
62 * is ((514 + 251) / 252) = 3
63 */
64 max += 1; /* NUL on end */
65 } else {
66 /* calculate the maximum length of the cooked key */
67 keylen = (keylen + 2) / 3;
68
69 max = keylen * 4;
70 max += 5; /* @checksum/M */
71 max += 3 * 2; /* maximum number of segment dividers (".../M")
72 * is ((514 + 188) / 189) = 3
73 */
74 max += 1; /* NUL on end */
75 }
76
77 max += 1; /* 2nd NUL on end */
78
79 _debug("max: %d", max);
80
81 key = kmalloc(max, GFP_KERNEL);
82 if (!key)
83 return NULL;
84
85 len = 0;
86
87 /* build the cooked key */
88 sprintf(key, "@%02x%c+", (unsigned) csum, 0);
89 len = 5;
90 mark = len - 1;
91
92 if (print) {
93 acc = *(uint16_t *) raw;
94 raw += 2;
95
96 key[len + 1] = cachefiles_charmap[acc & 63];
97 acc >>= 6;
98 key[len] = cachefiles_charmap[acc & 63];
99 len += 2;
100
101 seg = 250;
102 for (loop = keylen; loop > 0; loop--) {
103 if (seg <= 0) {
104 key[len++] = '\0';
105 mark = len;
106 key[len++] = '+';
107 seg = 252;
108 }
109
110 key[len++] = *raw++;
111 ASSERT(len < max);
112 }
113
114 switch (type) {
115 case FSCACHE_COOKIE_TYPE_INDEX: type = 'I'; break;
116 case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'D'; break;
117 default: type = 'S'; break;
118 }
119 } else {
120 seg = 252;
121 for (loop = keylen; loop > 0; loop--) {
122 if (seg <= 0) {
123 key[len++] = '\0';
124 mark = len;
125 key[len++] = '+';
126 seg = 252;
127 }
128
129 acc = *raw++;
130 acc |= *raw++ << 8;
131 acc |= *raw++ << 16;
132
133 _debug("acc: %06x", acc);
134
135 key[len++] = cachefiles_charmap[acc & 63];
136 acc >>= 6;
137 key[len++] = cachefiles_charmap[acc & 63];
138 acc >>= 6;
139 key[len++] = cachefiles_charmap[acc & 63];
140 acc >>= 6;
141 key[len++] = cachefiles_charmap[acc & 63];
142
143 ASSERT(len < max);
144 }
145
146 switch (type) {
147 case FSCACHE_COOKIE_TYPE_INDEX: type = 'J'; break;
148 case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'E'; break;
149 default: type = 'T'; break;
150 }
151 }
152
153 key[mark] = type;
154 key[len++] = 0;
155 key[len] = 0;
156
157 _leave(" = %p %d", key, len);
158 return key;
159}
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c
new file mode 100644
index 000000000000..4bfa8cf43bf5
--- /dev/null
+++ b/fs/cachefiles/main.c
@@ -0,0 +1,106 @@
1/* Network filesystem caching backend to use cache files on a premounted
2 * filesystem
3 *
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public Licence
9 * as published by the Free Software Foundation; either version
10 * 2 of the Licence, or (at your option) any later version.
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/sched.h>
16#include <linux/completion.h>
17#include <linux/slab.h>
18#include <linux/fs.h>
19#include <linux/file.h>
20#include <linux/namei.h>
21#include <linux/mount.h>
22#include <linux/statfs.h>
23#include <linux/sysctl.h>
24#include <linux/miscdevice.h>
25#include "internal.h"
26
27unsigned cachefiles_debug;
28module_param_named(debug, cachefiles_debug, uint, S_IWUSR | S_IRUGO);
29MODULE_PARM_DESC(cachefiles_debug, "CacheFiles debugging mask");
30
31MODULE_DESCRIPTION("Mounted-filesystem based cache");
32MODULE_AUTHOR("Red Hat, Inc.");
33MODULE_LICENSE("GPL");
34
35struct kmem_cache *cachefiles_object_jar;
36
37static struct miscdevice cachefiles_dev = {
38 .minor = MISC_DYNAMIC_MINOR,
39 .name = "cachefiles",
40 .fops = &cachefiles_daemon_fops,
41};
42
43static void cachefiles_object_init_once(void *_object)
44{
45 struct cachefiles_object *object = _object;
46
47 memset(object, 0, sizeof(*object));
48 spin_lock_init(&object->work_lock);
49}
50
51/*
52 * initialise the fs caching module
53 */
54static int __init cachefiles_init(void)
55{
56 int ret;
57
58 ret = misc_register(&cachefiles_dev);
59 if (ret < 0)
60 goto error_dev;
61
62 /* create an object jar */
63 ret = -ENOMEM;
64 cachefiles_object_jar =
65 kmem_cache_create("cachefiles_object_jar",
66 sizeof(struct cachefiles_object),
67 0,
68 SLAB_HWCACHE_ALIGN,
69 cachefiles_object_init_once);
70 if (!cachefiles_object_jar) {
71 printk(KERN_NOTICE
72 "CacheFiles: Failed to allocate an object jar\n");
73 goto error_object_jar;
74 }
75
76 ret = cachefiles_proc_init();
77 if (ret < 0)
78 goto error_proc;
79
80 printk(KERN_INFO "CacheFiles: Loaded\n");
81 return 0;
82
83error_proc:
84 kmem_cache_destroy(cachefiles_object_jar);
85error_object_jar:
86 misc_deregister(&cachefiles_dev);
87error_dev:
88 kerror("failed to register: %d", ret);
89 return ret;
90}
91
92fs_initcall(cachefiles_init);
93
94/*
95 * clean up on module removal
96 */
97static void __exit cachefiles_exit(void)
98{
99 printk(KERN_INFO "CacheFiles: Unloading\n");
100
101 cachefiles_proc_cleanup();
102 kmem_cache_destroy(cachefiles_object_jar);
103 misc_deregister(&cachefiles_dev);
104}
105
106module_exit(cachefiles_exit);
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
new file mode 100644
index 000000000000..4ce818ae39ea
--- /dev/null
+++ b/fs/cachefiles/namei.c
@@ -0,0 +1,771 @@
1/* CacheFiles path walking and related routines
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/sched.h>
14#include <linux/file.h>
15#include <linux/fs.h>
16#include <linux/fsnotify.h>
17#include <linux/quotaops.h>
18#include <linux/xattr.h>
19#include <linux/mount.h>
20#include <linux/namei.h>
21#include <linux/security.h>
22#include "internal.h"
23
24static int cachefiles_wait_bit(void *flags)
25{
26 schedule();
27 return 0;
28}
29
30/*
31 * record the fact that an object is now active
32 */
33static void cachefiles_mark_object_active(struct cachefiles_cache *cache,
34 struct cachefiles_object *object)
35{
36 struct cachefiles_object *xobject;
37 struct rb_node **_p, *_parent = NULL;
38 struct dentry *dentry;
39
40 _enter(",%p", object);
41
42try_again:
43 write_lock(&cache->active_lock);
44
45 if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags))
46 BUG();
47
48 dentry = object->dentry;
49 _p = &cache->active_nodes.rb_node;
50 while (*_p) {
51 _parent = *_p;
52 xobject = rb_entry(_parent,
53 struct cachefiles_object, active_node);
54
55 ASSERT(xobject != object);
56
57 if (xobject->dentry > dentry)
58 _p = &(*_p)->rb_left;
59 else if (xobject->dentry < dentry)
60 _p = &(*_p)->rb_right;
61 else
62 goto wait_for_old_object;
63 }
64
65 rb_link_node(&object->active_node, _parent, _p);
66 rb_insert_color(&object->active_node, &cache->active_nodes);
67
68 write_unlock(&cache->active_lock);
69 _leave("");
70 return;
71
72 /* an old object from a previous incarnation is hogging the slot - we
73 * need to wait for it to be destroyed */
74wait_for_old_object:
75 if (xobject->fscache.state < FSCACHE_OBJECT_DYING) {
76 printk(KERN_ERR "\n");
77 printk(KERN_ERR "CacheFiles: Error:"
78 " Unexpected object collision\n");
79 printk(KERN_ERR "xobject: OBJ%x\n",
80 xobject->fscache.debug_id);
81 printk(KERN_ERR "xobjstate=%s\n",
82 fscache_object_states[xobject->fscache.state]);
83 printk(KERN_ERR "xobjflags=%lx\n", xobject->fscache.flags);
84 printk(KERN_ERR "xobjevent=%lx [%lx]\n",
85 xobject->fscache.events, xobject->fscache.event_mask);
86 printk(KERN_ERR "xops=%u inp=%u exc=%u\n",
87 xobject->fscache.n_ops, xobject->fscache.n_in_progress,
88 xobject->fscache.n_exclusive);
89 printk(KERN_ERR "xcookie=%p [pr=%p nd=%p fl=%lx]\n",
90 xobject->fscache.cookie,
91 xobject->fscache.cookie->parent,
92 xobject->fscache.cookie->netfs_data,
93 xobject->fscache.cookie->flags);
94 printk(KERN_ERR "xparent=%p\n",
95 xobject->fscache.parent);
96 printk(KERN_ERR "object: OBJ%x\n",
97 object->fscache.debug_id);
98 printk(KERN_ERR "cookie=%p [pr=%p nd=%p fl=%lx]\n",
99 object->fscache.cookie,
100 object->fscache.cookie->parent,
101 object->fscache.cookie->netfs_data,
102 object->fscache.cookie->flags);
103 printk(KERN_ERR "parent=%p\n",
104 object->fscache.parent);
105 BUG();
106 }
107 atomic_inc(&xobject->usage);
108 write_unlock(&cache->active_lock);
109
110 _debug(">>> wait");
111 wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE,
112 cachefiles_wait_bit, TASK_UNINTERRUPTIBLE);
113 _debug("<<< waited");
114
115 cache->cache.ops->put_object(&xobject->fscache);
116 goto try_again;
117}
118
119/*
120 * delete an object representation from the cache
121 * - file backed objects are unlinked
122 * - directory backed objects are stuffed into the graveyard for userspace to
123 * delete
124 * - unlocks the directory mutex
125 */
126static int cachefiles_bury_object(struct cachefiles_cache *cache,
127 struct dentry *dir,
128 struct dentry *rep)
129{
130 struct dentry *grave, *trap;
131 char nbuffer[8 + 8 + 1];
132 int ret;
133
134 _enter(",'%*.*s','%*.*s'",
135 dir->d_name.len, dir->d_name.len, dir->d_name.name,
136 rep->d_name.len, rep->d_name.len, rep->d_name.name);
137
138 /* non-directories can just be unlinked */
139 if (!S_ISDIR(rep->d_inode->i_mode)) {
140 _debug("unlink stale object");
141 ret = vfs_unlink(dir->d_inode, rep);
142
143 mutex_unlock(&dir->d_inode->i_mutex);
144
145 if (ret == -EIO)
146 cachefiles_io_error(cache, "Unlink failed");
147
148 _leave(" = %d", ret);
149 return ret;
150 }
151
152 /* directories have to be moved to the graveyard */
153 _debug("move stale object to graveyard");
154 mutex_unlock(&dir->d_inode->i_mutex);
155
156try_again:
157 /* first step is to make up a grave dentry in the graveyard */
158 sprintf(nbuffer, "%08x%08x",
159 (uint32_t) get_seconds(),
160 (uint32_t) atomic_inc_return(&cache->gravecounter));
161
162 /* do the multiway lock magic */
163 trap = lock_rename(cache->graveyard, dir);
164
165 /* do some checks before getting the grave dentry */
166 if (rep->d_parent != dir) {
167 /* the entry was probably culled when we dropped the parent dir
168 * lock */
169 unlock_rename(cache->graveyard, dir);
170 _leave(" = 0 [culled?]");
171 return 0;
172 }
173
174 if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) {
175 unlock_rename(cache->graveyard, dir);
176 cachefiles_io_error(cache, "Graveyard no longer a directory");
177 return -EIO;
178 }
179
180 if (trap == rep) {
181 unlock_rename(cache->graveyard, dir);
182 cachefiles_io_error(cache, "May not make directory loop");
183 return -EIO;
184 }
185
186 if (d_mountpoint(rep)) {
187 unlock_rename(cache->graveyard, dir);
188 cachefiles_io_error(cache, "Mountpoint in cache");
189 return -EIO;
190 }
191
192 grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer));
193 if (IS_ERR(grave)) {
194 unlock_rename(cache->graveyard, dir);
195
196 if (PTR_ERR(grave) == -ENOMEM) {
197 _leave(" = -ENOMEM");
198 return -ENOMEM;
199 }
200
201 cachefiles_io_error(cache, "Lookup error %ld",
202 PTR_ERR(grave));
203 return -EIO;
204 }
205
206 if (grave->d_inode) {
207 unlock_rename(cache->graveyard, dir);
208 dput(grave);
209 grave = NULL;
210 cond_resched();
211 goto try_again;
212 }
213
214 if (d_mountpoint(grave)) {
215 unlock_rename(cache->graveyard, dir);
216 dput(grave);
217 cachefiles_io_error(cache, "Mountpoint in graveyard");
218 return -EIO;
219 }
220
221 /* target should not be an ancestor of source */
222 if (trap == grave) {
223 unlock_rename(cache->graveyard, dir);
224 dput(grave);
225 cachefiles_io_error(cache, "May not make directory loop");
226 return -EIO;
227 }
228
229 /* attempt the rename */
230 ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave);
231 if (ret != 0 && ret != -ENOMEM)
232 cachefiles_io_error(cache, "Rename failed with error %d", ret);
233
234 unlock_rename(cache->graveyard, dir);
235 dput(grave);
236 _leave(" = 0");
237 return 0;
238}
239
240/*
241 * delete an object representation from the cache
242 */
243int cachefiles_delete_object(struct cachefiles_cache *cache,
244 struct cachefiles_object *object)
245{
246 struct dentry *dir;
247 int ret;
248
249 _enter(",{%p}", object->dentry);
250
251 ASSERT(object->dentry);
252 ASSERT(object->dentry->d_inode);
253 ASSERT(object->dentry->d_parent);
254
255 dir = dget_parent(object->dentry);
256
257 mutex_lock(&dir->d_inode->i_mutex);
258 ret = cachefiles_bury_object(cache, dir, object->dentry);
259
260 dput(dir);
261 _leave(" = %d", ret);
262 return ret;
263}
264
265/*
266 * walk from the parent object to the child object through the backing
267 * filesystem, creating directories as we go
268 */
269int cachefiles_walk_to_object(struct cachefiles_object *parent,
270 struct cachefiles_object *object,
271 const char *key,
272 struct cachefiles_xattr *auxdata)
273{
274 struct cachefiles_cache *cache;
275 struct dentry *dir, *next = NULL;
276 unsigned long start;
277 const char *name;
278 int ret, nlen;
279
280 _enter("{%p},,%s,", parent->dentry, key);
281
282 cache = container_of(parent->fscache.cache,
283 struct cachefiles_cache, cache);
284
285 ASSERT(parent->dentry);
286 ASSERT(parent->dentry->d_inode);
287
288 if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) {
289 // TODO: convert file to dir
290 _leave("looking up in none directory");
291 return -ENOBUFS;
292 }
293
294 dir = dget(parent->dentry);
295
296advance:
297 /* attempt to transit the first directory component */
298 name = key;
299 nlen = strlen(key);
300
301 /* key ends in a double NUL */
302 key = key + nlen + 1;
303 if (!*key)
304 key = NULL;
305
306lookup_again:
307 /* search the current directory for the element name */
308 _debug("lookup '%s'", name);
309
310 mutex_lock(&dir->d_inode->i_mutex);
311
312 start = jiffies;
313 next = lookup_one_len(name, dir, nlen);
314 cachefiles_hist(cachefiles_lookup_histogram, start);
315 if (IS_ERR(next))
316 goto lookup_error;
317
318 _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative");
319
320 if (!key)
321 object->new = !next->d_inode;
322
323 /* if this element of the path doesn't exist, then the lookup phase
324 * failed, and we can release any readers in the certain knowledge that
325 * there's nothing for them to actually read */
326 if (!next->d_inode)
327 fscache_object_lookup_negative(&object->fscache);
328
329 /* we need to create the object if it's negative */
330 if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) {
331 /* index objects and intervening tree levels must be subdirs */
332 if (!next->d_inode) {
333 ret = cachefiles_has_space(cache, 1, 0);
334 if (ret < 0)
335 goto create_error;
336
337 start = jiffies;
338 ret = vfs_mkdir(dir->d_inode, next, 0);
339 cachefiles_hist(cachefiles_mkdir_histogram, start);
340 if (ret < 0)
341 goto create_error;
342
343 ASSERT(next->d_inode);
344
345 _debug("mkdir -> %p{%p{ino=%lu}}",
346 next, next->d_inode, next->d_inode->i_ino);
347
348 } else if (!S_ISDIR(next->d_inode->i_mode)) {
349 kerror("inode %lu is not a directory",
350 next->d_inode->i_ino);
351 ret = -ENOBUFS;
352 goto error;
353 }
354
355 } else {
356 /* non-index objects start out life as files */
357 if (!next->d_inode) {
358 ret = cachefiles_has_space(cache, 1, 0);
359 if (ret < 0)
360 goto create_error;
361
362 start = jiffies;
363 ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
364 cachefiles_hist(cachefiles_create_histogram, start);
365 if (ret < 0)
366 goto create_error;
367
368 ASSERT(next->d_inode);
369
370 _debug("create -> %p{%p{ino=%lu}}",
371 next, next->d_inode, next->d_inode->i_ino);
372
373 } else if (!S_ISDIR(next->d_inode->i_mode) &&
374 !S_ISREG(next->d_inode->i_mode)
375 ) {
376 kerror("inode %lu is not a file or directory",
377 next->d_inode->i_ino);
378 ret = -ENOBUFS;
379 goto error;
380 }
381 }
382
383 /* process the next component */
384 if (key) {
385 _debug("advance");
386 mutex_unlock(&dir->d_inode->i_mutex);
387 dput(dir);
388 dir = next;
389 next = NULL;
390 goto advance;
391 }
392
393 /* we've found the object we were looking for */
394 object->dentry = next;
395
396 /* if we've found that the terminal object exists, then we need to
397 * check its attributes and delete it if it's out of date */
398 if (!object->new) {
399 _debug("validate '%*.*s'",
400 next->d_name.len, next->d_name.len, next->d_name.name);
401
402 ret = cachefiles_check_object_xattr(object, auxdata);
403 if (ret == -ESTALE) {
404 /* delete the object (the deleter drops the directory
405 * mutex) */
406 object->dentry = NULL;
407
408 ret = cachefiles_bury_object(cache, dir, next);
409 dput(next);
410 next = NULL;
411
412 if (ret < 0)
413 goto delete_error;
414
415 _debug("redo lookup");
416 goto lookup_again;
417 }
418 }
419
420 /* note that we're now using this object */
421 cachefiles_mark_object_active(cache, object);
422
423 mutex_unlock(&dir->d_inode->i_mutex);
424 dput(dir);
425 dir = NULL;
426
427 _debug("=== OBTAINED_OBJECT ===");
428
429 if (object->new) {
430 /* attach data to a newly constructed terminal object */
431 ret = cachefiles_set_object_xattr(object, auxdata);
432 if (ret < 0)
433 goto check_error;
434 } else {
435 /* always update the atime on an object we've just looked up
436 * (this is used to keep track of culling, and atimes are only
437 * updated by read, write and readdir but not lookup or
438 * open) */
439 touch_atime(cache->mnt, next);
440 }
441
442 /* open a file interface onto a data file */
443 if (object->type != FSCACHE_COOKIE_TYPE_INDEX) {
444 if (S_ISREG(object->dentry->d_inode->i_mode)) {
445 const struct address_space_operations *aops;
446
447 ret = -EPERM;
448 aops = object->dentry->d_inode->i_mapping->a_ops;
449 if (!aops->bmap)
450 goto check_error;
451
452 object->backer = object->dentry;
453 } else {
454 BUG(); // TODO: open file in data-class subdir
455 }
456 }
457
458 object->new = 0;
459 fscache_obtained_object(&object->fscache);
460
461 _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino);
462 return 0;
463
464create_error:
465 _debug("create error %d", ret);
466 if (ret == -EIO)
467 cachefiles_io_error(cache, "Create/mkdir failed");
468 goto error;
469
470check_error:
471 _debug("check error %d", ret);
472 write_lock(&cache->active_lock);
473 rb_erase(&object->active_node, &cache->active_nodes);
474 clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
475 wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
476 write_unlock(&cache->active_lock);
477
478 dput(object->dentry);
479 object->dentry = NULL;
480 goto error_out;
481
482delete_error:
483 _debug("delete error %d", ret);
484 goto error_out2;
485
486lookup_error:
487 _debug("lookup error %ld", PTR_ERR(next));
488 ret = PTR_ERR(next);
489 if (ret == -EIO)
490 cachefiles_io_error(cache, "Lookup failed");
491 next = NULL;
492error:
493 mutex_unlock(&dir->d_inode->i_mutex);
494 dput(next);
495error_out2:
496 dput(dir);
497error_out:
498 if (ret == -ENOSPC)
499 ret = -ENOBUFS;
500
501 _leave(" = error %d", -ret);
502 return ret;
503}
504
505/*
506 * get a subdirectory
507 */
508struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
509 struct dentry *dir,
510 const char *dirname)
511{
512 struct dentry *subdir;
513 unsigned long start;
514 int ret;
515
516 _enter(",,%s", dirname);
517
518 /* search the current directory for the element name */
519 mutex_lock(&dir->d_inode->i_mutex);
520
521 start = jiffies;
522 subdir = lookup_one_len(dirname, dir, strlen(dirname));
523 cachefiles_hist(cachefiles_lookup_histogram, start);
524 if (IS_ERR(subdir)) {
525 if (PTR_ERR(subdir) == -ENOMEM)
526 goto nomem_d_alloc;
527 goto lookup_error;
528 }
529
530 _debug("subdir -> %p %s",
531 subdir, subdir->d_inode ? "positive" : "negative");
532
533 /* we need to create the subdir if it doesn't exist yet */
534 if (!subdir->d_inode) {
535 ret = cachefiles_has_space(cache, 1, 0);
536 if (ret < 0)
537 goto mkdir_error;
538
539 _debug("attempt mkdir");
540
541 ret = vfs_mkdir(dir->d_inode, subdir, 0700);
542 if (ret < 0)
543 goto mkdir_error;
544
545 ASSERT(subdir->d_inode);
546
547 _debug("mkdir -> %p{%p{ino=%lu}}",
548 subdir,
549 subdir->d_inode,
550 subdir->d_inode->i_ino);
551 }
552
553 mutex_unlock(&dir->d_inode->i_mutex);
554
555 /* we need to make sure the subdir is a directory */
556 ASSERT(subdir->d_inode);
557
558 if (!S_ISDIR(subdir->d_inode->i_mode)) {
559 kerror("%s is not a directory", dirname);
560 ret = -EIO;
561 goto check_error;
562 }
563
564 ret = -EPERM;
565 if (!subdir->d_inode->i_op ||
566 !subdir->d_inode->i_op->setxattr ||
567 !subdir->d_inode->i_op->getxattr ||
568 !subdir->d_inode->i_op->lookup ||
569 !subdir->d_inode->i_op->mkdir ||
570 !subdir->d_inode->i_op->create ||
571 !subdir->d_inode->i_op->rename ||
572 !subdir->d_inode->i_op->rmdir ||
573 !subdir->d_inode->i_op->unlink)
574 goto check_error;
575
576 _leave(" = [%lu]", subdir->d_inode->i_ino);
577 return subdir;
578
579check_error:
580 dput(subdir);
581 _leave(" = %d [check]", ret);
582 return ERR_PTR(ret);
583
584mkdir_error:
585 mutex_unlock(&dir->d_inode->i_mutex);
586 dput(subdir);
587 kerror("mkdir %s failed with error %d", dirname, ret);
588 return ERR_PTR(ret);
589
590lookup_error:
591 mutex_unlock(&dir->d_inode->i_mutex);
592 ret = PTR_ERR(subdir);
593 kerror("Lookup %s failed with error %d", dirname, ret);
594 return ERR_PTR(ret);
595
596nomem_d_alloc:
597 mutex_unlock(&dir->d_inode->i_mutex);
598 _leave(" = -ENOMEM");
599 return ERR_PTR(-ENOMEM);
600}
601
602/*
603 * find out if an object is in use or not
604 * - if finds object and it's not in use:
605 * - returns a pointer to the object and a reference on it
606 * - returns with the directory locked
607 */
608static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
609 struct dentry *dir,
610 char *filename)
611{
612 struct cachefiles_object *object;
613 struct rb_node *_n;
614 struct dentry *victim;
615 unsigned long start;
616 int ret;
617
618 //_enter(",%*.*s/,%s",
619 // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
620
621 /* look up the victim */
622 mutex_lock_nested(&dir->d_inode->i_mutex, 1);
623
624 start = jiffies;
625 victim = lookup_one_len(filename, dir, strlen(filename));
626 cachefiles_hist(cachefiles_lookup_histogram, start);
627 if (IS_ERR(victim))
628 goto lookup_error;
629
630 //_debug("victim -> %p %s",
631 // victim, victim->d_inode ? "positive" : "negative");
632
633 /* if the object is no longer there then we probably retired the object
634 * at the netfs's request whilst the cull was in progress
635 */
636 if (!victim->d_inode) {
637 mutex_unlock(&dir->d_inode->i_mutex);
638 dput(victim);
639 _leave(" = -ENOENT [absent]");
640 return ERR_PTR(-ENOENT);
641 }
642
643 /* check to see if we're using this object */
644 read_lock(&cache->active_lock);
645
646 _n = cache->active_nodes.rb_node;
647
648 while (_n) {
649 object = rb_entry(_n, struct cachefiles_object, active_node);
650
651 if (object->dentry > victim)
652 _n = _n->rb_left;
653 else if (object->dentry < victim)
654 _n = _n->rb_right;
655 else
656 goto object_in_use;
657 }
658
659 read_unlock(&cache->active_lock);
660
661 //_leave(" = %p", victim);
662 return victim;
663
664object_in_use:
665 read_unlock(&cache->active_lock);
666 mutex_unlock(&dir->d_inode->i_mutex);
667 dput(victim);
668 //_leave(" = -EBUSY [in use]");
669 return ERR_PTR(-EBUSY);
670
671lookup_error:
672 mutex_unlock(&dir->d_inode->i_mutex);
673 ret = PTR_ERR(victim);
674 if (ret == -ENOENT) {
675 /* file or dir now absent - probably retired by netfs */
676 _leave(" = -ESTALE [absent]");
677 return ERR_PTR(-ESTALE);
678 }
679
680 if (ret == -EIO) {
681 cachefiles_io_error(cache, "Lookup failed");
682 } else if (ret != -ENOMEM) {
683 kerror("Internal error: %d", ret);
684 ret = -EIO;
685 }
686
687 _leave(" = %d", ret);
688 return ERR_PTR(ret);
689}
690
691/*
692 * cull an object if it's not in use
693 * - called only by cache manager daemon
694 */
695int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
696 char *filename)
697{
698 struct dentry *victim;
699 int ret;
700
701 _enter(",%*.*s/,%s",
702 dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
703
704 victim = cachefiles_check_active(cache, dir, filename);
705 if (IS_ERR(victim))
706 return PTR_ERR(victim);
707
708 _debug("victim -> %p %s",
709 victim, victim->d_inode ? "positive" : "negative");
710
711 /* okay... the victim is not being used so we can cull it
712 * - start by marking it as stale
713 */
714 _debug("victim is cullable");
715
716 ret = cachefiles_remove_object_xattr(cache, victim);
717 if (ret < 0)
718 goto error_unlock;
719
720 /* actually remove the victim (drops the dir mutex) */
721 _debug("bury");
722
723 ret = cachefiles_bury_object(cache, dir, victim);
724 if (ret < 0)
725 goto error;
726
727 dput(victim);
728 _leave(" = 0");
729 return 0;
730
731error_unlock:
732 mutex_unlock(&dir->d_inode->i_mutex);
733error:
734 dput(victim);
735 if (ret == -ENOENT) {
736 /* file or dir now absent - probably retired by netfs */
737 _leave(" = -ESTALE [absent]");
738 return -ESTALE;
739 }
740
741 if (ret != -ENOMEM) {
742 kerror("Internal error: %d", ret);
743 ret = -EIO;
744 }
745
746 _leave(" = %d", ret);
747 return ret;
748}
749
750/*
751 * find out if an object is in use or not
752 * - called only by cache manager daemon
753 * - returns -EBUSY or 0 to indicate whether an object is in use or not
754 */
755int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
756 char *filename)
757{
758 struct dentry *victim;
759
760 //_enter(",%*.*s/,%s",
761 // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
762
763 victim = cachefiles_check_active(cache, dir, filename);
764 if (IS_ERR(victim))
765 return PTR_ERR(victim);
766
767 mutex_unlock(&dir->d_inode->i_mutex);
768 dput(victim);
769 //_leave(" = 0");
770 return 0;
771}
diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c
new file mode 100644
index 000000000000..eccd33941199
--- /dev/null
+++ b/fs/cachefiles/proc.c
@@ -0,0 +1,134 @@
1/* CacheFiles statistics
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/proc_fs.h>
14#include <linux/seq_file.h>
15#include "internal.h"
16
17atomic_t cachefiles_lookup_histogram[HZ];
18atomic_t cachefiles_mkdir_histogram[HZ];
19atomic_t cachefiles_create_histogram[HZ];
20
21/*
22 * display the latency histogram
23 */
24static int cachefiles_histogram_show(struct seq_file *m, void *v)
25{
26 unsigned long index;
27 unsigned x, y, z, t;
28
29 switch ((unsigned long) v) {
30 case 1:
31 seq_puts(m, "JIFS SECS LOOKUPS MKDIRS CREATES\n");
32 return 0;
33 case 2:
34 seq_puts(m, "===== ===== ========= ========= =========\n");
35 return 0;
36 default:
37 index = (unsigned long) v - 3;
38 x = atomic_read(&cachefiles_lookup_histogram[index]);
39 y = atomic_read(&cachefiles_mkdir_histogram[index]);
40 z = atomic_read(&cachefiles_create_histogram[index]);
41 if (x == 0 && y == 0 && z == 0)
42 return 0;
43
44 t = (index * 1000) / HZ;
45
46 seq_printf(m, "%4lu 0.%03u %9u %9u %9u\n", index, t, x, y, z);
47 return 0;
48 }
49}
50
51/*
52 * set up the iterator to start reading from the first line
53 */
54static void *cachefiles_histogram_start(struct seq_file *m, loff_t *_pos)
55{
56 if ((unsigned long long)*_pos >= HZ + 2)
57 return NULL;
58 if (*_pos == 0)
59 *_pos = 1;
60 return (void *)(unsigned long) *_pos;
61}
62
63/*
64 * move to the next line
65 */
66static void *cachefiles_histogram_next(struct seq_file *m, void *v, loff_t *pos)
67{
68 (*pos)++;
69 return (unsigned long long)*pos > HZ + 2 ?
70 NULL : (void *)(unsigned long) *pos;
71}
72
73/*
74 * clean up after reading
75 */
76static void cachefiles_histogram_stop(struct seq_file *m, void *v)
77{
78}
79
80static const struct seq_operations cachefiles_histogram_ops = {
81 .start = cachefiles_histogram_start,
82 .stop = cachefiles_histogram_stop,
83 .next = cachefiles_histogram_next,
84 .show = cachefiles_histogram_show,
85};
86
87/*
88 * open "/proc/fs/cachefiles/XXX" which provide statistics summaries
89 */
90static int cachefiles_histogram_open(struct inode *inode, struct file *file)
91{
92 return seq_open(file, &cachefiles_histogram_ops);
93}
94
95static const struct file_operations cachefiles_histogram_fops = {
96 .owner = THIS_MODULE,
97 .open = cachefiles_histogram_open,
98 .read = seq_read,
99 .llseek = seq_lseek,
100 .release = seq_release,
101};
102
103/*
104 * initialise the /proc/fs/cachefiles/ directory
105 */
106int __init cachefiles_proc_init(void)
107{
108 _enter("");
109
110 if (!proc_mkdir("fs/cachefiles", NULL))
111 goto error_dir;
112
113 if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL,
114 &cachefiles_histogram_fops))
115 goto error_histogram;
116
117 _leave(" = 0");
118 return 0;
119
120error_histogram:
121 remove_proc_entry("fs/cachefiles", NULL);
122error_dir:
123 _leave(" = -ENOMEM");
124 return -ENOMEM;
125}
126
127/*
128 * clean up the /proc/fs/cachefiles/ directory
129 */
130void cachefiles_proc_cleanup(void)
131{
132 remove_proc_entry("fs/cachefiles/histogram", NULL);
133 remove_proc_entry("fs/cachefiles", NULL);
134}
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
new file mode 100644
index 000000000000..a69787e7dd96
--- /dev/null
+++ b/fs/cachefiles/rdwr.c
@@ -0,0 +1,879 @@
1/* Storage object read/write
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/mount.h>
13#include <linux/file.h>
14#include "internal.h"
15
16/*
17 * detect wake up events generated by the unlocking of pages in which we're
18 * interested
19 * - we use this to detect read completion of backing pages
20 * - the caller holds the waitqueue lock
21 */
22static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
23 int sync, void *_key)
24{
25 struct cachefiles_one_read *monitor =
26 container_of(wait, struct cachefiles_one_read, monitor);
27 struct cachefiles_object *object;
28 struct wait_bit_key *key = _key;
29 struct page *page = wait->private;
30
31 ASSERT(key);
32
33 _enter("{%lu},%u,%d,{%p,%u}",
34 monitor->netfs_page->index, mode, sync,
35 key->flags, key->bit_nr);
36
37 if (key->flags != &page->flags ||
38 key->bit_nr != PG_locked)
39 return 0;
40
41 _debug("--- monitor %p %lx ---", page, page->flags);
42
43 if (!PageUptodate(page) && !PageError(page))
44 dump_stack();
45
46 /* remove from the waitqueue */
47 list_del(&wait->task_list);
48
49 /* move onto the action list and queue for FS-Cache thread pool */
50 ASSERT(monitor->op);
51
52 object = container_of(monitor->op->op.object,
53 struct cachefiles_object, fscache);
54
55 spin_lock(&object->work_lock);
56 list_add_tail(&monitor->op_link, &monitor->op->to_do);
57 spin_unlock(&object->work_lock);
58
59 fscache_enqueue_retrieval(monitor->op);
60 return 0;
61}
62
63/*
64 * copy data from backing pages to netfs pages to complete a read operation
65 * - driven by FS-Cache's thread pool
66 */
67static void cachefiles_read_copier(struct fscache_operation *_op)
68{
69 struct cachefiles_one_read *monitor;
70 struct cachefiles_object *object;
71 struct fscache_retrieval *op;
72 struct pagevec pagevec;
73 int error, max;
74
75 op = container_of(_op, struct fscache_retrieval, op);
76 object = container_of(op->op.object,
77 struct cachefiles_object, fscache);
78
79 _enter("{ino=%lu}", object->backer->d_inode->i_ino);
80
81 pagevec_init(&pagevec, 0);
82
83 max = 8;
84 spin_lock_irq(&object->work_lock);
85
86 while (!list_empty(&op->to_do)) {
87 monitor = list_entry(op->to_do.next,
88 struct cachefiles_one_read, op_link);
89 list_del(&monitor->op_link);
90
91 spin_unlock_irq(&object->work_lock);
92
93 _debug("- copy {%lu}", monitor->back_page->index);
94
95 error = -EIO;
96 if (PageUptodate(monitor->back_page)) {
97 copy_highpage(monitor->netfs_page, monitor->back_page);
98
99 pagevec_add(&pagevec, monitor->netfs_page);
100 fscache_mark_pages_cached(monitor->op, &pagevec);
101 error = 0;
102 }
103
104 if (error)
105 cachefiles_io_error_obj(
106 object,
107 "Readpage failed on backing file %lx",
108 (unsigned long) monitor->back_page->flags);
109
110 page_cache_release(monitor->back_page);
111
112 fscache_end_io(op, monitor->netfs_page, error);
113 page_cache_release(monitor->netfs_page);
114 fscache_put_retrieval(op);
115 kfree(monitor);
116
117 /* let the thread pool have some air occasionally */
118 max--;
119 if (max < 0 || need_resched()) {
120 if (!list_empty(&op->to_do))
121 fscache_enqueue_retrieval(op);
122 _leave(" [maxed out]");
123 return;
124 }
125
126 spin_lock_irq(&object->work_lock);
127 }
128
129 spin_unlock_irq(&object->work_lock);
130 _leave("");
131}
132
133/*
134 * read the corresponding page to the given set from the backing file
135 * - an uncertain page is simply discarded, to be tried again another time
136 */
137static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
138 struct fscache_retrieval *op,
139 struct page *netpage,
140 struct pagevec *pagevec)
141{
142 struct cachefiles_one_read *monitor;
143 struct address_space *bmapping;
144 struct page *newpage, *backpage;
145 int ret;
146
147 _enter("");
148
149 pagevec_reinit(pagevec);
150
151 _debug("read back %p{%lu,%d}",
152 netpage, netpage->index, page_count(netpage));
153
154 monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
155 if (!monitor)
156 goto nomem;
157
158 monitor->netfs_page = netpage;
159 monitor->op = fscache_get_retrieval(op);
160
161 init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
162
163 /* attempt to get hold of the backing page */
164 bmapping = object->backer->d_inode->i_mapping;
165 newpage = NULL;
166
167 for (;;) {
168 backpage = find_get_page(bmapping, netpage->index);
169 if (backpage)
170 goto backing_page_already_present;
171
172 if (!newpage) {
173 newpage = page_cache_alloc_cold(bmapping);
174 if (!newpage)
175 goto nomem_monitor;
176 }
177
178 ret = add_to_page_cache(newpage, bmapping,
179 netpage->index, GFP_KERNEL);
180 if (ret == 0)
181 goto installed_new_backing_page;
182 if (ret != -EEXIST)
183 goto nomem_page;
184 }
185
186 /* we've installed a new backing page, so now we need to add it
187 * to the LRU list and start it reading */
188installed_new_backing_page:
189 _debug("- new %p", newpage);
190
191 backpage = newpage;
192 newpage = NULL;
193
194 page_cache_get(backpage);
195 pagevec_add(pagevec, backpage);
196 __pagevec_lru_add_file(pagevec);
197
198read_backing_page:
199 ret = bmapping->a_ops->readpage(NULL, backpage);
200 if (ret < 0)
201 goto read_error;
202
203 /* set the monitor to transfer the data across */
204monitor_backing_page:
205 _debug("- monitor add");
206
207 /* install the monitor */
208 page_cache_get(monitor->netfs_page);
209 page_cache_get(backpage);
210 monitor->back_page = backpage;
211 monitor->monitor.private = backpage;
212 add_page_wait_queue(backpage, &monitor->monitor);
213 monitor = NULL;
214
215 /* but the page may have been read before the monitor was installed, so
216 * the monitor may miss the event - so we have to ensure that we do get
217 * one in such a case */
218 if (trylock_page(backpage)) {
219 _debug("jumpstart %p {%lx}", backpage, backpage->flags);
220 unlock_page(backpage);
221 }
222 goto success;
223
224 /* if the backing page is already present, it can be in one of
225 * three states: read in progress, read failed or read okay */
226backing_page_already_present:
227 _debug("- present");
228
229 if (newpage) {
230 page_cache_release(newpage);
231 newpage = NULL;
232 }
233
234 if (PageError(backpage))
235 goto io_error;
236
237 if (PageUptodate(backpage))
238 goto backing_page_already_uptodate;
239
240 if (!trylock_page(backpage))
241 goto monitor_backing_page;
242 _debug("read %p {%lx}", backpage, backpage->flags);
243 goto read_backing_page;
244
245 /* the backing page is already up to date, attach the netfs
246 * page to the pagecache and LRU and copy the data across */
247backing_page_already_uptodate:
248 _debug("- uptodate");
249
250 pagevec_add(pagevec, netpage);
251 fscache_mark_pages_cached(op, pagevec);
252
253 copy_highpage(netpage, backpage);
254 fscache_end_io(op, netpage, 0);
255
256success:
257 _debug("success");
258 ret = 0;
259
260out:
261 if (backpage)
262 page_cache_release(backpage);
263 if (monitor) {
264 fscache_put_retrieval(monitor->op);
265 kfree(monitor);
266 }
267 _leave(" = %d", ret);
268 return ret;
269
270read_error:
271 _debug("read error %d", ret);
272 if (ret == -ENOMEM)
273 goto out;
274io_error:
275 cachefiles_io_error_obj(object, "Page read error on backing file");
276 ret = -ENOBUFS;
277 goto out;
278
279nomem_page:
280 page_cache_release(newpage);
281nomem_monitor:
282 fscache_put_retrieval(monitor->op);
283 kfree(monitor);
284nomem:
285 _leave(" = -ENOMEM");
286 return -ENOMEM;
287}
288
289/*
290 * read a page from the cache or allocate a block in which to store it
291 * - cache withdrawal is prevented by the caller
292 * - returns -EINTR if interrupted
293 * - returns -ENOMEM if ran out of memory
294 * - returns -ENOBUFS if no buffers can be made available
295 * - returns -ENOBUFS if page is beyond EOF
296 * - if the page is backed by a block in the cache:
297 * - a read will be started which will call the callback on completion
298 * - 0 will be returned
299 * - else if the page is unbacked:
300 * - the metadata will be retained
301 * - -ENODATA will be returned
302 */
303int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
304 struct page *page,
305 gfp_t gfp)
306{
307 struct cachefiles_object *object;
308 struct cachefiles_cache *cache;
309 struct pagevec pagevec;
310 struct inode *inode;
311 sector_t block0, block;
312 unsigned shift;
313 int ret;
314
315 object = container_of(op->op.object,
316 struct cachefiles_object, fscache);
317 cache = container_of(object->fscache.cache,
318 struct cachefiles_cache, cache);
319
320 _enter("{%p},{%lx},,,", object, page->index);
321
322 if (!object->backer)
323 return -ENOBUFS;
324
325 inode = object->backer->d_inode;
326 ASSERT(S_ISREG(inode->i_mode));
327 ASSERT(inode->i_mapping->a_ops->bmap);
328 ASSERT(inode->i_mapping->a_ops->readpages);
329
330 /* calculate the shift required to use bmap */
331 if (inode->i_sb->s_blocksize > PAGE_SIZE)
332 return -ENOBUFS;
333
334 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
335
336 op->op.flags = FSCACHE_OP_FAST;
337 op->op.processor = cachefiles_read_copier;
338
339 pagevec_init(&pagevec, 0);
340
341 /* we assume the absence or presence of the first block is a good
342 * enough indication for the page as a whole
343 * - TODO: don't use bmap() for this as it is _not_ actually good
344 * enough for this as it doesn't indicate errors, but it's all we've
345 * got for the moment
346 */
347 block0 = page->index;
348 block0 <<= shift;
349
350 block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0);
351 _debug("%llx -> %llx",
352 (unsigned long long) block0,
353 (unsigned long long) block);
354
355 if (block) {
356 /* submit the apparently valid page to the backing fs to be
357 * read from disk */
358 ret = cachefiles_read_backing_file_one(object, op, page,
359 &pagevec);
360 } else if (cachefiles_has_space(cache, 0, 1) == 0) {
361 /* there's space in the cache we can use */
362 pagevec_add(&pagevec, page);
363 fscache_mark_pages_cached(op, &pagevec);
364 ret = -ENODATA;
365 } else {
366 ret = -ENOBUFS;
367 }
368
369 _leave(" = %d", ret);
370 return ret;
371}
372
373/*
374 * read the corresponding pages to the given set from the backing file
375 * - any uncertain pages are simply discarded, to be tried again another time
376 */
377static int cachefiles_read_backing_file(struct cachefiles_object *object,
378 struct fscache_retrieval *op,
379 struct list_head *list,
380 struct pagevec *mark_pvec)
381{
382 struct cachefiles_one_read *monitor = NULL;
383 struct address_space *bmapping = object->backer->d_inode->i_mapping;
384 struct pagevec lru_pvec;
385 struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
386 int ret = 0;
387
388 _enter("");
389
390 pagevec_init(&lru_pvec, 0);
391
392 list_for_each_entry_safe(netpage, _n, list, lru) {
393 list_del(&netpage->lru);
394
395 _debug("read back %p{%lu,%d}",
396 netpage, netpage->index, page_count(netpage));
397
398 if (!monitor) {
399 monitor = kzalloc(sizeof(*monitor), GFP_KERNEL);
400 if (!monitor)
401 goto nomem;
402
403 monitor->op = fscache_get_retrieval(op);
404 init_waitqueue_func_entry(&monitor->monitor,
405 cachefiles_read_waiter);
406 }
407
408 for (;;) {
409 backpage = find_get_page(bmapping, netpage->index);
410 if (backpage)
411 goto backing_page_already_present;
412
413 if (!newpage) {
414 newpage = page_cache_alloc_cold(bmapping);
415 if (!newpage)
416 goto nomem;
417 }
418
419 ret = add_to_page_cache(newpage, bmapping,
420 netpage->index, GFP_KERNEL);
421 if (ret == 0)
422 goto installed_new_backing_page;
423 if (ret != -EEXIST)
424 goto nomem;
425 }
426
427 /* we've installed a new backing page, so now we need to add it
428 * to the LRU list and start it reading */
429 installed_new_backing_page:
430 _debug("- new %p", newpage);
431
432 backpage = newpage;
433 newpage = NULL;
434
435 page_cache_get(backpage);
436 if (!pagevec_add(&lru_pvec, backpage))
437 __pagevec_lru_add_file(&lru_pvec);
438
439 reread_backing_page:
440 ret = bmapping->a_ops->readpage(NULL, backpage);
441 if (ret < 0)
442 goto read_error;
443
444 /* add the netfs page to the pagecache and LRU, and set the
445 * monitor to transfer the data across */
446 monitor_backing_page:
447 _debug("- monitor add");
448
449 ret = add_to_page_cache(netpage, op->mapping, netpage->index,
450 GFP_KERNEL);
451 if (ret < 0) {
452 if (ret == -EEXIST) {
453 page_cache_release(netpage);
454 continue;
455 }
456 goto nomem;
457 }
458
459 page_cache_get(netpage);
460 if (!pagevec_add(&lru_pvec, netpage))
461 __pagevec_lru_add_file(&lru_pvec);
462
463 /* install a monitor */
464 page_cache_get(netpage);
465 monitor->netfs_page = netpage;
466
467 page_cache_get(backpage);
468 monitor->back_page = backpage;
469 monitor->monitor.private = backpage;
470 add_page_wait_queue(backpage, &monitor->monitor);
471 monitor = NULL;
472
473 /* but the page may have been read before the monitor was
474 * installed, so the monitor may miss the event - so we have to
475 * ensure that we do get one in such a case */
476 if (trylock_page(backpage)) {
477 _debug("2unlock %p {%lx}", backpage, backpage->flags);
478 unlock_page(backpage);
479 }
480
481 page_cache_release(backpage);
482 backpage = NULL;
483
484 page_cache_release(netpage);
485 netpage = NULL;
486 continue;
487
488 /* if the backing page is already present, it can be in one of
489 * three states: read in progress, read failed or read okay */
490 backing_page_already_present:
491 _debug("- present %p", backpage);
492
493 if (PageError(backpage))
494 goto io_error;
495
496 if (PageUptodate(backpage))
497 goto backing_page_already_uptodate;
498
499 _debug("- not ready %p{%lx}", backpage, backpage->flags);
500
501 if (!trylock_page(backpage))
502 goto monitor_backing_page;
503
504 if (PageError(backpage)) {
505 _debug("error %lx", backpage->flags);
506 unlock_page(backpage);
507 goto io_error;
508 }
509
510 if (PageUptodate(backpage))
511 goto backing_page_already_uptodate_unlock;
512
513 /* we've locked a page that's neither up to date nor erroneous,
514 * so we need to attempt to read it again */
515 goto reread_backing_page;
516
517 /* the backing page is already up to date, attach the netfs
518 * page to the pagecache and LRU and copy the data across */
519 backing_page_already_uptodate_unlock:
520 _debug("uptodate %lx", backpage->flags);
521 unlock_page(backpage);
522 backing_page_already_uptodate:
523 _debug("- uptodate");
524
525 ret = add_to_page_cache(netpage, op->mapping, netpage->index,
526 GFP_KERNEL);
527 if (ret < 0) {
528 if (ret == -EEXIST) {
529 page_cache_release(netpage);
530 continue;
531 }
532 goto nomem;
533 }
534
535 copy_highpage(netpage, backpage);
536
537 page_cache_release(backpage);
538 backpage = NULL;
539
540 if (!pagevec_add(mark_pvec, netpage))
541 fscache_mark_pages_cached(op, mark_pvec);
542
543 page_cache_get(netpage);
544 if (!pagevec_add(&lru_pvec, netpage))
545 __pagevec_lru_add_file(&lru_pvec);
546
547 fscache_end_io(op, netpage, 0);
548 page_cache_release(netpage);
549 netpage = NULL;
550 continue;
551 }
552
553 netpage = NULL;
554
555 _debug("out");
556
557out:
558 /* tidy up */
559 pagevec_lru_add_file(&lru_pvec);
560
561 if (newpage)
562 page_cache_release(newpage);
563 if (netpage)
564 page_cache_release(netpage);
565 if (backpage)
566 page_cache_release(backpage);
567 if (monitor) {
568 fscache_put_retrieval(op);
569 kfree(monitor);
570 }
571
572 list_for_each_entry_safe(netpage, _n, list, lru) {
573 list_del(&netpage->lru);
574 page_cache_release(netpage);
575 }
576
577 _leave(" = %d", ret);
578 return ret;
579
580nomem:
581 _debug("nomem");
582 ret = -ENOMEM;
583 goto out;
584
585read_error:
586 _debug("read error %d", ret);
587 if (ret == -ENOMEM)
588 goto out;
589io_error:
590 cachefiles_io_error_obj(object, "Page read error on backing file");
591 ret = -ENOBUFS;
592 goto out;
593}
594
595/*
596 * read a list of pages from the cache or allocate blocks in which to store
597 * them
598 */
599int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
600 struct list_head *pages,
601 unsigned *nr_pages,
602 gfp_t gfp)
603{
604 struct cachefiles_object *object;
605 struct cachefiles_cache *cache;
606 struct list_head backpages;
607 struct pagevec pagevec;
608 struct inode *inode;
609 struct page *page, *_n;
610 unsigned shift, nrbackpages;
611 int ret, ret2, space;
612
613 object = container_of(op->op.object,
614 struct cachefiles_object, fscache);
615 cache = container_of(object->fscache.cache,
616 struct cachefiles_cache, cache);
617
618 _enter("{OBJ%x,%d},,%d,,",
619 object->fscache.debug_id, atomic_read(&op->op.usage),
620 *nr_pages);
621
622 if (!object->backer)
623 return -ENOBUFS;
624
625 space = 1;
626 if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
627 space = 0;
628
629 inode = object->backer->d_inode;
630 ASSERT(S_ISREG(inode->i_mode));
631 ASSERT(inode->i_mapping->a_ops->bmap);
632 ASSERT(inode->i_mapping->a_ops->readpages);
633
634 /* calculate the shift required to use bmap */
635 if (inode->i_sb->s_blocksize > PAGE_SIZE)
636 return -ENOBUFS;
637
638 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
639
640 pagevec_init(&pagevec, 0);
641
642 op->op.flags = FSCACHE_OP_FAST;
643 op->op.processor = cachefiles_read_copier;
644
645 INIT_LIST_HEAD(&backpages);
646 nrbackpages = 0;
647
648 ret = space ? -ENODATA : -ENOBUFS;
649 list_for_each_entry_safe(page, _n, pages, lru) {
650 sector_t block0, block;
651
652 /* we assume the absence or presence of the first block is a
653 * good enough indication for the page as a whole
654 * - TODO: don't use bmap() for this as it is _not_ actually
655 * good enough for this as it doesn't indicate errors, but
656 * it's all we've got for the moment
657 */
658 block0 = page->index;
659 block0 <<= shift;
660
661 block = inode->i_mapping->a_ops->bmap(inode->i_mapping,
662 block0);
663 _debug("%llx -> %llx",
664 (unsigned long long) block0,
665 (unsigned long long) block);
666
667 if (block) {
668 /* we have data - add it to the list to give to the
669 * backing fs */
670 list_move(&page->lru, &backpages);
671 (*nr_pages)--;
672 nrbackpages++;
673 } else if (space && pagevec_add(&pagevec, page) == 0) {
674 fscache_mark_pages_cached(op, &pagevec);
675 ret = -ENODATA;
676 }
677 }
678
679 if (pagevec_count(&pagevec) > 0)
680 fscache_mark_pages_cached(op, &pagevec);
681
682 if (list_empty(pages))
683 ret = 0;
684
685 /* submit the apparently valid pages to the backing fs to be read from
686 * disk */
687 if (nrbackpages > 0) {
688 ret2 = cachefiles_read_backing_file(object, op, &backpages,
689 &pagevec);
690 if (ret2 == -ENOMEM || ret2 == -EINTR)
691 ret = ret2;
692 }
693
694 if (pagevec_count(&pagevec) > 0)
695 fscache_mark_pages_cached(op, &pagevec);
696
697 _leave(" = %d [nr=%u%s]",
698 ret, *nr_pages, list_empty(pages) ? " empty" : "");
699 return ret;
700}
701
702/*
703 * allocate a block in the cache in which to store a page
704 * - cache withdrawal is prevented by the caller
705 * - returns -EINTR if interrupted
706 * - returns -ENOMEM if ran out of memory
707 * - returns -ENOBUFS if no buffers can be made available
708 * - returns -ENOBUFS if page is beyond EOF
709 * - otherwise:
710 * - the metadata will be retained
711 * - 0 will be returned
712 */
713int cachefiles_allocate_page(struct fscache_retrieval *op,
714 struct page *page,
715 gfp_t gfp)
716{
717 struct cachefiles_object *object;
718 struct cachefiles_cache *cache;
719 struct pagevec pagevec;
720 int ret;
721
722 object = container_of(op->op.object,
723 struct cachefiles_object, fscache);
724 cache = container_of(object->fscache.cache,
725 struct cachefiles_cache, cache);
726
727 _enter("%p,{%lx},", object, page->index);
728
729 ret = cachefiles_has_space(cache, 0, 1);
730 if (ret == 0) {
731 pagevec_init(&pagevec, 0);
732 pagevec_add(&pagevec, page);
733 fscache_mark_pages_cached(op, &pagevec);
734 } else {
735 ret = -ENOBUFS;
736 }
737
738 _leave(" = %d", ret);
739 return ret;
740}
741
742/*
743 * allocate blocks in the cache in which to store a set of pages
744 * - cache withdrawal is prevented by the caller
745 * - returns -EINTR if interrupted
746 * - returns -ENOMEM if ran out of memory
747 * - returns -ENOBUFS if some buffers couldn't be made available
748 * - returns -ENOBUFS if some pages are beyond EOF
749 * - otherwise:
750 * - -ENODATA will be returned
751 * - metadata will be retained for any page marked
752 */
753int cachefiles_allocate_pages(struct fscache_retrieval *op,
754 struct list_head *pages,
755 unsigned *nr_pages,
756 gfp_t gfp)
757{
758 struct cachefiles_object *object;
759 struct cachefiles_cache *cache;
760 struct pagevec pagevec;
761 struct page *page;
762 int ret;
763
764 object = container_of(op->op.object,
765 struct cachefiles_object, fscache);
766 cache = container_of(object->fscache.cache,
767 struct cachefiles_cache, cache);
768
769 _enter("%p,,,%d,", object, *nr_pages);
770
771 ret = cachefiles_has_space(cache, 0, *nr_pages);
772 if (ret == 0) {
773 pagevec_init(&pagevec, 0);
774
775 list_for_each_entry(page, pages, lru) {
776 if (pagevec_add(&pagevec, page) == 0)
777 fscache_mark_pages_cached(op, &pagevec);
778 }
779
780 if (pagevec_count(&pagevec) > 0)
781 fscache_mark_pages_cached(op, &pagevec);
782 ret = -ENODATA;
783 } else {
784 ret = -ENOBUFS;
785 }
786
787 _leave(" = %d", ret);
788 return ret;
789}
790
791/*
792 * request a page be stored in the cache
793 * - cache withdrawal is prevented by the caller
794 * - this request may be ignored if there's no cache block available, in which
795 * case -ENOBUFS will be returned
796 * - if the op is in progress, 0 will be returned
797 */
798int cachefiles_write_page(struct fscache_storage *op, struct page *page)
799{
800 struct cachefiles_object *object;
801 struct cachefiles_cache *cache;
802 mm_segment_t old_fs;
803 struct file *file;
804 loff_t pos;
805 void *data;
806 int ret;
807
808 ASSERT(op != NULL);
809 ASSERT(page != NULL);
810
811 object = container_of(op->op.object,
812 struct cachefiles_object, fscache);
813
814 _enter("%p,%p{%lx},,,", object, page, page->index);
815
816 if (!object->backer) {
817 _leave(" = -ENOBUFS");
818 return -ENOBUFS;
819 }
820
821 ASSERT(S_ISREG(object->backer->d_inode->i_mode));
822
823 cache = container_of(object->fscache.cache,
824 struct cachefiles_cache, cache);
825
826 /* write the page to the backing filesystem and let it store it in its
827 * own time */
828 dget(object->backer);
829 mntget(cache->mnt);
830 file = dentry_open(object->backer, cache->mnt, O_RDWR,
831 cache->cache_cred);
832 if (IS_ERR(file)) {
833 ret = PTR_ERR(file);
834 } else {
835 ret = -EIO;
836 if (file->f_op->write) {
837 pos = (loff_t) page->index << PAGE_SHIFT;
838 data = kmap(page);
839 old_fs = get_fs();
840 set_fs(KERNEL_DS);
841 ret = file->f_op->write(
842 file, (const void __user *) data, PAGE_SIZE,
843 &pos);
844 set_fs(old_fs);
845 kunmap(page);
846 if (ret != PAGE_SIZE)
847 ret = -EIO;
848 }
849 fput(file);
850 }
851
852 if (ret < 0) {
853 if (ret == -EIO)
854 cachefiles_io_error_obj(
855 object, "Write page to backing file failed");
856 ret = -ENOBUFS;
857 }
858
859 _leave(" = %d", ret);
860 return ret;
861}
862
863/*
864 * detach a backing block from a page
865 * - cache withdrawal is prevented by the caller
866 */
867void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
868{
869 struct cachefiles_object *object;
870 struct cachefiles_cache *cache;
871
872 object = container_of(_object, struct cachefiles_object, fscache);
873 cache = container_of(object->fscache.cache,
874 struct cachefiles_cache, cache);
875
876 _enter("%p,{%lu}", object, page->index);
877
878 spin_unlock(&object->fscache.cookie->lock);
879}
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
new file mode 100644
index 000000000000..b5808cdb2232
--- /dev/null
+++ b/fs/cachefiles/security.c
@@ -0,0 +1,116 @@
1/* CacheFiles security management
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/fs.h>
13#include <linux/cred.h>
14#include "internal.h"
15
16/*
17 * determine the security context within which we access the cache from within
18 * the kernel
19 */
20int cachefiles_get_security_ID(struct cachefiles_cache *cache)
21{
22 struct cred *new;
23 int ret;
24
25 _enter("{%s}", cache->secctx);
26
27 new = prepare_kernel_cred(current);
28 if (!new) {
29 ret = -ENOMEM;
30 goto error;
31 }
32
33 if (cache->secctx) {
34 ret = set_security_override_from_ctx(new, cache->secctx);
35 if (ret < 0) {
36 put_cred(new);
37 printk(KERN_ERR "CacheFiles:"
38 " Security denies permission to nominate"
39 " security context: error %d\n",
40 ret);
41 goto error;
42 }
43 }
44
45 cache->cache_cred = new;
46 ret = 0;
47error:
48 _leave(" = %d", ret);
49 return ret;
50}
51
52/*
53 * see if mkdir and create can be performed in the root directory
54 */
55static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
56 struct dentry *root)
57{
58 int ret;
59
60 ret = security_inode_mkdir(root->d_inode, root, 0);
61 if (ret < 0) {
62 printk(KERN_ERR "CacheFiles:"
63 " Security denies permission to make dirs: error %d",
64 ret);
65 return ret;
66 }
67
68 ret = security_inode_create(root->d_inode, root, 0);
69 if (ret < 0)
70 printk(KERN_ERR "CacheFiles:"
71 " Security denies permission to create files: error %d",
72 ret);
73
74 return ret;
75}
76
77/*
78 * check the security details of the on-disk cache
79 * - must be called with security override in force
80 */
81int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
82 struct dentry *root,
83 const struct cred **_saved_cred)
84{
85 struct cred *new;
86 int ret;
87
88 _enter("");
89
90 /* duplicate the cache creds for COW (the override is currently in
91 * force, so we can use prepare_creds() to do this) */
92 new = prepare_creds();
93 if (!new)
94 return -ENOMEM;
95
96 cachefiles_end_secure(cache, *_saved_cred);
97
98 /* use the cache root dir's security context as the basis with
99 * which create files */
100 ret = set_create_files_as(new, root->d_inode);
101 if (ret < 0) {
102 _leave(" = %d [cfa]", ret);
103 return ret;
104 }
105
106 put_cred(cache->cache_cred);
107 cache->cache_cred = new;
108
109 cachefiles_begin_secure(cache, _saved_cred);
110 ret = cachefiles_check_cache_dir(cache, root);
111
112 if (ret == -EOPNOTSUPP)
113 ret = 0;
114 _leave(" = %d", ret);
115 return ret;
116}
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
new file mode 100644
index 000000000000..f3e7a0bf068b
--- /dev/null
+++ b/fs/cachefiles/xattr.c
@@ -0,0 +1,291 @@
1/* CacheFiles extended attribute management
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/sched.h>
14#include <linux/file.h>
15#include <linux/fs.h>
16#include <linux/fsnotify.h>
17#include <linux/quotaops.h>
18#include <linux/xattr.h>
19#include "internal.h"
20
21static const char cachefiles_xattr_cache[] =
22 XATTR_USER_PREFIX "CacheFiles.cache";
23
24/*
25 * check the type label on an object
26 * - done using xattrs
27 */
28int cachefiles_check_object_type(struct cachefiles_object *object)
29{
30 struct dentry *dentry = object->dentry;
31 char type[3], xtype[3];
32 int ret;
33
34 ASSERT(dentry);
35 ASSERT(dentry->d_inode);
36
37 if (!object->fscache.cookie)
38 strcpy(type, "C3");
39 else
40 snprintf(type, 3, "%02x", object->fscache.cookie->def->type);
41
42 _enter("%p{%s}", object, type);
43
44 /* attempt to install a type label directly */
45 ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2,
46 XATTR_CREATE);
47 if (ret == 0) {
48 _debug("SET"); /* we succeeded */
49 goto error;
50 }
51
52 if (ret != -EEXIST) {
53 kerror("Can't set xattr on %*.*s [%lu] (err %d)",
54 dentry->d_name.len, dentry->d_name.len,
55 dentry->d_name.name, dentry->d_inode->i_ino,
56 -ret);
57 goto error;
58 }
59
60 /* read the current type label */
61 ret = vfs_getxattr(dentry, cachefiles_xattr_cache, xtype, 3);
62 if (ret < 0) {
63 if (ret == -ERANGE)
64 goto bad_type_length;
65
66 kerror("Can't read xattr on %*.*s [%lu] (err %d)",
67 dentry->d_name.len, dentry->d_name.len,
68 dentry->d_name.name, dentry->d_inode->i_ino,
69 -ret);
70 goto error;
71 }
72
73 /* check the type is what we're expecting */
74 if (ret != 2)
75 goto bad_type_length;
76
77 if (xtype[0] != type[0] || xtype[1] != type[1])
78 goto bad_type;
79
80 ret = 0;
81
82error:
83 _leave(" = %d", ret);
84 return ret;
85
86bad_type_length:
87 kerror("Cache object %lu type xattr length incorrect",
88 dentry->d_inode->i_ino);
89 ret = -EIO;
90 goto error;
91
92bad_type:
93 xtype[2] = 0;
94 kerror("Cache object %*.*s [%lu] type %s not %s",
95 dentry->d_name.len, dentry->d_name.len,
96 dentry->d_name.name, dentry->d_inode->i_ino,
97 xtype, type);
98 ret = -EIO;
99 goto error;
100}
101
102/*
103 * set the state xattr on a cache file
104 */
105int cachefiles_set_object_xattr(struct cachefiles_object *object,
106 struct cachefiles_xattr *auxdata)
107{
108 struct dentry *dentry = object->dentry;
109 int ret;
110
111 ASSERT(object->fscache.cookie);
112 ASSERT(dentry);
113
114 _enter("%p,#%d", object, auxdata->len);
115
116 /* attempt to install the cache metadata directly */
117 _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len);
118
119 ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
120 &auxdata->type, auxdata->len,
121 XATTR_CREATE);
122 if (ret < 0 && ret != -ENOMEM)
123 cachefiles_io_error_obj(
124 object,
125 "Failed to set xattr with error %d", ret);
126
127 _leave(" = %d", ret);
128 return ret;
129}
130
131/*
132 * update the state xattr on a cache file
133 */
134int cachefiles_update_object_xattr(struct cachefiles_object *object,
135 struct cachefiles_xattr *auxdata)
136{
137 struct dentry *dentry = object->dentry;
138 int ret;
139
140 ASSERT(object->fscache.cookie);
141 ASSERT(dentry);
142
143 _enter("%p,#%d", object, auxdata->len);
144
145 /* attempt to install the cache metadata directly */
146 _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len);
147
148 ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
149 &auxdata->type, auxdata->len,
150 XATTR_REPLACE);
151 if (ret < 0 && ret != -ENOMEM)
152 cachefiles_io_error_obj(
153 object,
154 "Failed to update xattr with error %d", ret);
155
156 _leave(" = %d", ret);
157 return ret;
158}
159
160/*
161 * check the state xattr on a cache file
162 * - return -ESTALE if the object should be deleted
163 */
164int cachefiles_check_object_xattr(struct cachefiles_object *object,
165 struct cachefiles_xattr *auxdata)
166{
167 struct cachefiles_xattr *auxbuf;
168 struct dentry *dentry = object->dentry;
169 int ret;
170
171 _enter("%p,#%d", object, auxdata->len);
172
173 ASSERT(dentry);
174 ASSERT(dentry->d_inode);
175
176 auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL);
177 if (!auxbuf) {
178 _leave(" = -ENOMEM");
179 return -ENOMEM;
180 }
181
182 /* read the current type label */
183 ret = vfs_getxattr(dentry, cachefiles_xattr_cache,
184 &auxbuf->type, 512 + 1);
185 if (ret < 0) {
186 if (ret == -ENODATA)
187 goto stale; /* no attribute - power went off
188 * mid-cull? */
189
190 if (ret == -ERANGE)
191 goto bad_type_length;
192
193 cachefiles_io_error_obj(object,
194 "Can't read xattr on %lu (err %d)",
195 dentry->d_inode->i_ino, -ret);
196 goto error;
197 }
198
199 /* check the on-disk object */
200 if (ret < 1)
201 goto bad_type_length;
202
203 if (auxbuf->type != auxdata->type)
204 goto stale;
205
206 auxbuf->len = ret;
207
208 /* consult the netfs */
209 if (object->fscache.cookie->def->check_aux) {
210 enum fscache_checkaux result;
211 unsigned int dlen;
212
213 dlen = auxbuf->len - 1;
214
215 _debug("checkaux %s #%u",
216 object->fscache.cookie->def->name, dlen);
217
218 result = fscache_check_aux(&object->fscache,
219 &auxbuf->data, dlen);
220
221 switch (result) {
222 /* entry okay as is */
223 case FSCACHE_CHECKAUX_OKAY:
224 goto okay;
225
226 /* entry requires update */
227 case FSCACHE_CHECKAUX_NEEDS_UPDATE:
228 break;
229
230 /* entry requires deletion */
231 case FSCACHE_CHECKAUX_OBSOLETE:
232 goto stale;
233
234 default:
235 BUG();
236 }
237
238 /* update the current label */
239 ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
240 &auxdata->type, auxdata->len,
241 XATTR_REPLACE);
242 if (ret < 0) {
243 cachefiles_io_error_obj(object,
244 "Can't update xattr on %lu"
245 " (error %d)",
246 dentry->d_inode->i_ino, -ret);
247 goto error;
248 }
249 }
250
251okay:
252 ret = 0;
253
254error:
255 kfree(auxbuf);
256 _leave(" = %d", ret);
257 return ret;
258
259bad_type_length:
260 kerror("Cache object %lu xattr length incorrect",
261 dentry->d_inode->i_ino);
262 ret = -EIO;
263 goto error;
264
265stale:
266 ret = -ESTALE;
267 goto error;
268}
269
270/*
271 * remove the object's xattr to mark it stale
272 */
273int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
274 struct dentry *dentry)
275{
276 int ret;
277
278 ret = vfs_removexattr(dentry, cachefiles_xattr_cache);
279 if (ret < 0) {
280 if (ret == -ENOENT || ret == -ENODATA)
281 ret = 0;
282 else if (ret != -ENOMEM)
283 cachefiles_io_error(cache,
284 "Can't remove xattr from %lu"
285 " (error %d)",
286 dentry->d_inode->i_ino, -ret);
287 }
288
289 _leave(" = %d", ret);
290 return ret;
291}
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
new file mode 100644
index 000000000000..9bbb8ce7bea0
--- /dev/null
+++ b/fs/fscache/Kconfig
@@ -0,0 +1,56 @@
1
2config FSCACHE
3 tristate "General filesystem local caching manager"
4 depends on EXPERIMENTAL
5 select SLOW_WORK
6 help
7 This option enables a generic filesystem caching manager that can be
8 used by various network and other filesystems to cache data locally.
9 Different sorts of caches can be plugged in, depending on the
10 resources available.
11
12 See Documentation/filesystems/caching/fscache.txt for more information.
13
14config FSCACHE_STATS
15 bool "Gather statistical information on local caching"
16 depends on FSCACHE && PROC_FS
17 help
18 This option causes statistical information to be gathered on local
19 caching and exported through file:
20
21 /proc/fs/fscache/stats
22
23 The gathering of statistics adds a certain amount of overhead to
24 execution as there are a quite a few stats gathered, and on a
25 multi-CPU system these may be on cachelines that keep bouncing
26 between CPUs. On the other hand, the stats are very useful for
27 debugging purposes. Saying 'Y' here is recommended.
28
29 See Documentation/filesystems/caching/fscache.txt for more information.
30
31config FSCACHE_HISTOGRAM
32 bool "Gather latency information on local caching"
33 depends on FSCACHE && PROC_FS
34 help
35 This option causes latency information to be gathered on local
36 caching and exported through file:
37
38 /proc/fs/fscache/histogram
39
40 The generation of this histogram adds a certain amount of overhead to
41 execution as there are a number of points at which data is gathered,
42 and on a multi-CPU system these may be on cachelines that keep
43 bouncing between CPUs. On the other hand, the histogram may be
44 useful for debugging purposes. Saying 'N' here is recommended.
45
46 See Documentation/filesystems/caching/fscache.txt for more information.
47
48config FSCACHE_DEBUG
49 bool "Debug FS-Cache"
50 depends on FSCACHE
51 help
52 This permits debugging to be dynamically enabled in the local caching
53 management module. If this is set, the debugging output may be
54 enabled by setting bits in /sys/modules/fscache/parameter/debug.
55
56 See Documentation/filesystems/caching/fscache.txt for more information.
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile
new file mode 100644
index 000000000000..91571b95aacc
--- /dev/null
+++ b/fs/fscache/Makefile
@@ -0,0 +1,19 @@
1#
2# Makefile for general filesystem caching code
3#
4
5fscache-y := \
6 cache.o \
7 cookie.o \
8 fsdef.o \
9 main.o \
10 netfs.o \
11 object.o \
12 operation.o \
13 page.o
14
15fscache-$(CONFIG_PROC_FS) += proc.o
16fscache-$(CONFIG_FSCACHE_STATS) += stats.o
17fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o
18
19obj-$(CONFIG_FSCACHE) := fscache.o
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
new file mode 100644
index 000000000000..e21985bbb1fb
--- /dev/null
+++ b/fs/fscache/cache.c
@@ -0,0 +1,415 @@
1/* FS-Cache cache handling
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#define FSCACHE_DEBUG_LEVEL CACHE
13#include <linux/module.h>
14#include <linux/slab.h>
15#include "internal.h"
16
17LIST_HEAD(fscache_cache_list);
18DECLARE_RWSEM(fscache_addremove_sem);
19DECLARE_WAIT_QUEUE_HEAD(fscache_cache_cleared_wq);
20EXPORT_SYMBOL(fscache_cache_cleared_wq);
21
22static LIST_HEAD(fscache_cache_tag_list);
23
24/*
25 * look up a cache tag
26 */
27struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name)
28{
29 struct fscache_cache_tag *tag, *xtag;
30
31 /* firstly check for the existence of the tag under read lock */
32 down_read(&fscache_addremove_sem);
33
34 list_for_each_entry(tag, &fscache_cache_tag_list, link) {
35 if (strcmp(tag->name, name) == 0) {
36 atomic_inc(&tag->usage);
37 up_read(&fscache_addremove_sem);
38 return tag;
39 }
40 }
41
42 up_read(&fscache_addremove_sem);
43
44 /* the tag does not exist - create a candidate */
45 xtag = kzalloc(sizeof(*xtag) + strlen(name) + 1, GFP_KERNEL);
46 if (!xtag)
47 /* return a dummy tag if out of memory */
48 return ERR_PTR(-ENOMEM);
49
50 atomic_set(&xtag->usage, 1);
51 strcpy(xtag->name, name);
52
53 /* write lock, search again and add if still not present */
54 down_write(&fscache_addremove_sem);
55
56 list_for_each_entry(tag, &fscache_cache_tag_list, link) {
57 if (strcmp(tag->name, name) == 0) {
58 atomic_inc(&tag->usage);
59 up_write(&fscache_addremove_sem);
60 kfree(xtag);
61 return tag;
62 }
63 }
64
65 list_add_tail(&xtag->link, &fscache_cache_tag_list);
66 up_write(&fscache_addremove_sem);
67 return xtag;
68}
69
70/*
71 * release a reference to a cache tag
72 */
73void __fscache_release_cache_tag(struct fscache_cache_tag *tag)
74{
75 if (tag != ERR_PTR(-ENOMEM)) {
76 down_write(&fscache_addremove_sem);
77
78 if (atomic_dec_and_test(&tag->usage))
79 list_del_init(&tag->link);
80 else
81 tag = NULL;
82
83 up_write(&fscache_addremove_sem);
84
85 kfree(tag);
86 }
87}
88
89/*
90 * select a cache in which to store an object
91 * - the cache addremove semaphore must be at least read-locked by the caller
92 * - the object will never be an index
93 */
94struct fscache_cache *fscache_select_cache_for_object(
95 struct fscache_cookie *cookie)
96{
97 struct fscache_cache_tag *tag;
98 struct fscache_object *object;
99 struct fscache_cache *cache;
100
101 _enter("");
102
103 if (list_empty(&fscache_cache_list)) {
104 _leave(" = NULL [no cache]");
105 return NULL;
106 }
107
108 /* we check the parent to determine the cache to use */
109 spin_lock(&cookie->lock);
110
111 /* the first in the parent's backing list should be the preferred
112 * cache */
113 if (!hlist_empty(&cookie->backing_objects)) {
114 object = hlist_entry(cookie->backing_objects.first,
115 struct fscache_object, cookie_link);
116
117 cache = object->cache;
118 if (object->state >= FSCACHE_OBJECT_DYING ||
119 test_bit(FSCACHE_IOERROR, &cache->flags))
120 cache = NULL;
121
122 spin_unlock(&cookie->lock);
123 _leave(" = %p [parent]", cache);
124 return cache;
125 }
126
127 /* the parent is unbacked */
128 if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
129 /* cookie not an index and is unbacked */
130 spin_unlock(&cookie->lock);
131 _leave(" = NULL [cookie ub,ni]");
132 return NULL;
133 }
134
135 spin_unlock(&cookie->lock);
136
137 if (!cookie->def->select_cache)
138 goto no_preference;
139
140 /* ask the netfs for its preference */
141 tag = cookie->def->select_cache(cookie->parent->netfs_data,
142 cookie->netfs_data);
143 if (!tag)
144 goto no_preference;
145
146 if (tag == ERR_PTR(-ENOMEM)) {
147 _leave(" = NULL [nomem tag]");
148 return NULL;
149 }
150
151 if (!tag->cache) {
152 _leave(" = NULL [unbacked tag]");
153 return NULL;
154 }
155
156 if (test_bit(FSCACHE_IOERROR, &tag->cache->flags))
157 return NULL;
158
159 _leave(" = %p [specific]", tag->cache);
160 return tag->cache;
161
162no_preference:
163 /* netfs has no preference - just select first cache */
164 cache = list_entry(fscache_cache_list.next,
165 struct fscache_cache, link);
166 _leave(" = %p [first]", cache);
167 return cache;
168}
169
170/**
171 * fscache_init_cache - Initialise a cache record
172 * @cache: The cache record to be initialised
173 * @ops: The cache operations to be installed in that record
174 * @idfmt: Format string to define identifier
175 * @...: sprintf-style arguments
176 *
177 * Initialise a record of a cache and fill in the name.
178 *
179 * See Documentation/filesystems/caching/backend-api.txt for a complete
180 * description.
181 */
182void fscache_init_cache(struct fscache_cache *cache,
183 const struct fscache_cache_ops *ops,
184 const char *idfmt,
185 ...)
186{
187 va_list va;
188
189 memset(cache, 0, sizeof(*cache));
190
191 cache->ops = ops;
192
193 va_start(va, idfmt);
194 vsnprintf(cache->identifier, sizeof(cache->identifier), idfmt, va);
195 va_end(va);
196
197 INIT_WORK(&cache->op_gc, fscache_operation_gc);
198 INIT_LIST_HEAD(&cache->link);
199 INIT_LIST_HEAD(&cache->object_list);
200 INIT_LIST_HEAD(&cache->op_gc_list);
201 spin_lock_init(&cache->object_list_lock);
202 spin_lock_init(&cache->op_gc_list_lock);
203}
204EXPORT_SYMBOL(fscache_init_cache);
205
206/**
207 * fscache_add_cache - Declare a cache as being open for business
208 * @cache: The record describing the cache
209 * @ifsdef: The record of the cache object describing the top-level index
210 * @tagname: The tag describing this cache
211 *
212 * Add a cache to the system, making it available for netfs's to use.
213 *
214 * See Documentation/filesystems/caching/backend-api.txt for a complete
215 * description.
216 */
217int fscache_add_cache(struct fscache_cache *cache,
218 struct fscache_object *ifsdef,
219 const char *tagname)
220{
221 struct fscache_cache_tag *tag;
222
223 BUG_ON(!cache->ops);
224 BUG_ON(!ifsdef);
225
226 cache->flags = 0;
227 ifsdef->event_mask = ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED);
228 ifsdef->state = FSCACHE_OBJECT_ACTIVE;
229
230 if (!tagname)
231 tagname = cache->identifier;
232
233 BUG_ON(!tagname[0]);
234
235 _enter("{%s.%s},,%s", cache->ops->name, cache->identifier, tagname);
236
237 /* we use the cache tag to uniquely identify caches */
238 tag = __fscache_lookup_cache_tag(tagname);
239 if (IS_ERR(tag))
240 goto nomem;
241
242 if (test_and_set_bit(FSCACHE_TAG_RESERVED, &tag->flags))
243 goto tag_in_use;
244
245 cache->kobj = kobject_create_and_add(tagname, fscache_root);
246 if (!cache->kobj)
247 goto error;
248
249 ifsdef->cookie = &fscache_fsdef_index;
250 ifsdef->cache = cache;
251 cache->fsdef = ifsdef;
252
253 down_write(&fscache_addremove_sem);
254
255 tag->cache = cache;
256 cache->tag = tag;
257
258 /* add the cache to the list */
259 list_add(&cache->link, &fscache_cache_list);
260
261 /* add the cache's netfs definition index object to the cache's
262 * list */
263 spin_lock(&cache->object_list_lock);
264 list_add_tail(&ifsdef->cache_link, &cache->object_list);
265 spin_unlock(&cache->object_list_lock);
266
267 /* add the cache's netfs definition index object to the top level index
268 * cookie as a known backing object */
269 spin_lock(&fscache_fsdef_index.lock);
270
271 hlist_add_head(&ifsdef->cookie_link,
272 &fscache_fsdef_index.backing_objects);
273
274 atomic_inc(&fscache_fsdef_index.usage);
275
276 /* done */
277 spin_unlock(&fscache_fsdef_index.lock);
278 up_write(&fscache_addremove_sem);
279
280 printk(KERN_NOTICE "FS-Cache: Cache \"%s\" added (type %s)\n",
281 cache->tag->name, cache->ops->name);
282 kobject_uevent(cache->kobj, KOBJ_ADD);
283
284 _leave(" = 0 [%s]", cache->identifier);
285 return 0;
286
287tag_in_use:
288 printk(KERN_ERR "FS-Cache: Cache tag '%s' already in use\n", tagname);
289 __fscache_release_cache_tag(tag);
290 _leave(" = -EXIST");
291 return -EEXIST;
292
293error:
294 __fscache_release_cache_tag(tag);
295 _leave(" = -EINVAL");
296 return -EINVAL;
297
298nomem:
299 _leave(" = -ENOMEM");
300 return -ENOMEM;
301}
302EXPORT_SYMBOL(fscache_add_cache);
303
304/**
305 * fscache_io_error - Note a cache I/O error
306 * @cache: The record describing the cache
307 *
308 * Note that an I/O error occurred in a cache and that it should no longer be
309 * used for anything. This also reports the error into the kernel log.
310 *
311 * See Documentation/filesystems/caching/backend-api.txt for a complete
312 * description.
313 */
314void fscache_io_error(struct fscache_cache *cache)
315{
316 set_bit(FSCACHE_IOERROR, &cache->flags);
317
318 printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n",
319 cache->ops->name);
320}
321EXPORT_SYMBOL(fscache_io_error);
322
323/*
324 * request withdrawal of all the objects in a cache
325 * - all the objects being withdrawn are moved onto the supplied list
326 */
327static void fscache_withdraw_all_objects(struct fscache_cache *cache,
328 struct list_head *dying_objects)
329{
330 struct fscache_object *object;
331
332 spin_lock(&cache->object_list_lock);
333
334 while (!list_empty(&cache->object_list)) {
335 object = list_entry(cache->object_list.next,
336 struct fscache_object, cache_link);
337 list_move_tail(&object->cache_link, dying_objects);
338
339 _debug("withdraw %p", object->cookie);
340
341 spin_lock(&object->lock);
342 spin_unlock(&cache->object_list_lock);
343 fscache_raise_event(object, FSCACHE_OBJECT_EV_WITHDRAW);
344 spin_unlock(&object->lock);
345
346 cond_resched();
347 spin_lock(&cache->object_list_lock);
348 }
349
350 spin_unlock(&cache->object_list_lock);
351}
352
353/**
354 * fscache_withdraw_cache - Withdraw a cache from the active service
355 * @cache: The record describing the cache
356 *
357 * Withdraw a cache from service, unbinding all its cache objects from the
358 * netfs cookies they're currently representing.
359 *
360 * See Documentation/filesystems/caching/backend-api.txt for a complete
361 * description.
362 */
363void fscache_withdraw_cache(struct fscache_cache *cache)
364{
365 LIST_HEAD(dying_objects);
366
367 _enter("");
368
369 printk(KERN_NOTICE "FS-Cache: Withdrawing cache \"%s\"\n",
370 cache->tag->name);
371
372 /* make the cache unavailable for cookie acquisition */
373 if (test_and_set_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags))
374 BUG();
375
376 down_write(&fscache_addremove_sem);
377 list_del_init(&cache->link);
378 cache->tag->cache = NULL;
379 up_write(&fscache_addremove_sem);
380
381 /* make sure all pages pinned by operations on behalf of the netfs are
382 * written to disk */
383 cache->ops->sync_cache(cache);
384
385 /* dissociate all the netfs pages backed by this cache from the block
386 * mappings in the cache */
387 cache->ops->dissociate_pages(cache);
388
389 /* we now have to destroy all the active objects pertaining to this
390 * cache - which we do by passing them off to thread pool to be
391 * disposed of */
392 _debug("destroy");
393
394 fscache_withdraw_all_objects(cache, &dying_objects);
395
396 /* wait for all extant objects to finish their outstanding operations
397 * and go away */
398 _debug("wait for finish");
399 wait_event(fscache_cache_cleared_wq,
400 atomic_read(&cache->object_count) == 0);
401 _debug("wait for clearance");
402 wait_event(fscache_cache_cleared_wq,
403 list_empty(&cache->object_list));
404 _debug("cleared");
405 ASSERT(list_empty(&dying_objects));
406
407 kobject_put(cache->kobj);
408
409 clear_bit(FSCACHE_TAG_RESERVED, &cache->tag->flags);
410 fscache_release_cache_tag(cache->tag);
411 cache->tag = NULL;
412
413 _leave("");
414}
415EXPORT_SYMBOL(fscache_withdraw_cache);
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
new file mode 100644
index 000000000000..72fd18f6c71f
--- /dev/null
+++ b/fs/fscache/cookie.c
@@ -0,0 +1,500 @@
1/* netfs cookie management
2 *
3 * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * See Documentation/filesystems/caching/netfs-api.txt for more information on
12 * the netfs API.
13 */
14
15#define FSCACHE_DEBUG_LEVEL COOKIE
16#include <linux/module.h>
17#include <linux/slab.h>
18#include "internal.h"
19
20struct kmem_cache *fscache_cookie_jar;
21
22static atomic_t fscache_object_debug_id = ATOMIC_INIT(0);
23
24static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie);
25static int fscache_alloc_object(struct fscache_cache *cache,
26 struct fscache_cookie *cookie);
27static int fscache_attach_object(struct fscache_cookie *cookie,
28 struct fscache_object *object);
29
30/*
31 * initialise an cookie jar slab element prior to any use
32 */
33void fscache_cookie_init_once(void *_cookie)
34{
35 struct fscache_cookie *cookie = _cookie;
36
37 memset(cookie, 0, sizeof(*cookie));
38 spin_lock_init(&cookie->lock);
39 INIT_HLIST_HEAD(&cookie->backing_objects);
40}
41
42/*
43 * request a cookie to represent an object (index, datafile, xattr, etc)
44 * - parent specifies the parent object
45 * - the top level index cookie for each netfs is stored in the fscache_netfs
46 * struct upon registration
47 * - def points to the definition
48 * - the netfs_data will be passed to the functions pointed to in *def
49 * - all attached caches will be searched to see if they contain this object
50 * - index objects aren't stored on disk until there's a dependent file that
51 * needs storing
52 * - other objects are stored in a selected cache immediately, and all the
53 * indices forming the path to it are instantiated if necessary
54 * - we never let on to the netfs about errors
55 * - we may set a negative cookie pointer, but that's okay
56 */
57struct fscache_cookie *__fscache_acquire_cookie(
58 struct fscache_cookie *parent,
59 const struct fscache_cookie_def *def,
60 void *netfs_data)
61{
62 struct fscache_cookie *cookie;
63
64 BUG_ON(!def);
65
66 _enter("{%s},{%s},%p",
67 parent ? (char *) parent->def->name : "<no-parent>",
68 def->name, netfs_data);
69
70 fscache_stat(&fscache_n_acquires);
71
72 /* if there's no parent cookie, then we don't create one here either */
73 if (!parent) {
74 fscache_stat(&fscache_n_acquires_null);
75 _leave(" [no parent]");
76 return NULL;
77 }
78
79 /* validate the definition */
80 BUG_ON(!def->get_key);
81 BUG_ON(!def->name[0]);
82
83 BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX &&
84 parent->def->type != FSCACHE_COOKIE_TYPE_INDEX);
85
86 /* allocate and initialise a cookie */
87 cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL);
88 if (!cookie) {
89 fscache_stat(&fscache_n_acquires_oom);
90 _leave(" [ENOMEM]");
91 return NULL;
92 }
93
94 atomic_set(&cookie->usage, 1);
95 atomic_set(&cookie->n_children, 0);
96
97 atomic_inc(&parent->usage);
98 atomic_inc(&parent->n_children);
99
100 cookie->def = def;
101 cookie->parent = parent;
102 cookie->netfs_data = netfs_data;
103 cookie->flags = 0;
104
105 INIT_RADIX_TREE(&cookie->stores, GFP_NOFS);
106
107 switch (cookie->def->type) {
108 case FSCACHE_COOKIE_TYPE_INDEX:
109 fscache_stat(&fscache_n_cookie_index);
110 break;
111 case FSCACHE_COOKIE_TYPE_DATAFILE:
112 fscache_stat(&fscache_n_cookie_data);
113 break;
114 default:
115 fscache_stat(&fscache_n_cookie_special);
116 break;
117 }
118
119 /* if the object is an index then we need do nothing more here - we
120 * create indices on disk when we need them as an index may exist in
121 * multiple caches */
122 if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) {
123 if (fscache_acquire_non_index_cookie(cookie) < 0) {
124 atomic_dec(&parent->n_children);
125 __fscache_cookie_put(cookie);
126 fscache_stat(&fscache_n_acquires_nobufs);
127 _leave(" = NULL");
128 return NULL;
129 }
130 }
131
132 fscache_stat(&fscache_n_acquires_ok);
133 _leave(" = %p", cookie);
134 return cookie;
135}
136EXPORT_SYMBOL(__fscache_acquire_cookie);
137
138/*
139 * acquire a non-index cookie
140 * - this must make sure the index chain is instantiated and instantiate the
141 * object representation too
142 */
143static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie)
144{
145 struct fscache_object *object;
146 struct fscache_cache *cache;
147 uint64_t i_size;
148 int ret;
149
150 _enter("");
151
152 cookie->flags = 1 << FSCACHE_COOKIE_UNAVAILABLE;
153
154 /* now we need to see whether the backing objects for this cookie yet
155 * exist, if not there'll be nothing to search */
156 down_read(&fscache_addremove_sem);
157
158 if (list_empty(&fscache_cache_list)) {
159 up_read(&fscache_addremove_sem);
160 _leave(" = 0 [no caches]");
161 return 0;
162 }
163
164 /* select a cache in which to store the object */
165 cache = fscache_select_cache_for_object(cookie->parent);
166 if (!cache) {
167 up_read(&fscache_addremove_sem);
168 fscache_stat(&fscache_n_acquires_no_cache);
169 _leave(" = -ENOMEDIUM [no cache]");
170 return -ENOMEDIUM;
171 }
172
173 _debug("cache %s", cache->tag->name);
174
175 cookie->flags =
176 (1 << FSCACHE_COOKIE_LOOKING_UP) |
177 (1 << FSCACHE_COOKIE_CREATING) |
178 (1 << FSCACHE_COOKIE_NO_DATA_YET);
179
180 /* ask the cache to allocate objects for this cookie and its parent
181 * chain */
182 ret = fscache_alloc_object(cache, cookie);
183 if (ret < 0) {
184 up_read(&fscache_addremove_sem);
185 _leave(" = %d", ret);
186 return ret;
187 }
188
189 /* pass on how big the object we're caching is supposed to be */
190 cookie->def->get_attr(cookie->netfs_data, &i_size);
191
192 spin_lock(&cookie->lock);
193 if (hlist_empty(&cookie->backing_objects)) {
194 spin_unlock(&cookie->lock);
195 goto unavailable;
196 }
197
198 object = hlist_entry(cookie->backing_objects.first,
199 struct fscache_object, cookie_link);
200
201 fscache_set_store_limit(object, i_size);
202
203 /* initiate the process of looking up all the objects in the chain
204 * (done by fscache_initialise_object()) */
205 fscache_enqueue_object(object);
206
207 spin_unlock(&cookie->lock);
208
209 /* we may be required to wait for lookup to complete at this point */
210 if (!fscache_defer_lookup) {
211 _debug("non-deferred lookup %p", &cookie->flags);
212 wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
213 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
214 _debug("complete");
215 if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags))
216 goto unavailable;
217 }
218
219 up_read(&fscache_addremove_sem);
220 _leave(" = 0 [deferred]");
221 return 0;
222
223unavailable:
224 up_read(&fscache_addremove_sem);
225 _leave(" = -ENOBUFS");
226 return -ENOBUFS;
227}
228
229/*
230 * recursively allocate cache object records for a cookie/cache combination
231 * - caller must be holding the addremove sem
232 */
233static int fscache_alloc_object(struct fscache_cache *cache,
234 struct fscache_cookie *cookie)
235{
236 struct fscache_object *object;
237 struct hlist_node *_n;
238 int ret;
239
240 _enter("%p,%p{%s}", cache, cookie, cookie->def->name);
241
242 spin_lock(&cookie->lock);
243 hlist_for_each_entry(object, _n, &cookie->backing_objects,
244 cookie_link) {
245 if (object->cache == cache)
246 goto object_already_extant;
247 }
248 spin_unlock(&cookie->lock);
249
250 /* ask the cache to allocate an object (we may end up with duplicate
251 * objects at this stage, but we sort that out later) */
252 object = cache->ops->alloc_object(cache, cookie);
253 if (IS_ERR(object)) {
254 fscache_stat(&fscache_n_object_no_alloc);
255 ret = PTR_ERR(object);
256 goto error;
257 }
258
259 fscache_stat(&fscache_n_object_alloc);
260
261 object->debug_id = atomic_inc_return(&fscache_object_debug_id);
262
263 _debug("ALLOC OBJ%x: %s {%lx}",
264 object->debug_id, cookie->def->name, object->events);
265
266 ret = fscache_alloc_object(cache, cookie->parent);
267 if (ret < 0)
268 goto error_put;
269
270 /* only attach if we managed to allocate all we needed, otherwise
271 * discard the object we just allocated and instead use the one
272 * attached to the cookie */
273 if (fscache_attach_object(cookie, object) < 0)
274 cache->ops->put_object(object);
275
276 _leave(" = 0");
277 return 0;
278
279object_already_extant:
280 ret = -ENOBUFS;
281 if (object->state >= FSCACHE_OBJECT_DYING) {
282 spin_unlock(&cookie->lock);
283 goto error;
284 }
285 spin_unlock(&cookie->lock);
286 _leave(" = 0 [found]");
287 return 0;
288
289error_put:
290 cache->ops->put_object(object);
291error:
292 _leave(" = %d", ret);
293 return ret;
294}
295
296/*
297 * attach a cache object to a cookie
298 */
299static int fscache_attach_object(struct fscache_cookie *cookie,
300 struct fscache_object *object)
301{
302 struct fscache_object *p;
303 struct fscache_cache *cache = object->cache;
304 struct hlist_node *_n;
305 int ret;
306
307 _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id);
308
309 spin_lock(&cookie->lock);
310
311 /* there may be multiple initial creations of this object, but we only
312 * want one */
313 ret = -EEXIST;
314 hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) {
315 if (p->cache == object->cache) {
316 if (p->state >= FSCACHE_OBJECT_DYING)
317 ret = -ENOBUFS;
318 goto cant_attach_object;
319 }
320 }
321
322 /* pin the parent object */
323 spin_lock_nested(&cookie->parent->lock, 1);
324 hlist_for_each_entry(p, _n, &cookie->parent->backing_objects,
325 cookie_link) {
326 if (p->cache == object->cache) {
327 if (p->state >= FSCACHE_OBJECT_DYING) {
328 ret = -ENOBUFS;
329 spin_unlock(&cookie->parent->lock);
330 goto cant_attach_object;
331 }
332 object->parent = p;
333 spin_lock(&p->lock);
334 p->n_children++;
335 spin_unlock(&p->lock);
336 break;
337 }
338 }
339 spin_unlock(&cookie->parent->lock);
340
341 /* attach to the cache's object list */
342 if (list_empty(&object->cache_link)) {
343 spin_lock(&cache->object_list_lock);
344 list_add(&object->cache_link, &cache->object_list);
345 spin_unlock(&cache->object_list_lock);
346 }
347
348 /* attach to the cookie */
349 object->cookie = cookie;
350 atomic_inc(&cookie->usage);
351 hlist_add_head(&object->cookie_link, &cookie->backing_objects);
352 ret = 0;
353
354cant_attach_object:
355 spin_unlock(&cookie->lock);
356 _leave(" = %d", ret);
357 return ret;
358}
359
360/*
361 * update the index entries backing a cookie
362 */
363void __fscache_update_cookie(struct fscache_cookie *cookie)
364{
365 struct fscache_object *object;
366 struct hlist_node *_p;
367
368 fscache_stat(&fscache_n_updates);
369
370 if (!cookie) {
371 fscache_stat(&fscache_n_updates_null);
372 _leave(" [no cookie]");
373 return;
374 }
375
376 _enter("{%s}", cookie->def->name);
377
378 BUG_ON(!cookie->def->get_aux);
379
380 spin_lock(&cookie->lock);
381
382 /* update the index entry on disk in each cache backing this cookie */
383 hlist_for_each_entry(object, _p,
384 &cookie->backing_objects, cookie_link) {
385 fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE);
386 }
387
388 spin_unlock(&cookie->lock);
389 _leave("");
390}
391EXPORT_SYMBOL(__fscache_update_cookie);
392
393/*
394 * release a cookie back to the cache
395 * - the object will be marked as recyclable on disk if retire is true
396 * - all dependents of this cookie must have already been unregistered
397 * (indices/files/pages)
398 */
399void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
400{
401 struct fscache_cache *cache;
402 struct fscache_object *object;
403 unsigned long event;
404
405 fscache_stat(&fscache_n_relinquishes);
406
407 if (!cookie) {
408 fscache_stat(&fscache_n_relinquishes_null);
409 _leave(" [no cookie]");
410 return;
411 }
412
413 _enter("%p{%s,%p},%d",
414 cookie, cookie->def->name, cookie->netfs_data, retire);
415
416 if (atomic_read(&cookie->n_children) != 0) {
417 printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n",
418 cookie->def->name);
419 BUG();
420 }
421
422 /* wait for the cookie to finish being instantiated (or to fail) */
423 if (test_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) {
424 fscache_stat(&fscache_n_relinquishes_waitcrt);
425 wait_on_bit(&cookie->flags, FSCACHE_COOKIE_CREATING,
426 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
427 }
428
429 event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE;
430
431 /* detach pointers back to the netfs */
432 spin_lock(&cookie->lock);
433
434 cookie->netfs_data = NULL;
435 cookie->def = NULL;
436
437 /* break links with all the active objects */
438 while (!hlist_empty(&cookie->backing_objects)) {
439 object = hlist_entry(cookie->backing_objects.first,
440 struct fscache_object,
441 cookie_link);
442
443 _debug("RELEASE OBJ%x", object->debug_id);
444
445 /* detach each cache object from the object cookie */
446 spin_lock(&object->lock);
447 hlist_del_init(&object->cookie_link);
448
449 cache = object->cache;
450 object->cookie = NULL;
451 fscache_raise_event(object, event);
452 spin_unlock(&object->lock);
453
454 if (atomic_dec_and_test(&cookie->usage))
455 /* the cookie refcount shouldn't be reduced to 0 yet */
456 BUG();
457 }
458
459 spin_unlock(&cookie->lock);
460
461 if (cookie->parent) {
462 ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0);
463 ASSERTCMP(atomic_read(&cookie->parent->n_children), >, 0);
464 atomic_dec(&cookie->parent->n_children);
465 }
466
467 /* finally dispose of the cookie */
468 ASSERTCMP(atomic_read(&cookie->usage), >, 0);
469 fscache_cookie_put(cookie);
470
471 _leave("");
472}
473EXPORT_SYMBOL(__fscache_relinquish_cookie);
474
475/*
476 * destroy a cookie
477 */
478void __fscache_cookie_put(struct fscache_cookie *cookie)
479{
480 struct fscache_cookie *parent;
481
482 _enter("%p", cookie);
483
484 for (;;) {
485 _debug("FREE COOKIE %p", cookie);
486 parent = cookie->parent;
487 BUG_ON(!hlist_empty(&cookie->backing_objects));
488 kmem_cache_free(fscache_cookie_jar, cookie);
489
490 if (!parent)
491 break;
492
493 cookie = parent;
494 BUG_ON(atomic_read(&cookie->usage) <= 0);
495 if (!atomic_dec_and_test(&cookie->usage))
496 break;
497 }
498
499 _leave("");
500}
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c
new file mode 100644
index 000000000000..f5b4baee7352
--- /dev/null
+++ b/fs/fscache/fsdef.c
@@ -0,0 +1,144 @@
1/* Filesystem index definition
2 *
3 * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#define FSCACHE_DEBUG_LEVEL CACHE
13#include <linux/module.h>
14#include "internal.h"
15
16static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
17 void *buffer, uint16_t bufmax);
18
19static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
20 void *buffer, uint16_t bufmax);
21
22static
23enum fscache_checkaux fscache_fsdef_netfs_check_aux(void *cookie_netfs_data,
24 const void *data,
25 uint16_t datalen);
26
27/*
28 * The root index is owned by FS-Cache itself.
29 *
30 * When a netfs requests caching facilities, FS-Cache will, if one doesn't
31 * already exist, create an entry in the root index with the key being the name
32 * of the netfs ("AFS" for example), and the auxiliary data holding the index
33 * structure version supplied by the netfs:
34 *
35 * FSDEF
36 * |
37 * +-----------+
38 * | |
39 * NFS AFS
40 * [v=1] [v=1]
41 *
42 * If an entry with the appropriate name does already exist, the version is
43 * compared. If the version is different, the entire subtree from that entry
44 * will be discarded and a new entry created.
45 *
46 * The new entry will be an index, and a cookie referring to it will be passed
47 * to the netfs. This is then the root handle by which the netfs accesses the
48 * cache. It can create whatever objects it likes in that index, including
49 * further indices.
50 */
51static struct fscache_cookie_def fscache_fsdef_index_def = {
52 .name = ".FS-Cache",
53 .type = FSCACHE_COOKIE_TYPE_INDEX,
54};
55
56struct fscache_cookie fscache_fsdef_index = {
57 .usage = ATOMIC_INIT(1),
58 .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock),
59 .backing_objects = HLIST_HEAD_INIT,
60 .def = &fscache_fsdef_index_def,
61};
62EXPORT_SYMBOL(fscache_fsdef_index);
63
64/*
65 * Definition of an entry in the root index. Each entry is an index, keyed to
66 * a specific netfs and only applicable to a particular version of the index
67 * structure used by that netfs.
68 */
69struct fscache_cookie_def fscache_fsdef_netfs_def = {
70 .name = "FSDEF.netfs",
71 .type = FSCACHE_COOKIE_TYPE_INDEX,
72 .get_key = fscache_fsdef_netfs_get_key,
73 .get_aux = fscache_fsdef_netfs_get_aux,
74 .check_aux = fscache_fsdef_netfs_check_aux,
75};
76
77/*
78 * get the key data for an FSDEF index record - this is the name of the netfs
79 * for which this entry is created
80 */
81static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data,
82 void *buffer, uint16_t bufmax)
83{
84 const struct fscache_netfs *netfs = cookie_netfs_data;
85 unsigned klen;
86
87 _enter("{%s.%u},", netfs->name, netfs->version);
88
89 klen = strlen(netfs->name);
90 if (klen > bufmax)
91 return 0;
92
93 memcpy(buffer, netfs->name, klen);
94 return klen;
95}
96
97/*
98 * get the auxiliary data for an FSDEF index record - this is the index
99 * structure version number of the netfs for which this version is created
100 */
101static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data,
102 void *buffer, uint16_t bufmax)
103{
104 const struct fscache_netfs *netfs = cookie_netfs_data;
105 unsigned dlen;
106
107 _enter("{%s.%u},", netfs->name, netfs->version);
108
109 dlen = sizeof(uint32_t);
110 if (dlen > bufmax)
111 return 0;
112
113 memcpy(buffer, &netfs->version, dlen);
114 return dlen;
115}
116
117/*
118 * check that the index structure version number stored in the auxiliary data
119 * matches the one the netfs gave us
120 */
121static enum fscache_checkaux fscache_fsdef_netfs_check_aux(
122 void *cookie_netfs_data,
123 const void *data,
124 uint16_t datalen)
125{
126 struct fscache_netfs *netfs = cookie_netfs_data;
127 uint32_t version;
128
129 _enter("{%s},,%hu", netfs->name, datalen);
130
131 if (datalen != sizeof(version)) {
132 _leave(" = OBSOLETE [dl=%d v=%zu]", datalen, sizeof(version));
133 return FSCACHE_CHECKAUX_OBSOLETE;
134 }
135
136 memcpy(&version, data, sizeof(version));
137 if (version != netfs->version) {
138 _leave(" = OBSOLETE [ver=%x net=%x]", version, netfs->version);
139 return FSCACHE_CHECKAUX_OBSOLETE;
140 }
141
142 _leave(" = OKAY");
143 return FSCACHE_CHECKAUX_OKAY;
144}
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c
new file mode 100644
index 000000000000..bad496748a59
--- /dev/null
+++ b/fs/fscache/histogram.c
@@ -0,0 +1,109 @@
1/* FS-Cache latency histogram
2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#define FSCACHE_DEBUG_LEVEL THREAD
13#include <linux/module.h>
14#include <linux/proc_fs.h>
15#include <linux/seq_file.h>
16#include "internal.h"
17
18atomic_t fscache_obj_instantiate_histogram[HZ];
19atomic_t fscache_objs_histogram[HZ];
20atomic_t fscache_ops_histogram[HZ];
21atomic_t fscache_retrieval_delay_histogram[HZ];
22atomic_t fscache_retrieval_histogram[HZ];
23
24/*
25 * display the time-taken histogram
26 */
27static int fscache_histogram_show(struct seq_file *m, void *v)
28{
29 unsigned long index;
30 unsigned n[5], t;
31
32 switch ((unsigned long) v) {
33 case 1:
34 seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS "
35 " RETRV DLY RETRIEVLS\n");
36 return 0;
37 case 2:
38 seq_puts(m, "===== ===== ========= ========= ========="
39 " ========= =========\n");
40 return 0;
41 default:
42 index = (unsigned long) v - 3;
43 n[0] = atomic_read(&fscache_obj_instantiate_histogram[index]);
44 n[1] = atomic_read(&fscache_ops_histogram[index]);
45 n[2] = atomic_read(&fscache_objs_histogram[index]);
46 n[3] = atomic_read(&fscache_retrieval_delay_histogram[index]);
47 n[4] = atomic_read(&fscache_retrieval_histogram[index]);
48 if (!(n[0] | n[1] | n[2] | n[3] | n[4]))
49 return 0;
50
51 t = (index * 1000) / HZ;
52
53 seq_printf(m, "%4lu 0.%03u %9u %9u %9u %9u %9u\n",
54 index, t, n[0], n[1], n[2], n[3], n[4]);
55 return 0;
56 }
57}
58
59/*
60 * set up the iterator to start reading from the first line
61 */
62static void *fscache_histogram_start(struct seq_file *m, loff_t *_pos)
63{
64 if ((unsigned long long)*_pos >= HZ + 2)
65 return NULL;
66 if (*_pos == 0)
67 *_pos = 1;
68 return (void *)(unsigned long) *_pos;
69}
70
71/*
72 * move to the next line
73 */
74static void *fscache_histogram_next(struct seq_file *m, void *v, loff_t *pos)
75{
76 (*pos)++;
77 return (unsigned long long)*pos > HZ + 2 ?
78 NULL : (void *)(unsigned long) *pos;
79}
80
81/*
82 * clean up after reading
83 */
84static void fscache_histogram_stop(struct seq_file *m, void *v)
85{
86}
87
88static const struct seq_operations fscache_histogram_ops = {
89 .start = fscache_histogram_start,
90 .stop = fscache_histogram_stop,
91 .next = fscache_histogram_next,
92 .show = fscache_histogram_show,
93};
94
95/*
96 * open "/proc/fs/fscache/histogram" to provide latency data
97 */
98static int fscache_histogram_open(struct inode *inode, struct file *file)
99{
100 return seq_open(file, &fscache_histogram_ops);
101}
102
103const struct file_operations fscache_histogram_fops = {
104 .owner = THIS_MODULE,
105 .open = fscache_histogram_open,
106 .read = seq_read,
107 .llseek = seq_lseek,
108 .release = seq_release,
109};
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
new file mode 100644
index 000000000000..e0cbd16f6dc9
--- /dev/null
+++ b/fs/fscache/internal.h
@@ -0,0 +1,380 @@
1/* Internal definitions for FS-Cache
2 *
3 * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12/*
13 * Lock order, in the order in which multiple locks should be obtained:
14 * - fscache_addremove_sem
15 * - cookie->lock
16 * - cookie->parent->lock
17 * - cache->object_list_lock
18 * - object->lock
19 * - object->parent->lock
20 * - fscache_thread_lock
21 *
22 */
23
24#include <linux/fscache-cache.h>
25#include <linux/sched.h>
26
27#define FSCACHE_MIN_THREADS 4
28#define FSCACHE_MAX_THREADS 32
29
30/*
31 * fsc-cache.c
32 */
33extern struct list_head fscache_cache_list;
34extern struct rw_semaphore fscache_addremove_sem;
35
36extern struct fscache_cache *fscache_select_cache_for_object(
37 struct fscache_cookie *);
38
39/*
40 * fsc-cookie.c
41 */
42extern struct kmem_cache *fscache_cookie_jar;
43
44extern void fscache_cookie_init_once(void *);
45extern void __fscache_cookie_put(struct fscache_cookie *);
46
47/*
48 * fsc-fsdef.c
49 */
50extern struct fscache_cookie fscache_fsdef_index;
51extern struct fscache_cookie_def fscache_fsdef_netfs_def;
52
53/*
54 * fsc-histogram.c
55 */
56#ifdef CONFIG_FSCACHE_HISTOGRAM
57extern atomic_t fscache_obj_instantiate_histogram[HZ];
58extern atomic_t fscache_objs_histogram[HZ];
59extern atomic_t fscache_ops_histogram[HZ];
60extern atomic_t fscache_retrieval_delay_histogram[HZ];
61extern atomic_t fscache_retrieval_histogram[HZ];
62
63static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif)
64{
65 unsigned long jif = jiffies - start_jif;
66 if (jif >= HZ)
67 jif = HZ - 1;
68 atomic_inc(&histogram[jif]);
69}
70
71extern const struct file_operations fscache_histogram_fops;
72
73#else
74#define fscache_hist(hist, start_jif) do {} while (0)
75#endif
76
77/*
78 * fsc-main.c
79 */
80extern unsigned fscache_defer_lookup;
81extern unsigned fscache_defer_create;
82extern unsigned fscache_debug;
83extern struct kobject *fscache_root;
84
85extern int fscache_wait_bit(void *);
86extern int fscache_wait_bit_interruptible(void *);
87
88/*
89 * fsc-object.c
90 */
91extern void fscache_withdrawing_object(struct fscache_cache *,
92 struct fscache_object *);
93extern void fscache_enqueue_object(struct fscache_object *);
94
95/*
96 * fsc-operation.c
97 */
98extern int fscache_submit_exclusive_op(struct fscache_object *,
99 struct fscache_operation *);
100extern int fscache_submit_op(struct fscache_object *,
101 struct fscache_operation *);
102extern void fscache_abort_object(struct fscache_object *);
103extern void fscache_start_operations(struct fscache_object *);
104extern void fscache_operation_gc(struct work_struct *);
105
106/*
107 * fsc-proc.c
108 */
109#ifdef CONFIG_PROC_FS
110extern int __init fscache_proc_init(void);
111extern void fscache_proc_cleanup(void);
112#else
113#define fscache_proc_init() (0)
114#define fscache_proc_cleanup() do {} while (0)
115#endif
116
117/*
118 * fsc-stats.c
119 */
120#ifdef CONFIG_FSCACHE_STATS
121extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS];
122extern atomic_t fscache_n_objs_processed[FSCACHE_MAX_THREADS];
123
124extern atomic_t fscache_n_op_pend;
125extern atomic_t fscache_n_op_run;
126extern atomic_t fscache_n_op_enqueue;
127extern atomic_t fscache_n_op_deferred_release;
128extern atomic_t fscache_n_op_release;
129extern atomic_t fscache_n_op_gc;
130
131extern atomic_t fscache_n_attr_changed;
132extern atomic_t fscache_n_attr_changed_ok;
133extern atomic_t fscache_n_attr_changed_nobufs;
134extern atomic_t fscache_n_attr_changed_nomem;
135extern atomic_t fscache_n_attr_changed_calls;
136
137extern atomic_t fscache_n_allocs;
138extern atomic_t fscache_n_allocs_ok;
139extern atomic_t fscache_n_allocs_wait;
140extern atomic_t fscache_n_allocs_nobufs;
141extern atomic_t fscache_n_alloc_ops;
142extern atomic_t fscache_n_alloc_op_waits;
143
144extern atomic_t fscache_n_retrievals;
145extern atomic_t fscache_n_retrievals_ok;
146extern atomic_t fscache_n_retrievals_wait;
147extern atomic_t fscache_n_retrievals_nodata;
148extern atomic_t fscache_n_retrievals_nobufs;
149extern atomic_t fscache_n_retrievals_intr;
150extern atomic_t fscache_n_retrievals_nomem;
151extern atomic_t fscache_n_retrieval_ops;
152extern atomic_t fscache_n_retrieval_op_waits;
153
154extern atomic_t fscache_n_stores;
155extern atomic_t fscache_n_stores_ok;
156extern atomic_t fscache_n_stores_again;
157extern atomic_t fscache_n_stores_nobufs;
158extern atomic_t fscache_n_stores_oom;
159extern atomic_t fscache_n_store_ops;
160extern atomic_t fscache_n_store_calls;
161
162extern atomic_t fscache_n_marks;
163extern atomic_t fscache_n_uncaches;
164
165extern atomic_t fscache_n_acquires;
166extern atomic_t fscache_n_acquires_null;
167extern atomic_t fscache_n_acquires_no_cache;
168extern atomic_t fscache_n_acquires_ok;
169extern atomic_t fscache_n_acquires_nobufs;
170extern atomic_t fscache_n_acquires_oom;
171
172extern atomic_t fscache_n_updates;
173extern atomic_t fscache_n_updates_null;
174extern atomic_t fscache_n_updates_run;
175
176extern atomic_t fscache_n_relinquishes;
177extern atomic_t fscache_n_relinquishes_null;
178extern atomic_t fscache_n_relinquishes_waitcrt;
179
180extern atomic_t fscache_n_cookie_index;
181extern atomic_t fscache_n_cookie_data;
182extern atomic_t fscache_n_cookie_special;
183
184extern atomic_t fscache_n_object_alloc;
185extern atomic_t fscache_n_object_no_alloc;
186extern atomic_t fscache_n_object_lookups;
187extern atomic_t fscache_n_object_lookups_negative;
188extern atomic_t fscache_n_object_lookups_positive;
189extern atomic_t fscache_n_object_created;
190extern atomic_t fscache_n_object_avail;
191extern atomic_t fscache_n_object_dead;
192
193extern atomic_t fscache_n_checkaux_none;
194extern atomic_t fscache_n_checkaux_okay;
195extern atomic_t fscache_n_checkaux_update;
196extern atomic_t fscache_n_checkaux_obsolete;
197
198static inline void fscache_stat(atomic_t *stat)
199{
200 atomic_inc(stat);
201}
202
203extern const struct file_operations fscache_stats_fops;
204#else
205
206#define fscache_stat(stat) do {} while (0)
207#endif
208
209/*
210 * raise an event on an object
211 * - if the event is not masked for that object, then the object is
212 * queued for attention by the thread pool.
213 */
214static inline void fscache_raise_event(struct fscache_object *object,
215 unsigned event)
216{
217 if (!test_and_set_bit(event, &object->events) &&
218 test_bit(event, &object->event_mask))
219 fscache_enqueue_object(object);
220}
221
222/*
223 * drop a reference to a cookie
224 */
225static inline void fscache_cookie_put(struct fscache_cookie *cookie)
226{
227 BUG_ON(atomic_read(&cookie->usage) <= 0);
228 if (atomic_dec_and_test(&cookie->usage))
229 __fscache_cookie_put(cookie);
230}
231
232/*
233 * get an extra reference to a netfs retrieval context
234 */
235static inline
236void *fscache_get_context(struct fscache_cookie *cookie, void *context)
237{
238 if (cookie->def->get_context)
239 cookie->def->get_context(cookie->netfs_data, context);
240 return context;
241}
242
243/*
244 * release a reference to a netfs retrieval context
245 */
246static inline
247void fscache_put_context(struct fscache_cookie *cookie, void *context)
248{
249 if (cookie->def->put_context)
250 cookie->def->put_context(cookie->netfs_data, context);
251}
252
253/*****************************************************************************/
254/*
255 * debug tracing
256 */
257#define dbgprintk(FMT, ...) \
258 printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
259
260/* make sure we maintain the format strings, even when debugging is disabled */
261static inline __attribute__((format(printf, 1, 2)))
262void _dbprintk(const char *fmt, ...)
263{
264}
265
266#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
267#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
268#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
269
270#define kjournal(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__)
271
272#ifdef __KDEBUG
273#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
274#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
275#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
276
277#elif defined(CONFIG_FSCACHE_DEBUG)
278#define _enter(FMT, ...) \
279do { \
280 if (__do_kdebug(ENTER)) \
281 kenter(FMT, ##__VA_ARGS__); \
282} while (0)
283
284#define _leave(FMT, ...) \
285do { \
286 if (__do_kdebug(LEAVE)) \
287 kleave(FMT, ##__VA_ARGS__); \
288} while (0)
289
290#define _debug(FMT, ...) \
291do { \
292 if (__do_kdebug(DEBUG)) \
293 kdebug(FMT, ##__VA_ARGS__); \
294} while (0)
295
296#else
297#define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
298#define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
299#define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__)
300#endif
301
302/*
303 * determine whether a particular optional debugging point should be logged
304 * - we need to go through three steps to persuade cpp to correctly join the
305 * shorthand in FSCACHE_DEBUG_LEVEL with its prefix
306 */
307#define ____do_kdebug(LEVEL, POINT) \
308 unlikely((fscache_debug & \
309 (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3))))
310#define ___do_kdebug(LEVEL, POINT) \
311 ____do_kdebug(LEVEL, POINT)
312#define __do_kdebug(POINT) \
313 ___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT)
314
315#define FSCACHE_DEBUG_CACHE 0
316#define FSCACHE_DEBUG_COOKIE 1
317#define FSCACHE_DEBUG_PAGE 2
318#define FSCACHE_DEBUG_OPERATION 3
319
320#define FSCACHE_POINT_ENTER 1
321#define FSCACHE_POINT_LEAVE 2
322#define FSCACHE_POINT_DEBUG 4
323
324#ifndef FSCACHE_DEBUG_LEVEL
325#define FSCACHE_DEBUG_LEVEL CACHE
326#endif
327
328/*
329 * assertions
330 */
331#if 1 /* defined(__KDEBUGALL) */
332
333#define ASSERT(X) \
334do { \
335 if (unlikely(!(X))) { \
336 printk(KERN_ERR "\n"); \
337 printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
338 BUG(); \
339 } \
340} while (0)
341
342#define ASSERTCMP(X, OP, Y) \
343do { \
344 if (unlikely(!((X) OP (Y)))) { \
345 printk(KERN_ERR "\n"); \
346 printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
347 printk(KERN_ERR "%lx " #OP " %lx is false\n", \
348 (unsigned long)(X), (unsigned long)(Y)); \
349 BUG(); \
350 } \
351} while (0)
352
353#define ASSERTIF(C, X) \
354do { \
355 if (unlikely((C) && !(X))) { \
356 printk(KERN_ERR "\n"); \
357 printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
358 BUG(); \
359 } \
360} while (0)
361
362#define ASSERTIFCMP(C, X, OP, Y) \
363do { \
364 if (unlikely((C) && !((X) OP (Y)))) { \
365 printk(KERN_ERR "\n"); \
366 printk(KERN_ERR "FS-Cache: Assertion failed\n"); \
367 printk(KERN_ERR "%lx " #OP " %lx is false\n", \
368 (unsigned long)(X), (unsigned long)(Y)); \
369 BUG(); \
370 } \
371} while (0)
372
373#else
374
375#define ASSERT(X) do {} while (0)
376#define ASSERTCMP(X, OP, Y) do {} while (0)
377#define ASSERTIF(C, X) do {} while (0)
378#define ASSERTIFCMP(C, X, OP, Y) do {} while (0)
379
380#endif /* assert or not */
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
new file mode 100644
index 000000000000..4de41b597499
--- /dev/null
+++ b/fs/fscache/main.c
@@ -0,0 +1,124 @@
1/* General filesystem local caching manager
2 *
3 * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#define FSCACHE_DEBUG_LEVEL CACHE
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/sched.h>
16#include <linux/completion.h>
17#include <linux/slab.h>
18#include "internal.h"
19
20MODULE_DESCRIPTION("FS Cache Manager");
21MODULE_AUTHOR("Red Hat, Inc.");
22MODULE_LICENSE("GPL");
23
24unsigned fscache_defer_lookup = 1;
25module_param_named(defer_lookup, fscache_defer_lookup, uint,
26 S_IWUSR | S_IRUGO);
27MODULE_PARM_DESC(fscache_defer_lookup,
28 "Defer cookie lookup to background thread");
29
30unsigned fscache_defer_create = 1;
31module_param_named(defer_create, fscache_defer_create, uint,
32 S_IWUSR | S_IRUGO);
33MODULE_PARM_DESC(fscache_defer_create,
34 "Defer cookie creation to background thread");
35
36unsigned fscache_debug;
37module_param_named(debug, fscache_debug, uint,
38 S_IWUSR | S_IRUGO);
39MODULE_PARM_DESC(fscache_debug,
40 "FS-Cache debugging mask");
41
42struct kobject *fscache_root;
43
44/*
45 * initialise the fs caching module
46 */
47static int __init fscache_init(void)
48{
49 int ret;
50
51 ret = slow_work_register_user();
52 if (ret < 0)
53 goto error_slow_work;
54
55 ret = fscache_proc_init();
56 if (ret < 0)
57 goto error_proc;
58
59 fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar",
60 sizeof(struct fscache_cookie),
61 0,
62 0,
63 fscache_cookie_init_once);
64 if (!fscache_cookie_jar) {
65 printk(KERN_NOTICE
66 "FS-Cache: Failed to allocate a cookie jar\n");
67 ret = -ENOMEM;
68 goto error_cookie_jar;
69 }
70
71 fscache_root = kobject_create_and_add("fscache", kernel_kobj);
72 if (!fscache_root)
73 goto error_kobj;
74
75 printk(KERN_NOTICE "FS-Cache: Loaded\n");
76 return 0;
77
78error_kobj:
79 kmem_cache_destroy(fscache_cookie_jar);
80error_cookie_jar:
81 fscache_proc_cleanup();
82error_proc:
83 slow_work_unregister_user();
84error_slow_work:
85 return ret;
86}
87
88fs_initcall(fscache_init);
89
90/*
91 * clean up on module removal
92 */
93static void __exit fscache_exit(void)
94{
95 _enter("");
96
97 kobject_put(fscache_root);
98 kmem_cache_destroy(fscache_cookie_jar);
99 fscache_proc_cleanup();
100 slow_work_unregister_user();
101 printk(KERN_NOTICE "FS-Cache: Unloaded\n");
102}
103
104module_exit(fscache_exit);
105
106/*
107 * wait_on_bit() sleep function for uninterruptible waiting
108 */
109int fscache_wait_bit(void *flags)
110{
111 schedule();
112 return 0;
113}
114EXPORT_SYMBOL(fscache_wait_bit);
115
116/*
117 * wait_on_bit() sleep function for interruptible waiting
118 */
119int fscache_wait_bit_interruptible(void *flags)
120{
121 schedule();
122 return signal_pending(current);
123}
124EXPORT_SYMBOL(fscache_wait_bit_interruptible);
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
new file mode 100644
index 000000000000..e028b8eb1c40
--- /dev/null
+++ b/fs/fscache/netfs.c
@@ -0,0 +1,103 @@
1/* FS-Cache netfs (client) registration
2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#define FSCACHE_DEBUG_LEVEL COOKIE
13#include <linux/module.h>
14#include <linux/slab.h>
15#include "internal.h"
16
17static LIST_HEAD(fscache_netfs_list);
18
19/*
20 * register a network filesystem for caching
21 */
22int __fscache_register_netfs(struct fscache_netfs *netfs)
23{
24 struct fscache_netfs *ptr;
25 int ret;
26
27 _enter("{%s}", netfs->name);
28
29 INIT_LIST_HEAD(&netfs->link);
30
31 /* allocate a cookie for the primary index */
32 netfs->primary_index =
33 kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL);
34
35 if (!netfs->primary_index) {
36 _leave(" = -ENOMEM");
37 return -ENOMEM;
38 }
39
40 /* initialise the primary index cookie */
41 atomic_set(&netfs->primary_index->usage, 1);
42 atomic_set(&netfs->primary_index->n_children, 0);
43
44 netfs->primary_index->def = &fscache_fsdef_netfs_def;
45 netfs->primary_index->parent = &fscache_fsdef_index;
46 netfs->primary_index->netfs_data = netfs;
47
48 atomic_inc(&netfs->primary_index->parent->usage);
49 atomic_inc(&netfs->primary_index->parent->n_children);
50
51 spin_lock_init(&netfs->primary_index->lock);
52 INIT_HLIST_HEAD(&netfs->primary_index->backing_objects);
53
54 /* check the netfs type is not already present */
55 down_write(&fscache_addremove_sem);
56
57 ret = -EEXIST;
58 list_for_each_entry(ptr, &fscache_netfs_list, link) {
59 if (strcmp(ptr->name, netfs->name) == 0)
60 goto already_registered;
61 }
62
63 list_add(&netfs->link, &fscache_netfs_list);
64 ret = 0;
65
66 printk(KERN_NOTICE "FS-Cache: Netfs '%s' registered for caching\n",
67 netfs->name);
68
69already_registered:
70 up_write(&fscache_addremove_sem);
71
72 if (ret < 0) {
73 netfs->primary_index->parent = NULL;
74 __fscache_cookie_put(netfs->primary_index);
75 netfs->primary_index = NULL;
76 }
77
78 _leave(" = %d", ret);
79 return ret;
80}
81EXPORT_SYMBOL(__fscache_register_netfs);
82
83/*
84 * unregister a network filesystem from the cache
85 * - all cookies must have been released first
86 */
87void __fscache_unregister_netfs(struct fscache_netfs *netfs)
88{
89 _enter("{%s.%u}", netfs->name, netfs->version);
90
91 down_write(&fscache_addremove_sem);
92
93 list_del(&netfs->link);
94 fscache_relinquish_cookie(netfs->primary_index, 0);
95
96 up_write(&fscache_addremove_sem);
97
98 printk(KERN_NOTICE "FS-Cache: Netfs '%s' unregistered from caching\n",
99 netfs->name);
100
101 _leave("");
102}
103EXPORT_SYMBOL(__fscache_unregister_netfs);
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
new file mode 100644
index 000000000000..392a41b1b79d
--- /dev/null
+++ b/fs/fscache/object.c
@@ -0,0 +1,810 @@
1/* FS-Cache object state machine handler
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * See Documentation/filesystems/caching/object.txt for a description of the
12 * object state machine and the in-kernel representations.
13 */
14
15#define FSCACHE_DEBUG_LEVEL COOKIE
16#include <linux/module.h>
17#include "internal.h"
18
19const char *fscache_object_states[] = {
20 [FSCACHE_OBJECT_INIT] = "OBJECT_INIT",
21 [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP",
22 [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING",
23 [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE",
24 [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE",
25 [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING",
26 [FSCACHE_OBJECT_DYING] = "OBJECT_DYING",
27 [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING",
28 [FSCACHE_OBJECT_ABORT_INIT] = "OBJECT_ABORT_INIT",
29 [FSCACHE_OBJECT_RELEASING] = "OBJECT_RELEASING",
30 [FSCACHE_OBJECT_RECYCLING] = "OBJECT_RECYCLING",
31 [FSCACHE_OBJECT_WITHDRAWING] = "OBJECT_WITHDRAWING",
32 [FSCACHE_OBJECT_DEAD] = "OBJECT_DEAD",
33};
34EXPORT_SYMBOL(fscache_object_states);
35
36static void fscache_object_slow_work_put_ref(struct slow_work *);
37static int fscache_object_slow_work_get_ref(struct slow_work *);
38static void fscache_object_slow_work_execute(struct slow_work *);
39static void fscache_initialise_object(struct fscache_object *);
40static void fscache_lookup_object(struct fscache_object *);
41static void fscache_object_available(struct fscache_object *);
42static void fscache_release_object(struct fscache_object *);
43static void fscache_withdraw_object(struct fscache_object *);
44static void fscache_enqueue_dependents(struct fscache_object *);
45static void fscache_dequeue_object(struct fscache_object *);
46
47const struct slow_work_ops fscache_object_slow_work_ops = {
48 .get_ref = fscache_object_slow_work_get_ref,
49 .put_ref = fscache_object_slow_work_put_ref,
50 .execute = fscache_object_slow_work_execute,
51};
52EXPORT_SYMBOL(fscache_object_slow_work_ops);
53
54/*
55 * we need to notify the parent when an op completes that we had outstanding
56 * upon it
57 */
58static inline void fscache_done_parent_op(struct fscache_object *object)
59{
60 struct fscache_object *parent = object->parent;
61
62 _enter("OBJ%x {OBJ%x,%x}",
63 object->debug_id, parent->debug_id, parent->n_ops);
64
65 spin_lock_nested(&parent->lock, 1);
66 parent->n_ops--;
67 parent->n_obj_ops--;
68 if (parent->n_ops == 0)
69 fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED);
70 spin_unlock(&parent->lock);
71}
72
73/*
74 * process events that have been sent to an object's state machine
75 * - initiates parent lookup
76 * - does object lookup
77 * - does object creation
78 * - does object recycling and retirement
79 * - does object withdrawal
80 */
81static void fscache_object_state_machine(struct fscache_object *object)
82{
83 enum fscache_object_state new_state;
84
85 ASSERT(object != NULL);
86
87 _enter("{OBJ%x,%s,%lx}",
88 object->debug_id, fscache_object_states[object->state],
89 object->events);
90
91 switch (object->state) {
92 /* wait for the parent object to become ready */
93 case FSCACHE_OBJECT_INIT:
94 object->event_mask =
95 ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED);
96 fscache_initialise_object(object);
97 goto done;
98
99 /* look up the object metadata on disk */
100 case FSCACHE_OBJECT_LOOKING_UP:
101 fscache_lookup_object(object);
102 goto lookup_transit;
103
104 /* create the object metadata on disk */
105 case FSCACHE_OBJECT_CREATING:
106 fscache_lookup_object(object);
107 goto lookup_transit;
108
109 /* handle an object becoming available; start pending
110 * operations and queue dependent operations for processing */
111 case FSCACHE_OBJECT_AVAILABLE:
112 fscache_object_available(object);
113 goto active_transit;
114
115 /* normal running state */
116 case FSCACHE_OBJECT_ACTIVE:
117 goto active_transit;
118
119 /* update the object metadata on disk */
120 case FSCACHE_OBJECT_UPDATING:
121 clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events);
122 fscache_stat(&fscache_n_updates_run);
123 object->cache->ops->update_object(object);
124 goto active_transit;
125
126 /* handle an object dying during lookup or creation */
127 case FSCACHE_OBJECT_LC_DYING:
128 object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE);
129 object->cache->ops->lookup_complete(object);
130
131 spin_lock(&object->lock);
132 object->state = FSCACHE_OBJECT_DYING;
133 if (test_and_clear_bit(FSCACHE_COOKIE_CREATING,
134 &object->cookie->flags))
135 wake_up_bit(&object->cookie->flags,
136 FSCACHE_COOKIE_CREATING);
137 spin_unlock(&object->lock);
138
139 fscache_done_parent_op(object);
140
141 /* wait for completion of all active operations on this object
142 * and the death of all child objects of this object */
143 case FSCACHE_OBJECT_DYING:
144 dying:
145 clear_bit(FSCACHE_OBJECT_EV_CLEARED, &object->events);
146 spin_lock(&object->lock);
147 _debug("dying OBJ%x {%d,%d}",
148 object->debug_id, object->n_ops, object->n_children);
149 if (object->n_ops == 0 && object->n_children == 0) {
150 object->event_mask &=
151 ~(1 << FSCACHE_OBJECT_EV_CLEARED);
152 object->event_mask |=
153 (1 << FSCACHE_OBJECT_EV_WITHDRAW) |
154 (1 << FSCACHE_OBJECT_EV_RETIRE) |
155 (1 << FSCACHE_OBJECT_EV_RELEASE) |
156 (1 << FSCACHE_OBJECT_EV_ERROR);
157 } else {
158 object->event_mask &=
159 ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) |
160 (1 << FSCACHE_OBJECT_EV_RETIRE) |
161 (1 << FSCACHE_OBJECT_EV_RELEASE) |
162 (1 << FSCACHE_OBJECT_EV_ERROR));
163 object->event_mask |=
164 1 << FSCACHE_OBJECT_EV_CLEARED;
165 }
166 spin_unlock(&object->lock);
167 fscache_enqueue_dependents(object);
168 goto terminal_transit;
169
170 /* handle an abort during initialisation */
171 case FSCACHE_OBJECT_ABORT_INIT:
172 _debug("handle abort init %lx", object->events);
173 object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE);
174
175 spin_lock(&object->lock);
176 fscache_dequeue_object(object);
177
178 object->state = FSCACHE_OBJECT_DYING;
179 if (test_and_clear_bit(FSCACHE_COOKIE_CREATING,
180 &object->cookie->flags))
181 wake_up_bit(&object->cookie->flags,
182 FSCACHE_COOKIE_CREATING);
183 spin_unlock(&object->lock);
184 goto dying;
185
186 /* handle the netfs releasing an object and possibly marking it
187 * obsolete too */
188 case FSCACHE_OBJECT_RELEASING:
189 case FSCACHE_OBJECT_RECYCLING:
190 object->event_mask &=
191 ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) |
192 (1 << FSCACHE_OBJECT_EV_RETIRE) |
193 (1 << FSCACHE_OBJECT_EV_RELEASE) |
194 (1 << FSCACHE_OBJECT_EV_ERROR));
195 fscache_release_object(object);
196 spin_lock(&object->lock);
197 object->state = FSCACHE_OBJECT_DEAD;
198 spin_unlock(&object->lock);
199 fscache_stat(&fscache_n_object_dead);
200 goto terminal_transit;
201
202 /* handle the parent cache of this object being withdrawn from
203 * active service */
204 case FSCACHE_OBJECT_WITHDRAWING:
205 object->event_mask &=
206 ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) |
207 (1 << FSCACHE_OBJECT_EV_RETIRE) |
208 (1 << FSCACHE_OBJECT_EV_RELEASE) |
209 (1 << FSCACHE_OBJECT_EV_ERROR));
210 fscache_withdraw_object(object);
211 spin_lock(&object->lock);
212 object->state = FSCACHE_OBJECT_DEAD;
213 spin_unlock(&object->lock);
214 fscache_stat(&fscache_n_object_dead);
215 goto terminal_transit;
216
217 /* complain about the object being woken up once it is
218 * deceased */
219 case FSCACHE_OBJECT_DEAD:
220 printk(KERN_ERR "FS-Cache:"
221 " Unexpected event in dead state %lx\n",
222 object->events & object->event_mask);
223 BUG();
224
225 default:
226 printk(KERN_ERR "FS-Cache: Unknown object state %u\n",
227 object->state);
228 BUG();
229 }
230
231 /* determine the transition from a lookup state */
232lookup_transit:
233 switch (fls(object->events & object->event_mask) - 1) {
234 case FSCACHE_OBJECT_EV_WITHDRAW:
235 case FSCACHE_OBJECT_EV_RETIRE:
236 case FSCACHE_OBJECT_EV_RELEASE:
237 case FSCACHE_OBJECT_EV_ERROR:
238 new_state = FSCACHE_OBJECT_LC_DYING;
239 goto change_state;
240 case FSCACHE_OBJECT_EV_REQUEUE:
241 goto done;
242 case -1:
243 goto done; /* sleep until event */
244 default:
245 goto unsupported_event;
246 }
247
248 /* determine the transition from an active state */
249active_transit:
250 switch (fls(object->events & object->event_mask) - 1) {
251 case FSCACHE_OBJECT_EV_WITHDRAW:
252 case FSCACHE_OBJECT_EV_RETIRE:
253 case FSCACHE_OBJECT_EV_RELEASE:
254 case FSCACHE_OBJECT_EV_ERROR:
255 new_state = FSCACHE_OBJECT_DYING;
256 goto change_state;
257 case FSCACHE_OBJECT_EV_UPDATE:
258 new_state = FSCACHE_OBJECT_UPDATING;
259 goto change_state;
260 case -1:
261 new_state = FSCACHE_OBJECT_ACTIVE;
262 goto change_state; /* sleep until event */
263 default:
264 goto unsupported_event;
265 }
266
267 /* determine the transition from a terminal state */
268terminal_transit:
269 switch (fls(object->events & object->event_mask) - 1) {
270 case FSCACHE_OBJECT_EV_WITHDRAW:
271 new_state = FSCACHE_OBJECT_WITHDRAWING;
272 goto change_state;
273 case FSCACHE_OBJECT_EV_RETIRE:
274 new_state = FSCACHE_OBJECT_RECYCLING;
275 goto change_state;
276 case FSCACHE_OBJECT_EV_RELEASE:
277 new_state = FSCACHE_OBJECT_RELEASING;
278 goto change_state;
279 case FSCACHE_OBJECT_EV_ERROR:
280 new_state = FSCACHE_OBJECT_WITHDRAWING;
281 goto change_state;
282 case FSCACHE_OBJECT_EV_CLEARED:
283 new_state = FSCACHE_OBJECT_DYING;
284 goto change_state;
285 case -1:
286 goto done; /* sleep until event */
287 default:
288 goto unsupported_event;
289 }
290
291change_state:
292 spin_lock(&object->lock);
293 object->state = new_state;
294 spin_unlock(&object->lock);
295
296done:
297 _leave(" [->%s]", fscache_object_states[object->state]);
298 return;
299
300unsupported_event:
301 printk(KERN_ERR "FS-Cache:"
302 " Unsupported event %lx [mask %lx] in state %s\n",
303 object->events, object->event_mask,
304 fscache_object_states[object->state]);
305 BUG();
306}
307
308/*
309 * execute an object
310 */
311static void fscache_object_slow_work_execute(struct slow_work *work)
312{
313 struct fscache_object *object =
314 container_of(work, struct fscache_object, work);
315 unsigned long start;
316
317 _enter("{OBJ%x}", object->debug_id);
318
319 clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
320
321 start = jiffies;
322 fscache_object_state_machine(object);
323 fscache_hist(fscache_objs_histogram, start);
324 if (object->events & object->event_mask)
325 fscache_enqueue_object(object);
326}
327
328/*
329 * initialise an object
330 * - check the specified object's parent to see if we can make use of it
331 * immediately to do a creation
332 * - we may need to start the process of creating a parent and we need to wait
333 * for the parent's lookup and creation to complete if it's not there yet
334 * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the
335 * leaf-most cookies of the object and all its children
336 */
337static void fscache_initialise_object(struct fscache_object *object)
338{
339 struct fscache_object *parent;
340
341 _enter("");
342 ASSERT(object->cookie != NULL);
343 ASSERT(object->cookie->parent != NULL);
344 ASSERT(list_empty(&object->work.link));
345
346 if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) |
347 (1 << FSCACHE_OBJECT_EV_RELEASE) |
348 (1 << FSCACHE_OBJECT_EV_RETIRE) |
349 (1 << FSCACHE_OBJECT_EV_WITHDRAW))) {
350 _debug("abort init %lx", object->events);
351 spin_lock(&object->lock);
352 object->state = FSCACHE_OBJECT_ABORT_INIT;
353 spin_unlock(&object->lock);
354 return;
355 }
356
357 spin_lock(&object->cookie->lock);
358 spin_lock_nested(&object->cookie->parent->lock, 1);
359
360 parent = object->parent;
361 if (!parent) {
362 _debug("no parent");
363 set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events);
364 } else {
365 spin_lock(&object->lock);
366 spin_lock_nested(&parent->lock, 1);
367 _debug("parent %s", fscache_object_states[parent->state]);
368
369 if (parent->state >= FSCACHE_OBJECT_DYING) {
370 _debug("bad parent");
371 set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events);
372 } else if (parent->state < FSCACHE_OBJECT_AVAILABLE) {
373 _debug("wait");
374
375 /* we may get woken up in this state by child objects
376 * binding on to us, so we need to make sure we don't
377 * add ourself to the list multiple times */
378 if (list_empty(&object->dep_link)) {
379 object->cache->ops->grab_object(object);
380 list_add(&object->dep_link,
381 &parent->dependents);
382
383 /* fscache_acquire_non_index_cookie() uses this
384 * to wake the chain up */
385 if (parent->state == FSCACHE_OBJECT_INIT)
386 fscache_enqueue_object(parent);
387 }
388 } else {
389 _debug("go");
390 parent->n_ops++;
391 parent->n_obj_ops++;
392 object->lookup_jif = jiffies;
393 object->state = FSCACHE_OBJECT_LOOKING_UP;
394 set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
395 }
396
397 spin_unlock(&parent->lock);
398 spin_unlock(&object->lock);
399 }
400
401 spin_unlock(&object->cookie->parent->lock);
402 spin_unlock(&object->cookie->lock);
403 _leave("");
404}
405
406/*
407 * look an object up in the cache from which it was allocated
408 * - we hold an "access lock" on the parent object, so the parent object cannot
409 * be withdrawn by either party till we've finished
410 * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the
411 * leaf-most cookies of the object and all its children
412 */
413static void fscache_lookup_object(struct fscache_object *object)
414{
415 struct fscache_cookie *cookie = object->cookie;
416 struct fscache_object *parent;
417
418 _enter("");
419
420 parent = object->parent;
421 ASSERT(parent != NULL);
422 ASSERTCMP(parent->n_ops, >, 0);
423 ASSERTCMP(parent->n_obj_ops, >, 0);
424
425 /* make sure the parent is still available */
426 ASSERTCMP(parent->state, >=, FSCACHE_OBJECT_AVAILABLE);
427
428 if (parent->state >= FSCACHE_OBJECT_DYING ||
429 test_bit(FSCACHE_IOERROR, &object->cache->flags)) {
430 _debug("unavailable");
431 set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events);
432 _leave("");
433 return;
434 }
435
436 _debug("LOOKUP \"%s/%s\" in \"%s\"",
437 parent->cookie->def->name, cookie->def->name,
438 object->cache->tag->name);
439
440 fscache_stat(&fscache_n_object_lookups);
441 object->cache->ops->lookup_object(object);
442
443 if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events))
444 set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
445
446 _leave("");
447}
448
449/**
450 * fscache_object_lookup_negative - Note negative cookie lookup
451 * @object: Object pointing to cookie to mark
452 *
453 * Note negative lookup, permitting those waiting to read data from an already
454 * existing backing object to continue as there's no data for them to read.
455 */
456void fscache_object_lookup_negative(struct fscache_object *object)
457{
458 struct fscache_cookie *cookie = object->cookie;
459
460 _enter("{OBJ%x,%s}",
461 object->debug_id, fscache_object_states[object->state]);
462
463 spin_lock(&object->lock);
464 if (object->state == FSCACHE_OBJECT_LOOKING_UP) {
465 fscache_stat(&fscache_n_object_lookups_negative);
466
467 /* transit here to allow write requests to begin stacking up
468 * and read requests to begin returning ENODATA */
469 object->state = FSCACHE_OBJECT_CREATING;
470 spin_unlock(&object->lock);
471
472 set_bit(FSCACHE_COOKIE_PENDING_FILL, &cookie->flags);
473 set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
474
475 _debug("wake up lookup %p", &cookie->flags);
476 smp_mb__before_clear_bit();
477 clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags);
478 smp_mb__after_clear_bit();
479 wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP);
480 set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
481 } else {
482 ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING);
483 spin_unlock(&object->lock);
484 }
485
486 _leave("");
487}
488EXPORT_SYMBOL(fscache_object_lookup_negative);
489
490/**
491 * fscache_obtained_object - Note successful object lookup or creation
492 * @object: Object pointing to cookie to mark
493 *
494 * Note successful lookup and/or creation, permitting those waiting to write
495 * data to a backing object to continue.
496 *
497 * Note that after calling this, an object's cookie may be relinquished by the
498 * netfs, and so must be accessed with object lock held.
499 */
500void fscache_obtained_object(struct fscache_object *object)
501{
502 struct fscache_cookie *cookie = object->cookie;
503
504 _enter("{OBJ%x,%s}",
505 object->debug_id, fscache_object_states[object->state]);
506
507 /* if we were still looking up, then we must have a positive lookup
508 * result, in which case there may be data available */
509 spin_lock(&object->lock);
510 if (object->state == FSCACHE_OBJECT_LOOKING_UP) {
511 fscache_stat(&fscache_n_object_lookups_positive);
512
513 clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
514
515 object->state = FSCACHE_OBJECT_AVAILABLE;
516 spin_unlock(&object->lock);
517
518 smp_mb__before_clear_bit();
519 clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags);
520 smp_mb__after_clear_bit();
521 wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP);
522 set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
523 } else {
524 ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING);
525 fscache_stat(&fscache_n_object_created);
526
527 object->state = FSCACHE_OBJECT_AVAILABLE;
528 spin_unlock(&object->lock);
529 set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
530 smp_wmb();
531 }
532
533 if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &cookie->flags))
534 wake_up_bit(&cookie->flags, FSCACHE_COOKIE_CREATING);
535
536 _leave("");
537}
538EXPORT_SYMBOL(fscache_obtained_object);
539
540/*
541 * handle an object that has just become available
542 */
543static void fscache_object_available(struct fscache_object *object)
544{
545 _enter("{OBJ%x}", object->debug_id);
546
547 spin_lock(&object->lock);
548
549 if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags))
550 wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING);
551
552 fscache_done_parent_op(object);
553 if (object->n_in_progress == 0) {
554 if (object->n_ops > 0) {
555 ASSERTCMP(object->n_ops, >=, object->n_obj_ops);
556 ASSERTIF(object->n_ops > object->n_obj_ops,
557 !list_empty(&object->pending_ops));
558 fscache_start_operations(object);
559 } else {
560 ASSERT(list_empty(&object->pending_ops));
561 }
562 }
563 spin_unlock(&object->lock);
564
565 object->cache->ops->lookup_complete(object);
566 fscache_enqueue_dependents(object);
567
568 fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif);
569 fscache_stat(&fscache_n_object_avail);
570
571 _leave("");
572}
573
574/*
575 * drop an object's attachments
576 */
577static void fscache_drop_object(struct fscache_object *object)
578{
579 struct fscache_object *parent = object->parent;
580 struct fscache_cache *cache = object->cache;
581
582 _enter("{OBJ%x,%d}", object->debug_id, object->n_children);
583
584 spin_lock(&cache->object_list_lock);
585 list_del_init(&object->cache_link);
586 spin_unlock(&cache->object_list_lock);
587
588 cache->ops->drop_object(object);
589
590 if (parent) {
591 _debug("release parent OBJ%x {%d}",
592 parent->debug_id, parent->n_children);
593
594 spin_lock(&parent->lock);
595 parent->n_children--;
596 if (parent->n_children == 0)
597 fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED);
598 spin_unlock(&parent->lock);
599 object->parent = NULL;
600 }
601
602 /* this just shifts the object release to the slow work processor */
603 object->cache->ops->put_object(object);
604
605 _leave("");
606}
607
608/*
609 * release or recycle an object that the netfs has discarded
610 */
611static void fscache_release_object(struct fscache_object *object)
612{
613 _enter("");
614
615 fscache_drop_object(object);
616}
617
618/*
619 * withdraw an object from active service
620 */
621static void fscache_withdraw_object(struct fscache_object *object)
622{
623 struct fscache_cookie *cookie;
624 bool detached;
625
626 _enter("");
627
628 spin_lock(&object->lock);
629 cookie = object->cookie;
630 if (cookie) {
631 /* need to get the cookie lock before the object lock, starting
632 * from the object pointer */
633 atomic_inc(&cookie->usage);
634 spin_unlock(&object->lock);
635
636 detached = false;
637 spin_lock(&cookie->lock);
638 spin_lock(&object->lock);
639
640 if (object->cookie == cookie) {
641 hlist_del_init(&object->cookie_link);
642 object->cookie = NULL;
643 detached = true;
644 }
645 spin_unlock(&cookie->lock);
646 fscache_cookie_put(cookie);
647 if (detached)
648 fscache_cookie_put(cookie);
649 }
650
651 spin_unlock(&object->lock);
652
653 fscache_drop_object(object);
654}
655
656/*
657 * withdraw an object from active service at the behest of the cache
658 * - need break the links to a cached object cookie
659 * - called under two situations:
660 * (1) recycler decides to reclaim an in-use object
661 * (2) a cache is unmounted
662 * - have to take care as the cookie can be being relinquished by the netfs
663 * simultaneously
664 * - the object is pinned by the caller holding a refcount on it
665 */
666void fscache_withdrawing_object(struct fscache_cache *cache,
667 struct fscache_object *object)
668{
669 bool enqueue = false;
670
671 _enter(",OBJ%x", object->debug_id);
672
673 spin_lock(&object->lock);
674 if (object->state < FSCACHE_OBJECT_WITHDRAWING) {
675 object->state = FSCACHE_OBJECT_WITHDRAWING;
676 enqueue = true;
677 }
678 spin_unlock(&object->lock);
679
680 if (enqueue)
681 fscache_enqueue_object(object);
682
683 _leave("");
684}
685
686/*
687 * allow the slow work item processor to get a ref on an object
688 */
689static int fscache_object_slow_work_get_ref(struct slow_work *work)
690{
691 struct fscache_object *object =
692 container_of(work, struct fscache_object, work);
693
694 return object->cache->ops->grab_object(object) ? 0 : -EAGAIN;
695}
696
697/*
698 * allow the slow work item processor to discard a ref on a work item
699 */
700static void fscache_object_slow_work_put_ref(struct slow_work *work)
701{
702 struct fscache_object *object =
703 container_of(work, struct fscache_object, work);
704
705 return object->cache->ops->put_object(object);
706}
707
708/*
709 * enqueue an object for metadata-type processing
710 */
711void fscache_enqueue_object(struct fscache_object *object)
712{
713 _enter("{OBJ%x}", object->debug_id);
714
715 slow_work_enqueue(&object->work);
716}
717
718/*
719 * enqueue the dependents of an object for metadata-type processing
720 * - the caller must hold the object's lock
721 * - this may cause an already locked object to wind up being processed again
722 */
723static void fscache_enqueue_dependents(struct fscache_object *object)
724{
725 struct fscache_object *dep;
726
727 _enter("{OBJ%x}", object->debug_id);
728
729 if (list_empty(&object->dependents))
730 return;
731
732 spin_lock(&object->lock);
733
734 while (!list_empty(&object->dependents)) {
735 dep = list_entry(object->dependents.next,
736 struct fscache_object, dep_link);
737 list_del_init(&dep->dep_link);
738
739
740 /* sort onto appropriate lists */
741 fscache_enqueue_object(dep);
742 dep->cache->ops->put_object(dep);
743
744 if (!list_empty(&object->dependents))
745 cond_resched_lock(&object->lock);
746 }
747
748 spin_unlock(&object->lock);
749}
750
751/*
752 * remove an object from whatever queue it's waiting on
753 * - the caller must hold object->lock
754 */
755void fscache_dequeue_object(struct fscache_object *object)
756{
757 _enter("{OBJ%x}", object->debug_id);
758
759 if (!list_empty(&object->dep_link)) {
760 spin_lock(&object->parent->lock);
761 list_del_init(&object->dep_link);
762 spin_unlock(&object->parent->lock);
763 }
764
765 _leave("");
766}
767
768/**
769 * fscache_check_aux - Ask the netfs whether an object on disk is still valid
770 * @object: The object to ask about
771 * @data: The auxiliary data for the object
772 * @datalen: The size of the auxiliary data
773 *
774 * This function consults the netfs about the coherency state of an object
775 */
776enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
777 const void *data, uint16_t datalen)
778{
779 enum fscache_checkaux result;
780
781 if (!object->cookie->def->check_aux) {
782 fscache_stat(&fscache_n_checkaux_none);
783 return FSCACHE_CHECKAUX_OKAY;
784 }
785
786 result = object->cookie->def->check_aux(object->cookie->netfs_data,
787 data, datalen);
788 switch (result) {
789 /* entry okay as is */
790 case FSCACHE_CHECKAUX_OKAY:
791 fscache_stat(&fscache_n_checkaux_okay);
792 break;
793
794 /* entry requires update */
795 case FSCACHE_CHECKAUX_NEEDS_UPDATE:
796 fscache_stat(&fscache_n_checkaux_update);
797 break;
798
799 /* entry requires deletion */
800 case FSCACHE_CHECKAUX_OBSOLETE:
801 fscache_stat(&fscache_n_checkaux_obsolete);
802 break;
803
804 default:
805 BUG();
806 }
807
808 return result;
809}
810EXPORT_SYMBOL(fscache_check_aux);
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
new file mode 100644
index 000000000000..e7f8d53b8b6b
--- /dev/null
+++ b/fs/fscache/operation.c
@@ -0,0 +1,459 @@
1/* FS-Cache worker operation management routines
2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * See Documentation/filesystems/caching/operations.txt
12 */
13
14#define FSCACHE_DEBUG_LEVEL OPERATION
15#include <linux/module.h>
16#include "internal.h"
17
18atomic_t fscache_op_debug_id;
19EXPORT_SYMBOL(fscache_op_debug_id);
20
21/**
22 * fscache_enqueue_operation - Enqueue an operation for processing
23 * @op: The operation to enqueue
24 *
25 * Enqueue an operation for processing by the FS-Cache thread pool.
26 *
27 * This will get its own ref on the object.
28 */
29void fscache_enqueue_operation(struct fscache_operation *op)
30{
31 _enter("{OBJ%x OP%x,%u}",
32 op->object->debug_id, op->debug_id, atomic_read(&op->usage));
33
34 ASSERT(op->processor != NULL);
35 ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
36 ASSERTCMP(atomic_read(&op->usage), >, 0);
37
38 if (list_empty(&op->pend_link)) {
39 switch (op->flags & FSCACHE_OP_TYPE) {
40 case FSCACHE_OP_FAST:
41 _debug("queue fast");
42 atomic_inc(&op->usage);
43 if (!schedule_work(&op->fast_work))
44 fscache_put_operation(op);
45 break;
46 case FSCACHE_OP_SLOW:
47 _debug("queue slow");
48 slow_work_enqueue(&op->slow_work);
49 break;
50 case FSCACHE_OP_MYTHREAD:
51 _debug("queue for caller's attention");
52 break;
53 default:
54 printk(KERN_ERR "FS-Cache: Unexpected op type %lx",
55 op->flags);
56 BUG();
57 break;
58 }
59 fscache_stat(&fscache_n_op_enqueue);
60 }
61}
62EXPORT_SYMBOL(fscache_enqueue_operation);
63
64/*
65 * start an op running
66 */
67static void fscache_run_op(struct fscache_object *object,
68 struct fscache_operation *op)
69{
70 object->n_in_progress++;
71 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
72 wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
73 if (op->processor)
74 fscache_enqueue_operation(op);
75 fscache_stat(&fscache_n_op_run);
76}
77
78/*
79 * submit an exclusive operation for an object
80 * - other ops are excluded from running simultaneously with this one
81 * - this gets any extra refs it needs on an op
82 */
83int fscache_submit_exclusive_op(struct fscache_object *object,
84 struct fscache_operation *op)
85{
86 int ret;
87
88 _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
89
90 spin_lock(&object->lock);
91 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
92 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
93
94 ret = -ENOBUFS;
95 if (fscache_object_is_active(object)) {
96 op->object = object;
97 object->n_ops++;
98 object->n_exclusive++; /* reads and writes must wait */
99
100 if (object->n_ops > 0) {
101 atomic_inc(&op->usage);
102 list_add_tail(&op->pend_link, &object->pending_ops);
103 fscache_stat(&fscache_n_op_pend);
104 } else if (!list_empty(&object->pending_ops)) {
105 atomic_inc(&op->usage);
106 list_add_tail(&op->pend_link, &object->pending_ops);
107 fscache_stat(&fscache_n_op_pend);
108 fscache_start_operations(object);
109 } else {
110 ASSERTCMP(object->n_in_progress, ==, 0);
111 fscache_run_op(object, op);
112 }
113
114 /* need to issue a new write op after this */
115 clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
116 ret = 0;
117 } else if (object->state == FSCACHE_OBJECT_CREATING) {
118 op->object = object;
119 object->n_ops++;
120 object->n_exclusive++; /* reads and writes must wait */
121 atomic_inc(&op->usage);
122 list_add_tail(&op->pend_link, &object->pending_ops);
123 fscache_stat(&fscache_n_op_pend);
124 ret = 0;
125 } else {
126 /* not allowed to submit ops in any other state */
127 BUG();
128 }
129
130 spin_unlock(&object->lock);
131 return ret;
132}
133
134/*
135 * report an unexpected submission
136 */
137static void fscache_report_unexpected_submission(struct fscache_object *object,
138 struct fscache_operation *op,
139 unsigned long ostate)
140{
141 static bool once_only;
142 struct fscache_operation *p;
143 unsigned n;
144
145 if (once_only)
146 return;
147 once_only = true;
148
149 kdebug("unexpected submission OP%x [OBJ%x %s]",
150 op->debug_id, object->debug_id,
151 fscache_object_states[object->state]);
152 kdebug("objstate=%s [%s]",
153 fscache_object_states[object->state],
154 fscache_object_states[ostate]);
155 kdebug("objflags=%lx", object->flags);
156 kdebug("objevent=%lx [%lx]", object->events, object->event_mask);
157 kdebug("ops=%u inp=%u exc=%u",
158 object->n_ops, object->n_in_progress, object->n_exclusive);
159
160 if (!list_empty(&object->pending_ops)) {
161 n = 0;
162 list_for_each_entry(p, &object->pending_ops, pend_link) {
163 ASSERTCMP(p->object, ==, object);
164 kdebug("%p %p", op->processor, op->release);
165 n++;
166 }
167
168 kdebug("n=%u", n);
169 }
170
171 dump_stack();
172}
173
174/*
175 * submit an operation for an object
176 * - objects may be submitted only in the following states:
177 * - during object creation (write ops may be submitted)
178 * - whilst the object is active
179 * - after an I/O error incurred in one of the two above states (op rejected)
180 * - this gets any extra refs it needs on an op
181 */
182int fscache_submit_op(struct fscache_object *object,
183 struct fscache_operation *op)
184{
185 unsigned long ostate;
186 int ret;
187
188 _enter("{OBJ%x OP%x},{%u}",
189 object->debug_id, op->debug_id, atomic_read(&op->usage));
190
191 ASSERTCMP(atomic_read(&op->usage), >, 0);
192
193 spin_lock(&object->lock);
194 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
195 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
196
197 ostate = object->state;
198 smp_rmb();
199
200 if (fscache_object_is_active(object)) {
201 op->object = object;
202 object->n_ops++;
203
204 if (object->n_exclusive > 0) {
205 atomic_inc(&op->usage);
206 list_add_tail(&op->pend_link, &object->pending_ops);
207 fscache_stat(&fscache_n_op_pend);
208 } else if (!list_empty(&object->pending_ops)) {
209 atomic_inc(&op->usage);
210 list_add_tail(&op->pend_link, &object->pending_ops);
211 fscache_stat(&fscache_n_op_pend);
212 fscache_start_operations(object);
213 } else {
214 ASSERTCMP(object->n_exclusive, ==, 0);
215 fscache_run_op(object, op);
216 }
217 ret = 0;
218 } else if (object->state == FSCACHE_OBJECT_CREATING) {
219 op->object = object;
220 object->n_ops++;
221 atomic_inc(&op->usage);
222 list_add_tail(&op->pend_link, &object->pending_ops);
223 fscache_stat(&fscache_n_op_pend);
224 ret = 0;
225 } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) {
226 fscache_report_unexpected_submission(object, op, ostate);
227 ASSERT(!fscache_object_is_active(object));
228 ret = -ENOBUFS;
229 } else {
230 ret = -ENOBUFS;
231 }
232
233 spin_unlock(&object->lock);
234 return ret;
235}
236
237/*
238 * queue an object for withdrawal on error, aborting all following asynchronous
239 * operations
240 */
241void fscache_abort_object(struct fscache_object *object)
242{
243 _enter("{OBJ%x}", object->debug_id);
244
245 fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR);
246}
247
248/*
249 * jump start the operation processing on an object
250 * - caller must hold object->lock
251 */
252void fscache_start_operations(struct fscache_object *object)
253{
254 struct fscache_operation *op;
255 bool stop = false;
256
257 while (!list_empty(&object->pending_ops) && !stop) {
258 op = list_entry(object->pending_ops.next,
259 struct fscache_operation, pend_link);
260
261 if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
262 if (object->n_in_progress > 0)
263 break;
264 stop = true;
265 }
266 list_del_init(&op->pend_link);
267 object->n_in_progress++;
268
269 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
270 wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
271 if (op->processor)
272 fscache_enqueue_operation(op);
273
274 /* the pending queue was holding a ref on the object */
275 fscache_put_operation(op);
276 }
277
278 ASSERTCMP(object->n_in_progress, <=, object->n_ops);
279
280 _debug("woke %d ops on OBJ%x",
281 object->n_in_progress, object->debug_id);
282}
283
284/*
285 * release an operation
286 * - queues pending ops if this is the last in-progress op
287 */
288void fscache_put_operation(struct fscache_operation *op)
289{
290 struct fscache_object *object;
291 struct fscache_cache *cache;
292
293 _enter("{OBJ%x OP%x,%d}",
294 op->object->debug_id, op->debug_id, atomic_read(&op->usage));
295
296 ASSERTCMP(atomic_read(&op->usage), >, 0);
297
298 if (!atomic_dec_and_test(&op->usage))
299 return;
300
301 _debug("PUT OP");
302 if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
303 BUG();
304
305 fscache_stat(&fscache_n_op_release);
306
307 if (op->release) {
308 op->release(op);
309 op->release = NULL;
310 }
311
312 object = op->object;
313
314 /* now... we may get called with the object spinlock held, so we
315 * complete the cleanup here only if we can immediately acquire the
316 * lock, and defer it otherwise */
317 if (!spin_trylock(&object->lock)) {
318 _debug("defer put");
319 fscache_stat(&fscache_n_op_deferred_release);
320
321 cache = object->cache;
322 spin_lock(&cache->op_gc_list_lock);
323 list_add_tail(&op->pend_link, &cache->op_gc_list);
324 spin_unlock(&cache->op_gc_list_lock);
325 schedule_work(&cache->op_gc);
326 _leave(" [defer]");
327 return;
328 }
329
330 if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
331 ASSERTCMP(object->n_exclusive, >, 0);
332 object->n_exclusive--;
333 }
334
335 ASSERTCMP(object->n_in_progress, >, 0);
336 object->n_in_progress--;
337 if (object->n_in_progress == 0)
338 fscache_start_operations(object);
339
340 ASSERTCMP(object->n_ops, >, 0);
341 object->n_ops--;
342 if (object->n_ops == 0)
343 fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED);
344
345 spin_unlock(&object->lock);
346
347 kfree(op);
348 _leave(" [done]");
349}
350EXPORT_SYMBOL(fscache_put_operation);
351
352/*
353 * garbage collect operations that have had their release deferred
354 */
355void fscache_operation_gc(struct work_struct *work)
356{
357 struct fscache_operation *op;
358 struct fscache_object *object;
359 struct fscache_cache *cache =
360 container_of(work, struct fscache_cache, op_gc);
361 int count = 0;
362
363 _enter("");
364
365 do {
366 spin_lock(&cache->op_gc_list_lock);
367 if (list_empty(&cache->op_gc_list)) {
368 spin_unlock(&cache->op_gc_list_lock);
369 break;
370 }
371
372 op = list_entry(cache->op_gc_list.next,
373 struct fscache_operation, pend_link);
374 list_del(&op->pend_link);
375 spin_unlock(&cache->op_gc_list_lock);
376
377 object = op->object;
378
379 _debug("GC DEFERRED REL OBJ%x OP%x",
380 object->debug_id, op->debug_id);
381 fscache_stat(&fscache_n_op_gc);
382
383 ASSERTCMP(atomic_read(&op->usage), ==, 0);
384
385 spin_lock(&object->lock);
386 if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) {
387 ASSERTCMP(object->n_exclusive, >, 0);
388 object->n_exclusive--;
389 }
390
391 ASSERTCMP(object->n_in_progress, >, 0);
392 object->n_in_progress--;
393 if (object->n_in_progress == 0)
394 fscache_start_operations(object);
395
396 ASSERTCMP(object->n_ops, >, 0);
397 object->n_ops--;
398 if (object->n_ops == 0)
399 fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED);
400
401 spin_unlock(&object->lock);
402
403 } while (count++ < 20);
404
405 if (!list_empty(&cache->op_gc_list))
406 schedule_work(&cache->op_gc);
407
408 _leave("");
409}
410
411/*
412 * allow the slow work item processor to get a ref on an operation
413 */
414static int fscache_op_get_ref(struct slow_work *work)
415{
416 struct fscache_operation *op =
417 container_of(work, struct fscache_operation, slow_work);
418
419 atomic_inc(&op->usage);
420 return 0;
421}
422
423/*
424 * allow the slow work item processor to discard a ref on an operation
425 */
426static void fscache_op_put_ref(struct slow_work *work)
427{
428 struct fscache_operation *op =
429 container_of(work, struct fscache_operation, slow_work);
430
431 fscache_put_operation(op);
432}
433
434/*
435 * execute an operation using the slow thread pool to provide processing context
436 * - the caller holds a ref to this object, so we don't need to hold one
437 */
438static void fscache_op_execute(struct slow_work *work)
439{
440 struct fscache_operation *op =
441 container_of(work, struct fscache_operation, slow_work);
442 unsigned long start;
443
444 _enter("{OBJ%x OP%x,%d}",
445 op->object->debug_id, op->debug_id, atomic_read(&op->usage));
446
447 ASSERT(op->processor != NULL);
448 start = jiffies;
449 op->processor(op);
450 fscache_hist(fscache_ops_histogram, start);
451
452 _leave("");
453}
454
455const struct slow_work_ops fscache_op_slow_work_ops = {
456 .get_ref = fscache_op_get_ref,
457 .put_ref = fscache_op_put_ref,
458 .execute = fscache_op_execute,
459};
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
new file mode 100644
index 000000000000..2568e0eb644f
--- /dev/null
+++ b/fs/fscache/page.c
@@ -0,0 +1,816 @@
1/* Cache page management and data I/O routines
2 *
3 * Copyright (C) 2004-2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#define FSCACHE_DEBUG_LEVEL PAGE
13#include <linux/module.h>
14#include <linux/fscache-cache.h>
15#include <linux/buffer_head.h>
16#include <linux/pagevec.h>
17#include "internal.h"
18
19/*
20 * check to see if a page is being written to the cache
21 */
22bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page)
23{
24 void *val;
25
26 rcu_read_lock();
27 val = radix_tree_lookup(&cookie->stores, page->index);
28 rcu_read_unlock();
29
30 return val != NULL;
31}
32EXPORT_SYMBOL(__fscache_check_page_write);
33
34/*
35 * wait for a page to finish being written to the cache
36 */
37void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *page)
38{
39 wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0);
40
41 wait_event(*wq, !__fscache_check_page_write(cookie, page));
42}
43EXPORT_SYMBOL(__fscache_wait_on_page_write);
44
45/*
46 * note that a page has finished being written to the cache
47 */
48static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page)
49{
50 struct page *xpage;
51
52 spin_lock(&cookie->lock);
53 xpage = radix_tree_delete(&cookie->stores, page->index);
54 spin_unlock(&cookie->lock);
55 ASSERT(xpage != NULL);
56
57 wake_up_bit(&cookie->flags, 0);
58}
59
60/*
61 * actually apply the changed attributes to a cache object
62 */
63static void fscache_attr_changed_op(struct fscache_operation *op)
64{
65 struct fscache_object *object = op->object;
66
67 _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id);
68
69 fscache_stat(&fscache_n_attr_changed_calls);
70
71 if (fscache_object_is_active(object) &&
72 object->cache->ops->attr_changed(object) < 0)
73 fscache_abort_object(object);
74
75 _leave("");
76}
77
78/*
79 * notification that the attributes on an object have changed
80 */
81int __fscache_attr_changed(struct fscache_cookie *cookie)
82{
83 struct fscache_operation *op;
84 struct fscache_object *object;
85
86 _enter("%p", cookie);
87
88 ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
89
90 fscache_stat(&fscache_n_attr_changed);
91
92 op = kzalloc(sizeof(*op), GFP_KERNEL);
93 if (!op) {
94 fscache_stat(&fscache_n_attr_changed_nomem);
95 _leave(" = -ENOMEM");
96 return -ENOMEM;
97 }
98
99 fscache_operation_init(op, NULL);
100 fscache_operation_init_slow(op, fscache_attr_changed_op);
101 op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE);
102
103 spin_lock(&cookie->lock);
104
105 if (hlist_empty(&cookie->backing_objects))
106 goto nobufs;
107 object = hlist_entry(cookie->backing_objects.first,
108 struct fscache_object, cookie_link);
109
110 if (fscache_submit_exclusive_op(object, op) < 0)
111 goto nobufs;
112 spin_unlock(&cookie->lock);
113 fscache_stat(&fscache_n_attr_changed_ok);
114 fscache_put_operation(op);
115 _leave(" = 0");
116 return 0;
117
118nobufs:
119 spin_unlock(&cookie->lock);
120 kfree(op);
121 fscache_stat(&fscache_n_attr_changed_nobufs);
122 _leave(" = %d", -ENOBUFS);
123 return -ENOBUFS;
124}
125EXPORT_SYMBOL(__fscache_attr_changed);
126
127/*
128 * handle secondary execution given to a retrieval op on behalf of the
129 * cache
130 */
131static void fscache_retrieval_work(struct work_struct *work)
132{
133 struct fscache_retrieval *op =
134 container_of(work, struct fscache_retrieval, op.fast_work);
135 unsigned long start;
136
137 _enter("{OP%x}", op->op.debug_id);
138
139 start = jiffies;
140 op->op.processor(&op->op);
141 fscache_hist(fscache_ops_histogram, start);
142 fscache_put_operation(&op->op);
143}
144
145/*
146 * release a retrieval op reference
147 */
148static void fscache_release_retrieval_op(struct fscache_operation *_op)
149{
150 struct fscache_retrieval *op =
151 container_of(_op, struct fscache_retrieval, op);
152
153 _enter("{OP%x}", op->op.debug_id);
154
155 fscache_hist(fscache_retrieval_histogram, op->start_time);
156 if (op->context)
157 fscache_put_context(op->op.object->cookie, op->context);
158
159 _leave("");
160}
161
162/*
163 * allocate a retrieval op
164 */
165static struct fscache_retrieval *fscache_alloc_retrieval(
166 struct address_space *mapping,
167 fscache_rw_complete_t end_io_func,
168 void *context)
169{
170 struct fscache_retrieval *op;
171
172 /* allocate a retrieval operation and attempt to submit it */
173 op = kzalloc(sizeof(*op), GFP_NOIO);
174 if (!op) {
175 fscache_stat(&fscache_n_retrievals_nomem);
176 return NULL;
177 }
178
179 fscache_operation_init(&op->op, fscache_release_retrieval_op);
180 op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING);
181 op->mapping = mapping;
182 op->end_io_func = end_io_func;
183 op->context = context;
184 op->start_time = jiffies;
185 INIT_WORK(&op->op.fast_work, fscache_retrieval_work);
186 INIT_LIST_HEAD(&op->to_do);
187 return op;
188}
189
190/*
191 * wait for a deferred lookup to complete
192 */
193static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
194{
195 unsigned long jif;
196
197 _enter("");
198
199 if (!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) {
200 _leave(" = 0 [imm]");
201 return 0;
202 }
203
204 fscache_stat(&fscache_n_retrievals_wait);
205
206 jif = jiffies;
207 if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP,
208 fscache_wait_bit_interruptible,
209 TASK_INTERRUPTIBLE) != 0) {
210 fscache_stat(&fscache_n_retrievals_intr);
211 _leave(" = -ERESTARTSYS");
212 return -ERESTARTSYS;
213 }
214
215 ASSERT(!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags));
216
217 smp_rmb();
218 fscache_hist(fscache_retrieval_delay_histogram, jif);
219 _leave(" = 0 [dly]");
220 return 0;
221}
222
223/*
224 * read a page from the cache or allocate a block in which to store it
225 * - we return:
226 * -ENOMEM - out of memory, nothing done
227 * -ERESTARTSYS - interrupted
228 * -ENOBUFS - no backing object available in which to cache the block
229 * -ENODATA - no data available in the backing object for this block
230 * 0 - dispatched a read - it'll call end_io_func() when finished
231 */
232int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
233 struct page *page,
234 fscache_rw_complete_t end_io_func,
235 void *context,
236 gfp_t gfp)
237{
238 struct fscache_retrieval *op;
239 struct fscache_object *object;
240 int ret;
241
242 _enter("%p,%p,,,", cookie, page);
243
244 fscache_stat(&fscache_n_retrievals);
245
246 if (hlist_empty(&cookie->backing_objects))
247 goto nobufs;
248
249 ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
250 ASSERTCMP(page, !=, NULL);
251
252 if (fscache_wait_for_deferred_lookup(cookie) < 0)
253 return -ERESTARTSYS;
254
255 op = fscache_alloc_retrieval(page->mapping, end_io_func, context);
256 if (!op) {
257 _leave(" = -ENOMEM");
258 return -ENOMEM;
259 }
260
261 spin_lock(&cookie->lock);
262
263 if (hlist_empty(&cookie->backing_objects))
264 goto nobufs_unlock;
265 object = hlist_entry(cookie->backing_objects.first,
266 struct fscache_object, cookie_link);
267
268 ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP);
269
270 if (fscache_submit_op(object, &op->op) < 0)
271 goto nobufs_unlock;
272 spin_unlock(&cookie->lock);
273
274 fscache_stat(&fscache_n_retrieval_ops);
275
276 /* pin the netfs read context in case we need to do the actual netfs
277 * read because we've encountered a cache read failure */
278 fscache_get_context(object->cookie, op->context);
279
280 /* we wait for the operation to become active, and then process it
281 * *here*, in this thread, and not in the thread pool */
282 if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) {
283 _debug(">>> WT");
284 fscache_stat(&fscache_n_retrieval_op_waits);
285 wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
286 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
287 _debug("<<< GO");
288 }
289
290 /* ask the cache to honour the operation */
291 if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) {
292 ret = object->cache->ops->allocate_page(op, page, gfp);
293 if (ret == 0)
294 ret = -ENODATA;
295 } else {
296 ret = object->cache->ops->read_or_alloc_page(op, page, gfp);
297 }
298
299 if (ret == -ENOMEM)
300 fscache_stat(&fscache_n_retrievals_nomem);
301 else if (ret == -ERESTARTSYS)
302 fscache_stat(&fscache_n_retrievals_intr);
303 else if (ret == -ENODATA)
304 fscache_stat(&fscache_n_retrievals_nodata);
305 else if (ret < 0)
306 fscache_stat(&fscache_n_retrievals_nobufs);
307 else
308 fscache_stat(&fscache_n_retrievals_ok);
309
310 fscache_put_retrieval(op);
311 _leave(" = %d", ret);
312 return ret;
313
314nobufs_unlock:
315 spin_unlock(&cookie->lock);
316 kfree(op);
317nobufs:
318 fscache_stat(&fscache_n_retrievals_nobufs);
319 _leave(" = -ENOBUFS");
320 return -ENOBUFS;
321}
322EXPORT_SYMBOL(__fscache_read_or_alloc_page);
323
324/*
325 * read a list of page from the cache or allocate a block in which to store
326 * them
327 * - we return:
328 * -ENOMEM - out of memory, some pages may be being read
329 * -ERESTARTSYS - interrupted, some pages may be being read
330 * -ENOBUFS - no backing object or space available in which to cache any
331 * pages not being read
332 * -ENODATA - no data available in the backing object for some or all of
333 * the pages
334 * 0 - dispatched a read on all pages
335 *
336 * end_io_func() will be called for each page read from the cache as it is
337 * finishes being read
338 *
339 * any pages for which a read is dispatched will be removed from pages and
340 * nr_pages
341 */
342int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
343 struct address_space *mapping,
344 struct list_head *pages,
345 unsigned *nr_pages,
346 fscache_rw_complete_t end_io_func,
347 void *context,
348 gfp_t gfp)
349{
350 fscache_pages_retrieval_func_t func;
351 struct fscache_retrieval *op;
352 struct fscache_object *object;
353 int ret;
354
355 _enter("%p,,%d,,,", cookie, *nr_pages);
356
357 fscache_stat(&fscache_n_retrievals);
358
359 if (hlist_empty(&cookie->backing_objects))
360 goto nobufs;
361
362 ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
363 ASSERTCMP(*nr_pages, >, 0);
364 ASSERT(!list_empty(pages));
365
366 if (fscache_wait_for_deferred_lookup(cookie) < 0)
367 return -ERESTARTSYS;
368
369 op = fscache_alloc_retrieval(mapping, end_io_func, context);
370 if (!op)
371 return -ENOMEM;
372
373 spin_lock(&cookie->lock);
374
375 if (hlist_empty(&cookie->backing_objects))
376 goto nobufs_unlock;
377 object = hlist_entry(cookie->backing_objects.first,
378 struct fscache_object, cookie_link);
379
380 if (fscache_submit_op(object, &op->op) < 0)
381 goto nobufs_unlock;
382 spin_unlock(&cookie->lock);
383
384 fscache_stat(&fscache_n_retrieval_ops);
385
386 /* pin the netfs read context in case we need to do the actual netfs
387 * read because we've encountered a cache read failure */
388 fscache_get_context(object->cookie, op->context);
389
390 /* we wait for the operation to become active, and then process it
391 * *here*, in this thread, and not in the thread pool */
392 if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) {
393 _debug(">>> WT");
394 fscache_stat(&fscache_n_retrieval_op_waits);
395 wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
396 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
397 _debug("<<< GO");
398 }
399
400 /* ask the cache to honour the operation */
401 if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags))
402 func = object->cache->ops->allocate_pages;
403 else
404 func = object->cache->ops->read_or_alloc_pages;
405 ret = func(op, pages, nr_pages, gfp);
406
407 if (ret == -ENOMEM)
408 fscache_stat(&fscache_n_retrievals_nomem);
409 else if (ret == -ERESTARTSYS)
410 fscache_stat(&fscache_n_retrievals_intr);
411 else if (ret == -ENODATA)
412 fscache_stat(&fscache_n_retrievals_nodata);
413 else if (ret < 0)
414 fscache_stat(&fscache_n_retrievals_nobufs);
415 else
416 fscache_stat(&fscache_n_retrievals_ok);
417
418 fscache_put_retrieval(op);
419 _leave(" = %d", ret);
420 return ret;
421
422nobufs_unlock:
423 spin_unlock(&cookie->lock);
424 kfree(op);
425nobufs:
426 fscache_stat(&fscache_n_retrievals_nobufs);
427 _leave(" = -ENOBUFS");
428 return -ENOBUFS;
429}
430EXPORT_SYMBOL(__fscache_read_or_alloc_pages);
431
432/*
433 * allocate a block in the cache on which to store a page
434 * - we return:
435 * -ENOMEM - out of memory, nothing done
436 * -ERESTARTSYS - interrupted
437 * -ENOBUFS - no backing object available in which to cache the block
438 * 0 - block allocated
439 */
440int __fscache_alloc_page(struct fscache_cookie *cookie,
441 struct page *page,
442 gfp_t gfp)
443{
444 struct fscache_retrieval *op;
445 struct fscache_object *object;
446 int ret;
447
448 _enter("%p,%p,,,", cookie, page);
449
450 fscache_stat(&fscache_n_allocs);
451
452 if (hlist_empty(&cookie->backing_objects))
453 goto nobufs;
454
455 ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
456 ASSERTCMP(page, !=, NULL);
457
458 if (fscache_wait_for_deferred_lookup(cookie) < 0)
459 return -ERESTARTSYS;
460
461 op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
462 if (!op)
463 return -ENOMEM;
464
465 spin_lock(&cookie->lock);
466
467 if (hlist_empty(&cookie->backing_objects))
468 goto nobufs_unlock;
469 object = hlist_entry(cookie->backing_objects.first,
470 struct fscache_object, cookie_link);
471
472 if (fscache_submit_op(object, &op->op) < 0)
473 goto nobufs_unlock;
474 spin_unlock(&cookie->lock);
475
476 fscache_stat(&fscache_n_alloc_ops);
477
478 if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) {
479 _debug(">>> WT");
480 fscache_stat(&fscache_n_alloc_op_waits);
481 wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
482 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
483 _debug("<<< GO");
484 }
485
486 /* ask the cache to honour the operation */
487 ret = object->cache->ops->allocate_page(op, page, gfp);
488
489 if (ret < 0)
490 fscache_stat(&fscache_n_allocs_nobufs);
491 else
492 fscache_stat(&fscache_n_allocs_ok);
493
494 fscache_put_retrieval(op);
495 _leave(" = %d", ret);
496 return ret;
497
498nobufs_unlock:
499 spin_unlock(&cookie->lock);
500 kfree(op);
501nobufs:
502 fscache_stat(&fscache_n_allocs_nobufs);
503 _leave(" = -ENOBUFS");
504 return -ENOBUFS;
505}
506EXPORT_SYMBOL(__fscache_alloc_page);
507
508/*
509 * release a write op reference
510 */
511static void fscache_release_write_op(struct fscache_operation *_op)
512{
513 _enter("{OP%x}", _op->debug_id);
514}
515
516/*
517 * perform the background storage of a page into the cache
518 */
519static void fscache_write_op(struct fscache_operation *_op)
520{
521 struct fscache_storage *op =
522 container_of(_op, struct fscache_storage, op);
523 struct fscache_object *object = op->op.object;
524 struct fscache_cookie *cookie = object->cookie;
525 struct page *page;
526 unsigned n;
527 void *results[1];
528 int ret;
529
530 _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
531
532 spin_lock(&cookie->lock);
533 spin_lock(&object->lock);
534
535 if (!fscache_object_is_active(object)) {
536 spin_unlock(&object->lock);
537 spin_unlock(&cookie->lock);
538 _leave("");
539 return;
540 }
541
542 fscache_stat(&fscache_n_store_calls);
543
544 /* find a page to store */
545 page = NULL;
546 n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1,
547 FSCACHE_COOKIE_PENDING_TAG);
548 if (n != 1)
549 goto superseded;
550 page = results[0];
551 _debug("gang %d [%lx]", n, page->index);
552 if (page->index > op->store_limit)
553 goto superseded;
554
555 radix_tree_tag_clear(&cookie->stores, page->index,
556 FSCACHE_COOKIE_PENDING_TAG);
557
558 spin_unlock(&object->lock);
559 spin_unlock(&cookie->lock);
560
561 if (page) {
562 ret = object->cache->ops->write_page(op, page);
563 fscache_end_page_write(cookie, page);
564 page_cache_release(page);
565 if (ret < 0)
566 fscache_abort_object(object);
567 else
568 fscache_enqueue_operation(&op->op);
569 }
570
571 _leave("");
572 return;
573
574superseded:
575 /* this writer is going away and there aren't any more things to
576 * write */
577 _debug("cease");
578 clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
579 spin_unlock(&object->lock);
580 spin_unlock(&cookie->lock);
581 _leave("");
582}
583
584/*
585 * request a page be stored in the cache
586 * - returns:
587 * -ENOMEM - out of memory, nothing done
588 * -ENOBUFS - no backing object available in which to cache the page
589 * 0 - dispatched a write - it'll call end_io_func() when finished
590 *
591 * if the cookie still has a backing object at this point, that object can be
592 * in one of a few states with respect to storage processing:
593 *
594 * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is
595 * set)
596 *
597 * (a) no writes yet (set FSCACHE_COOKIE_PENDING_FILL and queue deferred
598 * fill op)
599 *
600 * (b) writes deferred till post-creation (mark page for writing and
601 * return immediately)
602 *
603 * (2) negative lookup, object created, initial fill being made from netfs
604 * (FSCACHE_COOKIE_INITIAL_FILL is set)
605 *
606 * (a) fill point not yet reached this page (mark page for writing and
607 * return)
608 *
609 * (b) fill point passed this page (queue op to store this page)
610 *
611 * (3) object extant (queue op to store this page)
612 *
613 * any other state is invalid
614 */
615int __fscache_write_page(struct fscache_cookie *cookie,
616 struct page *page,
617 gfp_t gfp)
618{
619 struct fscache_storage *op;
620 struct fscache_object *object;
621 int ret;
622
623 _enter("%p,%x,", cookie, (u32) page->flags);
624
625 ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
626 ASSERT(PageFsCache(page));
627
628 fscache_stat(&fscache_n_stores);
629
630 op = kzalloc(sizeof(*op), GFP_NOIO);
631 if (!op)
632 goto nomem;
633
634 fscache_operation_init(&op->op, fscache_release_write_op);
635 fscache_operation_init_slow(&op->op, fscache_write_op);
636 op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING);
637
638 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
639 if (ret < 0)
640 goto nomem_free;
641
642 ret = -ENOBUFS;
643 spin_lock(&cookie->lock);
644
645 if (hlist_empty(&cookie->backing_objects))
646 goto nobufs;
647 object = hlist_entry(cookie->backing_objects.first,
648 struct fscache_object, cookie_link);
649 if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
650 goto nobufs;
651
652 /* add the page to the pending-storage radix tree on the backing
653 * object */
654 spin_lock(&object->lock);
655
656 _debug("store limit %llx", (unsigned long long) object->store_limit);
657
658 ret = radix_tree_insert(&cookie->stores, page->index, page);
659 if (ret < 0) {
660 if (ret == -EEXIST)
661 goto already_queued;
662 _debug("insert failed %d", ret);
663 goto nobufs_unlock_obj;
664 }
665
666 radix_tree_tag_set(&cookie->stores, page->index,
667 FSCACHE_COOKIE_PENDING_TAG);
668 page_cache_get(page);
669
670 /* we only want one writer at a time, but we do need to queue new
671 * writers after exclusive ops */
672 if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags))
673 goto already_pending;
674
675 spin_unlock(&object->lock);
676
677 op->op.debug_id = atomic_inc_return(&fscache_op_debug_id);
678 op->store_limit = object->store_limit;
679
680 if (fscache_submit_op(object, &op->op) < 0)
681 goto submit_failed;
682
683 spin_unlock(&cookie->lock);
684 radix_tree_preload_end();
685 fscache_stat(&fscache_n_store_ops);
686 fscache_stat(&fscache_n_stores_ok);
687
688 /* the slow work queue now carries its own ref on the object */
689 fscache_put_operation(&op->op);
690 _leave(" = 0");
691 return 0;
692
693already_queued:
694 fscache_stat(&fscache_n_stores_again);
695already_pending:
696 spin_unlock(&object->lock);
697 spin_unlock(&cookie->lock);
698 radix_tree_preload_end();
699 kfree(op);
700 fscache_stat(&fscache_n_stores_ok);
701 _leave(" = 0");
702 return 0;
703
704submit_failed:
705 radix_tree_delete(&cookie->stores, page->index);
706 page_cache_release(page);
707 ret = -ENOBUFS;
708 goto nobufs;
709
710nobufs_unlock_obj:
711 spin_unlock(&object->lock);
712nobufs:
713 spin_unlock(&cookie->lock);
714 radix_tree_preload_end();
715 kfree(op);
716 fscache_stat(&fscache_n_stores_nobufs);
717 _leave(" = -ENOBUFS");
718 return -ENOBUFS;
719
720nomem_free:
721 kfree(op);
722nomem:
723 fscache_stat(&fscache_n_stores_oom);
724 _leave(" = -ENOMEM");
725 return -ENOMEM;
726}
727EXPORT_SYMBOL(__fscache_write_page);
728
729/*
730 * remove a page from the cache
731 */
732void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page)
733{
734 struct fscache_object *object;
735
736 _enter(",%p", page);
737
738 ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
739 ASSERTCMP(page, !=, NULL);
740
741 fscache_stat(&fscache_n_uncaches);
742
743 /* cache withdrawal may beat us to it */
744 if (!PageFsCache(page))
745 goto done;
746
747 /* get the object */
748 spin_lock(&cookie->lock);
749
750 if (hlist_empty(&cookie->backing_objects)) {
751 ClearPageFsCache(page);
752 goto done_unlock;
753 }
754
755 object = hlist_entry(cookie->backing_objects.first,
756 struct fscache_object, cookie_link);
757
758 /* there might now be stuff on disk we could read */
759 clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags);
760
761 /* only invoke the cache backend if we managed to mark the page
762 * uncached here; this deals with synchronisation vs withdrawal */
763 if (TestClearPageFsCache(page) &&
764 object->cache->ops->uncache_page) {
765 /* the cache backend releases the cookie lock */
766 object->cache->ops->uncache_page(object, page);
767 goto done;
768 }
769
770done_unlock:
771 spin_unlock(&cookie->lock);
772done:
773 _leave("");
774}
775EXPORT_SYMBOL(__fscache_uncache_page);
776
777/**
778 * fscache_mark_pages_cached - Mark pages as being cached
779 * @op: The retrieval op pages are being marked for
780 * @pagevec: The pages to be marked
781 *
782 * Mark a bunch of netfs pages as being cached. After this is called,
783 * the netfs must call fscache_uncache_page() to remove the mark.
784 */
785void fscache_mark_pages_cached(struct fscache_retrieval *op,
786 struct pagevec *pagevec)
787{
788 struct fscache_cookie *cookie = op->op.object->cookie;
789 unsigned long loop;
790
791#ifdef CONFIG_FSCACHE_STATS
792 atomic_add(pagevec->nr, &fscache_n_marks);
793#endif
794
795 for (loop = 0; loop < pagevec->nr; loop++) {
796 struct page *page = pagevec->pages[loop];
797
798 _debug("- mark %p{%lx}", page, page->index);
799 if (TestSetPageFsCache(page)) {
800 static bool once_only;
801 if (!once_only) {
802 once_only = true;
803 printk(KERN_WARNING "FS-Cache:"
804 " Cookie type %s marked page %lx"
805 " multiple times\n",
806 cookie->def->name, page->index);
807 }
808 }
809 }
810
811 if (cookie->def->mark_pages_cached)
812 cookie->def->mark_pages_cached(cookie->netfs_data,
813 op->mapping, pagevec);
814 pagevec_reinit(pagevec);
815}
816EXPORT_SYMBOL(fscache_mark_pages_cached);
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
new file mode 100644
index 000000000000..beeab44bc31a
--- /dev/null
+++ b/fs/fscache/proc.c
@@ -0,0 +1,68 @@
1/* FS-Cache statistics viewing interface
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#define FSCACHE_DEBUG_LEVEL OPERATION
13#include <linux/module.h>
14#include <linux/proc_fs.h>
15#include <linux/seq_file.h>
16#include "internal.h"
17
18/*
19 * initialise the /proc/fs/fscache/ directory
20 */
21int __init fscache_proc_init(void)
22{
23 _enter("");
24
25 if (!proc_mkdir("fs/fscache", NULL))
26 goto error_dir;
27
28#ifdef CONFIG_FSCACHE_STATS
29 if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL,
30 &fscache_stats_fops))
31 goto error_stats;
32#endif
33
34#ifdef CONFIG_FSCACHE_HISTOGRAM
35 if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL,
36 &fscache_histogram_fops))
37 goto error_histogram;
38#endif
39
40 _leave(" = 0");
41 return 0;
42
43#ifdef CONFIG_FSCACHE_HISTOGRAM
44error_histogram:
45#endif
46#ifdef CONFIG_FSCACHE_STATS
47 remove_proc_entry("fs/fscache/stats", NULL);
48error_stats:
49#endif
50 remove_proc_entry("fs/fscache", NULL);
51error_dir:
52 _leave(" = -ENOMEM");
53 return -ENOMEM;
54}
55
56/*
57 * clean up the /proc/fs/fscache/ directory
58 */
59void fscache_proc_cleanup(void)
60{
61#ifdef CONFIG_FSCACHE_HISTOGRAM
62 remove_proc_entry("fs/fscache/histogram", NULL);
63#endif
64#ifdef CONFIG_FSCACHE_STATS
65 remove_proc_entry("fs/fscache/stats", NULL);
66#endif
67 remove_proc_entry("fs/fscache", NULL);
68}
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
new file mode 100644
index 000000000000..65deb99e756b
--- /dev/null
+++ b/fs/fscache/stats.c
@@ -0,0 +1,212 @@
1/* FS-Cache statistics
2 *
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#define FSCACHE_DEBUG_LEVEL THREAD
13#include <linux/module.h>
14#include <linux/proc_fs.h>
15#include <linux/seq_file.h>
16#include "internal.h"
17
18/*
19 * operation counters
20 */
21atomic_t fscache_n_op_pend;
22atomic_t fscache_n_op_run;
23atomic_t fscache_n_op_enqueue;
24atomic_t fscache_n_op_requeue;
25atomic_t fscache_n_op_deferred_release;
26atomic_t fscache_n_op_release;
27atomic_t fscache_n_op_gc;
28
29atomic_t fscache_n_attr_changed;
30atomic_t fscache_n_attr_changed_ok;
31atomic_t fscache_n_attr_changed_nobufs;
32atomic_t fscache_n_attr_changed_nomem;
33atomic_t fscache_n_attr_changed_calls;
34
35atomic_t fscache_n_allocs;
36atomic_t fscache_n_allocs_ok;
37atomic_t fscache_n_allocs_wait;
38atomic_t fscache_n_allocs_nobufs;
39atomic_t fscache_n_alloc_ops;
40atomic_t fscache_n_alloc_op_waits;
41
42atomic_t fscache_n_retrievals;
43atomic_t fscache_n_retrievals_ok;
44atomic_t fscache_n_retrievals_wait;
45atomic_t fscache_n_retrievals_nodata;
46atomic_t fscache_n_retrievals_nobufs;
47atomic_t fscache_n_retrievals_intr;
48atomic_t fscache_n_retrievals_nomem;
49atomic_t fscache_n_retrieval_ops;
50atomic_t fscache_n_retrieval_op_waits;
51
52atomic_t fscache_n_stores;
53atomic_t fscache_n_stores_ok;
54atomic_t fscache_n_stores_again;
55atomic_t fscache_n_stores_nobufs;
56atomic_t fscache_n_stores_oom;
57atomic_t fscache_n_store_ops;
58atomic_t fscache_n_store_calls;
59
60atomic_t fscache_n_marks;
61atomic_t fscache_n_uncaches;
62
63atomic_t fscache_n_acquires;
64atomic_t fscache_n_acquires_null;
65atomic_t fscache_n_acquires_no_cache;
66atomic_t fscache_n_acquires_ok;
67atomic_t fscache_n_acquires_nobufs;
68atomic_t fscache_n_acquires_oom;
69
70atomic_t fscache_n_updates;
71atomic_t fscache_n_updates_null;
72atomic_t fscache_n_updates_run;
73
74atomic_t fscache_n_relinquishes;
75atomic_t fscache_n_relinquishes_null;
76atomic_t fscache_n_relinquishes_waitcrt;
77
78atomic_t fscache_n_cookie_index;
79atomic_t fscache_n_cookie_data;
80atomic_t fscache_n_cookie_special;
81
82atomic_t fscache_n_object_alloc;
83atomic_t fscache_n_object_no_alloc;
84atomic_t fscache_n_object_lookups;
85atomic_t fscache_n_object_lookups_negative;
86atomic_t fscache_n_object_lookups_positive;
87atomic_t fscache_n_object_created;
88atomic_t fscache_n_object_avail;
89atomic_t fscache_n_object_dead;
90
91atomic_t fscache_n_checkaux_none;
92atomic_t fscache_n_checkaux_okay;
93atomic_t fscache_n_checkaux_update;
94atomic_t fscache_n_checkaux_obsolete;
95
96/*
97 * display the general statistics
98 */
99static int fscache_stats_show(struct seq_file *m, void *v)
100{
101 seq_puts(m, "FS-Cache statistics\n");
102
103 seq_printf(m, "Cookies: idx=%u dat=%u spc=%u\n",
104 atomic_read(&fscache_n_cookie_index),
105 atomic_read(&fscache_n_cookie_data),
106 atomic_read(&fscache_n_cookie_special));
107
108 seq_printf(m, "Objects: alc=%u nal=%u avl=%u ded=%u\n",
109 atomic_read(&fscache_n_object_alloc),
110 atomic_read(&fscache_n_object_no_alloc),
111 atomic_read(&fscache_n_object_avail),
112 atomic_read(&fscache_n_object_dead));
113 seq_printf(m, "ChkAux : non=%u ok=%u upd=%u obs=%u\n",
114 atomic_read(&fscache_n_checkaux_none),
115 atomic_read(&fscache_n_checkaux_okay),
116 atomic_read(&fscache_n_checkaux_update),
117 atomic_read(&fscache_n_checkaux_obsolete));
118
119 seq_printf(m, "Pages : mrk=%u unc=%u\n",
120 atomic_read(&fscache_n_marks),
121 atomic_read(&fscache_n_uncaches));
122
123 seq_printf(m, "Acquire: n=%u nul=%u noc=%u ok=%u nbf=%u"
124 " oom=%u\n",
125 atomic_read(&fscache_n_acquires),
126 atomic_read(&fscache_n_acquires_null),
127 atomic_read(&fscache_n_acquires_no_cache),
128 atomic_read(&fscache_n_acquires_ok),
129 atomic_read(&fscache_n_acquires_nobufs),
130 atomic_read(&fscache_n_acquires_oom));
131
132 seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n",
133 atomic_read(&fscache_n_object_lookups),
134 atomic_read(&fscache_n_object_lookups_negative),
135 atomic_read(&fscache_n_object_lookups_positive),
136 atomic_read(&fscache_n_object_created));
137
138 seq_printf(m, "Updates: n=%u nul=%u run=%u\n",
139 atomic_read(&fscache_n_updates),
140 atomic_read(&fscache_n_updates_null),
141 atomic_read(&fscache_n_updates_run));
142
143 seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u\n",
144 atomic_read(&fscache_n_relinquishes),
145 atomic_read(&fscache_n_relinquishes_null),
146 atomic_read(&fscache_n_relinquishes_waitcrt));
147
148 seq_printf(m, "AttrChg: n=%u ok=%u nbf=%u oom=%u run=%u\n",
149 atomic_read(&fscache_n_attr_changed),
150 atomic_read(&fscache_n_attr_changed_ok),
151 atomic_read(&fscache_n_attr_changed_nobufs),
152 atomic_read(&fscache_n_attr_changed_nomem),
153 atomic_read(&fscache_n_attr_changed_calls));
154
155 seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u\n",
156 atomic_read(&fscache_n_allocs),
157 atomic_read(&fscache_n_allocs_ok),
158 atomic_read(&fscache_n_allocs_wait),
159 atomic_read(&fscache_n_allocs_nobufs));
160 seq_printf(m, "Allocs : ops=%u owt=%u\n",
161 atomic_read(&fscache_n_alloc_ops),
162 atomic_read(&fscache_n_alloc_op_waits));
163
164 seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u"
165 " int=%u oom=%u\n",
166 atomic_read(&fscache_n_retrievals),
167 atomic_read(&fscache_n_retrievals_ok),
168 atomic_read(&fscache_n_retrievals_wait),
169 atomic_read(&fscache_n_retrievals_nodata),
170 atomic_read(&fscache_n_retrievals_nobufs),
171 atomic_read(&fscache_n_retrievals_intr),
172 atomic_read(&fscache_n_retrievals_nomem));
173 seq_printf(m, "Retrvls: ops=%u owt=%u\n",
174 atomic_read(&fscache_n_retrieval_ops),
175 atomic_read(&fscache_n_retrieval_op_waits));
176
177 seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n",
178 atomic_read(&fscache_n_stores),
179 atomic_read(&fscache_n_stores_ok),
180 atomic_read(&fscache_n_stores_again),
181 atomic_read(&fscache_n_stores_nobufs),
182 atomic_read(&fscache_n_stores_oom));
183 seq_printf(m, "Stores : ops=%u run=%u\n",
184 atomic_read(&fscache_n_store_ops),
185 atomic_read(&fscache_n_store_calls));
186
187 seq_printf(m, "Ops : pend=%u run=%u enq=%u\n",
188 atomic_read(&fscache_n_op_pend),
189 atomic_read(&fscache_n_op_run),
190 atomic_read(&fscache_n_op_enqueue));
191 seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n",
192 atomic_read(&fscache_n_op_deferred_release),
193 atomic_read(&fscache_n_op_release),
194 atomic_read(&fscache_n_op_gc));
195 return 0;
196}
197
198/*
199 * open "/proc/fs/fscache/stats" allowing provision of a statistical summary
200 */
201static int fscache_stats_open(struct inode *inode, struct file *file)
202{
203 return single_open(file, fscache_stats_show, NULL);
204}
205
206const struct file_operations fscache_stats_fops = {
207 .owner = THIS_MODULE,
208 .open = fscache_stats_open,
209 .read = seq_read,
210 .llseek = seq_lseek,
211 .release = seq_release,
212};
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 36fe20d6eba2..e67f3ec07736 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -84,3 +84,11 @@ config ROOT_NFS
84 <file:Documentation/filesystems/nfsroot.txt>. 84 <file:Documentation/filesystems/nfsroot.txt>.
85 85
86 Most people say N here. 86 Most people say N here.
87
88config NFS_FSCACHE
89 bool "Provide NFS client caching support (EXPERIMENTAL)"
90 depends on EXPERIMENTAL
91 depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y
92 help
93 Say Y here if you want NFS data to be cached locally on disc through
94 the general filesystem cache manager
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index ac6170c594a3..845159814de2 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,3 +15,4 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
15 callback.o callback_xdr.o callback_proc.o \ 15 callback.o callback_xdr.o callback_proc.o \
16 nfs4namespace.o 16 nfs4namespace.o
17nfs-$(CONFIG_SYSCTL) += sysctl.o 17nfs-$(CONFIG_SYSCTL) += sysctl.o
18nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index aba38017bdef..75c9cd2aa119 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -45,6 +45,7 @@
45#include "delegation.h" 45#include "delegation.h"
46#include "iostat.h" 46#include "iostat.h"
47#include "internal.h" 47#include "internal.h"
48#include "fscache.h"
48 49
49#define NFSDBG_FACILITY NFSDBG_CLIENT 50#define NFSDBG_FACILITY NFSDBG_CLIENT
50 51
@@ -154,6 +155,8 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
154 if (!IS_ERR(cred)) 155 if (!IS_ERR(cred))
155 clp->cl_machine_cred = cred; 156 clp->cl_machine_cred = cred;
156 157
158 nfs_fscache_get_client_cookie(clp);
159
157 return clp; 160 return clp;
158 161
159error_3: 162error_3:
@@ -187,6 +190,8 @@ static void nfs_free_client(struct nfs_client *clp)
187 190
188 nfs4_shutdown_client(clp); 191 nfs4_shutdown_client(clp);
189 192
193 nfs_fscache_release_client_cookie(clp);
194
190 /* -EIO all pending I/O */ 195 /* -EIO all pending I/O */
191 if (!IS_ERR(clp->cl_rpcclient)) 196 if (!IS_ERR(clp->cl_rpcclient))
192 rpc_shutdown_client(clp->cl_rpcclient); 197 rpc_shutdown_client(clp->cl_rpcclient);
@@ -760,6 +765,7 @@ static int nfs_init_server(struct nfs_server *server,
760 765
761 /* Initialise the client representation from the mount data */ 766 /* Initialise the client representation from the mount data */
762 server->flags = data->flags; 767 server->flags = data->flags;
768 server->options = data->options;
763 769
764 if (data->rsize) 770 if (data->rsize)
765 server->rsize = nfs_block_size(data->rsize, NULL); 771 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1148,6 +1154,7 @@ static int nfs4_init_server(struct nfs_server *server,
1148 /* Initialise the client representation from the mount data */ 1154 /* Initialise the client representation from the mount data */
1149 server->flags = data->flags; 1155 server->flags = data->flags;
1150 server->caps |= NFS_CAP_ATOMIC_OPEN; 1156 server->caps |= NFS_CAP_ATOMIC_OPEN;
1157 server->options = data->options;
1151 1158
1152 /* Get a client record */ 1159 /* Get a client record */
1153 error = nfs4_set_client(server, 1160 error = nfs4_set_client(server,
@@ -1559,7 +1566,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1559 1566
1560 /* display header on line 1 */ 1567 /* display header on line 1 */
1561 if (v == &nfs_volume_list) { 1568 if (v == &nfs_volume_list) {
1562 seq_puts(m, "NV SERVER PORT DEV FSID\n"); 1569 seq_puts(m, "NV SERVER PORT DEV FSID FSC\n");
1563 return 0; 1570 return 0;
1564 } 1571 }
1565 /* display one transport per line on subsequent lines */ 1572 /* display one transport per line on subsequent lines */
@@ -1573,12 +1580,13 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
1573 (unsigned long long) server->fsid.major, 1580 (unsigned long long) server->fsid.major,
1574 (unsigned long long) server->fsid.minor); 1581 (unsigned long long) server->fsid.minor);
1575 1582
1576 seq_printf(m, "v%u %s %s %-7s %-17s\n", 1583 seq_printf(m, "v%u %s %s %-7s %-17s %s\n",
1577 clp->rpc_ops->version, 1584 clp->rpc_ops->version,
1578 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), 1585 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
1579 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), 1586 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
1580 dev, 1587 dev,
1581 fsid); 1588 fsid,
1589 nfs_server_fscache_state(server));
1582 1590
1583 return 0; 1591 return 0;
1584} 1592}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0abf3f331f56..3523b895eb4b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -35,6 +35,7 @@
35#include "delegation.h" 35#include "delegation.h"
36#include "internal.h" 36#include "internal.h"
37#include "iostat.h" 37#include "iostat.h"
38#include "fscache.h"
38 39
39#define NFSDBG_FACILITY NFSDBG_FILE 40#define NFSDBG_FACILITY NFSDBG_FILE
40 41
@@ -409,6 +410,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
409 return copied; 410 return copied;
410} 411}
411 412
413/*
414 * Partially or wholly invalidate a page
415 * - Release the private state associated with a page if undergoing complete
416 * page invalidation
417 * - Called if either PG_private or PG_fscache is set on the page
418 * - Caller holds page lock
419 */
412static void nfs_invalidate_page(struct page *page, unsigned long offset) 420static void nfs_invalidate_page(struct page *page, unsigned long offset)
413{ 421{
414 dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); 422 dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset);
@@ -417,23 +425,43 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
417 return; 425 return;
418 /* Cancel any unstarted writes on this page */ 426 /* Cancel any unstarted writes on this page */
419 nfs_wb_page_cancel(page->mapping->host, page); 427 nfs_wb_page_cancel(page->mapping->host, page);
428
429 nfs_fscache_invalidate_page(page, page->mapping->host);
420} 430}
421 431
432/*
433 * Attempt to release the private state associated with a page
434 * - Called if either PG_private or PG_fscache is set on the page
435 * - Caller holds page lock
436 * - Return true (may release page) or false (may not)
437 */
422static int nfs_release_page(struct page *page, gfp_t gfp) 438static int nfs_release_page(struct page *page, gfp_t gfp)
423{ 439{
424 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); 440 dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
425 441
426 /* If PagePrivate() is set, then the page is not freeable */ 442 /* If PagePrivate() is set, then the page is not freeable */
427 return 0; 443 if (PagePrivate(page))
444 return 0;
445 return nfs_fscache_release_page(page, gfp);
428} 446}
429 447
448/*
449 * Attempt to clear the private state associated with a page when an error
450 * occurs that requires the cached contents of an inode to be written back or
451 * destroyed
452 * - Called if either PG_private or fscache is set on the page
453 * - Caller holds page lock
454 * - Return 0 if successful, -error otherwise
455 */
430static int nfs_launder_page(struct page *page) 456static int nfs_launder_page(struct page *page)
431{ 457{
432 struct inode *inode = page->mapping->host; 458 struct inode *inode = page->mapping->host;
459 struct nfs_inode *nfsi = NFS_I(inode);
433 460
434 dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", 461 dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
435 inode->i_ino, (long long)page_offset(page)); 462 inode->i_ino, (long long)page_offset(page));
436 463
464 nfs_fscache_wait_on_page_write(nfsi, page);
437 return nfs_wb_page(inode, page); 465 return nfs_wb_page(inode, page);
438} 466}
439 467
@@ -451,6 +479,11 @@ const struct address_space_operations nfs_file_aops = {
451 .launder_page = nfs_launder_page, 479 .launder_page = nfs_launder_page,
452}; 480};
453 481
482/*
483 * Notification that a PTE pointing to an NFS page is about to be made
484 * writable, implying that someone is about to modify the page through a
485 * shared-writable mapping
486 */
454static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 487static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
455{ 488{
456 struct page *page = vmf->page; 489 struct page *page = vmf->page;
@@ -465,6 +498,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
465 filp->f_mapping->host->i_ino, 498 filp->f_mapping->host->i_ino,
466 (long long)page_offset(page)); 499 (long long)page_offset(page));
467 500
501 /* make sure the cache has finished storing the page */
502 nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
503
468 lock_page(page); 504 lock_page(page);
469 mapping = page->mapping; 505 mapping = page->mapping;
470 if (mapping != dentry->d_inode->i_mapping) 506 if (mapping != dentry->d_inode->i_mapping)
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c
new file mode 100644
index 000000000000..5b1006480bc2
--- /dev/null
+++ b/fs/nfs/fscache-index.c
@@ -0,0 +1,337 @@
1/* NFS FS-Cache index structure definition
2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/sched.h>
15#include <linux/mm.h>
16#include <linux/nfs_fs.h>
17#include <linux/nfs_fs_sb.h>
18#include <linux/in6.h>
19
20#include "internal.h"
21#include "fscache.h"
22
23#define NFSDBG_FACILITY NFSDBG_FSCACHE
24
25/*
26 * Define the NFS filesystem for FS-Cache. Upon registration FS-Cache sticks
27 * the cookie for the top-level index object for NFS into here. The top-level
28 * index can than have other cache objects inserted into it.
29 */
30struct fscache_netfs nfs_fscache_netfs = {
31 .name = "nfs",
32 .version = 0,
33};
34
35/*
36 * Register NFS for caching
37 */
38int nfs_fscache_register(void)
39{
40 return fscache_register_netfs(&nfs_fscache_netfs);
41}
42
43/*
44 * Unregister NFS for caching
45 */
46void nfs_fscache_unregister(void)
47{
48 fscache_unregister_netfs(&nfs_fscache_netfs);
49}
50
51/*
52 * Layout of the key for an NFS server cache object.
53 */
54struct nfs_server_key {
55 uint16_t nfsversion; /* NFS protocol version */
56 uint16_t family; /* address family */
57 uint16_t port; /* IP port */
58 union {
59 struct in_addr ipv4_addr; /* IPv4 address */
60 struct in6_addr ipv6_addr; /* IPv6 address */
61 } addr[0];
62};
63
64/*
65 * Generate a key to describe a server in the main NFS index
66 * - We return the length of the key, or 0 if we can't generate one
67 */
68static uint16_t nfs_server_get_key(const void *cookie_netfs_data,
69 void *buffer, uint16_t bufmax)
70{
71 const struct nfs_client *clp = cookie_netfs_data;
72 const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr;
73 const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr;
74 struct nfs_server_key *key = buffer;
75 uint16_t len = sizeof(struct nfs_server_key);
76
77 key->nfsversion = clp->rpc_ops->version;
78 key->family = clp->cl_addr.ss_family;
79
80 memset(key, 0, len);
81
82 switch (clp->cl_addr.ss_family) {
83 case AF_INET:
84 key->port = sin->sin_port;
85 key->addr[0].ipv4_addr = sin->sin_addr;
86 len += sizeof(key->addr[0].ipv4_addr);
87 break;
88
89 case AF_INET6:
90 key->port = sin6->sin6_port;
91 key->addr[0].ipv6_addr = sin6->sin6_addr;
92 len += sizeof(key->addr[0].ipv6_addr);
93 break;
94
95 default:
96 printk(KERN_WARNING "NFS: Unknown network family '%d'\n",
97 clp->cl_addr.ss_family);
98 len = 0;
99 break;
100 }
101
102 return len;
103}
104
105/*
106 * Define the server object for FS-Cache. This is used to describe a server
107 * object to fscache_acquire_cookie(). It is keyed by the NFS protocol and
108 * server address parameters.
109 */
110const struct fscache_cookie_def nfs_fscache_server_index_def = {
111 .name = "NFS.server",
112 .type = FSCACHE_COOKIE_TYPE_INDEX,
113 .get_key = nfs_server_get_key,
114};
115
116/*
117 * Generate a key to describe a superblock key in the main NFS index
118 */
119static uint16_t nfs_super_get_key(const void *cookie_netfs_data,
120 void *buffer, uint16_t bufmax)
121{
122 const struct nfs_fscache_key *key;
123 const struct nfs_server *nfss = cookie_netfs_data;
124 uint16_t len;
125
126 key = nfss->fscache_key;
127 len = sizeof(key->key) + key->key.uniq_len;
128 if (len > bufmax) {
129 len = 0;
130 } else {
131 memcpy(buffer, &key->key, sizeof(key->key));
132 memcpy(buffer + sizeof(key->key),
133 key->key.uniquifier, key->key.uniq_len);
134 }
135
136 return len;
137}
138
139/*
140 * Define the superblock object for FS-Cache. This is used to describe a
141 * superblock object to fscache_acquire_cookie(). It is keyed by all the NFS
142 * parameters that might cause a separate superblock.
143 */
144const struct fscache_cookie_def nfs_fscache_super_index_def = {
145 .name = "NFS.super",
146 .type = FSCACHE_COOKIE_TYPE_INDEX,
147 .get_key = nfs_super_get_key,
148};
149
150/*
151 * Definition of the auxiliary data attached to NFS inode storage objects
152 * within the cache.
153 *
154 * The contents of this struct are recorded in the on-disk local cache in the
155 * auxiliary data attached to the data storage object backing an inode. This
156 * permits coherency to be managed when a new inode binds to an already extant
157 * cache object.
158 */
159struct nfs_fscache_inode_auxdata {
160 struct timespec mtime;
161 struct timespec ctime;
162 loff_t size;
163 u64 change_attr;
164};
165
166/*
167 * Generate a key to describe an NFS inode in an NFS server's index
168 */
169static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data,
170 void *buffer, uint16_t bufmax)
171{
172 const struct nfs_inode *nfsi = cookie_netfs_data;
173 uint16_t nsize;
174
175 /* use the inode's NFS filehandle as the key */
176 nsize = nfsi->fh.size;
177 memcpy(buffer, nfsi->fh.data, nsize);
178 return nsize;
179}
180
181/*
182 * Get certain file attributes from the netfs data
183 * - This function can be absent for an index
184 * - Not permitted to return an error
185 * - The netfs data from the cookie being used as the source is presented
186 */
187static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data,
188 uint64_t *size)
189{
190 const struct nfs_inode *nfsi = cookie_netfs_data;
191
192 *size = nfsi->vfs_inode.i_size;
193}
194
195/*
196 * Get the auxiliary data from netfs data
197 * - This function can be absent if the index carries no state data
198 * - Should store the auxiliary data in the buffer
199 * - Should return the amount of amount stored
200 * - Not permitted to return an error
201 * - The netfs data from the cookie being used as the source is presented
202 */
203static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data,
204 void *buffer, uint16_t bufmax)
205{
206 struct nfs_fscache_inode_auxdata auxdata;
207 const struct nfs_inode *nfsi = cookie_netfs_data;
208
209 memset(&auxdata, 0, sizeof(auxdata));
210 auxdata.size = nfsi->vfs_inode.i_size;
211 auxdata.mtime = nfsi->vfs_inode.i_mtime;
212 auxdata.ctime = nfsi->vfs_inode.i_ctime;
213
214 if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
215 auxdata.change_attr = nfsi->change_attr;
216
217 if (bufmax > sizeof(auxdata))
218 bufmax = sizeof(auxdata);
219
220 memcpy(buffer, &auxdata, bufmax);
221 return bufmax;
222}
223
224/*
225 * Consult the netfs about the state of an object
226 * - This function can be absent if the index carries no state data
227 * - The netfs data from the cookie being used as the target is
228 * presented, as is the auxiliary data
229 */
230static
231enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data,
232 const void *data,
233 uint16_t datalen)
234{
235 struct nfs_fscache_inode_auxdata auxdata;
236 struct nfs_inode *nfsi = cookie_netfs_data;
237
238 if (datalen != sizeof(auxdata))
239 return FSCACHE_CHECKAUX_OBSOLETE;
240
241 memset(&auxdata, 0, sizeof(auxdata));
242 auxdata.size = nfsi->vfs_inode.i_size;
243 auxdata.mtime = nfsi->vfs_inode.i_mtime;
244 auxdata.ctime = nfsi->vfs_inode.i_ctime;
245
246 if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4)
247 auxdata.change_attr = nfsi->change_attr;
248
249 if (memcmp(data, &auxdata, datalen) != 0)
250 return FSCACHE_CHECKAUX_OBSOLETE;
251
252 return FSCACHE_CHECKAUX_OKAY;
253}
254
255/*
256 * Indication from FS-Cache that the cookie is no longer cached
257 * - This function is called when the backing store currently caching a cookie
258 * is removed
259 * - The netfs should use this to clean up any markers indicating cached pages
260 * - This is mandatory for any object that may have data
261 */
262static void nfs_fscache_inode_now_uncached(void *cookie_netfs_data)
263{
264 struct nfs_inode *nfsi = cookie_netfs_data;
265 struct pagevec pvec;
266 pgoff_t first;
267 int loop, nr_pages;
268
269 pagevec_init(&pvec, 0);
270 first = 0;
271
272 dprintk("NFS: nfs_inode_now_uncached: nfs_inode 0x%p\n", nfsi);
273
274 for (;;) {
275 /* grab a bunch of pages to unmark */
276 nr_pages = pagevec_lookup(&pvec,
277 nfsi->vfs_inode.i_mapping,
278 first,
279 PAGEVEC_SIZE - pagevec_count(&pvec));
280 if (!nr_pages)
281 break;
282
283 for (loop = 0; loop < nr_pages; loop++)
284 ClearPageFsCache(pvec.pages[loop]);
285
286 first = pvec.pages[nr_pages - 1]->index + 1;
287
288 pvec.nr = nr_pages;
289 pagevec_release(&pvec);
290 cond_resched();
291 }
292}
293
294/*
295 * Get an extra reference on a read context.
296 * - This function can be absent if the completion function doesn't require a
297 * context.
298 * - The read context is passed back to NFS in the event that a data read on the
299 * cache fails with EIO - in which case the server must be contacted to
300 * retrieve the data, which requires the read context for security.
301 */
302static void nfs_fh_get_context(void *cookie_netfs_data, void *context)
303{
304 get_nfs_open_context(context);
305}
306
307/*
308 * Release an extra reference on a read context.
309 * - This function can be absent if the completion function doesn't require a
310 * context.
311 */
312static void nfs_fh_put_context(void *cookie_netfs_data, void *context)
313{
314 if (context)
315 put_nfs_open_context(context);
316}
317
318/*
319 * Define the inode object for FS-Cache. This is used to describe an inode
320 * object to fscache_acquire_cookie(). It is keyed by the NFS file handle for
321 * an inode.
322 *
323 * Coherency is managed by comparing the copies of i_size, i_mtime and i_ctime
324 * held in the cache auxiliary data for the data storage object with those in
325 * the inode struct in memory.
326 */
327const struct fscache_cookie_def nfs_fscache_inode_object_def = {
328 .name = "NFS.fh",
329 .type = FSCACHE_COOKIE_TYPE_DATAFILE,
330 .get_key = nfs_fscache_inode_get_key,
331 .get_attr = nfs_fscache_inode_get_attr,
332 .get_aux = nfs_fscache_inode_get_aux,
333 .check_aux = nfs_fscache_inode_check_aux,
334 .now_uncached = nfs_fscache_inode_now_uncached,
335 .get_context = nfs_fh_get_context,
336 .put_context = nfs_fh_put_context,
337};
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
new file mode 100644
index 000000000000..379be678cb7e
--- /dev/null
+++ b/fs/nfs/fscache.c
@@ -0,0 +1,523 @@
1/* NFS filesystem cache interface
2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/init.h>
13#include <linux/kernel.h>
14#include <linux/sched.h>
15#include <linux/mm.h>
16#include <linux/nfs_fs.h>
17#include <linux/nfs_fs_sb.h>
18#include <linux/in6.h>
19#include <linux/seq_file.h>
20
21#include "internal.h"
22#include "iostat.h"
23#include "fscache.h"
24
25#define NFSDBG_FACILITY NFSDBG_FSCACHE
26
27static struct rb_root nfs_fscache_keys = RB_ROOT;
28static DEFINE_SPINLOCK(nfs_fscache_keys_lock);
29
30/*
31 * Get the per-client index cookie for an NFS client if the appropriate mount
32 * flag was set
33 * - We always try and get an index cookie for the client, but get filehandle
34 * cookies on a per-superblock basis, depending on the mount flags
35 */
36void nfs_fscache_get_client_cookie(struct nfs_client *clp)
37{
38 /* create a cache index for looking up filehandles */
39 clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index,
40 &nfs_fscache_server_index_def,
41 clp);
42 dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n",
43 clp, clp->fscache);
44}
45
46/*
47 * Dispose of a per-client cookie
48 */
49void nfs_fscache_release_client_cookie(struct nfs_client *clp)
50{
51 dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n",
52 clp, clp->fscache);
53
54 fscache_relinquish_cookie(clp->fscache, 0);
55 clp->fscache = NULL;
56}
57
58/*
59 * Get the cache cookie for an NFS superblock. We have to handle
60 * uniquification here because the cache doesn't do it for us.
61 */
62void nfs_fscache_get_super_cookie(struct super_block *sb,
63 struct nfs_parsed_mount_data *data)
64{
65 struct nfs_fscache_key *key, *xkey;
66 struct nfs_server *nfss = NFS_SB(sb);
67 struct rb_node **p, *parent;
68 const char *uniq = data->fscache_uniq ?: "";
69 int diff, ulen;
70
71 ulen = strlen(uniq);
72 key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL);
73 if (!key)
74 return;
75
76 key->nfs_client = nfss->nfs_client;
77 key->key.super.s_flags = sb->s_flags & NFS_MS_MASK;
78 key->key.nfs_server.flags = nfss->flags;
79 key->key.nfs_server.rsize = nfss->rsize;
80 key->key.nfs_server.wsize = nfss->wsize;
81 key->key.nfs_server.acregmin = nfss->acregmin;
82 key->key.nfs_server.acregmax = nfss->acregmax;
83 key->key.nfs_server.acdirmin = nfss->acdirmin;
84 key->key.nfs_server.acdirmax = nfss->acdirmax;
85 key->key.nfs_server.fsid = nfss->fsid;
86 key->key.rpc_auth.au_flavor = nfss->client->cl_auth->au_flavor;
87
88 key->key.uniq_len = ulen;
89 memcpy(key->key.uniquifier, uniq, ulen);
90
91 spin_lock(&nfs_fscache_keys_lock);
92 p = &nfs_fscache_keys.rb_node;
93 parent = NULL;
94 while (*p) {
95 parent = *p;
96 xkey = rb_entry(parent, struct nfs_fscache_key, node);
97
98 if (key->nfs_client < xkey->nfs_client)
99 goto go_left;
100 if (key->nfs_client > xkey->nfs_client)
101 goto go_right;
102
103 diff = memcmp(&key->key, &xkey->key, sizeof(key->key));
104 if (diff < 0)
105 goto go_left;
106 if (diff > 0)
107 goto go_right;
108
109 if (key->key.uniq_len == 0)
110 goto non_unique;
111 diff = memcmp(key->key.uniquifier,
112 xkey->key.uniquifier,
113 key->key.uniq_len);
114 if (diff < 0)
115 goto go_left;
116 if (diff > 0)
117 goto go_right;
118 goto non_unique;
119
120 go_left:
121 p = &(*p)->rb_left;
122 continue;
123 go_right:
124 p = &(*p)->rb_right;
125 }
126
127 rb_link_node(&key->node, parent, p);
128 rb_insert_color(&key->node, &nfs_fscache_keys);
129 spin_unlock(&nfs_fscache_keys_lock);
130 nfss->fscache_key = key;
131
132 /* create a cache index for looking up filehandles */
133 nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache,
134 &nfs_fscache_super_index_def,
135 nfss);
136 dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n",
137 nfss, nfss->fscache);
138 return;
139
140non_unique:
141 spin_unlock(&nfs_fscache_keys_lock);
142 kfree(key);
143 nfss->fscache_key = NULL;
144 nfss->fscache = NULL;
145 printk(KERN_WARNING "NFS:"
146 " Cache request denied due to non-unique superblock keys\n");
147}
148
149/*
150 * release a per-superblock cookie
151 */
152void nfs_fscache_release_super_cookie(struct super_block *sb)
153{
154 struct nfs_server *nfss = NFS_SB(sb);
155
156 dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n",
157 nfss, nfss->fscache);
158
159 fscache_relinquish_cookie(nfss->fscache, 0);
160 nfss->fscache = NULL;
161
162 if (nfss->fscache_key) {
163 spin_lock(&nfs_fscache_keys_lock);
164 rb_erase(&nfss->fscache_key->node, &nfs_fscache_keys);
165 spin_unlock(&nfs_fscache_keys_lock);
166 kfree(nfss->fscache_key);
167 nfss->fscache_key = NULL;
168 }
169}
170
171/*
172 * Initialise the per-inode cache cookie pointer for an NFS inode.
173 */
174void nfs_fscache_init_inode_cookie(struct inode *inode)
175{
176 NFS_I(inode)->fscache = NULL;
177 if (S_ISREG(inode->i_mode))
178 set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
179}
180
181/*
182 * Get the per-inode cache cookie for an NFS inode.
183 */
184static void nfs_fscache_enable_inode_cookie(struct inode *inode)
185{
186 struct super_block *sb = inode->i_sb;
187 struct nfs_inode *nfsi = NFS_I(inode);
188
189 if (nfsi->fscache || !NFS_FSCACHE(inode))
190 return;
191
192 if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) {
193 nfsi->fscache = fscache_acquire_cookie(
194 NFS_SB(sb)->fscache,
195 &nfs_fscache_inode_object_def,
196 nfsi);
197
198 dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n",
199 sb, nfsi, nfsi->fscache);
200 }
201}
202
203/*
204 * Release a per-inode cookie.
205 */
206void nfs_fscache_release_inode_cookie(struct inode *inode)
207{
208 struct nfs_inode *nfsi = NFS_I(inode);
209
210 dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n",
211 nfsi, nfsi->fscache);
212
213 fscache_relinquish_cookie(nfsi->fscache, 0);
214 nfsi->fscache = NULL;
215}
216
217/*
218 * Retire a per-inode cookie, destroying the data attached to it.
219 */
220void nfs_fscache_zap_inode_cookie(struct inode *inode)
221{
222 struct nfs_inode *nfsi = NFS_I(inode);
223
224 dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n",
225 nfsi, nfsi->fscache);
226
227 fscache_relinquish_cookie(nfsi->fscache, 1);
228 nfsi->fscache = NULL;
229}
230
231/*
232 * Turn off the cache with regard to a per-inode cookie if opened for writing,
233 * invalidating all the pages in the page cache relating to the associated
234 * inode to clear the per-page caching.
235 */
236static void nfs_fscache_disable_inode_cookie(struct inode *inode)
237{
238 clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
239
240 if (NFS_I(inode)->fscache) {
241 dfprintk(FSCACHE,
242 "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode));
243
244 /* Need to invalidate any mapped pages that were read in before
245 * turning off the cache.
246 */
247 if (inode->i_mapping && inode->i_mapping->nrpages)
248 invalidate_inode_pages2(inode->i_mapping);
249
250 nfs_fscache_zap_inode_cookie(inode);
251 }
252}
253
254/*
255 * wait_on_bit() sleep function for uninterruptible waiting
256 */
257static int nfs_fscache_wait_bit(void *flags)
258{
259 schedule();
260 return 0;
261}
262
263/*
264 * Lock against someone else trying to also acquire or relinquish a cookie
265 */
266static inline void nfs_fscache_inode_lock(struct inode *inode)
267{
268 struct nfs_inode *nfsi = NFS_I(inode);
269
270 while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags))
271 wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK,
272 nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE);
273}
274
275/*
276 * Unlock cookie management lock
277 */
278static inline void nfs_fscache_inode_unlock(struct inode *inode)
279{
280 struct nfs_inode *nfsi = NFS_I(inode);
281
282 smp_mb__before_clear_bit();
283 clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags);
284 smp_mb__after_clear_bit();
285 wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK);
286}
287
288/*
289 * Decide if we should enable or disable local caching for this inode.
290 * - For now, with NFS, only regular files that are open read-only will be able
291 * to use the cache.
292 * - May be invoked multiple times in parallel by parallel nfs_open() functions.
293 */
294void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
295{
296 if (NFS_FSCACHE(inode)) {
297 nfs_fscache_inode_lock(inode);
298 if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
299 nfs_fscache_disable_inode_cookie(inode);
300 else
301 nfs_fscache_enable_inode_cookie(inode);
302 nfs_fscache_inode_unlock(inode);
303 }
304}
305
306/*
307 * Replace a per-inode cookie due to revalidation detecting a file having
308 * changed on the server.
309 */
310void nfs_fscache_reset_inode_cookie(struct inode *inode)
311{
312 struct nfs_inode *nfsi = NFS_I(inode);
313 struct nfs_server *nfss = NFS_SERVER(inode);
314 struct fscache_cookie *old = nfsi->fscache;
315
316 nfs_fscache_inode_lock(inode);
317 if (nfsi->fscache) {
318 /* retire the current fscache cache and get a new one */
319 fscache_relinquish_cookie(nfsi->fscache, 1);
320
321 nfsi->fscache = fscache_acquire_cookie(
322 nfss->nfs_client->fscache,
323 &nfs_fscache_inode_object_def,
324 nfsi);
325
326 dfprintk(FSCACHE,
327 "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n",
328 nfss, nfsi, old, nfsi->fscache);
329 }
330 nfs_fscache_inode_unlock(inode);
331}
332
333/*
334 * Release the caching state associated with a page, if the page isn't busy
335 * interacting with the cache.
336 * - Returns true (can release page) or false (page busy).
337 */
338int nfs_fscache_release_page(struct page *page, gfp_t gfp)
339{
340 struct nfs_inode *nfsi = NFS_I(page->mapping->host);
341 struct fscache_cookie *cookie = nfsi->fscache;
342
343 BUG_ON(!cookie);
344
345 if (fscache_check_page_write(cookie, page)) {
346 if (!(gfp & __GFP_WAIT))
347 return 0;
348 fscache_wait_on_page_write(cookie, page);
349 }
350
351 if (PageFsCache(page)) {
352 dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
353 cookie, page, nfsi);
354
355 fscache_uncache_page(cookie, page);
356 nfs_add_fscache_stats(page->mapping->host,
357 NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
358 }
359
360 return 1;
361}
362
363/*
364 * Release the caching state associated with a page if undergoing complete page
365 * invalidation.
366 */
367void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
368{
369 struct nfs_inode *nfsi = NFS_I(inode);
370 struct fscache_cookie *cookie = nfsi->fscache;
371
372 BUG_ON(!cookie);
373
374 dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n",
375 cookie, page, nfsi);
376
377 fscache_wait_on_page_write(cookie, page);
378
379 BUG_ON(!PageLocked(page));
380 fscache_uncache_page(cookie, page);
381 nfs_add_fscache_stats(page->mapping->host,
382 NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
383}
384
385/*
386 * Handle completion of a page being read from the cache.
387 * - Called in process (keventd) context.
388 */
389static void nfs_readpage_from_fscache_complete(struct page *page,
390 void *context,
391 int error)
392{
393 dfprintk(FSCACHE,
394 "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n",
395 page, context, error);
396
397 /* if the read completes with an error, we just unlock the page and let
398 * the VM reissue the readpage */
399 if (!error) {
400 SetPageUptodate(page);
401 unlock_page(page);
402 } else {
403 error = nfs_readpage_async(context, page->mapping->host, page);
404 if (error)
405 unlock_page(page);
406 }
407}
408
409/*
410 * Retrieve a page from fscache
411 */
412int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
413 struct inode *inode, struct page *page)
414{
415 int ret;
416
417 dfprintk(FSCACHE,
418 "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n",
419 NFS_I(inode)->fscache, page, page->index, page->flags, inode);
420
421 ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache,
422 page,
423 nfs_readpage_from_fscache_complete,
424 ctx,
425 GFP_KERNEL);
426
427 switch (ret) {
428 case 0: /* read BIO submitted (page in fscache) */
429 dfprintk(FSCACHE,
430 "NFS: readpage_from_fscache: BIO submitted\n");
431 nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1);
432 return ret;
433
434 case -ENOBUFS: /* inode not in cache */
435 case -ENODATA: /* page not in cache */
436 nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
437 dfprintk(FSCACHE,
438 "NFS: readpage_from_fscache %d\n", ret);
439 return 1;
440
441 default:
442 dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret);
443 nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
444 }
445 return ret;
446}
447
448/*
449 * Retrieve a set of pages from fscache
450 */
451int __nfs_readpages_from_fscache(struct nfs_open_context *ctx,
452 struct inode *inode,
453 struct address_space *mapping,
454 struct list_head *pages,
455 unsigned *nr_pages)
456{
457 int ret, npages = *nr_pages;
458
459 dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n",
460 NFS_I(inode)->fscache, npages, inode);
461
462 ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache,
463 mapping, pages, nr_pages,
464 nfs_readpage_from_fscache_complete,
465 ctx,
466 mapping_gfp_mask(mapping));
467 if (*nr_pages < npages)
468 nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK,
469 npages);
470 if (*nr_pages > 0)
471 nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL,
472 *nr_pages);
473
474 switch (ret) {
475 case 0: /* read submitted to the cache for all pages */
476 BUG_ON(!list_empty(pages));
477 BUG_ON(*nr_pages != 0);
478 dfprintk(FSCACHE,
479 "NFS: nfs_getpages_from_fscache: submitted\n");
480
481 return ret;
482
483 case -ENOBUFS: /* some pages aren't cached and can't be */
484 case -ENODATA: /* some pages aren't cached */
485 dfprintk(FSCACHE,
486 "NFS: nfs_getpages_from_fscache: no page: %d\n", ret);
487 return 1;
488
489 default:
490 dfprintk(FSCACHE,
491 "NFS: nfs_getpages_from_fscache: ret %d\n", ret);
492 }
493
494 return ret;
495}
496
497/*
498 * Store a newly fetched page in fscache
499 * - PG_fscache must be set on the page
500 */
501void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
502{
503 int ret;
504
505 dfprintk(FSCACHE,
506 "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n",
507 NFS_I(inode)->fscache, page, page->index, page->flags, sync);
508
509 ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL);
510 dfprintk(FSCACHE,
511 "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n",
512 page, page->index, page->flags, ret);
513
514 if (ret != 0) {
515 fscache_uncache_page(NFS_I(inode)->fscache, page);
516 nfs_add_fscache_stats(inode,
517 NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1);
518 nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
519 } else {
520 nfs_add_fscache_stats(inode,
521 NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1);
522 }
523}
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
new file mode 100644
index 000000000000..6e809bb0ff08
--- /dev/null
+++ b/fs/nfs/fscache.h
@@ -0,0 +1,220 @@
1/* NFS filesystem cache interface definitions
2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#ifndef _NFS_FSCACHE_H
13#define _NFS_FSCACHE_H
14
15#include <linux/nfs_fs.h>
16#include <linux/nfs_mount.h>
17#include <linux/nfs4_mount.h>
18#include <linux/fscache.h>
19
20#ifdef CONFIG_NFS_FSCACHE
21
22/*
23 * set of NFS FS-Cache objects that form a superblock key
24 */
25struct nfs_fscache_key {
26 struct rb_node node;
27 struct nfs_client *nfs_client; /* the server */
28
29 /* the elements of the unique key - as used by nfs_compare_super() and
30 * nfs_compare_mount_options() to distinguish superblocks */
31 struct {
32 struct {
33 unsigned long s_flags; /* various flags
34 * (& NFS_MS_MASK) */
35 } super;
36
37 struct {
38 struct nfs_fsid fsid;
39 int flags;
40 unsigned int rsize; /* read size */
41 unsigned int wsize; /* write size */
42 unsigned int acregmin; /* attr cache timeouts */
43 unsigned int acregmax;
44 unsigned int acdirmin;
45 unsigned int acdirmax;
46 } nfs_server;
47
48 struct {
49 rpc_authflavor_t au_flavor;
50 } rpc_auth;
51
52 /* uniquifier - can be used if nfs_server.flags includes
53 * NFS_MOUNT_UNSHARED */
54 u8 uniq_len;
55 char uniquifier[0];
56 } key;
57};
58
59/*
60 * fscache-index.c
61 */
62extern struct fscache_netfs nfs_fscache_netfs;
63extern const struct fscache_cookie_def nfs_fscache_server_index_def;
64extern const struct fscache_cookie_def nfs_fscache_super_index_def;
65extern const struct fscache_cookie_def nfs_fscache_inode_object_def;
66
67extern int nfs_fscache_register(void);
68extern void nfs_fscache_unregister(void);
69
70/*
71 * fscache.c
72 */
73extern void nfs_fscache_get_client_cookie(struct nfs_client *);
74extern void nfs_fscache_release_client_cookie(struct nfs_client *);
75
76extern void nfs_fscache_get_super_cookie(struct super_block *,
77 struct nfs_parsed_mount_data *);
78extern void nfs_fscache_release_super_cookie(struct super_block *);
79
80extern void nfs_fscache_init_inode_cookie(struct inode *);
81extern void nfs_fscache_release_inode_cookie(struct inode *);
82extern void nfs_fscache_zap_inode_cookie(struct inode *);
83extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *);
84extern void nfs_fscache_reset_inode_cookie(struct inode *);
85
86extern void __nfs_fscache_invalidate_page(struct page *, struct inode *);
87extern int nfs_fscache_release_page(struct page *, gfp_t);
88
89extern int __nfs_readpage_from_fscache(struct nfs_open_context *,
90 struct inode *, struct page *);
91extern int __nfs_readpages_from_fscache(struct nfs_open_context *,
92 struct inode *, struct address_space *,
93 struct list_head *, unsigned *);
94extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int);
95
96/*
97 * wait for a page to complete writing to the cache
98 */
99static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi,
100 struct page *page)
101{
102 if (PageFsCache(page))
103 fscache_wait_on_page_write(nfsi->fscache, page);
104}
105
106/*
107 * release the caching state associated with a page if undergoing complete page
108 * invalidation
109 */
110static inline void nfs_fscache_invalidate_page(struct page *page,
111 struct inode *inode)
112{
113 if (PageFsCache(page))
114 __nfs_fscache_invalidate_page(page, inode);
115}
116
117/*
118 * Retrieve a page from an inode data storage object.
119 */
120static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
121 struct inode *inode,
122 struct page *page)
123{
124 if (NFS_I(inode)->fscache)
125 return __nfs_readpage_from_fscache(ctx, inode, page);
126 return -ENOBUFS;
127}
128
129/*
130 * Retrieve a set of pages from an inode data storage object.
131 */
132static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
133 struct inode *inode,
134 struct address_space *mapping,
135 struct list_head *pages,
136 unsigned *nr_pages)
137{
138 if (NFS_I(inode)->fscache)
139 return __nfs_readpages_from_fscache(ctx, inode, mapping, pages,
140 nr_pages);
141 return -ENOBUFS;
142}
143
144/*
145 * Store a page newly fetched from the server in an inode data storage object
146 * in the cache.
147 */
148static inline void nfs_readpage_to_fscache(struct inode *inode,
149 struct page *page,
150 int sync)
151{
152 if (PageFsCache(page))
153 __nfs_readpage_to_fscache(inode, page, sync);
154}
155
156/*
157 * indicate the client caching state as readable text
158 */
159static inline const char *nfs_server_fscache_state(struct nfs_server *server)
160{
161 if (server->fscache && (server->options & NFS_OPTION_FSCACHE))
162 return "yes";
163 return "no ";
164}
165
166
167#else /* CONFIG_NFS_FSCACHE */
168static inline int nfs_fscache_register(void) { return 0; }
169static inline void nfs_fscache_unregister(void) {}
170
171static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {}
172static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {}
173
174static inline void nfs_fscache_get_super_cookie(
175 struct super_block *sb,
176 struct nfs_parsed_mount_data *data)
177{
178}
179static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {}
180
181static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {}
182static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {}
183static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {}
184static inline void nfs_fscache_set_inode_cookie(struct inode *inode,
185 struct file *filp) {}
186static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {}
187
188static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp)
189{
190 return 1; /* True: may release page */
191}
192static inline void nfs_fscache_invalidate_page(struct page *page,
193 struct inode *inode) {}
194static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi,
195 struct page *page) {}
196
197static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx,
198 struct inode *inode,
199 struct page *page)
200{
201 return -ENOBUFS;
202}
203static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx,
204 struct inode *inode,
205 struct address_space *mapping,
206 struct list_head *pages,
207 unsigned *nr_pages)
208{
209 return -ENOBUFS;
210}
211static inline void nfs_readpage_to_fscache(struct inode *inode,
212 struct page *page, int sync) {}
213
214static inline const char *nfs_server_fscache_state(struct nfs_server *server)
215{
216 return "no ";
217}
218
219#endif /* CONFIG_NFS_FSCACHE */
220#endif /* _NFS_FSCACHE_H */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a834d1d850b7..64f87194d390 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -46,6 +46,7 @@
46#include "delegation.h" 46#include "delegation.h"
47#include "iostat.h" 47#include "iostat.h"
48#include "internal.h" 48#include "internal.h"
49#include "fscache.h"
49 50
50#define NFSDBG_FACILITY NFSDBG_VFS 51#define NFSDBG_FACILITY NFSDBG_VFS
51 52
@@ -121,6 +122,7 @@ void nfs_clear_inode(struct inode *inode)
121 BUG_ON(!list_empty(&NFS_I(inode)->open_files)); 122 BUG_ON(!list_empty(&NFS_I(inode)->open_files));
122 nfs_zap_acl_cache(inode); 123 nfs_zap_acl_cache(inode);
123 nfs_access_zap_cache(inode); 124 nfs_access_zap_cache(inode);
125 nfs_fscache_release_inode_cookie(inode);
124} 126}
125 127
126/** 128/**
@@ -355,6 +357,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
355 nfsi->attrtimeo_timestamp = now; 357 nfsi->attrtimeo_timestamp = now;
356 nfsi->access_cache = RB_ROOT; 358 nfsi->access_cache = RB_ROOT;
357 359
360 nfs_fscache_init_inode_cookie(inode);
361
358 unlock_new_inode(inode); 362 unlock_new_inode(inode);
359 } else 363 } else
360 nfs_refresh_inode(inode, fattr); 364 nfs_refresh_inode(inode, fattr);
@@ -686,6 +690,7 @@ int nfs_open(struct inode *inode, struct file *filp)
686 ctx->mode = filp->f_mode; 690 ctx->mode = filp->f_mode;
687 nfs_file_set_open_context(filp, ctx); 691 nfs_file_set_open_context(filp, ctx);
688 put_nfs_open_context(ctx); 692 put_nfs_open_context(ctx);
693 nfs_fscache_set_inode_cookie(inode, filp);
689 return 0; 694 return 0;
690} 695}
691 696
@@ -786,6 +791,7 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa
786 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); 791 memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
787 spin_unlock(&inode->i_lock); 792 spin_unlock(&inode->i_lock);
788 nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); 793 nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
794 nfs_fscache_reset_inode_cookie(inode);
789 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", 795 dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
790 inode->i_sb->s_id, (long long)NFS_FILEID(inode)); 796 inode->i_sb->s_id, (long long)NFS_FILEID(inode));
791 return 0; 797 return 0;
@@ -1030,6 +1036,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
1030 spin_lock(&inode->i_lock); 1036 spin_lock(&inode->i_lock);
1031 status = nfs_refresh_inode_locked(inode, fattr); 1037 status = nfs_refresh_inode_locked(inode, fattr);
1032 spin_unlock(&inode->i_lock); 1038 spin_unlock(&inode->i_lock);
1039
1033 return status; 1040 return status;
1034} 1041}
1035 1042
@@ -1436,6 +1443,10 @@ static int __init init_nfs_fs(void)
1436{ 1443{
1437 int err; 1444 int err;
1438 1445
1446 err = nfs_fscache_register();
1447 if (err < 0)
1448 goto out7;
1449
1439 err = nfsiod_start(); 1450 err = nfsiod_start();
1440 if (err) 1451 if (err)
1441 goto out6; 1452 goto out6;
@@ -1488,6 +1499,8 @@ out4:
1488out5: 1499out5:
1489 nfsiod_stop(); 1500 nfsiod_stop();
1490out6: 1501out6:
1502 nfs_fscache_unregister();
1503out7:
1491 return err; 1504 return err;
1492} 1505}
1493 1506
@@ -1498,6 +1511,7 @@ static void __exit exit_nfs_fs(void)
1498 nfs_destroy_readpagecache(); 1511 nfs_destroy_readpagecache();
1499 nfs_destroy_inodecache(); 1512 nfs_destroy_inodecache();
1500 nfs_destroy_nfspagecache(); 1513 nfs_destroy_nfspagecache();
1514 nfs_fscache_unregister();
1501#ifdef CONFIG_PROC_FS 1515#ifdef CONFIG_PROC_FS
1502 rpc_proc_unregister("nfs"); 1516 rpc_proc_unregister("nfs");
1503#endif 1517#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2041f68ff1cc..e4d6a8348adf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -5,6 +5,8 @@
5#include <linux/mount.h> 5#include <linux/mount.h>
6#include <linux/security.h> 6#include <linux/security.h>
7 7
8#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
9
8struct nfs_string; 10struct nfs_string;
9 11
10/* Maximum number of readahead requests 12/* Maximum number of readahead requests
@@ -37,10 +39,12 @@ struct nfs_parsed_mount_data {
37 int acregmin, acregmax, 39 int acregmin, acregmax,
38 acdirmin, acdirmax; 40 acdirmin, acdirmax;
39 int namlen; 41 int namlen;
42 unsigned int options;
40 unsigned int bsize; 43 unsigned int bsize;
41 unsigned int auth_flavor_len; 44 unsigned int auth_flavor_len;
42 rpc_authflavor_t auth_flavors[1]; 45 rpc_authflavor_t auth_flavors[1];
43 char *client_address; 46 char *client_address;
47 char *fscache_uniq;
44 48
45 struct { 49 struct {
46 struct sockaddr_storage address; 50 struct sockaddr_storage address;
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index a36952810032..a2ab2529b5ca 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -16,6 +16,9 @@
16 16
17struct nfs_iostats { 17struct nfs_iostats {
18 unsigned long long bytes[__NFSIOS_BYTESMAX]; 18 unsigned long long bytes[__NFSIOS_BYTESMAX];
19#ifdef CONFIG_NFS_FSCACHE
20 unsigned long long fscache[__NFSIOS_FSCACHEMAX];
21#endif
19 unsigned long events[__NFSIOS_COUNTSMAX]; 22 unsigned long events[__NFSIOS_COUNTSMAX];
20} ____cacheline_aligned; 23} ____cacheline_aligned;
21 24
@@ -57,6 +60,21 @@ static inline void nfs_add_stats(const struct inode *inode,
57 nfs_add_server_stats(NFS_SERVER(inode), stat, addend); 60 nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
58} 61}
59 62
63#ifdef CONFIG_NFS_FSCACHE
64static inline void nfs_add_fscache_stats(struct inode *inode,
65 enum nfs_stat_fscachecounters stat,
66 unsigned long addend)
67{
68 struct nfs_iostats *iostats;
69 int cpu;
70
71 cpu = get_cpu();
72 iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu);
73 iostats->fscache[stat] += addend;
74 put_cpu_no_resched();
75}
76#endif
77
60static inline struct nfs_iostats *nfs_alloc_iostats(void) 78static inline struct nfs_iostats *nfs_alloc_iostats(void)
61{ 79{
62 return alloc_percpu(struct nfs_iostats); 80 return alloc_percpu(struct nfs_iostats);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f856004bb7fa..4ace3c50a8eb 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,6 +24,7 @@
24 24
25#include "internal.h" 25#include "internal.h"
26#include "iostat.h" 26#include "iostat.h"
27#include "fscache.h"
27 28
28#define NFSDBG_FACILITY NFSDBG_PAGECACHE 29#define NFSDBG_FACILITY NFSDBG_PAGECACHE
29 30
@@ -111,8 +112,8 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
111 } 112 }
112} 113}
113 114
114static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 115int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
115 struct page *page) 116 struct page *page)
116{ 117{
117 LIST_HEAD(one_request); 118 LIST_HEAD(one_request);
118 struct nfs_page *new; 119 struct nfs_page *new;
@@ -139,6 +140,11 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
139 140
140static void nfs_readpage_release(struct nfs_page *req) 141static void nfs_readpage_release(struct nfs_page *req)
141{ 142{
143 struct inode *d_inode = req->wb_context->path.dentry->d_inode;
144
145 if (PageUptodate(req->wb_page))
146 nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
147
142 unlock_page(req->wb_page); 148 unlock_page(req->wb_page);
143 149
144 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 150 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
@@ -510,8 +516,15 @@ int nfs_readpage(struct file *file, struct page *page)
510 } else 516 } else
511 ctx = get_nfs_open_context(nfs_file_open_context(file)); 517 ctx = get_nfs_open_context(nfs_file_open_context(file));
512 518
519 if (!IS_SYNC(inode)) {
520 error = nfs_readpage_from_fscache(ctx, inode, page);
521 if (error == 0)
522 goto out;
523 }
524
513 error = nfs_readpage_async(ctx, inode, page); 525 error = nfs_readpage_async(ctx, inode, page);
514 526
527out:
515 put_nfs_open_context(ctx); 528 put_nfs_open_context(ctx);
516 return error; 529 return error;
517out_unlock: 530out_unlock:
@@ -584,6 +597,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
584 return -EBADF; 597 return -EBADF;
585 } else 598 } else
586 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 599 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
600
601 /* attempt to read as many of the pages as possible from the cache
602 * - this returns -ENOBUFS immediately if the cookie is negative
603 */
604 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
605 pages, &nr_pages);
606 if (ret == 0)
607 goto read_complete; /* all pages were read */
608
587 if (rsize < PAGE_CACHE_SIZE) 609 if (rsize < PAGE_CACHE_SIZE)
588 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 610 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
589 else 611 else
@@ -594,6 +616,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
594 nfs_pageio_complete(&pgio); 616 nfs_pageio_complete(&pgio);
595 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 617 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
596 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 618 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
619read_complete:
597 put_nfs_open_context(desc.ctx); 620 put_nfs_open_context(desc.ctx);
598out: 621out:
599 return ret; 622 return ret;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0942fcbbad3c..82eaadbff408 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -60,6 +60,7 @@
60#include "delegation.h" 60#include "delegation.h"
61#include "iostat.h" 61#include "iostat.h"
62#include "internal.h" 62#include "internal.h"
63#include "fscache.h"
63 64
64#define NFSDBG_FACILITY NFSDBG_VFS 65#define NFSDBG_FACILITY NFSDBG_VFS
65 66
@@ -76,6 +77,7 @@ enum {
76 Opt_rdirplus, Opt_nordirplus, 77 Opt_rdirplus, Opt_nordirplus,
77 Opt_sharecache, Opt_nosharecache, 78 Opt_sharecache, Opt_nosharecache,
78 Opt_resvport, Opt_noresvport, 79 Opt_resvport, Opt_noresvport,
80 Opt_fscache, Opt_nofscache,
79 81
80 /* Mount options that take integer arguments */ 82 /* Mount options that take integer arguments */
81 Opt_port, 83 Opt_port,
@@ -93,6 +95,7 @@ enum {
93 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, 95 Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
94 Opt_addr, Opt_mountaddr, Opt_clientaddr, 96 Opt_addr, Opt_mountaddr, Opt_clientaddr,
95 Opt_lookupcache, 97 Opt_lookupcache,
98 Opt_fscache_uniq,
96 99
97 /* Special mount options */ 100 /* Special mount options */
98 Opt_userspace, Opt_deprecated, Opt_sloppy, 101 Opt_userspace, Opt_deprecated, Opt_sloppy,
@@ -132,6 +135,9 @@ static const match_table_t nfs_mount_option_tokens = {
132 { Opt_nosharecache, "nosharecache" }, 135 { Opt_nosharecache, "nosharecache" },
133 { Opt_resvport, "resvport" }, 136 { Opt_resvport, "resvport" },
134 { Opt_noresvport, "noresvport" }, 137 { Opt_noresvport, "noresvport" },
138 { Opt_fscache, "fsc" },
139 { Opt_fscache_uniq, "fsc=%s" },
140 { Opt_nofscache, "nofsc" },
135 141
136 { Opt_port, "port=%u" }, 142 { Opt_port, "port=%u" },
137 { Opt_rsize, "rsize=%u" }, 143 { Opt_rsize, "rsize=%u" },
@@ -563,6 +569,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
563 if (clp->rpc_ops->version == 4) 569 if (clp->rpc_ops->version == 4)
564 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); 570 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
565#endif 571#endif
572 if (nfss->options & NFS_OPTION_FSCACHE)
573 seq_printf(m, ",fsc");
566} 574}
567 575
568/* 576/*
@@ -641,6 +649,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
641 totals.events[i] += stats->events[i]; 649 totals.events[i] += stats->events[i];
642 for (i = 0; i < __NFSIOS_BYTESMAX; i++) 650 for (i = 0; i < __NFSIOS_BYTESMAX; i++)
643 totals.bytes[i] += stats->bytes[i]; 651 totals.bytes[i] += stats->bytes[i];
652#ifdef CONFIG_NFS_FSCACHE
653 for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
654 totals.fscache[i] += stats->fscache[i];
655#endif
644 656
645 preempt_enable(); 657 preempt_enable();
646 } 658 }
@@ -651,6 +663,13 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
651 seq_printf(m, "\n\tbytes:\t"); 663 seq_printf(m, "\n\tbytes:\t");
652 for (i = 0; i < __NFSIOS_BYTESMAX; i++) 664 for (i = 0; i < __NFSIOS_BYTESMAX; i++)
653 seq_printf(m, "%Lu ", totals.bytes[i]); 665 seq_printf(m, "%Lu ", totals.bytes[i]);
666#ifdef CONFIG_NFS_FSCACHE
667 if (nfss->options & NFS_OPTION_FSCACHE) {
668 seq_printf(m, "\n\tfsc:\t");
669 for (i = 0; i < __NFSIOS_FSCACHEMAX; i++)
670 seq_printf(m, "%Lu ", totals.bytes[i]);
671 }
672#endif
654 seq_printf(m, "\n"); 673 seq_printf(m, "\n");
655 674
656 rpc_print_iostats(m, nfss->client); 675 rpc_print_iostats(m, nfss->client);
@@ -1044,6 +1063,24 @@ static int nfs_parse_mount_options(char *raw,
1044 case Opt_noresvport: 1063 case Opt_noresvport:
1045 mnt->flags |= NFS_MOUNT_NORESVPORT; 1064 mnt->flags |= NFS_MOUNT_NORESVPORT;
1046 break; 1065 break;
1066 case Opt_fscache:
1067 mnt->options |= NFS_OPTION_FSCACHE;
1068 kfree(mnt->fscache_uniq);
1069 mnt->fscache_uniq = NULL;
1070 break;
1071 case Opt_nofscache:
1072 mnt->options &= ~NFS_OPTION_FSCACHE;
1073 kfree(mnt->fscache_uniq);
1074 mnt->fscache_uniq = NULL;
1075 break;
1076 case Opt_fscache_uniq:
1077 string = match_strdup(args);
1078 if (!string)
1079 goto out_nomem;
1080 kfree(mnt->fscache_uniq);
1081 mnt->fscache_uniq = string;
1082 mnt->options |= NFS_OPTION_FSCACHE;
1083 break;
1047 1084
1048 /* 1085 /*
1049 * options that take numeric values 1086 * options that take numeric values
@@ -1870,8 +1907,6 @@ static void nfs_clone_super(struct super_block *sb,
1870 nfs_initialise_sb(sb); 1907 nfs_initialise_sb(sb);
1871} 1908}
1872 1909
1873#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
1874
1875static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) 1910static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
1876{ 1911{
1877 const struct nfs_server *a = s->s_fs_info; 1912 const struct nfs_server *a = s->s_fs_info;
@@ -2036,6 +2071,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2036 if (!s->s_root) { 2071 if (!s->s_root) {
2037 /* initial superblock/root creation */ 2072 /* initial superblock/root creation */
2038 nfs_fill_super(s, data); 2073 nfs_fill_super(s, data);
2074 nfs_fscache_get_super_cookie(s, data);
2039 } 2075 }
2040 2076
2041 mntroot = nfs_get_root(s, mntfh); 2077 mntroot = nfs_get_root(s, mntfh);
@@ -2056,6 +2092,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2056out: 2092out:
2057 kfree(data->nfs_server.hostname); 2093 kfree(data->nfs_server.hostname);
2058 kfree(data->mount_server.hostname); 2094 kfree(data->mount_server.hostname);
2095 kfree(data->fscache_uniq);
2059 security_free_mnt_opts(&data->lsm_opts); 2096 security_free_mnt_opts(&data->lsm_opts);
2060out_free_fh: 2097out_free_fh:
2061 kfree(mntfh); 2098 kfree(mntfh);
@@ -2083,6 +2120,7 @@ static void nfs_kill_super(struct super_block *s)
2083 2120
2084 bdi_unregister(&server->backing_dev_info); 2121 bdi_unregister(&server->backing_dev_info);
2085 kill_anon_super(s); 2122 kill_anon_super(s);
2123 nfs_fscache_release_super_cookie(s);
2086 nfs_free_server(server); 2124 nfs_free_server(server);
2087} 2125}
2088 2126
@@ -2390,6 +2428,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2390 if (!s->s_root) { 2428 if (!s->s_root) {
2391 /* initial superblock/root creation */ 2429 /* initial superblock/root creation */
2392 nfs4_fill_super(s); 2430 nfs4_fill_super(s);
2431 nfs_fscache_get_super_cookie(s, data);
2393 } 2432 }
2394 2433
2395 mntroot = nfs4_get_root(s, mntfh); 2434 mntroot = nfs4_get_root(s, mntfh);
@@ -2411,6 +2450,7 @@ out:
2411 kfree(data->client_address); 2450 kfree(data->client_address);
2412 kfree(data->nfs_server.export_path); 2451 kfree(data->nfs_server.export_path);
2413 kfree(data->nfs_server.hostname); 2452 kfree(data->nfs_server.hostname);
2453 kfree(data->fscache_uniq);
2414 security_free_mnt_opts(&data->lsm_opts); 2454 security_free_mnt_opts(&data->lsm_opts);
2415out_free_fh: 2455out_free_fh:
2416 kfree(mntfh); 2456 kfree(mntfh);
@@ -2437,6 +2477,7 @@ static void nfs4_kill_super(struct super_block *sb)
2437 kill_anon_super(sb); 2477 kill_anon_super(sb);
2438 2478
2439 nfs4_renewd_prepare_shutdown(server); 2479 nfs4_renewd_prepare_shutdown(server);
2480 nfs_fscache_release_super_cookie(sb);
2440 nfs_free_server(server); 2481 nfs_free_server(server);
2441} 2482}
2442 2483
diff --git a/fs/splice.c b/fs/splice.c
index 4ed0ba44a966..dd727d43e5b7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -59,7 +59,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
59 */ 59 */
60 wait_on_page_writeback(page); 60 wait_on_page_writeback(page);
61 61
62 if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) 62 if (page_has_private(page) &&
63 !try_to_release_page(page, GFP_KERNEL))
63 goto out_unlock; 64 goto out_unlock;
64 65
65 /* 66 /*
diff --git a/fs/super.c b/fs/super.c
index 2ba481518ba7..77cb4ec919b9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -287,6 +287,7 @@ int fsync_super(struct super_block *sb)
287 __fsync_super(sb); 287 __fsync_super(sb);
288 return sync_blockdev(sb->s_bdev); 288 return sync_blockdev(sb->s_bdev);
289} 289}
290EXPORT_SYMBOL_GPL(fsync_super);
290 291
291/** 292/**
292 * generic_shutdown_super - common helper for ->kill_sb() 293 * generic_shutdown_super - common helper for ->kill_sb()
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
new file mode 100644
index 000000000000..84d3532dd3ea
--- /dev/null
+++ b/include/linux/fscache-cache.h
@@ -0,0 +1,505 @@
1/* General filesystem caching backing cache interface
2 *
3 * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * NOTE!!! See:
12 *
13 * Documentation/filesystems/caching/backend-api.txt
14 *
15 * for a description of the cache backend interface declared here.
16 */
17
18#ifndef _LINUX_FSCACHE_CACHE_H
19#define _LINUX_FSCACHE_CACHE_H
20
21#include <linux/fscache.h>
22#include <linux/sched.h>
23#include <linux/slow-work.h>
24
25#define NR_MAXCACHES BITS_PER_LONG
26
27struct fscache_cache;
28struct fscache_cache_ops;
29struct fscache_object;
30struct fscache_operation;
31
32/*
33 * cache tag definition
34 */
35struct fscache_cache_tag {
36 struct list_head link;
37 struct fscache_cache *cache; /* cache referred to by this tag */
38 unsigned long flags;
39#define FSCACHE_TAG_RESERVED 0 /* T if tag is reserved for a cache */
40 atomic_t usage;
41 char name[0]; /* tag name */
42};
43
44/*
45 * cache definition
46 */
47struct fscache_cache {
48 const struct fscache_cache_ops *ops;
49 struct fscache_cache_tag *tag; /* tag representing this cache */
50 struct kobject *kobj; /* system representation of this cache */
51 struct list_head link; /* link in list of caches */
52 size_t max_index_size; /* maximum size of index data */
53 char identifier[36]; /* cache label */
54
55 /* node management */
56 struct work_struct op_gc; /* operation garbage collector */
57 struct list_head object_list; /* list of data/index objects */
58 struct list_head op_gc_list; /* list of ops to be deleted */
59 spinlock_t object_list_lock;
60 spinlock_t op_gc_list_lock;
61 atomic_t object_count; /* no. of live objects in this cache */
62 struct fscache_object *fsdef; /* object for the fsdef index */
63 unsigned long flags;
64#define FSCACHE_IOERROR 0 /* cache stopped on I/O error */
65#define FSCACHE_CACHE_WITHDRAWN 1 /* cache has been withdrawn */
66};
67
68extern wait_queue_head_t fscache_cache_cleared_wq;
69
70/*
71 * operation to be applied to a cache object
72 * - retrieval initiation operations are done in the context of the process
73 * that issued them, and not in an async thread pool
74 */
75typedef void (*fscache_operation_release_t)(struct fscache_operation *op);
76typedef void (*fscache_operation_processor_t)(struct fscache_operation *op);
77
78struct fscache_operation {
79 union {
80 struct work_struct fast_work; /* record for fast ops */
81 struct slow_work slow_work; /* record for (very) slow ops */
82 };
83 struct list_head pend_link; /* link in object->pending_ops */
84 struct fscache_object *object; /* object to be operated upon */
85
86 unsigned long flags;
87#define FSCACHE_OP_TYPE 0x000f /* operation type */
88#define FSCACHE_OP_FAST 0x0001 /* - fast op, processor may not sleep for disk */
89#define FSCACHE_OP_SLOW 0x0002 /* - (very) slow op, processor may sleep for disk */
90#define FSCACHE_OP_MYTHREAD 0x0003 /* - processing is done be issuing thread, not pool */
91#define FSCACHE_OP_WAITING 4 /* cleared when op is woken */
92#define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */
93#define FSCACHE_OP_DEAD 6 /* op is now dead */
94
95 atomic_t usage;
96 unsigned debug_id; /* debugging ID */
97
98 /* operation processor callback
99 * - can be NULL if FSCACHE_OP_WAITING is going to be used to perform
100 * the op in a non-pool thread */
101 fscache_operation_processor_t processor;
102
103 /* operation releaser */
104 fscache_operation_release_t release;
105};
106
107extern atomic_t fscache_op_debug_id;
108extern const struct slow_work_ops fscache_op_slow_work_ops;
109
110extern void fscache_enqueue_operation(struct fscache_operation *);
111extern void fscache_put_operation(struct fscache_operation *);
112
113/**
114 * fscache_operation_init - Do basic initialisation of an operation
115 * @op: The operation to initialise
116 * @release: The release function to assign
117 *
118 * Do basic initialisation of an operation. The caller must still set flags,
119 * object, either fast_work or slow_work if necessary, and processor if needed.
120 */
121static inline void fscache_operation_init(struct fscache_operation *op,
122 fscache_operation_release_t release)
123{
124 atomic_set(&op->usage, 1);
125 op->debug_id = atomic_inc_return(&fscache_op_debug_id);
126 op->release = release;
127 INIT_LIST_HEAD(&op->pend_link);
128}
129
130/**
131 * fscache_operation_init_slow - Do additional initialisation of a slow op
132 * @op: The operation to initialise
133 * @processor: The processor function to assign
134 *
135 * Do additional initialisation of an operation as required for slow work.
136 */
137static inline
138void fscache_operation_init_slow(struct fscache_operation *op,
139 fscache_operation_processor_t processor)
140{
141 op->processor = processor;
142 slow_work_init(&op->slow_work, &fscache_op_slow_work_ops);
143}
144
145/*
146 * data read operation
147 */
148struct fscache_retrieval {
149 struct fscache_operation op;
150 struct address_space *mapping; /* netfs pages */
151 fscache_rw_complete_t end_io_func; /* function to call on I/O completion */
152 void *context; /* netfs read context (pinned) */
153 struct list_head to_do; /* list of things to be done by the backend */
154 unsigned long start_time; /* time at which retrieval started */
155};
156
157typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op,
158 struct page *page,
159 gfp_t gfp);
160
161typedef int (*fscache_pages_retrieval_func_t)(struct fscache_retrieval *op,
162 struct list_head *pages,
163 unsigned *nr_pages,
164 gfp_t gfp);
165
166/**
167 * fscache_get_retrieval - Get an extra reference on a retrieval operation
168 * @op: The retrieval operation to get a reference on
169 *
170 * Get an extra reference on a retrieval operation.
171 */
172static inline
173struct fscache_retrieval *fscache_get_retrieval(struct fscache_retrieval *op)
174{
175 atomic_inc(&op->op.usage);
176 return op;
177}
178
179/**
180 * fscache_enqueue_retrieval - Enqueue a retrieval operation for processing
181 * @op: The retrieval operation affected
182 *
183 * Enqueue a retrieval operation for processing by the FS-Cache thread pool.
184 */
185static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
186{
187 fscache_enqueue_operation(&op->op);
188}
189
190/**
191 * fscache_put_retrieval - Drop a reference to a retrieval operation
192 * @op: The retrieval operation affected
193 *
194 * Drop a reference to a retrieval operation.
195 */
196static inline void fscache_put_retrieval(struct fscache_retrieval *op)
197{
198 fscache_put_operation(&op->op);
199}
200
201/*
202 * cached page storage work item
203 * - used to do three things:
204 * - batch writes to the cache
205 * - do cache writes asynchronously
206 * - defer writes until cache object lookup completion
207 */
208struct fscache_storage {
209 struct fscache_operation op;
210 pgoff_t store_limit; /* don't write more than this */
211};
212
213/*
214 * cache operations
215 */
216struct fscache_cache_ops {
217 /* name of cache provider */
218 const char *name;
219
220 /* allocate an object record for a cookie */
221 struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
222 struct fscache_cookie *cookie);
223
224 /* look up the object for a cookie */
225 void (*lookup_object)(struct fscache_object *object);
226
227 /* finished looking up */
228 void (*lookup_complete)(struct fscache_object *object);
229
230 /* increment the usage count on this object (may fail if unmounting) */
231 struct fscache_object *(*grab_object)(struct fscache_object *object);
232
233 /* pin an object in the cache */
234 int (*pin_object)(struct fscache_object *object);
235
236 /* unpin an object in the cache */
237 void (*unpin_object)(struct fscache_object *object);
238
239 /* store the updated auxilliary data on an object */
240 void (*update_object)(struct fscache_object *object);
241
242 /* discard the resources pinned by an object and effect retirement if
243 * necessary */
244 void (*drop_object)(struct fscache_object *object);
245
246 /* dispose of a reference to an object */
247 void (*put_object)(struct fscache_object *object);
248
249 /* sync a cache */
250 void (*sync_cache)(struct fscache_cache *cache);
251
252 /* notification that the attributes of a non-index object (such as
253 * i_size) have changed */
254 int (*attr_changed)(struct fscache_object *object);
255
256 /* reserve space for an object's data and associated metadata */
257 int (*reserve_space)(struct fscache_object *object, loff_t i_size);
258
259 /* request a backing block for a page be read or allocated in the
260 * cache */
261 fscache_page_retrieval_func_t read_or_alloc_page;
262
263 /* request backing blocks for a list of pages be read or allocated in
264 * the cache */
265 fscache_pages_retrieval_func_t read_or_alloc_pages;
266
267 /* request a backing block for a page be allocated in the cache so that
268 * it can be written directly */
269 fscache_page_retrieval_func_t allocate_page;
270
271 /* request backing blocks for pages be allocated in the cache so that
272 * they can be written directly */
273 fscache_pages_retrieval_func_t allocate_pages;
274
275 /* write a page to its backing block in the cache */
276 int (*write_page)(struct fscache_storage *op, struct page *page);
277
278 /* detach backing block from a page (optional)
279 * - must release the cookie lock before returning
280 * - may sleep
281 */
282 void (*uncache_page)(struct fscache_object *object,
283 struct page *page);
284
285 /* dissociate a cache from all the pages it was backing */
286 void (*dissociate_pages)(struct fscache_cache *cache);
287};
288
289/*
290 * data file or index object cookie
291 * - a file will only appear in one cache
292 * - a request to cache a file may or may not be honoured, subject to
293 * constraints such as disk space
294 * - indices are created on disk just-in-time
295 */
296struct fscache_cookie {
297 atomic_t usage; /* number of users of this cookie */
298 atomic_t n_children; /* number of children of this cookie */
299 spinlock_t lock;
300 struct hlist_head backing_objects; /* object(s) backing this file/index */
301 const struct fscache_cookie_def *def; /* definition */
302 struct fscache_cookie *parent; /* parent of this entry */
303 void *netfs_data; /* back pointer to netfs */
304 struct radix_tree_root stores; /* pages to be stored on this cookie */
305#define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */
306
307 unsigned long flags;
308#define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */
309#define FSCACHE_COOKIE_CREATING 1 /* T if non-index object being created still */
310#define FSCACHE_COOKIE_NO_DATA_YET 2 /* T if new object with no cached data yet */
311#define FSCACHE_COOKIE_PENDING_FILL 3 /* T if pending initial fill on object */
312#define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */
313#define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */
314};
315
316extern struct fscache_cookie fscache_fsdef_index;
317
318/*
319 * on-disk cache file or index handle
320 */
321struct fscache_object {
322 enum fscache_object_state {
323 FSCACHE_OBJECT_INIT, /* object in initial unbound state */
324 FSCACHE_OBJECT_LOOKING_UP, /* looking up object */
325 FSCACHE_OBJECT_CREATING, /* creating object */
326
327 /* active states */
328 FSCACHE_OBJECT_AVAILABLE, /* cleaning up object after creation */
329 FSCACHE_OBJECT_ACTIVE, /* object is usable */
330 FSCACHE_OBJECT_UPDATING, /* object is updating */
331
332 /* terminal states */
333 FSCACHE_OBJECT_DYING, /* object waiting for accessors to finish */
334 FSCACHE_OBJECT_LC_DYING, /* object cleaning up after lookup/create */
335 FSCACHE_OBJECT_ABORT_INIT, /* abort the init state */
336 FSCACHE_OBJECT_RELEASING, /* releasing object */
337 FSCACHE_OBJECT_RECYCLING, /* retiring object */
338 FSCACHE_OBJECT_WITHDRAWING, /* withdrawing object */
339 FSCACHE_OBJECT_DEAD, /* object is now dead */
340 } state;
341
342 int debug_id; /* debugging ID */
343 int n_children; /* number of child objects */
344 int n_ops; /* number of ops outstanding on object */
345 int n_obj_ops; /* number of object ops outstanding on object */
346 int n_in_progress; /* number of ops in progress */
347 int n_exclusive; /* number of exclusive ops queued */
348 spinlock_t lock; /* state and operations lock */
349
350 unsigned long lookup_jif; /* time at which lookup started */
351 unsigned long event_mask; /* events this object is interested in */
352 unsigned long events; /* events to be processed by this object
353 * (order is important - using fls) */
354#define FSCACHE_OBJECT_EV_REQUEUE 0 /* T if object should be requeued */
355#define FSCACHE_OBJECT_EV_UPDATE 1 /* T if object should be updated */
356#define FSCACHE_OBJECT_EV_CLEARED 2 /* T if accessors all gone */
357#define FSCACHE_OBJECT_EV_ERROR 3 /* T if fatal error occurred during processing */
358#define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */
359#define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */
360#define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */
361
362 unsigned long flags;
363#define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */
364#define FSCACHE_OBJECT_PENDING_WRITE 1 /* T if object has pending write */
365#define FSCACHE_OBJECT_WAITING 2 /* T if object is waiting on its parent */
366
367 struct list_head cache_link; /* link in cache->object_list */
368 struct hlist_node cookie_link; /* link in cookie->backing_objects */
369 struct fscache_cache *cache; /* cache that supplied this object */
370 struct fscache_cookie *cookie; /* netfs's file/index object */
371 struct fscache_object *parent; /* parent object */
372 struct slow_work work; /* attention scheduling record */
373 struct list_head dependents; /* FIFO of dependent objects */
374 struct list_head dep_link; /* link in parent's dependents list */
375 struct list_head pending_ops; /* unstarted operations on this object */
376 pgoff_t store_limit; /* current storage limit */
377};
378
379extern const char *fscache_object_states[];
380
381#define fscache_object_is_active(obj) \
382 (!test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) && \
383 (obj)->state >= FSCACHE_OBJECT_AVAILABLE && \
384 (obj)->state < FSCACHE_OBJECT_DYING)
385
386extern const struct slow_work_ops fscache_object_slow_work_ops;
387
388/**
389 * fscache_object_init - Initialise a cache object description
390 * @object: Object description
391 *
392 * Initialise a cache object description to its basic values.
393 *
394 * See Documentation/filesystems/caching/backend-api.txt for a complete
395 * description.
396 */
397static inline
398void fscache_object_init(struct fscache_object *object,
399 struct fscache_cookie *cookie,
400 struct fscache_cache *cache)
401{
402 atomic_inc(&cache->object_count);
403
404 object->state = FSCACHE_OBJECT_INIT;
405 spin_lock_init(&object->lock);
406 INIT_LIST_HEAD(&object->cache_link);
407 INIT_HLIST_NODE(&object->cookie_link);
408 vslow_work_init(&object->work, &fscache_object_slow_work_ops);
409 INIT_LIST_HEAD(&object->dependents);
410 INIT_LIST_HEAD(&object->dep_link);
411 INIT_LIST_HEAD(&object->pending_ops);
412 object->n_children = 0;
413 object->n_ops = object->n_in_progress = object->n_exclusive = 0;
414 object->events = object->event_mask = 0;
415 object->flags = 0;
416 object->store_limit = 0;
417 object->cache = cache;
418 object->cookie = cookie;
419 object->parent = NULL;
420}
421
422extern void fscache_object_lookup_negative(struct fscache_object *object);
423extern void fscache_obtained_object(struct fscache_object *object);
424
425/**
426 * fscache_object_destroyed - Note destruction of an object in a cache
427 * @cache: The cache from which the object came
428 *
429 * Note the destruction and deallocation of an object record in a cache.
430 */
431static inline void fscache_object_destroyed(struct fscache_cache *cache)
432{
433 if (atomic_dec_and_test(&cache->object_count))
434 wake_up_all(&fscache_cache_cleared_wq);
435}
436
437/**
438 * fscache_object_lookup_error - Note an object encountered an error
439 * @object: The object on which the error was encountered
440 *
441 * Note that an object encountered a fatal error (usually an I/O error) and
442 * that it should be withdrawn as soon as possible.
443 */
444static inline void fscache_object_lookup_error(struct fscache_object *object)
445{
446 set_bit(FSCACHE_OBJECT_EV_ERROR, &object->events);
447}
448
449/**
450 * fscache_set_store_limit - Set the maximum size to be stored in an object
451 * @object: The object to set the maximum on
452 * @i_size: The limit to set in bytes
453 *
454 * Set the maximum size an object is permitted to reach, implying the highest
455 * byte that may be written. Intended to be called by the attr_changed() op.
456 *
457 * See Documentation/filesystems/caching/backend-api.txt for a complete
458 * description.
459 */
460static inline
461void fscache_set_store_limit(struct fscache_object *object, loff_t i_size)
462{
463 object->store_limit = i_size >> PAGE_SHIFT;
464 if (i_size & ~PAGE_MASK)
465 object->store_limit++;
466}
467
468/**
469 * fscache_end_io - End a retrieval operation on a page
470 * @op: The FS-Cache operation covering the retrieval
471 * @page: The page that was to be fetched
472 * @error: The error code (0 if successful)
473 *
474 * Note the end of an operation to retrieve a page, as covered by a particular
475 * operation record.
476 */
477static inline void fscache_end_io(struct fscache_retrieval *op,
478 struct page *page, int error)
479{
480 op->end_io_func(page, op->context, error);
481}
482
483/*
484 * out-of-line cache backend functions
485 */
486extern void fscache_init_cache(struct fscache_cache *cache,
487 const struct fscache_cache_ops *ops,
488 const char *idfmt,
489 ...) __attribute__ ((format (printf, 3, 4)));
490
491extern int fscache_add_cache(struct fscache_cache *cache,
492 struct fscache_object *fsdef,
493 const char *tagname);
494extern void fscache_withdraw_cache(struct fscache_cache *cache);
495
496extern void fscache_io_error(struct fscache_cache *cache);
497
498extern void fscache_mark_pages_cached(struct fscache_retrieval *op,
499 struct pagevec *pagevec);
500
501extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
502 const void *data,
503 uint16_t datalen);
504
505#endif /* _LINUX_FSCACHE_CACHE_H */
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
new file mode 100644
index 000000000000..6d8ee466e0a0
--- /dev/null
+++ b/include/linux/fscache.h
@@ -0,0 +1,618 @@
1/* General filesystem caching interface
2 *
3 * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * NOTE!!! See:
12 *
13 * Documentation/filesystems/caching/netfs-api.txt
14 *
15 * for a description of the network filesystem interface declared here.
16 */
17
18#ifndef _LINUX_FSCACHE_H
19#define _LINUX_FSCACHE_H
20
21#include <linux/fs.h>
22#include <linux/list.h>
23#include <linux/pagemap.h>
24#include <linux/pagevec.h>
25
26#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
27#define fscache_available() (1)
28#define fscache_cookie_valid(cookie) (cookie)
29#else
30#define fscache_available() (0)
31#define fscache_cookie_valid(cookie) (0)
32#endif
33
34
35/*
36 * overload PG_private_2 to give us PG_fscache - this is used to indicate that
37 * a page is currently backed by a local disk cache
38 */
39#define PageFsCache(page) PagePrivate2((page))
40#define SetPageFsCache(page) SetPagePrivate2((page))
41#define ClearPageFsCache(page) ClearPagePrivate2((page))
42#define TestSetPageFsCache(page) TestSetPagePrivate2((page))
43#define TestClearPageFsCache(page) TestClearPagePrivate2((page))
44
45/* pattern used to fill dead space in an index entry */
46#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79
47
48struct pagevec;
49struct fscache_cache_tag;
50struct fscache_cookie;
51struct fscache_netfs;
52
53typedef void (*fscache_rw_complete_t)(struct page *page,
54 void *context,
55 int error);
56
57/* result of index entry consultation */
58enum fscache_checkaux {
59 FSCACHE_CHECKAUX_OKAY, /* entry okay as is */
60 FSCACHE_CHECKAUX_NEEDS_UPDATE, /* entry requires update */
61 FSCACHE_CHECKAUX_OBSOLETE, /* entry requires deletion */
62};
63
64/*
65 * fscache cookie definition
66 */
67struct fscache_cookie_def {
68 /* name of cookie type */
69 char name[16];
70
71 /* cookie type */
72 uint8_t type;
73#define FSCACHE_COOKIE_TYPE_INDEX 0
74#define FSCACHE_COOKIE_TYPE_DATAFILE 1
75
76 /* select the cache into which to insert an entry in this index
77 * - optional
78 * - should return a cache identifier or NULL to cause the cache to be
79 * inherited from the parent if possible or the first cache picked
80 * for a non-index file if not
81 */
82 struct fscache_cache_tag *(*select_cache)(
83 const void *parent_netfs_data,
84 const void *cookie_netfs_data);
85
86 /* get an index key
87 * - should store the key data in the buffer
88 * - should return the amount of amount stored
89 * - not permitted to return an error
90 * - the netfs data from the cookie being used as the source is
91 * presented
92 */
93 uint16_t (*get_key)(const void *cookie_netfs_data,
94 void *buffer,
95 uint16_t bufmax);
96
97 /* get certain file attributes from the netfs data
98 * - this function can be absent for an index
99 * - not permitted to return an error
100 * - the netfs data from the cookie being used as the source is
101 * presented
102 */
103 void (*get_attr)(const void *cookie_netfs_data, uint64_t *size);
104
105 /* get the auxilliary data from netfs data
106 * - this function can be absent if the index carries no state data
107 * - should store the auxilliary data in the buffer
108 * - should return the amount of amount stored
109 * - not permitted to return an error
110 * - the netfs data from the cookie being used as the source is
111 * presented
112 */
113 uint16_t (*get_aux)(const void *cookie_netfs_data,
114 void *buffer,
115 uint16_t bufmax);
116
117 /* consult the netfs about the state of an object
118 * - this function can be absent if the index carries no state data
119 * - the netfs data from the cookie being used as the target is
120 * presented, as is the auxilliary data
121 */
122 enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
123 const void *data,
124 uint16_t datalen);
125
126 /* get an extra reference on a read context
127 * - this function can be absent if the completion function doesn't
128 * require a context
129 */
130 void (*get_context)(void *cookie_netfs_data, void *context);
131
132 /* release an extra reference on a read context
133 * - this function can be absent if the completion function doesn't
134 * require a context
135 */
136 void (*put_context)(void *cookie_netfs_data, void *context);
137
138 /* indicate pages that now have cache metadata retained
139 * - this function should mark the specified pages as now being cached
140 * - the pages will have been marked with PG_fscache before this is
141 * called, so this is optional
142 */
143 void (*mark_pages_cached)(void *cookie_netfs_data,
144 struct address_space *mapping,
145 struct pagevec *cached_pvec);
146
147 /* indicate the cookie is no longer cached
148 * - this function is called when the backing store currently caching
149 * a cookie is removed
150 * - the netfs should use this to clean up any markers indicating
151 * cached pages
152 * - this is mandatory for any object that may have data
153 */
154 void (*now_uncached)(void *cookie_netfs_data);
155};
156
157/*
158 * fscache cached network filesystem type
159 * - name, version and ops must be filled in before registration
160 * - all other fields will be set during registration
161 */
162struct fscache_netfs {
163 uint32_t version; /* indexing version */
164 const char *name; /* filesystem name */
165 struct fscache_cookie *primary_index;
166 struct list_head link; /* internal link */
167};
168
169/*
170 * slow-path functions for when there is actually caching available, and the
171 * netfs does actually have a valid token
172 * - these are not to be called directly
173 * - these are undefined symbols when FS-Cache is not configured and the
174 * optimiser takes care of not using them
175 */
176extern int __fscache_register_netfs(struct fscache_netfs *);
177extern void __fscache_unregister_netfs(struct fscache_netfs *);
178extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *);
179extern void __fscache_release_cache_tag(struct fscache_cache_tag *);
180
181extern struct fscache_cookie *__fscache_acquire_cookie(
182 struct fscache_cookie *,
183 const struct fscache_cookie_def *,
184 void *);
185extern void __fscache_relinquish_cookie(struct fscache_cookie *, int);
186extern void __fscache_update_cookie(struct fscache_cookie *);
187extern int __fscache_attr_changed(struct fscache_cookie *);
188extern int __fscache_read_or_alloc_page(struct fscache_cookie *,
189 struct page *,
190 fscache_rw_complete_t,
191 void *,
192 gfp_t);
193extern int __fscache_read_or_alloc_pages(struct fscache_cookie *,
194 struct address_space *,
195 struct list_head *,
196 unsigned *,
197 fscache_rw_complete_t,
198 void *,
199 gfp_t);
200extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t);
201extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t);
202extern void __fscache_uncache_page(struct fscache_cookie *, struct page *);
203extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *);
204extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *);
205
206/**
207 * fscache_register_netfs - Register a filesystem as desiring caching services
208 * @netfs: The description of the filesystem
209 *
210 * Register a filesystem as desiring caching services if they're available.
211 *
212 * See Documentation/filesystems/caching/netfs-api.txt for a complete
213 * description.
214 */
215static inline
216int fscache_register_netfs(struct fscache_netfs *netfs)
217{
218 if (fscache_available())
219 return __fscache_register_netfs(netfs);
220 else
221 return 0;
222}
223
224/**
225 * fscache_unregister_netfs - Indicate that a filesystem no longer desires
226 * caching services
227 * @netfs: The description of the filesystem
228 *
229 * Indicate that a filesystem no longer desires caching services for the
230 * moment.
231 *
232 * See Documentation/filesystems/caching/netfs-api.txt for a complete
233 * description.
234 */
235static inline
236void fscache_unregister_netfs(struct fscache_netfs *netfs)
237{
238 if (fscache_available())
239 __fscache_unregister_netfs(netfs);
240}
241
242/**
243 * fscache_lookup_cache_tag - Look up a cache tag
244 * @name: The name of the tag to search for
245 *
246 * Acquire a specific cache referral tag that can be used to select a specific
247 * cache in which to cache an index.
248 *
249 * See Documentation/filesystems/caching/netfs-api.txt for a complete
250 * description.
251 */
252static inline
253struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name)
254{
255 if (fscache_available())
256 return __fscache_lookup_cache_tag(name);
257 else
258 return NULL;
259}
260
261/**
262 * fscache_release_cache_tag - Release a cache tag
263 * @tag: The tag to release
264 *
265 * Release a reference to a cache referral tag previously looked up.
266 *
267 * See Documentation/filesystems/caching/netfs-api.txt for a complete
268 * description.
269 */
270static inline
271void fscache_release_cache_tag(struct fscache_cache_tag *tag)
272{
273 if (fscache_available())
274 __fscache_release_cache_tag(tag);
275}
276
277/**
278 * fscache_acquire_cookie - Acquire a cookie to represent a cache object
279 * @parent: The cookie that's to be the parent of this one
280 * @def: A description of the cache object, including callback operations
281 * @netfs_data: An arbitrary piece of data to be kept in the cookie to
282 * represent the cache object to the netfs
283 *
284 * This function is used to inform FS-Cache about part of an index hierarchy
285 * that can be used to locate files. This is done by requesting a cookie for
286 * each index in the path to the file.
287 *
288 * See Documentation/filesystems/caching/netfs-api.txt for a complete
289 * description.
290 */
291static inline
292struct fscache_cookie *fscache_acquire_cookie(
293 struct fscache_cookie *parent,
294 const struct fscache_cookie_def *def,
295 void *netfs_data)
296{
297 if (fscache_cookie_valid(parent))
298 return __fscache_acquire_cookie(parent, def, netfs_data);
299 else
300 return NULL;
301}
302
303/**
304 * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding
305 * it
306 * @cookie: The cookie being returned
307 * @retire: True if the cache object the cookie represents is to be discarded
308 *
309 * This function returns a cookie to the cache, forcibly discarding the
310 * associated cache object if retire is set to true.
311 *
312 * See Documentation/filesystems/caching/netfs-api.txt for a complete
313 * description.
314 */
315static inline
316void fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
317{
318 if (fscache_cookie_valid(cookie))
319 __fscache_relinquish_cookie(cookie, retire);
320}
321
322/**
323 * fscache_update_cookie - Request that a cache object be updated
324 * @cookie: The cookie representing the cache object
325 *
326 * Request an update of the index data for the cache object associated with the
327 * cookie.
328 *
329 * See Documentation/filesystems/caching/netfs-api.txt for a complete
330 * description.
331 */
332static inline
333void fscache_update_cookie(struct fscache_cookie *cookie)
334{
335 if (fscache_cookie_valid(cookie))
336 __fscache_update_cookie(cookie);
337}
338
339/**
340 * fscache_pin_cookie - Pin a data-storage cache object in its cache
341 * @cookie: The cookie representing the cache object
342 *
343 * Permit data-storage cache objects to be pinned in the cache.
344 *
345 * See Documentation/filesystems/caching/netfs-api.txt for a complete
346 * description.
347 */
348static inline
349int fscache_pin_cookie(struct fscache_cookie *cookie)
350{
351 return -ENOBUFS;
352}
353
354/**
355 * fscache_pin_cookie - Unpin a data-storage cache object in its cache
356 * @cookie: The cookie representing the cache object
357 *
358 * Permit data-storage cache objects to be unpinned from the cache.
359 *
360 * See Documentation/filesystems/caching/netfs-api.txt for a complete
361 * description.
362 */
363static inline
364void fscache_unpin_cookie(struct fscache_cookie *cookie)
365{
366}
367
368/**
369 * fscache_attr_changed - Notify cache that an object's attributes changed
370 * @cookie: The cookie representing the cache object
371 *
372 * Send a notification to the cache indicating that an object's attributes have
373 * changed. This includes the data size. These attributes will be obtained
374 * through the get_attr() cookie definition op.
375 *
376 * See Documentation/filesystems/caching/netfs-api.txt for a complete
377 * description.
378 */
379static inline
380int fscache_attr_changed(struct fscache_cookie *cookie)
381{
382 if (fscache_cookie_valid(cookie))
383 return __fscache_attr_changed(cookie);
384 else
385 return -ENOBUFS;
386}
387
388/**
389 * fscache_reserve_space - Reserve data space for a cached object
390 * @cookie: The cookie representing the cache object
391 * @i_size: The amount of space to be reserved
392 *
393 * Reserve an amount of space in the cache for the cache object attached to a
394 * cookie so that a write to that object within the space can always be
395 * honoured.
396 *
397 * See Documentation/filesystems/caching/netfs-api.txt for a complete
398 * description.
399 */
400static inline
401int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size)
402{
403 return -ENOBUFS;
404}
405
406/**
407 * fscache_read_or_alloc_page - Read a page from the cache or allocate a block
408 * in which to store it
409 * @cookie: The cookie representing the cache object
410 * @page: The netfs page to fill if possible
411 * @end_io_func: The callback to invoke when and if the page is filled
412 * @context: An arbitrary piece of data to pass on to end_io_func()
413 * @gfp: The conditions under which memory allocation should be made
414 *
415 * Read a page from the cache, or if that's not possible make a potential
416 * one-block reservation in the cache into which the page may be stored once
417 * fetched from the server.
418 *
419 * If the page is not backed by the cache object, or if it there's some reason
420 * it can't be, -ENOBUFS will be returned and nothing more will be done for
421 * that page.
422 *
423 * Else, if that page is backed by the cache, a read will be initiated directly
424 * to the netfs's page and 0 will be returned by this function. The
425 * end_io_func() callback will be invoked when the operation terminates on a
426 * completion or failure. Note that the callback may be invoked before the
427 * return.
428 *
429 * Else, if the page is unbacked, -ENODATA is returned and a block may have
430 * been allocated in the cache.
431 *
432 * See Documentation/filesystems/caching/netfs-api.txt for a complete
433 * description.
434 */
435static inline
436int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
437 struct page *page,
438 fscache_rw_complete_t end_io_func,
439 void *context,
440 gfp_t gfp)
441{
442 if (fscache_cookie_valid(cookie))
443 return __fscache_read_or_alloc_page(cookie, page, end_io_func,
444 context, gfp);
445 else
446 return -ENOBUFS;
447}
448
449/**
450 * fscache_read_or_alloc_pages - Read pages from the cache and/or allocate
451 * blocks in which to store them
452 * @cookie: The cookie representing the cache object
453 * @mapping: The netfs inode mapping to which the pages will be attached
454 * @pages: A list of potential netfs pages to be filled
455 * @end_io_func: The callback to invoke when and if each page is filled
456 * @context: An arbitrary piece of data to pass on to end_io_func()
457 * @gfp: The conditions under which memory allocation should be made
458 *
459 * Read a set of pages from the cache, or if that's not possible, attempt to
460 * make a potential one-block reservation for each page in the cache into which
461 * that page may be stored once fetched from the server.
462 *
463 * If some pages are not backed by the cache object, or if it there's some
464 * reason they can't be, -ENOBUFS will be returned and nothing more will be
465 * done for that pages.
466 *
467 * Else, if some of the pages are backed by the cache, a read will be initiated
468 * directly to the netfs's page and 0 will be returned by this function. The
469 * end_io_func() callback will be invoked when the operation terminates on a
470 * completion or failure. Note that the callback may be invoked before the
471 * return.
472 *
473 * Else, if a page is unbacked, -ENODATA is returned and a block may have
474 * been allocated in the cache.
475 *
476 * Because the function may want to return all of -ENOBUFS, -ENODATA and 0 in
477 * regard to different pages, the return values are prioritised in that order.
478 * Any pages submitted for reading are removed from the pages list.
479 *
480 * See Documentation/filesystems/caching/netfs-api.txt for a complete
481 * description.
482 */
483static inline
484int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
485 struct address_space *mapping,
486 struct list_head *pages,
487 unsigned *nr_pages,
488 fscache_rw_complete_t end_io_func,
489 void *context,
490 gfp_t gfp)
491{
492 if (fscache_cookie_valid(cookie))
493 return __fscache_read_or_alloc_pages(cookie, mapping, pages,
494 nr_pages, end_io_func,
495 context, gfp);
496 else
497 return -ENOBUFS;
498}
499
500/**
501 * fscache_alloc_page - Allocate a block in which to store a page
502 * @cookie: The cookie representing the cache object
503 * @page: The netfs page to allocate a page for
504 * @gfp: The conditions under which memory allocation should be made
505 *
506 * Request Allocation a block in the cache in which to store a netfs page
507 * without retrieving any contents from the cache.
508 *
509 * If the page is not backed by a file then -ENOBUFS will be returned and
510 * nothing more will be done, and no reservation will be made.
511 *
512 * Else, a block will be allocated if one wasn't already, and 0 will be
513 * returned
514 *
515 * See Documentation/filesystems/caching/netfs-api.txt for a complete
516 * description.
517 */
518static inline
519int fscache_alloc_page(struct fscache_cookie *cookie,
520 struct page *page,
521 gfp_t gfp)
522{
523 if (fscache_cookie_valid(cookie))
524 return __fscache_alloc_page(cookie, page, gfp);
525 else
526 return -ENOBUFS;
527}
528
529/**
530 * fscache_write_page - Request storage of a page in the cache
531 * @cookie: The cookie representing the cache object
532 * @page: The netfs page to store
533 * @gfp: The conditions under which memory allocation should be made
534 *
535 * Request the contents of the netfs page be written into the cache. This
536 * request may be ignored if no cache block is currently allocated, in which
537 * case it will return -ENOBUFS.
538 *
539 * If a cache block was already allocated, a write will be initiated and 0 will
540 * be returned. The PG_fscache_write page bit is set immediately and will then
541 * be cleared at the completion of the write to indicate the success or failure
542 * of the operation. Note that the completion may happen before the return.
543 *
544 * See Documentation/filesystems/caching/netfs-api.txt for a complete
545 * description.
546 */
547static inline
548int fscache_write_page(struct fscache_cookie *cookie,
549 struct page *page,
550 gfp_t gfp)
551{
552 if (fscache_cookie_valid(cookie))
553 return __fscache_write_page(cookie, page, gfp);
554 else
555 return -ENOBUFS;
556}
557
558/**
559 * fscache_uncache_page - Indicate that caching is no longer required on a page
560 * @cookie: The cookie representing the cache object
561 * @page: The netfs page that was being cached.
562 *
563 * Tell the cache that we no longer want a page to be cached and that it should
564 * remove any knowledge of the netfs page it may have.
565 *
566 * Note that this cannot cancel any outstanding I/O operations between this
567 * page and the cache.
568 *
569 * See Documentation/filesystems/caching/netfs-api.txt for a complete
570 * description.
571 */
572static inline
573void fscache_uncache_page(struct fscache_cookie *cookie,
574 struct page *page)
575{
576 if (fscache_cookie_valid(cookie))
577 __fscache_uncache_page(cookie, page);
578}
579
580/**
581 * fscache_check_page_write - Ask if a page is being writing to the cache
582 * @cookie: The cookie representing the cache object
583 * @page: The netfs page that is being cached.
584 *
585 * Ask the cache if a page is being written to the cache.
586 *
587 * See Documentation/filesystems/caching/netfs-api.txt for a complete
588 * description.
589 */
590static inline
591bool fscache_check_page_write(struct fscache_cookie *cookie,
592 struct page *page)
593{
594 if (fscache_cookie_valid(cookie))
595 return __fscache_check_page_write(cookie, page);
596 return false;
597}
598
599/**
600 * fscache_wait_on_page_write - Wait for a page to complete writing to the cache
601 * @cookie: The cookie representing the cache object
602 * @page: The netfs page that is being cached.
603 *
604 * Ask the cache to wake us up when a page is no longer being written to the
605 * cache.
606 *
607 * See Documentation/filesystems/caching/netfs-api.txt for a complete
608 * description.
609 */
610static inline
611void fscache_wait_on_page_write(struct fscache_cookie *cookie,
612 struct page *page)
613{
614 if (fscache_cookie_valid(cookie))
615 __fscache_wait_on_page_write(cookie, page);
616}
617
618#endif /* _LINUX_FSCACHE_H */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index bde2557c2a9c..fdffb413b192 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -185,6 +185,9 @@ struct nfs_inode {
185 fmode_t delegation_state; 185 fmode_t delegation_state;
186 struct rw_semaphore rwsem; 186 struct rw_semaphore rwsem;
187#endif /* CONFIG_NFS_V4*/ 187#endif /* CONFIG_NFS_V4*/
188#ifdef CONFIG_NFS_FSCACHE
189 struct fscache_cookie *fscache;
190#endif
188 struct inode vfs_inode; 191 struct inode vfs_inode;
189}; 192};
190 193
@@ -207,6 +210,8 @@ struct nfs_inode {
207#define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ 210#define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */
208#define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ 211#define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */
209#define NFS_INO_FLUSHING (4) /* inode is flushing out data */ 212#define NFS_INO_FLUSHING (4) /* inode is flushing out data */
213#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
214#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
210 215
211static inline struct nfs_inode *NFS_I(const struct inode *inode) 216static inline struct nfs_inode *NFS_I(const struct inode *inode)
212{ 217{
@@ -260,6 +265,11 @@ static inline int NFS_STALE(const struct inode *inode)
260 return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags); 265 return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
261} 266}
262 267
268static inline int NFS_FSCACHE(const struct inode *inode)
269{
270 return test_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags);
271}
272
263static inline __u64 NFS_FILEID(const struct inode *inode) 273static inline __u64 NFS_FILEID(const struct inode *inode)
264{ 274{
265 return NFS_I(inode)->fileid; 275 return NFS_I(inode)->fileid;
@@ -506,6 +516,8 @@ extern int nfs_readpages(struct file *, struct address_space *,
506 struct list_head *, unsigned); 516 struct list_head *, unsigned);
507extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); 517extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
508extern void nfs_readdata_release(void *data); 518extern void nfs_readdata_release(void *data);
519extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
520 struct page *);
509 521
510/* 522/*
511 * Allocate nfs_read_data structures 523 * Allocate nfs_read_data structures
@@ -583,6 +595,7 @@ extern void * nfs_root_data(void);
583#define NFSDBG_CALLBACK 0x0100 595#define NFSDBG_CALLBACK 0x0100
584#define NFSDBG_CLIENT 0x0200 596#define NFSDBG_CLIENT 0x0200
585#define NFSDBG_MOUNT 0x0400 597#define NFSDBG_MOUNT 0x0400
598#define NFSDBG_FSCACHE 0x0800
586#define NFSDBG_ALL 0xFFFF 599#define NFSDBG_ALL 0xFFFF
587 600
588#ifdef __KERNEL__ 601#ifdef __KERNEL__
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 29b1e40dce99..6ad75948cbf7 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -64,6 +64,10 @@ struct nfs_client {
64 char cl_ipaddr[48]; 64 char cl_ipaddr[48];
65 unsigned char cl_id_uniquifier; 65 unsigned char cl_id_uniquifier;
66#endif 66#endif
67
68#ifdef CONFIG_NFS_FSCACHE
69 struct fscache_cookie *fscache; /* client index cache cookie */
70#endif
67}; 71};
68 72
69/* 73/*
@@ -96,12 +100,19 @@ struct nfs_server {
96 unsigned int acdirmin; 100 unsigned int acdirmin;
97 unsigned int acdirmax; 101 unsigned int acdirmax;
98 unsigned int namelen; 102 unsigned int namelen;
103 unsigned int options; /* extra options enabled by mount */
104#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
99 105
100 struct nfs_fsid fsid; 106 struct nfs_fsid fsid;
101 __u64 maxfilesize; /* maximum file size */ 107 __u64 maxfilesize; /* maximum file size */
102 unsigned long mount_time; /* when this fs was mounted */ 108 unsigned long mount_time; /* when this fs was mounted */
103 dev_t s_dev; /* superblock dev numbers */ 109 dev_t s_dev; /* superblock dev numbers */
104 110
111#ifdef CONFIG_NFS_FSCACHE
112 struct nfs_fscache_key *fscache_key; /* unique key for superblock */
113 struct fscache_cookie *fscache; /* superblock cookie */
114#endif
115
105#ifdef CONFIG_NFS_V4 116#ifdef CONFIG_NFS_V4
106 u32 attr_bitmask[2];/* V4 bitmask representing the set 117 u32 attr_bitmask[2];/* V4 bitmask representing the set
107 of attributes supported on this 118 of attributes supported on this
diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
index 1cb9a3fed2b3..68b10f5f8907 100644
--- a/include/linux/nfs_iostat.h
+++ b/include/linux/nfs_iostat.h
@@ -116,4 +116,16 @@ enum nfs_stat_eventcounters {
116 __NFSIOS_COUNTSMAX, 116 __NFSIOS_COUNTSMAX,
117}; 117};
118 118
119/*
120 * NFS local caching servicing counters
121 */
122enum nfs_stat_fscachecounters {
123 NFSIOS_FSCACHE_PAGES_READ_OK,
124 NFSIOS_FSCACHE_PAGES_READ_FAIL,
125 NFSIOS_FSCACHE_PAGES_WRITTEN_OK,
126 NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL,
127 NFSIOS_FSCACHE_PAGES_UNCACHED,
128 __NFSIOS_FSCACHEMAX,
129};
130
119#endif /* _LINUX_NFS_IOSTAT */ 131#endif /* _LINUX_NFS_IOSTAT */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 61df1779b2a5..62214c7d2d93 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -82,6 +82,7 @@ enum pageflags {
82 PG_arch_1, 82 PG_arch_1,
83 PG_reserved, 83 PG_reserved,
84 PG_private, /* If pagecache, has fs-private data */ 84 PG_private, /* If pagecache, has fs-private data */
85 PG_private_2, /* If pagecache, has fs aux data */
85 PG_writeback, /* Page is under writeback */ 86 PG_writeback, /* Page is under writeback */
86#ifdef CONFIG_PAGEFLAGS_EXTENDED 87#ifdef CONFIG_PAGEFLAGS_EXTENDED
87 PG_head, /* A head page */ 88 PG_head, /* A head page */
@@ -108,6 +109,12 @@ enum pageflags {
108 /* Filesystems */ 109 /* Filesystems */
109 PG_checked = PG_owner_priv_1, 110 PG_checked = PG_owner_priv_1,
110 111
112 /* Two page bits are conscripted by FS-Cache to maintain local caching
113 * state. These bits are set on pages belonging to the netfs's inodes
114 * when those inodes are being locally cached.
115 */
116 PG_fscache = PG_private_2, /* page backed by cache */
117
111 /* XEN */ 118 /* XEN */
112 PG_pinned = PG_owner_priv_1, 119 PG_pinned = PG_owner_priv_1,
113 PG_savepinned = PG_dirty, 120 PG_savepinned = PG_dirty,
@@ -182,7 +189,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
182 189
183struct page; /* forward declaration */ 190struct page; /* forward declaration */
184 191
185TESTPAGEFLAG(Locked, locked) 192TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked)
186PAGEFLAG(Error, error) 193PAGEFLAG(Error, error)
187PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) 194PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
188PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) 195PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
@@ -194,8 +201,6 @@ PAGEFLAG(Checked, checked) /* Used by some filesystems */
194PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ 201PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
195PAGEFLAG(SavePinned, savepinned); /* Xen */ 202PAGEFLAG(SavePinned, savepinned); /* Xen */
196PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) 203PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
197PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
198 __SETPAGEFLAG(Private, private)
199PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) 204PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
200 205
201__PAGEFLAG(SlobPage, slob_page) 206__PAGEFLAG(SlobPage, slob_page)
@@ -205,6 +210,16 @@ __PAGEFLAG(SlubFrozen, slub_frozen)
205__PAGEFLAG(SlubDebug, slub_debug) 210__PAGEFLAG(SlubDebug, slub_debug)
206 211
207/* 212/*
213 * Private page markings that may be used by the filesystem that owns the page
214 * for its own purposes.
215 * - PG_private and PG_private_2 cause releasepage() and co to be invoked
216 */
217PAGEFLAG(Private, private) __SETPAGEFLAG(Private, private)
218 __CLEARPAGEFLAG(Private, private)
219PAGEFLAG(Private2, private_2) TESTSCFLAG(Private2, private_2)
220PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
221
222/*
208 * Only test-and-set exist for PG_writeback. The unconditional operators are 223 * Only test-and-set exist for PG_writeback. The unconditional operators are
209 * risky: they bypass page accounting. 224 * risky: they bypass page accounting.
210 */ 225 */
@@ -384,9 +399,10 @@ static inline void __ClearPageTail(struct page *page)
384 * these flags set. It they are, there is a problem. 399 * these flags set. It they are, there is a problem.
385 */ 400 */
386#define PAGE_FLAGS_CHECK_AT_FREE \ 401#define PAGE_FLAGS_CHECK_AT_FREE \
387 (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ 402 (1 << PG_lru | 1 << PG_locked | \
388 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 403 1 << PG_private | 1 << PG_private_2 | \
389 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ 404 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
405 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
390 __PG_UNEVICTABLE | __PG_MLOCKED) 406 __PG_UNEVICTABLE | __PG_MLOCKED)
391 407
392/* 408/*
@@ -397,4 +413,16 @@ static inline void __ClearPageTail(struct page *page)
397#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) 413#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1)
398 414
399#endif /* !__GENERATING_BOUNDS_H */ 415#endif /* !__GENERATING_BOUNDS_H */
416
417/**
418 * page_has_private - Determine if page has private stuff
419 * @page: The page to be checked
420 *
421 * Determine if a page has private stuff, indicating that release routines
422 * should be invoked upon it.
423 */
424#define page_has_private(page) \
425 ((page)->flags & ((1 << PG_private) | \
426 (1 << PG_private_2)))
427
400#endif /* PAGE_FLAGS_H */ 428#endif /* PAGE_FLAGS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 076a7dc67c2b..34da5230faab 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -384,6 +384,11 @@ static inline void wait_on_page_writeback(struct page *page)
384extern void end_page_writeback(struct page *page); 384extern void end_page_writeback(struct page *page);
385 385
386/* 386/*
387 * Add an arbitrary waiter to a page's wait queue
388 */
389extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
390
391/*
387 * Fault a userspace page into pagetables. Return non-zero on a fault. 392 * Fault a userspace page into pagetables. Return non-zero on a fault.
388 * 393 *
389 * This assumes that two userspace pages are always sufficient. That's 394 * This assumes that two userspace pages are always sufficient. That's
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
new file mode 100644
index 000000000000..85958277f83d
--- /dev/null
+++ b/include/linux/slow-work.h
@@ -0,0 +1,95 @@
1/* Worker thread pool for slow items, such as filesystem lookups or mkdirs
2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 *
11 * See Documentation/slow-work.txt
12 */
13
14#ifndef _LINUX_SLOW_WORK_H
15#define _LINUX_SLOW_WORK_H
16
17#ifdef CONFIG_SLOW_WORK
18
19#include <linux/sysctl.h>
20
21struct slow_work;
22
23/*
24 * The operations used to support slow work items
25 */
26struct slow_work_ops {
27 /* get a ref on a work item
28 * - return 0 if successful, -ve if not
29 */
30 int (*get_ref)(struct slow_work *work);
31
32 /* discard a ref to a work item */
33 void (*put_ref)(struct slow_work *work);
34
35 /* execute a work item */
36 void (*execute)(struct slow_work *work);
37};
38
39/*
40 * A slow work item
41 * - A reference is held on the parent object by the thread pool when it is
42 * queued
43 */
44struct slow_work {
45 unsigned long flags;
46#define SLOW_WORK_PENDING 0 /* item pending (further) execution */
47#define SLOW_WORK_EXECUTING 1 /* item currently executing */
48#define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */
49#define SLOW_WORK_VERY_SLOW 3 /* item is very slow */
50 const struct slow_work_ops *ops; /* operations table for this item */
51 struct list_head link; /* link in queue */
52};
53
54/**
55 * slow_work_init - Initialise a slow work item
56 * @work: The work item to initialise
57 * @ops: The operations to use to handle the slow work item
58 *
59 * Initialise a slow work item.
60 */
61static inline void slow_work_init(struct slow_work *work,
62 const struct slow_work_ops *ops)
63{
64 work->flags = 0;
65 work->ops = ops;
66 INIT_LIST_HEAD(&work->link);
67}
68
69/**
70 * slow_work_init - Initialise a very slow work item
71 * @work: The work item to initialise
72 * @ops: The operations to use to handle the slow work item
73 *
74 * Initialise a very slow work item. This item will be restricted such that
75 * only a certain number of the pool threads will be able to execute items of
76 * this type.
77 */
78static inline void vslow_work_init(struct slow_work *work,
79 const struct slow_work_ops *ops)
80{
81 work->flags = 1 << SLOW_WORK_VERY_SLOW;
82 work->ops = ops;
83 INIT_LIST_HEAD(&work->link);
84}
85
86extern int slow_work_enqueue(struct slow_work *work);
87extern int slow_work_register_user(void);
88extern void slow_work_unregister_user(void);
89
90#ifdef CONFIG_SYSCTL
91extern ctl_table slow_work_sysctls[];
92#endif
93
94#endif /* CONFIG_SLOW_WORK */
95#endif /* _LINUX_SLOW_WORK_H */
diff --git a/init/Kconfig b/init/Kconfig
index 1398a14b0191..236a79377b8e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1014,6 +1014,18 @@ config MARKERS
1014 1014
1015source "arch/Kconfig" 1015source "arch/Kconfig"
1016 1016
1017config SLOW_WORK
1018 default n
1019 bool "Enable slow work thread pool"
1020 help
1021 The slow work thread pool provides a number of dynamically allocated
1022 threads that can be used by the kernel to perform operations that
1023 take a relatively long time.
1024
1025 An example of this would be CacheFiles doing a path lookup followed
1026 by a series of mkdirs and a create call, all of which have to touch
1027 disk.
1028
1017endmenu # General setup 1029endmenu # General setup
1018 1030
1019config HAVE_GENERIC_DMA_COHERENT 1031config HAVE_GENERIC_DMA_COHERENT
diff --git a/kernel/Makefile b/kernel/Makefile
index e4791b3ba55d..bab1dffe37e9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
93obj-$(CONFIG_FUNCTION_TRACER) += trace/ 93obj-$(CONFIG_FUNCTION_TRACER) += trace/
94obj-$(CONFIG_TRACING) += trace/ 94obj-$(CONFIG_TRACING) += trace/
95obj-$(CONFIG_SMP) += sched_cpupri.o 95obj-$(CONFIG_SMP) += sched_cpupri.o
96obj-$(CONFIG_SLOW_WORK) += slow-work.o
96 97
97ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 98ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
98# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 99# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
new file mode 100644
index 000000000000..cf2bc01186ef
--- /dev/null
+++ b/kernel/slow-work.c
@@ -0,0 +1,640 @@
1/* Worker thread pool for slow items, such as filesystem lookups or mkdirs
2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 *
11 * See Documentation/slow-work.txt
12 */
13
14#include <linux/module.h>
15#include <linux/slow-work.h>
16#include <linux/kthread.h>
17#include <linux/freezer.h>
18#include <linux/wait.h>
19
20#define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of
21 * things to do */
22#define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after
23 * OOM */
24
25static void slow_work_cull_timeout(unsigned long);
26static void slow_work_oom_timeout(unsigned long);
27
28#ifdef CONFIG_SYSCTL
29static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *,
30 void __user *, size_t *, loff_t *);
31
32static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *,
33 void __user *, size_t *, loff_t *);
34#endif
35
36/*
37 * The pool of threads has at least min threads in it as long as someone is
38 * using the facility, and may have as many as max.
39 *
40 * A portion of the pool may be processing very slow operations.
41 */
42static unsigned slow_work_min_threads = 2;
43static unsigned slow_work_max_threads = 4;
44static unsigned vslow_work_proportion = 50; /* % of threads that may process
45 * very slow work */
46
47#ifdef CONFIG_SYSCTL
48static const int slow_work_min_min_threads = 2;
49static int slow_work_max_max_threads = 255;
50static const int slow_work_min_vslow = 1;
51static const int slow_work_max_vslow = 99;
52
53ctl_table slow_work_sysctls[] = {
54 {
55 .ctl_name = CTL_UNNUMBERED,
56 .procname = "min-threads",
57 .data = &slow_work_min_threads,
58 .maxlen = sizeof(unsigned),
59 .mode = 0644,
60 .proc_handler = slow_work_min_threads_sysctl,
61 .extra1 = (void *) &slow_work_min_min_threads,
62 .extra2 = &slow_work_max_threads,
63 },
64 {
65 .ctl_name = CTL_UNNUMBERED,
66 .procname = "max-threads",
67 .data = &slow_work_max_threads,
68 .maxlen = sizeof(unsigned),
69 .mode = 0644,
70 .proc_handler = slow_work_max_threads_sysctl,
71 .extra1 = &slow_work_min_threads,
72 .extra2 = (void *) &slow_work_max_max_threads,
73 },
74 {
75 .ctl_name = CTL_UNNUMBERED,
76 .procname = "vslow-percentage",
77 .data = &vslow_work_proportion,
78 .maxlen = sizeof(unsigned),
79 .mode = 0644,
80 .proc_handler = &proc_dointvec_minmax,
81 .extra1 = (void *) &slow_work_min_vslow,
82 .extra2 = (void *) &slow_work_max_vslow,
83 },
84 { .ctl_name = 0 }
85};
86#endif
87
88/*
89 * The active state of the thread pool
90 */
91static atomic_t slow_work_thread_count;
92static atomic_t vslow_work_executing_count;
93
94static bool slow_work_may_not_start_new_thread;
95static bool slow_work_cull; /* cull a thread due to lack of activity */
96static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0);
97static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0);
98static struct slow_work slow_work_new_thread; /* new thread starter */
99
100/*
101 * The queues of work items and the lock governing access to them. These are
102 * shared between all the CPUs. It doesn't make sense to have per-CPU queues
103 * as the number of threads bears no relation to the number of CPUs.
104 *
105 * There are two queues of work items: one for slow work items, and one for
106 * very slow work items.
107 */
108static LIST_HEAD(slow_work_queue);
109static LIST_HEAD(vslow_work_queue);
110static DEFINE_SPINLOCK(slow_work_queue_lock);
111
112/*
113 * The thread controls. A variable used to signal to the threads that they
114 * should exit when the queue is empty, a waitqueue used by the threads to wait
115 * for signals, and a completion set by the last thread to exit.
116 */
117static bool slow_work_threads_should_exit;
118static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq);
119static DECLARE_COMPLETION(slow_work_last_thread_exited);
120
121/*
122 * The number of users of the thread pool and its lock. Whilst this is zero we
123 * have no threads hanging around, and when this reaches zero, we wait for all
124 * active or queued work items to complete and kill all the threads we do have.
125 */
126static int slow_work_user_count;
127static DEFINE_MUTEX(slow_work_user_lock);
128
129/*
130 * Calculate the maximum number of active threads in the pool that are
131 * permitted to process very slow work items.
132 *
133 * The answer is rounded up to at least 1, but may not equal or exceed the
134 * maximum number of the threads in the pool. This means we always have at
135 * least one thread that can process slow work items, and we always have at
136 * least one thread that won't get tied up doing so.
137 */
138static unsigned slow_work_calc_vsmax(void)
139{
140 unsigned vsmax;
141
142 vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion;
143 vsmax /= 100;
144 vsmax = max(vsmax, 1U);
145 return min(vsmax, slow_work_max_threads - 1);
146}
147
148/*
149 * Attempt to execute stuff queued on a slow thread. Return true if we managed
150 * it, false if there was nothing to do.
151 */
152static bool slow_work_execute(void)
153{
154 struct slow_work *work = NULL;
155 unsigned vsmax;
156 bool very_slow;
157
158 vsmax = slow_work_calc_vsmax();
159
160 /* see if we can schedule a new thread to be started if we're not
161 * keeping up with the work */
162 if (!waitqueue_active(&slow_work_thread_wq) &&
163 (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) &&
164 atomic_read(&slow_work_thread_count) < slow_work_max_threads &&
165 !slow_work_may_not_start_new_thread)
166 slow_work_enqueue(&slow_work_new_thread);
167
168 /* find something to execute */
169 spin_lock_irq(&slow_work_queue_lock);
170 if (!list_empty(&vslow_work_queue) &&
171 atomic_read(&vslow_work_executing_count) < vsmax) {
172 work = list_entry(vslow_work_queue.next,
173 struct slow_work, link);
174 if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
175 BUG();
176 list_del_init(&work->link);
177 atomic_inc(&vslow_work_executing_count);
178 very_slow = true;
179 } else if (!list_empty(&slow_work_queue)) {
180 work = list_entry(slow_work_queue.next,
181 struct slow_work, link);
182 if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
183 BUG();
184 list_del_init(&work->link);
185 very_slow = false;
186 } else {
187 very_slow = false; /* avoid the compiler warning */
188 }
189 spin_unlock_irq(&slow_work_queue_lock);
190
191 if (!work)
192 return false;
193
194 if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags))
195 BUG();
196
197 work->ops->execute(work);
198
199 if (very_slow)
200 atomic_dec(&vslow_work_executing_count);
201 clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags);
202
203 /* if someone tried to enqueue the item whilst we were executing it,
204 * then it'll be left unenqueued to avoid multiple threads trying to
205 * execute it simultaneously
206 *
207 * there is, however, a race between us testing the pending flag and
208 * getting the spinlock, and between the enqueuer setting the pending
209 * flag and getting the spinlock, so we use a deferral bit to tell us
210 * if the enqueuer got there first
211 */
212 if (test_bit(SLOW_WORK_PENDING, &work->flags)) {
213 spin_lock_irq(&slow_work_queue_lock);
214
215 if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) &&
216 test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags))
217 goto auto_requeue;
218
219 spin_unlock_irq(&slow_work_queue_lock);
220 }
221
222 work->ops->put_ref(work);
223 return true;
224
225auto_requeue:
226 /* we must complete the enqueue operation
227 * - we transfer our ref on the item back to the appropriate queue
228 * - don't wake another thread up as we're awake already
229 */
230 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
231 list_add_tail(&work->link, &vslow_work_queue);
232 else
233 list_add_tail(&work->link, &slow_work_queue);
234 spin_unlock_irq(&slow_work_queue_lock);
235 return true;
236}
237
238/**
239 * slow_work_enqueue - Schedule a slow work item for processing
240 * @work: The work item to queue
241 *
242 * Schedule a slow work item for processing. If the item is already undergoing
243 * execution, this guarantees not to re-enter the execution routine until the
244 * first execution finishes.
245 *
246 * The item is pinned by this function as it retains a reference to it, managed
247 * through the item operations. The item is unpinned once it has been
248 * executed.
249 *
250 * An item may hog the thread that is running it for a relatively large amount
251 * of time, sufficient, for example, to perform several lookup, mkdir, create
252 * and setxattr operations. It may sleep on I/O and may sleep to obtain locks.
253 *
254 * Conversely, if a number of items are awaiting processing, it may take some
255 * time before any given item is given attention. The number of threads in the
256 * pool may be increased to deal with demand, but only up to a limit.
257 *
258 * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in
259 * the very slow queue, from which only a portion of the threads will be
260 * allowed to pick items to execute. This ensures that very slow items won't
261 * overly block ones that are just ordinarily slow.
262 *
263 * Returns 0 if successful, -EAGAIN if not.
264 */
265int slow_work_enqueue(struct slow_work *work)
266{
267 unsigned long flags;
268
269 BUG_ON(slow_work_user_count <= 0);
270 BUG_ON(!work);
271 BUG_ON(!work->ops);
272 BUG_ON(!work->ops->get_ref);
273
274 /* when honouring an enqueue request, we only promise that we will run
275 * the work function in the future; we do not promise to run it once
276 * per enqueue request
277 *
278 * we use the PENDING bit to merge together repeat requests without
279 * having to disable IRQs and take the spinlock, whilst still
280 * maintaining our promise
281 */
282 if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
283 spin_lock_irqsave(&slow_work_queue_lock, flags);
284
285 /* we promise that we will not attempt to execute the work
286 * function in more than one thread simultaneously
287 *
288 * this, however, leaves us with a problem if we're asked to
289 * enqueue the work whilst someone is executing the work
290 * function as simply queueing the work immediately means that
291 * another thread may try executing it whilst it is already
292 * under execution
293 *
294 * to deal with this, we set the ENQ_DEFERRED bit instead of
295 * enqueueing, and the thread currently executing the work
296 * function will enqueue the work item when the work function
297 * returns and it has cleared the EXECUTING bit
298 */
299 if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
300 set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
301 } else {
302 if (work->ops->get_ref(work) < 0)
303 goto cant_get_ref;
304 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
305 list_add_tail(&work->link, &vslow_work_queue);
306 else
307 list_add_tail(&work->link, &slow_work_queue);
308 wake_up(&slow_work_thread_wq);
309 }
310
311 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
312 }
313 return 0;
314
315cant_get_ref:
316 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
317 return -EAGAIN;
318}
319EXPORT_SYMBOL(slow_work_enqueue);
320
321/*
322 * Worker thread culling algorithm
323 */
324static bool slow_work_cull_thread(void)
325{
326 unsigned long flags;
327 bool do_cull = false;
328
329 spin_lock_irqsave(&slow_work_queue_lock, flags);
330
331 if (slow_work_cull) {
332 slow_work_cull = false;
333
334 if (list_empty(&slow_work_queue) &&
335 list_empty(&vslow_work_queue) &&
336 atomic_read(&slow_work_thread_count) >
337 slow_work_min_threads) {
338 mod_timer(&slow_work_cull_timer,
339 jiffies + SLOW_WORK_CULL_TIMEOUT);
340 do_cull = true;
341 }
342 }
343
344 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
345 return do_cull;
346}
347
348/*
349 * Determine if there is slow work available for dispatch
350 */
351static inline bool slow_work_available(int vsmax)
352{
353 return !list_empty(&slow_work_queue) ||
354 (!list_empty(&vslow_work_queue) &&
355 atomic_read(&vslow_work_executing_count) < vsmax);
356}
357
358/*
359 * Worker thread dispatcher
360 */
361static int slow_work_thread(void *_data)
362{
363 int vsmax;
364
365 DEFINE_WAIT(wait);
366
367 set_freezable();
368 set_user_nice(current, -5);
369
370 for (;;) {
371 vsmax = vslow_work_proportion;
372 vsmax *= atomic_read(&slow_work_thread_count);
373 vsmax /= 100;
374
375 prepare_to_wait(&slow_work_thread_wq, &wait,
376 TASK_INTERRUPTIBLE);
377 if (!freezing(current) &&
378 !slow_work_threads_should_exit &&
379 !slow_work_available(vsmax) &&
380 !slow_work_cull)
381 schedule();
382 finish_wait(&slow_work_thread_wq, &wait);
383
384 try_to_freeze();
385
386 vsmax = vslow_work_proportion;
387 vsmax *= atomic_read(&slow_work_thread_count);
388 vsmax /= 100;
389
390 if (slow_work_available(vsmax) && slow_work_execute()) {
391 cond_resched();
392 if (list_empty(&slow_work_queue) &&
393 list_empty(&vslow_work_queue) &&
394 atomic_read(&slow_work_thread_count) >
395 slow_work_min_threads)
396 mod_timer(&slow_work_cull_timer,
397 jiffies + SLOW_WORK_CULL_TIMEOUT);
398 continue;
399 }
400
401 if (slow_work_threads_should_exit)
402 break;
403
404 if (slow_work_cull && slow_work_cull_thread())
405 break;
406 }
407
408 if (atomic_dec_and_test(&slow_work_thread_count))
409 complete_and_exit(&slow_work_last_thread_exited, 0);
410 return 0;
411}
412
413/*
414 * Handle thread cull timer expiration
415 */
416static void slow_work_cull_timeout(unsigned long data)
417{
418 slow_work_cull = true;
419 wake_up(&slow_work_thread_wq);
420}
421
422/*
423 * Get a reference on slow work thread starter
424 */
425static int slow_work_new_thread_get_ref(struct slow_work *work)
426{
427 return 0;
428}
429
430/*
431 * Drop a reference on slow work thread starter
432 */
433static void slow_work_new_thread_put_ref(struct slow_work *work)
434{
435}
436
437/*
438 * Start a new slow work thread
439 */
440static void slow_work_new_thread_execute(struct slow_work *work)
441{
442 struct task_struct *p;
443
444 if (slow_work_threads_should_exit)
445 return;
446
447 if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads)
448 return;
449
450 if (!mutex_trylock(&slow_work_user_lock))
451 return;
452
453 slow_work_may_not_start_new_thread = true;
454 atomic_inc(&slow_work_thread_count);
455 p = kthread_run(slow_work_thread, NULL, "kslowd");
456 if (IS_ERR(p)) {
457 printk(KERN_DEBUG "Slow work thread pool: OOM\n");
458 if (atomic_dec_and_test(&slow_work_thread_count))
459 BUG(); /* we're running on a slow work thread... */
460 mod_timer(&slow_work_oom_timer,
461 jiffies + SLOW_WORK_OOM_TIMEOUT);
462 } else {
463 /* ratelimit the starting of new threads */
464 mod_timer(&slow_work_oom_timer, jiffies + 1);
465 }
466
467 mutex_unlock(&slow_work_user_lock);
468}
469
470static const struct slow_work_ops slow_work_new_thread_ops = {
471 .get_ref = slow_work_new_thread_get_ref,
472 .put_ref = slow_work_new_thread_put_ref,
473 .execute = slow_work_new_thread_execute,
474};
475
476/*
477 * post-OOM new thread start suppression expiration
478 */
479static void slow_work_oom_timeout(unsigned long data)
480{
481 slow_work_may_not_start_new_thread = false;
482}
483
484#ifdef CONFIG_SYSCTL
485/*
486 * Handle adjustment of the minimum number of threads
487 */
488static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
489 struct file *filp, void __user *buffer,
490 size_t *lenp, loff_t *ppos)
491{
492 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
493 int n;
494
495 if (ret == 0) {
496 mutex_lock(&slow_work_user_lock);
497 if (slow_work_user_count > 0) {
498 /* see if we need to start or stop threads */
499 n = atomic_read(&slow_work_thread_count) -
500 slow_work_min_threads;
501
502 if (n < 0 && !slow_work_may_not_start_new_thread)
503 slow_work_enqueue(&slow_work_new_thread);
504 else if (n > 0)
505 mod_timer(&slow_work_cull_timer,
506 jiffies + SLOW_WORK_CULL_TIMEOUT);
507 }
508 mutex_unlock(&slow_work_user_lock);
509 }
510
511 return ret;
512}
513
514/*
515 * Handle adjustment of the maximum number of threads
516 */
517static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
518 struct file *filp, void __user *buffer,
519 size_t *lenp, loff_t *ppos)
520{
521 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
522 int n;
523
524 if (ret == 0) {
525 mutex_lock(&slow_work_user_lock);
526 if (slow_work_user_count > 0) {
527 /* see if we need to stop threads */
528 n = slow_work_max_threads -
529 atomic_read(&slow_work_thread_count);
530
531 if (n < 0)
532 mod_timer(&slow_work_cull_timer,
533 jiffies + SLOW_WORK_CULL_TIMEOUT);
534 }
535 mutex_unlock(&slow_work_user_lock);
536 }
537
538 return ret;
539}
540#endif /* CONFIG_SYSCTL */
541
542/**
543 * slow_work_register_user - Register a user of the facility
544 *
545 * Register a user of the facility, starting up the initial threads if there
546 * aren't any other users at this point. This will return 0 if successful, or
547 * an error if not.
548 */
549int slow_work_register_user(void)
550{
551 struct task_struct *p;
552 int loop;
553
554 mutex_lock(&slow_work_user_lock);
555
556 if (slow_work_user_count == 0) {
557 printk(KERN_NOTICE "Slow work thread pool: Starting up\n");
558 init_completion(&slow_work_last_thread_exited);
559
560 slow_work_threads_should_exit = false;
561 slow_work_init(&slow_work_new_thread,
562 &slow_work_new_thread_ops);
563 slow_work_may_not_start_new_thread = false;
564 slow_work_cull = false;
565
566 /* start the minimum number of threads */
567 for (loop = 0; loop < slow_work_min_threads; loop++) {
568 atomic_inc(&slow_work_thread_count);
569 p = kthread_run(slow_work_thread, NULL, "kslowd");
570 if (IS_ERR(p))
571 goto error;
572 }
573 printk(KERN_NOTICE "Slow work thread pool: Ready\n");
574 }
575
576 slow_work_user_count++;
577 mutex_unlock(&slow_work_user_lock);
578 return 0;
579
580error:
581 if (atomic_dec_and_test(&slow_work_thread_count))
582 complete(&slow_work_last_thread_exited);
583 if (loop > 0) {
584 printk(KERN_ERR "Slow work thread pool:"
585 " Aborting startup on ENOMEM\n");
586 slow_work_threads_should_exit = true;
587 wake_up_all(&slow_work_thread_wq);
588 wait_for_completion(&slow_work_last_thread_exited);
589 printk(KERN_ERR "Slow work thread pool: Aborted\n");
590 }
591 mutex_unlock(&slow_work_user_lock);
592 return PTR_ERR(p);
593}
594EXPORT_SYMBOL(slow_work_register_user);
595
596/**
597 * slow_work_unregister_user - Unregister a user of the facility
598 *
599 * Unregister a user of the facility, killing all the threads if this was the
600 * last one.
601 */
602void slow_work_unregister_user(void)
603{
604 mutex_lock(&slow_work_user_lock);
605
606 BUG_ON(slow_work_user_count <= 0);
607
608 slow_work_user_count--;
609 if (slow_work_user_count == 0) {
610 printk(KERN_NOTICE "Slow work thread pool: Shutting down\n");
611 slow_work_threads_should_exit = true;
612 wake_up_all(&slow_work_thread_wq);
613 wait_for_completion(&slow_work_last_thread_exited);
614 printk(KERN_NOTICE "Slow work thread pool:"
615 " Shut down complete\n");
616 }
617
618 del_timer_sync(&slow_work_cull_timer);
619
620 mutex_unlock(&slow_work_user_lock);
621}
622EXPORT_SYMBOL(slow_work_unregister_user);
623
624/*
625 * Initialise the slow work facility
626 */
627static int __init init_slow_work(void)
628{
629 unsigned nr_cpus = num_possible_cpus();
630
631 if (slow_work_max_threads < nr_cpus)
632 slow_work_max_threads = nr_cpus;
633#ifdef CONFIG_SYSCTL
634 if (slow_work_max_max_threads < nr_cpus * 2)
635 slow_work_max_max_threads = nr_cpus * 2;
636#endif
637 return 0;
638}
639
640subsys_initcall(init_slow_work);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5ec4543dfc06..82350f8f04f6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -48,6 +48,7 @@
48#include <linux/acpi.h> 48#include <linux/acpi.h>
49#include <linux/reboot.h> 49#include <linux/reboot.h>
50#include <linux/ftrace.h> 50#include <linux/ftrace.h>
51#include <linux/slow-work.h>
51 52
52#include <asm/uaccess.h> 53#include <asm/uaccess.h>
53#include <asm/processor.h> 54#include <asm/processor.h>
@@ -897,6 +898,14 @@ static struct ctl_table kern_table[] = {
897 .proc_handler = &scan_unevictable_handler, 898 .proc_handler = &scan_unevictable_handler,
898 }, 899 },
899#endif 900#endif
901#ifdef CONFIG_SLOW_WORK
902 {
903 .ctl_name = CTL_UNNUMBERED,
904 .procname = "slow-work",
905 .mode = 0555,
906 .child = slow_work_sysctls,
907 },
908#endif
900/* 909/*
901 * NOTE: do not add new entries to this table unless you have read 910 * NOTE: do not add new entries to this table unless you have read
902 * Documentation/sysctl/ctl_unnumbered.txt 911 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/mm/filemap.c b/mm/filemap.c
index 126d3973b3d1..fc11974f2bee 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -565,6 +565,24 @@ void wait_on_page_bit(struct page *page, int bit_nr)
565EXPORT_SYMBOL(wait_on_page_bit); 565EXPORT_SYMBOL(wait_on_page_bit);
566 566
567/** 567/**
568 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
569 * @page - Page defining the wait queue of interest
570 * @waiter - Waiter to add to the queue
571 *
572 * Add an arbitrary @waiter to the wait queue for the nominated @page.
573 */
574void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
575{
576 wait_queue_head_t *q = page_waitqueue(page);
577 unsigned long flags;
578
579 spin_lock_irqsave(&q->lock, flags);
580 __add_wait_queue(q, waiter);
581 spin_unlock_irqrestore(&q->lock, flags);
582}
583EXPORT_SYMBOL_GPL(add_page_wait_queue);
584
585/**
568 * unlock_page - unlock a locked page 586 * unlock_page - unlock a locked page
569 * @page: the page 587 * @page: the page
570 * 588 *
@@ -2463,6 +2481,9 @@ EXPORT_SYMBOL(generic_file_aio_write);
2463 * (presumably at page->private). If the release was successful, return `1'. 2481 * (presumably at page->private). If the release was successful, return `1'.
2464 * Otherwise return zero. 2482 * Otherwise return zero.
2465 * 2483 *
2484 * This may also be called if PG_fscache is set on a page, indicating that the
2485 * page is known to the local caching routines.
2486 *
2466 * The @gfp_mask argument specifies whether I/O may be performed to release 2487 * The @gfp_mask argument specifies whether I/O may be performed to release
2467 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). 2488 * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS).
2468 * 2489 *
diff --git a/mm/migrate.c b/mm/migrate.c
index a9eff3f092f6..068655d8f883 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -250,7 +250,7 @@ out:
250 * The number of remaining references must be: 250 * The number of remaining references must be:
251 * 1 for anonymous pages without a mapping 251 * 1 for anonymous pages without a mapping
252 * 2 for pages with a mapping 252 * 2 for pages with a mapping
253 * 3 for pages with a mapping and PagePrivate set. 253 * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
254 */ 254 */
255static int migrate_page_move_mapping(struct address_space *mapping, 255static int migrate_page_move_mapping(struct address_space *mapping,
256 struct page *newpage, struct page *page) 256 struct page *newpage, struct page *page)
@@ -270,7 +270,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
270 pslot = radix_tree_lookup_slot(&mapping->page_tree, 270 pslot = radix_tree_lookup_slot(&mapping->page_tree,
271 page_index(page)); 271 page_index(page));
272 272
273 expected_count = 2 + !!PagePrivate(page); 273 expected_count = 2 + !!page_has_private(page);
274 if (page_count(page) != expected_count || 274 if (page_count(page) != expected_count ||
275 (struct page *)radix_tree_deref_slot(pslot) != page) { 275 (struct page *)radix_tree_deref_slot(pslot) != page) {
276 spin_unlock_irq(&mapping->tree_lock); 276 spin_unlock_irq(&mapping->tree_lock);
@@ -386,7 +386,7 @@ EXPORT_SYMBOL(fail_migrate_page);
386 386
387/* 387/*
388 * Common logic to directly migrate a single page suitable for 388 * Common logic to directly migrate a single page suitable for
389 * pages that do not use PagePrivate. 389 * pages that do not use PagePrivate/PagePrivate2.
390 * 390 *
391 * Pages are locked upon entry and exit. 391 * Pages are locked upon entry and exit.
392 */ 392 */
@@ -522,7 +522,7 @@ static int fallback_migrate_page(struct address_space *mapping,
522 * Buffers may be managed in a filesystem specific way. 522 * Buffers may be managed in a filesystem specific way.
523 * We must have no buffers or drop them. 523 * We must have no buffers or drop them.
524 */ 524 */
525 if (PagePrivate(page) && 525 if (page_has_private(page) &&
526 !try_to_release_page(page, GFP_KERNEL)) 526 !try_to_release_page(page, GFP_KERNEL))
527 return -EAGAIN; 527 return -EAGAIN;
528 528
@@ -655,7 +655,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
655 * free the metadata, so the page can be freed. 655 * free the metadata, so the page can be freed.
656 */ 656 */
657 if (!page->mapping) { 657 if (!page->mapping) {
658 if (!PageAnon(page) && PagePrivate(page)) { 658 if (!PageAnon(page) && page_has_private(page)) {
659 /* 659 /*
660 * Go direct to try_to_free_buffers() here because 660 * Go direct to try_to_free_buffers() here because
661 * a) that's what try_to_release_page() would do anyway 661 * a) that's what try_to_release_page() would do anyway
diff --git a/mm/readahead.c b/mm/readahead.c
index 9ce303d4b810..133b6d525513 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -31,6 +31,42 @@ EXPORT_SYMBOL_GPL(file_ra_state_init);
31 31
32#define list_to_page(head) (list_entry((head)->prev, struct page, lru)) 32#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
33 33
34/*
35 * see if a page needs releasing upon read_cache_pages() failure
36 * - the caller of read_cache_pages() may have set PG_private or PG_fscache
37 * before calling, such as the NFS fs marking pages that are cached locally
38 * on disk, thus we need to give the fs a chance to clean up in the event of
39 * an error
40 */
41static void read_cache_pages_invalidate_page(struct address_space *mapping,
42 struct page *page)
43{
44 if (page_has_private(page)) {
45 if (!trylock_page(page))
46 BUG();
47 page->mapping = mapping;
48 do_invalidatepage(page, 0);
49 page->mapping = NULL;
50 unlock_page(page);
51 }
52 page_cache_release(page);
53}
54
55/*
56 * release a list of pages, invalidating them first if need be
57 */
58static void read_cache_pages_invalidate_pages(struct address_space *mapping,
59 struct list_head *pages)
60{
61 struct page *victim;
62
63 while (!list_empty(pages)) {
64 victim = list_to_page(pages);
65 list_del(&victim->lru);
66 read_cache_pages_invalidate_page(mapping, victim);
67 }
68}
69
34/** 70/**
35 * read_cache_pages - populate an address space with some pages & start reads against them 71 * read_cache_pages - populate an address space with some pages & start reads against them
36 * @mapping: the address_space 72 * @mapping: the address_space
@@ -52,14 +88,14 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
52 list_del(&page->lru); 88 list_del(&page->lru);
53 if (add_to_page_cache_lru(page, mapping, 89 if (add_to_page_cache_lru(page, mapping,
54 page->index, GFP_KERNEL)) { 90 page->index, GFP_KERNEL)) {
55 page_cache_release(page); 91 read_cache_pages_invalidate_page(mapping, page);
56 continue; 92 continue;
57 } 93 }
58 page_cache_release(page); 94 page_cache_release(page);
59 95
60 ret = filler(data, page); 96 ret = filler(data, page);
61 if (unlikely(ret)) { 97 if (unlikely(ret)) {
62 put_pages_list(pages); 98 read_cache_pages_invalidate_pages(mapping, pages);
63 break; 99 break;
64 } 100 }
65 task_io_account_read(PAGE_CACHE_SIZE); 101 task_io_account_read(PAGE_CACHE_SIZE);
diff --git a/mm/swap.c b/mm/swap.c
index 6e83084c1f6c..bede23ce64ea 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -448,8 +448,8 @@ void pagevec_strip(struct pagevec *pvec)
448 for (i = 0; i < pagevec_count(pvec); i++) { 448 for (i = 0; i < pagevec_count(pvec); i++) {
449 struct page *page = pvec->pages[i]; 449 struct page *page = pvec->pages[i];
450 450
451 if (PagePrivate(page) && trylock_page(page)) { 451 if (page_has_private(page) && trylock_page(page)) {
452 if (PagePrivate(page)) 452 if (page_has_private(page))
453 try_to_release_page(page, 0); 453 try_to_release_page(page, 0);
454 unlock_page(page); 454 unlock_page(page);
455 } 455 }
diff --git a/mm/truncate.c b/mm/truncate.c
index 1229211104f8..55206fab7b99 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -50,7 +50,7 @@ void do_invalidatepage(struct page *page, unsigned long offset)
50static inline void truncate_partial_page(struct page *page, unsigned partial) 50static inline void truncate_partial_page(struct page *page, unsigned partial)
51{ 51{
52 zero_user_segment(page, partial, PAGE_CACHE_SIZE); 52 zero_user_segment(page, partial, PAGE_CACHE_SIZE);
53 if (PagePrivate(page)) 53 if (page_has_private(page))
54 do_invalidatepage(page, partial); 54 do_invalidatepage(page, partial);
55} 55}
56 56
@@ -99,7 +99,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
99 if (page->mapping != mapping) 99 if (page->mapping != mapping)
100 return; 100 return;
101 101
102 if (PagePrivate(page)) 102 if (page_has_private(page))
103 do_invalidatepage(page, 0); 103 do_invalidatepage(page, 0);
104 104
105 cancel_dirty_page(page, PAGE_CACHE_SIZE); 105 cancel_dirty_page(page, PAGE_CACHE_SIZE);
@@ -126,7 +126,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
126 if (page->mapping != mapping) 126 if (page->mapping != mapping)
127 return 0; 127 return 0;
128 128
129 if (PagePrivate(page) && !try_to_release_page(page, 0)) 129 if (page_has_private(page) && !try_to_release_page(page, 0))
130 return 0; 130 return 0;
131 131
132 clear_page_mlock(page); 132 clear_page_mlock(page);
@@ -348,7 +348,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
348 if (page->mapping != mapping) 348 if (page->mapping != mapping)
349 return 0; 349 return 0;
350 350
351 if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) 351 if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
352 return 0; 352 return 0;
353 353
354 spin_lock_irq(&mapping->tree_lock); 354 spin_lock_irq(&mapping->tree_lock);
@@ -356,7 +356,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
356 goto failed; 356 goto failed;
357 357
358 clear_page_mlock(page); 358 clear_page_mlock(page);
359 BUG_ON(PagePrivate(page)); 359 BUG_ON(page_has_private(page));
360 __remove_from_page_cache(page); 360 __remove_from_page_cache(page);
361 spin_unlock_irq(&mapping->tree_lock); 361 spin_unlock_irq(&mapping->tree_lock);
362 page_cache_release(page); /* pagecache ref */ 362 page_cache_release(page); /* pagecache ref */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 06e72693b458..425244988bb2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -283,7 +283,7 @@ static inline int page_mapping_inuse(struct page *page)
283 283
284static inline int is_page_cache_freeable(struct page *page) 284static inline int is_page_cache_freeable(struct page *page)
285{ 285{
286 return page_count(page) - !!PagePrivate(page) == 2; 286 return page_count(page) - !!page_has_private(page) == 2;
287} 287}
288 288
289static int may_write_to_queue(struct backing_dev_info *bdi) 289static int may_write_to_queue(struct backing_dev_info *bdi)
@@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
367 * Some data journaling orphaned pages can have 367 * Some data journaling orphaned pages can have
368 * page->mapping == NULL while being dirty with clean buffers. 368 * page->mapping == NULL while being dirty with clean buffers.
369 */ 369 */
370 if (PagePrivate(page)) { 370 if (page_has_private(page)) {
371 if (try_to_free_buffers(page)) { 371 if (try_to_free_buffers(page)) {
372 ClearPageDirty(page); 372 ClearPageDirty(page);
373 printk("%s: orphaned page\n", __func__); 373 printk("%s: orphaned page\n", __func__);
@@ -727,7 +727,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
727 * process address space (page_count == 1) it can be freed. 727 * process address space (page_count == 1) it can be freed.
728 * Otherwise, leave the page on the LRU so it is swappable. 728 * Otherwise, leave the page on the LRU so it is swappable.
729 */ 729 */
730 if (PagePrivate(page)) { 730 if (page_has_private(page)) {
731 if (!try_to_release_page(page, sc->gfp_mask)) 731 if (!try_to_release_page(page, sc->gfp_mask))
732 goto activate_locked; 732 goto activate_locked;
733 if (!mapping && page_count(page) == 1) { 733 if (!mapping && page_count(page) == 1) {
diff --git a/security/security.c b/security/security.c
index 206e53844d2f..5284255c5cdf 100644
--- a/security/security.c
+++ b/security/security.c
@@ -445,6 +445,7 @@ int security_inode_create(struct inode *dir, struct dentry *dentry, int mode)
445 return 0; 445 return 0;
446 return security_ops->inode_create(dir, dentry, mode); 446 return security_ops->inode_create(dir, dentry, mode);
447} 447}
448EXPORT_SYMBOL_GPL(security_inode_create);
448 449
449int security_inode_link(struct dentry *old_dentry, struct inode *dir, 450int security_inode_link(struct dentry *old_dentry, struct inode *dir,
450 struct dentry *new_dentry) 451 struct dentry *new_dentry)
@@ -475,6 +476,7 @@ int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode)
475 return 0; 476 return 0;
476 return security_ops->inode_mkdir(dir, dentry, mode); 477 return security_ops->inode_mkdir(dir, dentry, mode);
477} 478}
479EXPORT_SYMBOL_GPL(security_inode_mkdir);
478 480
479int security_inode_rmdir(struct inode *dir, struct dentry *dentry) 481int security_inode_rmdir(struct inode *dir, struct dentry *dentry)
480{ 482{