diff options
82 files changed, 15414 insertions, 390 deletions
diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt new file mode 100644 index 000000000000..382d52cdaf2d --- /dev/null +++ b/Documentation/filesystems/caching/backend-api.txt | |||
@@ -0,0 +1,658 @@ | |||
1 | ========================== | ||
2 | FS-CACHE CACHE BACKEND API | ||
3 | ========================== | ||
4 | |||
5 | The FS-Cache system provides an API by which actual caches can be supplied to | ||
6 | FS-Cache for it to then serve out to network filesystems and other interested | ||
7 | parties. | ||
8 | |||
9 | This API is declared in <linux/fscache-cache.h>. | ||
10 | |||
11 | |||
12 | ==================================== | ||
13 | INITIALISING AND REGISTERING A CACHE | ||
14 | ==================================== | ||
15 | |||
16 | To start off, a cache definition must be initialised and registered for each | ||
17 | cache the backend wants to make available. For instance, CacheFS does this in | ||
18 | the fill_super() operation on mounting. | ||
19 | |||
20 | The cache definition (struct fscache_cache) should be initialised by calling: | ||
21 | |||
22 | void fscache_init_cache(struct fscache_cache *cache, | ||
23 | struct fscache_cache_ops *ops, | ||
24 | const char *idfmt, | ||
25 | ...); | ||
26 | |||
27 | Where: | ||
28 | |||
29 | (*) "cache" is a pointer to the cache definition; | ||
30 | |||
31 | (*) "ops" is a pointer to the table of operations that the backend supports on | ||
32 | this cache; and | ||
33 | |||
34 | (*) "idfmt" is a format and printf-style arguments for constructing a label | ||
35 | for the cache. | ||
36 | |||
37 | |||
38 | The cache should then be registered with FS-Cache by passing a pointer to the | ||
39 | previously initialised cache definition to: | ||
40 | |||
41 | int fscache_add_cache(struct fscache_cache *cache, | ||
42 | struct fscache_object *fsdef, | ||
43 | const char *tagname); | ||
44 | |||
45 | Two extra arguments should also be supplied: | ||
46 | |||
47 | (*) "fsdef" which should point to the object representation for the FS-Cache | ||
48 | master index in this cache. Netfs primary index entries will be created | ||
49 | here. FS-Cache keeps the caller's reference to the index object if | ||
50 | successful and will release it upon withdrawal of the cache. | ||
51 | |||
52 | (*) "tagname" which, if given, should be a text string naming this cache. If | ||
53 | this is NULL, the identifier will be used instead. For CacheFS, the | ||
54 | identifier is set to name the underlying block device and the tag can be | ||
55 | supplied by mount. | ||
56 | |||
57 | This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag | ||
58 | is already in use. 0 will be returned on success. | ||
59 | |||
60 | |||
61 | ===================== | ||
62 | UNREGISTERING A CACHE | ||
63 | ===================== | ||
64 | |||
65 | A cache can be withdrawn from the system by calling this function with a | ||
66 | pointer to the cache definition: | ||
67 | |||
68 | void fscache_withdraw_cache(struct fscache_cache *cache); | ||
69 | |||
70 | In CacheFS's case, this is called by put_super(). | ||
71 | |||
72 | |||
73 | ======== | ||
74 | SECURITY | ||
75 | ======== | ||
76 | |||
77 | The cache methods are executed one of two contexts: | ||
78 | |||
79 | (1) that of the userspace process that issued the netfs operation that caused | ||
80 | the cache method to be invoked, or | ||
81 | |||
82 | (2) that of one of the processes in the FS-Cache thread pool. | ||
83 | |||
84 | In either case, this may not be an appropriate context in which to access the | ||
85 | cache. | ||
86 | |||
87 | The calling process's fsuid, fsgid and SELinux security identities may need to | ||
88 | be masqueraded for the duration of the cache driver's access to the cache. | ||
89 | This is left to the cache to handle; FS-Cache makes no effort in this regard. | ||
90 | |||
91 | |||
92 | =================================== | ||
93 | CONTROL AND STATISTICS PRESENTATION | ||
94 | =================================== | ||
95 | |||
96 | The cache may present data to the outside world through FS-Cache's interfaces | ||
97 | in sysfs and procfs - the former for control and the latter for statistics. | ||
98 | |||
99 | A sysfs directory called /sys/fs/fscache/<cachetag>/ is created if CONFIG_SYSFS | ||
100 | is enabled. This is accessible through the kobject struct fscache_cache::kobj | ||
101 | and is for use by the cache as it sees fit. | ||
102 | |||
103 | |||
104 | ======================== | ||
105 | RELEVANT DATA STRUCTURES | ||
106 | ======================== | ||
107 | |||
108 | (*) Index/Data file FS-Cache representation cookie: | ||
109 | |||
110 | struct fscache_cookie { | ||
111 | struct fscache_object_def *def; | ||
112 | struct fscache_netfs *netfs; | ||
113 | void *netfs_data; | ||
114 | ... | ||
115 | }; | ||
116 | |||
117 | The fields that might be of use to the backend describe the object | ||
118 | definition, the netfs definition and the netfs's data for this cookie. | ||
119 | The object definition contain functions supplied by the netfs for loading | ||
120 | and matching index entries; these are required to provide some of the | ||
121 | cache operations. | ||
122 | |||
123 | |||
124 | (*) In-cache object representation: | ||
125 | |||
126 | struct fscache_object { | ||
127 | int debug_id; | ||
128 | enum { | ||
129 | FSCACHE_OBJECT_RECYCLING, | ||
130 | ... | ||
131 | } state; | ||
132 | spinlock_t lock | ||
133 | struct fscache_cache *cache; | ||
134 | struct fscache_cookie *cookie; | ||
135 | ... | ||
136 | }; | ||
137 | |||
138 | Structures of this type should be allocated by the cache backend and | ||
139 | passed to FS-Cache when requested by the appropriate cache operation. In | ||
140 | the case of CacheFS, they're embedded in CacheFS's internal object | ||
141 | structures. | ||
142 | |||
143 | The debug_id is a simple integer that can be used in debugging messages | ||
144 | that refer to a particular object. In such a case it should be printed | ||
145 | using "OBJ%x" to be consistent with FS-Cache. | ||
146 | |||
147 | Each object contains a pointer to the cookie that represents the object it | ||
148 | is backing. An object should retired when put_object() is called if it is | ||
149 | in state FSCACHE_OBJECT_RECYCLING. The fscache_object struct should be | ||
150 | initialised by calling fscache_object_init(object). | ||
151 | |||
152 | |||
153 | (*) FS-Cache operation record: | ||
154 | |||
155 | struct fscache_operation { | ||
156 | atomic_t usage; | ||
157 | struct fscache_object *object; | ||
158 | unsigned long flags; | ||
159 | #define FSCACHE_OP_EXCLUSIVE | ||
160 | void (*processor)(struct fscache_operation *op); | ||
161 | void (*release)(struct fscache_operation *op); | ||
162 | ... | ||
163 | }; | ||
164 | |||
165 | FS-Cache has a pool of threads that it uses to give CPU time to the | ||
166 | various asynchronous operations that need to be done as part of driving | ||
167 | the cache. These are represented by the above structure. The processor | ||
168 | method is called to give the op CPU time, and the release method to get | ||
169 | rid of it when its usage count reaches 0. | ||
170 | |||
171 | An operation can be made exclusive upon an object by setting the | ||
172 | appropriate flag before enqueuing it with fscache_enqueue_operation(). If | ||
173 | an operation needs more processing time, it should be enqueued again. | ||
174 | |||
175 | |||
176 | (*) FS-Cache retrieval operation record: | ||
177 | |||
178 | struct fscache_retrieval { | ||
179 | struct fscache_operation op; | ||
180 | struct address_space *mapping; | ||
181 | struct list_head *to_do; | ||
182 | ... | ||
183 | }; | ||
184 | |||
185 | A structure of this type is allocated by FS-Cache to record retrieval and | ||
186 | allocation requests made by the netfs. This struct is then passed to the | ||
187 | backend to do the operation. The backend may get extra refs to it by | ||
188 | calling fscache_get_retrieval() and refs may be discarded by calling | ||
189 | fscache_put_retrieval(). | ||
190 | |||
191 | A retrieval operation can be used by the backend to do retrieval work. To | ||
192 | do this, the retrieval->op.processor method pointer should be set | ||
193 | appropriately by the backend and fscache_enqueue_retrieval() called to | ||
194 | submit it to the thread pool. CacheFiles, for example, uses this to queue | ||
195 | page examination when it detects PG_lock being cleared. | ||
196 | |||
197 | The to_do field is an empty list available for the cache backend to use as | ||
198 | it sees fit. | ||
199 | |||
200 | |||
201 | (*) FS-Cache storage operation record: | ||
202 | |||
203 | struct fscache_storage { | ||
204 | struct fscache_operation op; | ||
205 | pgoff_t store_limit; | ||
206 | ... | ||
207 | }; | ||
208 | |||
209 | A structure of this type is allocated by FS-Cache to record outstanding | ||
210 | writes to be made. FS-Cache itself enqueues this operation and invokes | ||
211 | the write_page() method on the object at appropriate times to effect | ||
212 | storage. | ||
213 | |||
214 | |||
215 | ================ | ||
216 | CACHE OPERATIONS | ||
217 | ================ | ||
218 | |||
219 | The cache backend provides FS-Cache with a table of operations that can be | ||
220 | performed on the denizens of the cache. These are held in a structure of type: | ||
221 | |||
222 | struct fscache_cache_ops | ||
223 | |||
224 | (*) Name of cache provider [mandatory]: | ||
225 | |||
226 | const char *name | ||
227 | |||
228 | This isn't strictly an operation, but should be pointed at a string naming | ||
229 | the backend. | ||
230 | |||
231 | |||
232 | (*) Allocate a new object [mandatory]: | ||
233 | |||
234 | struct fscache_object *(*alloc_object)(struct fscache_cache *cache, | ||
235 | struct fscache_cookie *cookie) | ||
236 | |||
237 | This method is used to allocate a cache object representation to back a | ||
238 | cookie in a particular cache. fscache_object_init() should be called on | ||
239 | the object to initialise it prior to returning. | ||
240 | |||
241 | This function may also be used to parse the index key to be used for | ||
242 | multiple lookup calls to turn it into a more convenient form. FS-Cache | ||
243 | will call the lookup_complete() method to allow the cache to release the | ||
244 | form once lookup is complete or aborted. | ||
245 | |||
246 | |||
247 | (*) Look up and create object [mandatory]: | ||
248 | |||
249 | void (*lookup_object)(struct fscache_object *object) | ||
250 | |||
251 | This method is used to look up an object, given that the object is already | ||
252 | allocated and attached to the cookie. This should instantiate that object | ||
253 | in the cache if it can. | ||
254 | |||
255 | The method should call fscache_object_lookup_negative() as soon as | ||
256 | possible if it determines the object doesn't exist in the cache. If the | ||
257 | object is found to exist and the netfs indicates that it is valid then | ||
258 | fscache_obtained_object() should be called once the object is in a | ||
259 | position to have data stored in it. Similarly, fscache_obtained_object() | ||
260 | should also be called once a non-present object has been created. | ||
261 | |||
262 | If a lookup error occurs, fscache_object_lookup_error() should be called | ||
263 | to abort the lookup of that object. | ||
264 | |||
265 | |||
266 | (*) Release lookup data [mandatory]: | ||
267 | |||
268 | void (*lookup_complete)(struct fscache_object *object) | ||
269 | |||
270 | This method is called to ask the cache to release any resources it was | ||
271 | using to perform a lookup. | ||
272 | |||
273 | |||
274 | (*) Increment object refcount [mandatory]: | ||
275 | |||
276 | struct fscache_object *(*grab_object)(struct fscache_object *object) | ||
277 | |||
278 | This method is called to increment the reference count on an object. It | ||
279 | may fail (for instance if the cache is being withdrawn) by returning NULL. | ||
280 | It should return the object pointer if successful. | ||
281 | |||
282 | |||
283 | (*) Lock/Unlock object [mandatory]: | ||
284 | |||
285 | void (*lock_object)(struct fscache_object *object) | ||
286 | void (*unlock_object)(struct fscache_object *object) | ||
287 | |||
288 | These methods are used to exclusively lock an object. It must be possible | ||
289 | to schedule with the lock held, so a spinlock isn't sufficient. | ||
290 | |||
291 | |||
292 | (*) Pin/Unpin object [optional]: | ||
293 | |||
294 | int (*pin_object)(struct fscache_object *object) | ||
295 | void (*unpin_object)(struct fscache_object *object) | ||
296 | |||
297 | These methods are used to pin an object into the cache. Once pinned an | ||
298 | object cannot be reclaimed to make space. Return -ENOSPC if there's not | ||
299 | enough space in the cache to permit this. | ||
300 | |||
301 | |||
302 | (*) Update object [mandatory]: | ||
303 | |||
304 | int (*update_object)(struct fscache_object *object) | ||
305 | |||
306 | This is called to update the index entry for the specified object. The | ||
307 | new information should be in object->cookie->netfs_data. This can be | ||
308 | obtained by calling object->cookie->def->get_aux()/get_attr(). | ||
309 | |||
310 | |||
311 | (*) Discard object [mandatory]: | ||
312 | |||
313 | void (*drop_object)(struct fscache_object *object) | ||
314 | |||
315 | This method is called to indicate that an object has been unbound from its | ||
316 | cookie, and that the cache should release the object's resources and | ||
317 | retire it if it's in state FSCACHE_OBJECT_RECYCLING. | ||
318 | |||
319 | This method should not attempt to release any references held by the | ||
320 | caller. The caller will invoke the put_object() method as appropriate. | ||
321 | |||
322 | |||
323 | (*) Release object reference [mandatory]: | ||
324 | |||
325 | void (*put_object)(struct fscache_object *object) | ||
326 | |||
327 | This method is used to discard a reference to an object. The object may | ||
328 | be freed when all the references to it are released. | ||
329 | |||
330 | |||
331 | (*) Synchronise a cache [mandatory]: | ||
332 | |||
333 | void (*sync)(struct fscache_cache *cache) | ||
334 | |||
335 | This is called to ask the backend to synchronise a cache with its backing | ||
336 | device. | ||
337 | |||
338 | |||
339 | (*) Dissociate a cache [mandatory]: | ||
340 | |||
341 | void (*dissociate_pages)(struct fscache_cache *cache) | ||
342 | |||
343 | This is called to ask a cache to perform any page dissociations as part of | ||
344 | cache withdrawal. | ||
345 | |||
346 | |||
347 | (*) Notification that the attributes on a netfs file changed [mandatory]: | ||
348 | |||
349 | int (*attr_changed)(struct fscache_object *object); | ||
350 | |||
351 | This is called to indicate to the cache that certain attributes on a netfs | ||
352 | file have changed (for example the maximum size a file may reach). The | ||
353 | cache can read these from the netfs by calling the cookie's get_attr() | ||
354 | method. | ||
355 | |||
356 | The cache may use the file size information to reserve space on the cache. | ||
357 | It should also call fscache_set_store_limit() to indicate to FS-Cache the | ||
358 | highest byte it's willing to store for an object. | ||
359 | |||
360 | This method may return -ve if an error occurred or the cache object cannot | ||
361 | be expanded. In such a case, the object will be withdrawn from service. | ||
362 | |||
363 | This operation is run asynchronously from FS-Cache's thread pool, and | ||
364 | storage and retrieval operations from the netfs are excluded during the | ||
365 | execution of this operation. | ||
366 | |||
367 | |||
368 | (*) Reserve cache space for an object's data [optional]: | ||
369 | |||
370 | int (*reserve_space)(struct fscache_object *object, loff_t size); | ||
371 | |||
372 | This is called to request that cache space be reserved to hold the data | ||
373 | for an object and the metadata used to track it. Zero size should be | ||
374 | taken as request to cancel a reservation. | ||
375 | |||
376 | This should return 0 if successful, -ENOSPC if there isn't enough space | ||
377 | available, or -ENOMEM or -EIO on other errors. | ||
378 | |||
379 | The reservation may exceed the current size of the object, thus permitting | ||
380 | future expansion. If the amount of space consumed by an object would | ||
381 | exceed the reservation, it's permitted to refuse requests to allocate | ||
382 | pages, but not required. An object may be pruned down to its reservation | ||
383 | size if larger than that already. | ||
384 | |||
385 | |||
386 | (*) Request page be read from cache [mandatory]: | ||
387 | |||
388 | int (*read_or_alloc_page)(struct fscache_retrieval *op, | ||
389 | struct page *page, | ||
390 | gfp_t gfp) | ||
391 | |||
392 | This is called to attempt to read a netfs page from the cache, or to | ||
393 | reserve a backing block if not. FS-Cache will have done as much checking | ||
394 | as it can before calling, but most of the work belongs to the backend. | ||
395 | |||
396 | If there's no page in the cache, then -ENODATA should be returned if the | ||
397 | backend managed to reserve a backing block; -ENOBUFS or -ENOMEM if it | ||
398 | didn't. | ||
399 | |||
400 | If there is suitable data in the cache, then a read operation should be | ||
401 | queued and 0 returned. When the read finishes, fscache_end_io() should be | ||
402 | called. | ||
403 | |||
404 | The fscache_mark_pages_cached() should be called for the page if any cache | ||
405 | metadata is retained. This will indicate to the netfs that the page needs | ||
406 | explicit uncaching. This operation takes a pagevec, thus allowing several | ||
407 | pages to be marked at once. | ||
408 | |||
409 | The retrieval record pointed to by op should be retained for each page | ||
410 | queued and released when I/O on the page has been formally ended. | ||
411 | fscache_get/put_retrieval() are available for this purpose. | ||
412 | |||
413 | The retrieval record may be used to get CPU time via the FS-Cache thread | ||
414 | pool. If this is desired, the op->op.processor should be set to point to | ||
415 | the appropriate processing routine, and fscache_enqueue_retrieval() should | ||
416 | be called at an appropriate point to request CPU time. For instance, the | ||
417 | retrieval routine could be enqueued upon the completion of a disk read. | ||
418 | The to_do field in the retrieval record is provided to aid in this. | ||
419 | |||
420 | If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS | ||
421 | returned if possible or fscache_end_io() called with a suitable error | ||
422 | code.. | ||
423 | |||
424 | |||
425 | (*) Request pages be read from cache [mandatory]: | ||
426 | |||
427 | int (*read_or_alloc_pages)(struct fscache_retrieval *op, | ||
428 | struct list_head *pages, | ||
429 | unsigned *nr_pages, | ||
430 | gfp_t gfp) | ||
431 | |||
432 | This is like the read_or_alloc_page() method, except it is handed a list | ||
433 | of pages instead of one page. Any pages on which a read operation is | ||
434 | started must be added to the page cache for the specified mapping and also | ||
435 | to the LRU. Such pages must also be removed from the pages list and | ||
436 | *nr_pages decremented per page. | ||
437 | |||
438 | If there was an error such as -ENOMEM, then that should be returned; else | ||
439 | if one or more pages couldn't be read or allocated, then -ENOBUFS should | ||
440 | be returned; else if one or more pages couldn't be read, then -ENODATA | ||
441 | should be returned. If all the pages are dispatched then 0 should be | ||
442 | returned. | ||
443 | |||
444 | |||
445 | (*) Request page be allocated in the cache [mandatory]: | ||
446 | |||
447 | int (*allocate_page)(struct fscache_retrieval *op, | ||
448 | struct page *page, | ||
449 | gfp_t gfp) | ||
450 | |||
451 | This is like the read_or_alloc_page() method, except that it shouldn't | ||
452 | read from the cache, even if there's data there that could be retrieved. | ||
453 | It should, however, set up any internal metadata required such that | ||
454 | the write_page() method can write to the cache. | ||
455 | |||
456 | If there's no backing block available, then -ENOBUFS should be returned | ||
457 | (or -ENOMEM if there were other problems). If a block is successfully | ||
458 | allocated, then the netfs page should be marked and 0 returned. | ||
459 | |||
460 | |||
461 | (*) Request pages be allocated in the cache [mandatory]: | ||
462 | |||
463 | int (*allocate_pages)(struct fscache_retrieval *op, | ||
464 | struct list_head *pages, | ||
465 | unsigned *nr_pages, | ||
466 | gfp_t gfp) | ||
467 | |||
468 | This is an multiple page version of the allocate_page() method. pages and | ||
469 | nr_pages should be treated as for the read_or_alloc_pages() method. | ||
470 | |||
471 | |||
472 | (*) Request page be written to cache [mandatory]: | ||
473 | |||
474 | int (*write_page)(struct fscache_storage *op, | ||
475 | struct page *page); | ||
476 | |||
477 | This is called to write from a page on which there was a previously | ||
478 | successful read_or_alloc_page() call or similar. FS-Cache filters out | ||
479 | pages that don't have mappings. | ||
480 | |||
481 | This method is called asynchronously from the FS-Cache thread pool. It is | ||
482 | not required to actually store anything, provided -ENODATA is then | ||
483 | returned to the next read of this page. | ||
484 | |||
485 | If an error occurred, then a negative error code should be returned, | ||
486 | otherwise zero should be returned. FS-Cache will take appropriate action | ||
487 | in response to an error, such as withdrawing this object. | ||
488 | |||
489 | If this method returns success then FS-Cache will inform the netfs | ||
490 | appropriately. | ||
491 | |||
492 | |||
493 | (*) Discard retained per-page metadata [mandatory]: | ||
494 | |||
495 | void (*uncache_page)(struct fscache_object *object, struct page *page) | ||
496 | |||
497 | This is called when a netfs page is being evicted from the pagecache. The | ||
498 | cache backend should tear down any internal representation or tracking it | ||
499 | maintains for this page. | ||
500 | |||
501 | |||
502 | ================== | ||
503 | FS-CACHE UTILITIES | ||
504 | ================== | ||
505 | |||
506 | FS-Cache provides some utilities that a cache backend may make use of: | ||
507 | |||
508 | (*) Note occurrence of an I/O error in a cache: | ||
509 | |||
510 | void fscache_io_error(struct fscache_cache *cache) | ||
511 | |||
512 | This tells FS-Cache that an I/O error occurred in the cache. After this | ||
513 | has been called, only resource dissociation operations (object and page | ||
514 | release) will be passed from the netfs to the cache backend for the | ||
515 | specified cache. | ||
516 | |||
517 | This does not actually withdraw the cache. That must be done separately. | ||
518 | |||
519 | |||
520 | (*) Invoke the retrieval I/O completion function: | ||
521 | |||
522 | void fscache_end_io(struct fscache_retrieval *op, struct page *page, | ||
523 | int error); | ||
524 | |||
525 | This is called to note the end of an attempt to retrieve a page. The | ||
526 | error value should be 0 if successful and an error otherwise. | ||
527 | |||
528 | |||
529 | (*) Set highest store limit: | ||
530 | |||
531 | void fscache_set_store_limit(struct fscache_object *object, | ||
532 | loff_t i_size); | ||
533 | |||
534 | This sets the limit FS-Cache imposes on the highest byte it's willing to | ||
535 | try and store for a netfs. Any page over this limit is automatically | ||
536 | rejected by fscache_read_alloc_page() and co with -ENOBUFS. | ||
537 | |||
538 | |||
539 | (*) Mark pages as being cached: | ||
540 | |||
541 | void fscache_mark_pages_cached(struct fscache_retrieval *op, | ||
542 | struct pagevec *pagevec); | ||
543 | |||
544 | This marks a set of pages as being cached. After this has been called, | ||
545 | the netfs must call fscache_uncache_page() to unmark the pages. | ||
546 | |||
547 | |||
548 | (*) Perform coherency check on an object: | ||
549 | |||
550 | enum fscache_checkaux fscache_check_aux(struct fscache_object *object, | ||
551 | const void *data, | ||
552 | uint16_t datalen); | ||
553 | |||
554 | This asks the netfs to perform a coherency check on an object that has | ||
555 | just been looked up. The cookie attached to the object will determine the | ||
556 | netfs to use. data and datalen should specify where the auxiliary data | ||
557 | retrieved from the cache can be found. | ||
558 | |||
559 | One of three values will be returned: | ||
560 | |||
561 | (*) FSCACHE_CHECKAUX_OKAY | ||
562 | |||
563 | The coherency data indicates the object is valid as is. | ||
564 | |||
565 | (*) FSCACHE_CHECKAUX_NEEDS_UPDATE | ||
566 | |||
567 | The coherency data needs updating, but otherwise the object is | ||
568 | valid. | ||
569 | |||
570 | (*) FSCACHE_CHECKAUX_OBSOLETE | ||
571 | |||
572 | The coherency data indicates that the object is obsolete and should | ||
573 | be discarded. | ||
574 | |||
575 | |||
576 | (*) Initialise a freshly allocated object: | ||
577 | |||
578 | void fscache_object_init(struct fscache_object *object); | ||
579 | |||
580 | This initialises all the fields in an object representation. | ||
581 | |||
582 | |||
583 | (*) Indicate the destruction of an object: | ||
584 | |||
585 | void fscache_object_destroyed(struct fscache_cache *cache); | ||
586 | |||
587 | This must be called to inform FS-Cache that an object that belonged to a | ||
588 | cache has been destroyed and deallocated. This will allow continuation | ||
589 | of the cache withdrawal process when it is stopped pending destruction of | ||
590 | all the objects. | ||
591 | |||
592 | |||
593 | (*) Indicate negative lookup on an object: | ||
594 | |||
595 | void fscache_object_lookup_negative(struct fscache_object *object); | ||
596 | |||
597 | This is called to indicate to FS-Cache that a lookup process for an object | ||
598 | found a negative result. | ||
599 | |||
600 | This changes the state of an object to permit reads pending on lookup | ||
601 | completion to go off and start fetching data from the netfs server as it's | ||
602 | known at this point that there can't be any data in the cache. | ||
603 | |||
604 | This may be called multiple times on an object. Only the first call is | ||
605 | significant - all subsequent calls are ignored. | ||
606 | |||
607 | |||
608 | (*) Indicate an object has been obtained: | ||
609 | |||
610 | void fscache_obtained_object(struct fscache_object *object); | ||
611 | |||
612 | This is called to indicate to FS-Cache that a lookup process for an object | ||
613 | produced a positive result, or that an object was created. This should | ||
614 | only be called once for any particular object. | ||
615 | |||
616 | This changes the state of an object to indicate: | ||
617 | |||
618 | (1) if no call to fscache_object_lookup_negative() has been made on | ||
619 | this object, that there may be data available, and that reads can | ||
620 | now go and look for it; and | ||
621 | |||
622 | (2) that writes may now proceed against this object. | ||
623 | |||
624 | |||
625 | (*) Indicate that object lookup failed: | ||
626 | |||
627 | void fscache_object_lookup_error(struct fscache_object *object); | ||
628 | |||
629 | This marks an object as having encountered a fatal error (usually EIO) | ||
630 | and causes it to move into a state whereby it will be withdrawn as soon | ||
631 | as possible. | ||
632 | |||
633 | |||
634 | (*) Get and release references on a retrieval record: | ||
635 | |||
636 | void fscache_get_retrieval(struct fscache_retrieval *op); | ||
637 | void fscache_put_retrieval(struct fscache_retrieval *op); | ||
638 | |||
639 | These two functions are used to retain a retrieval record whilst doing | ||
640 | asynchronous data retrieval and block allocation. | ||
641 | |||
642 | |||
643 | (*) Enqueue a retrieval record for processing. | ||
644 | |||
645 | void fscache_enqueue_retrieval(struct fscache_retrieval *op); | ||
646 | |||
647 | This enqueues a retrieval record for processing by the FS-Cache thread | ||
648 | pool. One of the threads in the pool will invoke the retrieval record's | ||
649 | op->op.processor callback function. This function may be called from | ||
650 | within the callback function. | ||
651 | |||
652 | |||
653 | (*) List of object state names: | ||
654 | |||
655 | const char *fscache_object_states[]; | ||
656 | |||
657 | For debugging purposes, this may be used to turn the state that an object | ||
658 | is in into a text string for display purposes. | ||
diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt new file mode 100644 index 000000000000..c78a49b7bba6 --- /dev/null +++ b/Documentation/filesystems/caching/cachefiles.txt | |||
@@ -0,0 +1,501 @@ | |||
1 | =============================================== | ||
2 | CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM | ||
3 | =============================================== | ||
4 | |||
5 | Contents: | ||
6 | |||
7 | (*) Overview. | ||
8 | |||
9 | (*) Requirements. | ||
10 | |||
11 | (*) Configuration. | ||
12 | |||
13 | (*) Starting the cache. | ||
14 | |||
15 | (*) Things to avoid. | ||
16 | |||
17 | (*) Cache culling. | ||
18 | |||
19 | (*) Cache structure. | ||
20 | |||
21 | (*) Security model and SELinux. | ||
22 | |||
23 | (*) A note on security. | ||
24 | |||
25 | (*) Statistical information. | ||
26 | |||
27 | (*) Debugging. | ||
28 | |||
29 | |||
30 | ======== | ||
31 | OVERVIEW | ||
32 | ======== | ||
33 | |||
34 | CacheFiles is a caching backend that's meant to use as a cache a directory on | ||
35 | an already mounted filesystem of a local type (such as Ext3). | ||
36 | |||
37 | CacheFiles uses a userspace daemon to do some of the cache management - such as | ||
38 | reaping stale nodes and culling. This is called cachefilesd and lives in | ||
39 | /sbin. | ||
40 | |||
41 | The filesystem and data integrity of the cache are only as good as those of the | ||
42 | filesystem providing the backing services. Note that CacheFiles does not | ||
43 | attempt to journal anything since the journalling interfaces of the various | ||
44 | filesystems are very specific in nature. | ||
45 | |||
46 | CacheFiles creates a misc character device - "/dev/cachefiles" - that is used | ||
47 | to communication with the daemon. Only one thing may have this open at once, | ||
48 | and whilst it is open, a cache is at least partially in existence. The daemon | ||
49 | opens this and sends commands down it to control the cache. | ||
50 | |||
51 | CacheFiles is currently limited to a single cache. | ||
52 | |||
53 | CacheFiles attempts to maintain at least a certain percentage of free space on | ||
54 | the filesystem, shrinking the cache by culling the objects it contains to make | ||
55 | space if necessary - see the "Cache Culling" section. This means it can be | ||
56 | placed on the same medium as a live set of data, and will expand to make use of | ||
57 | spare space and automatically contract when the set of data requires more | ||
58 | space. | ||
59 | |||
60 | |||
61 | ============ | ||
62 | REQUIREMENTS | ||
63 | ============ | ||
64 | |||
65 | The use of CacheFiles and its daemon requires the following features to be | ||
66 | available in the system and in the cache filesystem: | ||
67 | |||
68 | - dnotify. | ||
69 | |||
70 | - extended attributes (xattrs). | ||
71 | |||
72 | - openat() and friends. | ||
73 | |||
74 | - bmap() support on files in the filesystem (FIBMAP ioctl). | ||
75 | |||
76 | - The use of bmap() to detect a partial page at the end of the file. | ||
77 | |||
78 | It is strongly recommended that the "dir_index" option is enabled on Ext3 | ||
79 | filesystems being used as a cache. | ||
80 | |||
81 | |||
82 | ============= | ||
83 | CONFIGURATION | ||
84 | ============= | ||
85 | |||
86 | The cache is configured by a script in /etc/cachefilesd.conf. These commands | ||
87 | set up cache ready for use. The following script commands are available: | ||
88 | |||
89 | (*) brun <N>% | ||
90 | (*) bcull <N>% | ||
91 | (*) bstop <N>% | ||
92 | (*) frun <N>% | ||
93 | (*) fcull <N>% | ||
94 | (*) fstop <N>% | ||
95 | |||
96 | Configure the culling limits. Optional. See the section on culling | ||
97 | The defaults are 7% (run), 5% (cull) and 1% (stop) respectively. | ||
98 | |||
99 | The commands beginning with a 'b' are file space (block) limits, those | ||
100 | beginning with an 'f' are file count limits. | ||
101 | |||
102 | (*) dir <path> | ||
103 | |||
104 | Specify the directory containing the root of the cache. Mandatory. | ||
105 | |||
106 | (*) tag <name> | ||
107 | |||
108 | Specify a tag to FS-Cache to use in distinguishing multiple caches. | ||
109 | Optional. The default is "CacheFiles". | ||
110 | |||
111 | (*) debug <mask> | ||
112 | |||
113 | Specify a numeric bitmask to control debugging in the kernel module. | ||
114 | Optional. The default is zero (all off). The following values can be | ||
115 | OR'd into the mask to collect various information: | ||
116 | |||
117 | 1 Turn on trace of function entry (_enter() macros) | ||
118 | 2 Turn on trace of function exit (_leave() macros) | ||
119 | 4 Turn on trace of internal debug points (_debug()) | ||
120 | |||
121 | This mask can also be set through sysfs, eg: | ||
122 | |||
123 | echo 5 >/sys/modules/cachefiles/parameters/debug | ||
124 | |||
125 | |||
126 | ================== | ||
127 | STARTING THE CACHE | ||
128 | ================== | ||
129 | |||
130 | The cache is started by running the daemon. The daemon opens the cache device, | ||
131 | configures the cache and tells it to begin caching. At that point the cache | ||
132 | binds to fscache and the cache becomes live. | ||
133 | |||
134 | The daemon is run as follows: | ||
135 | |||
136 | /sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>] | ||
137 | |||
138 | The flags are: | ||
139 | |||
140 | (*) -d | ||
141 | |||
142 | Increase the debugging level. This can be specified multiple times and | ||
143 | is cumulative with itself. | ||
144 | |||
145 | (*) -s | ||
146 | |||
147 | Send messages to stderr instead of syslog. | ||
148 | |||
149 | (*) -n | ||
150 | |||
151 | Don't daemonise and go into background. | ||
152 | |||
153 | (*) -f <configfile> | ||
154 | |||
155 | Use an alternative configuration file rather than the default one. | ||
156 | |||
157 | |||
158 | =============== | ||
159 | THINGS TO AVOID | ||
160 | =============== | ||
161 | |||
162 | Do not mount other things within the cache as this will cause problems. The | ||
163 | kernel module contains its own very cut-down path walking facility that ignores | ||
164 | mountpoints, but the daemon can't avoid them. | ||
165 | |||
166 | Do not create, rename or unlink files and directories in the cache whilst the | ||
167 | cache is active, as this may cause the state to become uncertain. | ||
168 | |||
169 | Renaming files in the cache might make objects appear to be other objects (the | ||
170 | filename is part of the lookup key). | ||
171 | |||
172 | Do not change or remove the extended attributes attached to cache files by the | ||
173 | cache as this will cause the cache state management to get confused. | ||
174 | |||
175 | Do not create files or directories in the cache, lest the cache get confused or | ||
176 | serve incorrect data. | ||
177 | |||
178 | Do not chmod files in the cache. The module creates things with minimal | ||
179 | permissions to prevent random users being able to access them directly. | ||
180 | |||
181 | |||
182 | ============= | ||
183 | CACHE CULLING | ||
184 | ============= | ||
185 | |||
186 | The cache may need culling occasionally to make space. This involves | ||
187 | discarding objects from the cache that have been used less recently than | ||
188 | anything else. Culling is based on the access time of data objects. Empty | ||
189 | directories are culled if not in use. | ||
190 | |||
191 | Cache culling is done on the basis of the percentage of blocks and the | ||
192 | percentage of files available in the underlying filesystem. There are six | ||
193 | "limits": | ||
194 | |||
195 | (*) brun | ||
196 | (*) frun | ||
197 | |||
198 | If the amount of free space and the number of available files in the cache | ||
199 | rises above both these limits, then culling is turned off. | ||
200 | |||
201 | (*) bcull | ||
202 | (*) fcull | ||
203 | |||
204 | If the amount of available space or the number of available files in the | ||
205 | cache falls below either of these limits, then culling is started. | ||
206 | |||
207 | (*) bstop | ||
208 | (*) fstop | ||
209 | |||
210 | If the amount of available space or the number of available files in the | ||
211 | cache falls below either of these limits, then no further allocation of | ||
212 | disk space or files is permitted until culling has raised things above | ||
213 | these limits again. | ||
214 | |||
215 | These must be configured thusly: | ||
216 | |||
217 | 0 <= bstop < bcull < brun < 100 | ||
218 | 0 <= fstop < fcull < frun < 100 | ||
219 | |||
220 | Note that these are percentages of available space and available files, and do | ||
221 | _not_ appear as 100 minus the percentage displayed by the "df" program. | ||
222 | |||
223 | The userspace daemon scans the cache to build up a table of cullable objects. | ||
224 | These are then culled in least recently used order. A new scan of the cache is | ||
225 | started as soon as space is made in the table. Objects will be skipped if | ||
226 | their atimes have changed or if the kernel module says it is still using them. | ||
227 | |||
228 | |||
229 | =============== | ||
230 | CACHE STRUCTURE | ||
231 | =============== | ||
232 | |||
233 | The CacheFiles module will create two directories in the directory it was | ||
234 | given: | ||
235 | |||
236 | (*) cache/ | ||
237 | |||
238 | (*) graveyard/ | ||
239 | |||
240 | The active cache objects all reside in the first directory. The CacheFiles | ||
241 | kernel module moves any retired or culled objects that it can't simply unlink | ||
242 | to the graveyard from which the daemon will actually delete them. | ||
243 | |||
244 | The daemon uses dnotify to monitor the graveyard directory, and will delete | ||
245 | anything that appears therein. | ||
246 | |||
247 | |||
248 | The module represents index objects as directories with the filename "I..." or | ||
249 | "J...". Note that the "cache/" directory is itself a special index. | ||
250 | |||
251 | Data objects are represented as files if they have no children, or directories | ||
252 | if they do. Their filenames all begin "D..." or "E...". If represented as a | ||
253 | directory, data objects will have a file in the directory called "data" that | ||
254 | actually holds the data. | ||
255 | |||
256 | Special objects are similar to data objects, except their filenames begin | ||
257 | "S..." or "T...". | ||
258 | |||
259 | |||
260 | If an object has children, then it will be represented as a directory. | ||
261 | Immediately in the representative directory are a collection of directories | ||
262 | named for hash values of the child object keys with an '@' prepended. Into | ||
263 | this directory, if possible, will be placed the representations of the child | ||
264 | objects: | ||
265 | |||
266 | INDEX INDEX INDEX DATA FILES | ||
267 | ========= ========== ================================= ================ | ||
268 | cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400 | ||
269 | cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry | ||
270 | cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry | ||
271 | cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry | ||
272 | |||
273 | |||
274 | If the key is so long that it exceeds NAME_MAX with the decorations added on to | ||
275 | it, then it will be cut into pieces, the first few of which will be used to | ||
276 | make a nest of directories, and the last one of which will be the objects | ||
277 | inside the last directory. The names of the intermediate directories will have | ||
278 | '+' prepended: | ||
279 | |||
280 | J1223/@23/+xy...z/+kl...m/Epqr | ||
281 | |||
282 | |||
283 | Note that keys are raw data, and not only may they exceed NAME_MAX in size, | ||
284 | they may also contain things like '/' and NUL characters, and so they may not | ||
285 | be suitable for turning directly into a filename. | ||
286 | |||
287 | To handle this, CacheFiles will use a suitably printable filename directly and | ||
288 | "base-64" encode ones that aren't directly suitable. The two versions of | ||
289 | object filenames indicate the encoding: | ||
290 | |||
291 | OBJECT TYPE PRINTABLE ENCODED | ||
292 | =============== =============== =============== | ||
293 | Index "I..." "J..." | ||
294 | Data "D..." "E..." | ||
295 | Special "S..." "T..." | ||
296 | |||
297 | Intermediate directories are always "@" or "+" as appropriate. | ||
298 | |||
299 | |||
300 | Each object in the cache has an extended attribute label that holds the object | ||
301 | type ID (required to distinguish special objects) and the auxiliary data from | ||
302 | the netfs. The latter is used to detect stale objects in the cache and update | ||
303 | or retire them. | ||
304 | |||
305 | |||
306 | Note that CacheFiles will erase from the cache any file it doesn't recognise or | ||
307 | any file of an incorrect type (such as a FIFO file or a device file). | ||
308 | |||
309 | |||
310 | ========================== | ||
311 | SECURITY MODEL AND SELINUX | ||
312 | ========================== | ||
313 | |||
314 | CacheFiles is implemented to deal properly with the LSM security features of | ||
315 | the Linux kernel and the SELinux facility. | ||
316 | |||
317 | One of the problems that CacheFiles faces is that it is generally acting on | ||
318 | behalf of a process, and running in that process's context, and that includes a | ||
319 | security context that is not appropriate for accessing the cache - either | ||
320 | because the files in the cache are inaccessible to that process, or because if | ||
321 | the process creates a file in the cache, that file may be inaccessible to other | ||
322 | processes. | ||
323 | |||
324 | The way CacheFiles works is to temporarily change the security context (fsuid, | ||
325 | fsgid and actor security label) that the process acts as - without changing the | ||
326 | security context of the process when it the target of an operation performed by | ||
327 | some other process (so signalling and suchlike still work correctly). | ||
328 | |||
329 | |||
330 | When the CacheFiles module is asked to bind to its cache, it: | ||
331 | |||
332 | (1) Finds the security label attached to the root cache directory and uses | ||
333 | that as the security label with which it will create files. By default, | ||
334 | this is: | ||
335 | |||
336 | cachefiles_var_t | ||
337 | |||
338 | (2) Finds the security label of the process which issued the bind request | ||
339 | (presumed to be the cachefilesd daemon), which by default will be: | ||
340 | |||
341 | cachefilesd_t | ||
342 | |||
343 | and asks LSM to supply a security ID as which it should act given the | ||
344 | daemon's label. By default, this will be: | ||
345 | |||
346 | cachefiles_kernel_t | ||
347 | |||
348 | SELinux transitions the daemon's security ID to the module's security ID | ||
349 | based on a rule of this form in the policy. | ||
350 | |||
351 | type_transition <daemon's-ID> kernel_t : process <module's-ID>; | ||
352 | |||
353 | For instance: | ||
354 | |||
355 | type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t; | ||
356 | |||
357 | |||
358 | The module's security ID gives it permission to create, move and remove files | ||
359 | and directories in the cache, to find and access directories and files in the | ||
360 | cache, to set and access extended attributes on cache objects, and to read and | ||
361 | write files in the cache. | ||
362 | |||
363 | The daemon's security ID gives it only a very restricted set of permissions: it | ||
364 | may scan directories, stat files and erase files and directories. It may | ||
365 | not read or write files in the cache, and so it is precluded from accessing the | ||
366 | data cached therein; nor is it permitted to create new files in the cache. | ||
367 | |||
368 | |||
369 | There are policy source files available in: | ||
370 | |||
371 | http://people.redhat.com/~dhowells/fscache/cachefilesd-0.8.tar.bz2 | ||
372 | |||
373 | and later versions. In that tarball, see the files: | ||
374 | |||
375 | cachefilesd.te | ||
376 | cachefilesd.fc | ||
377 | cachefilesd.if | ||
378 | |||
379 | They are built and installed directly by the RPM. | ||
380 | |||
381 | If a non-RPM based system is being used, then copy the above files to their own | ||
382 | directory and run: | ||
383 | |||
384 | make -f /usr/share/selinux/devel/Makefile | ||
385 | semodule -i cachefilesd.pp | ||
386 | |||
387 | You will need checkpolicy and selinux-policy-devel installed prior to the | ||
388 | build. | ||
389 | |||
390 | |||
391 | By default, the cache is located in /var/fscache, but if it is desirable that | ||
392 | it should be elsewhere, than either the above policy files must be altered, or | ||
393 | an auxiliary policy must be installed to label the alternate location of the | ||
394 | cache. | ||
395 | |||
396 | For instructions on how to add an auxiliary policy to enable the cache to be | ||
397 | located elsewhere when SELinux is in enforcing mode, please see: | ||
398 | |||
399 | /usr/share/doc/cachefilesd-*/move-cache.txt | ||
400 | |||
401 | When the cachefilesd rpm is installed; alternatively, the document can be found | ||
402 | in the sources. | ||
403 | |||
404 | |||
405 | ================== | ||
406 | A NOTE ON SECURITY | ||
407 | ================== | ||
408 | |||
409 | CacheFiles makes use of the split security in the task_struct. It allocates | ||
410 | its own task_security structure, and redirects current->act_as to point to it | ||
411 | when it acts on behalf of another process, in that process's context. | ||
412 | |||
413 | The reason it does this is that it calls vfs_mkdir() and suchlike rather than | ||
414 | bypassing security and calling inode ops directly. Therefore the VFS and LSM | ||
415 | may deny the CacheFiles access to the cache data because under some | ||
416 | circumstances the caching code is running in the security context of whatever | ||
417 | process issued the original syscall on the netfs. | ||
418 | |||
419 | Furthermore, should CacheFiles create a file or directory, the security | ||
420 | parameters with that object is created (UID, GID, security label) would be | ||
421 | derived from that process that issued the system call, thus potentially | ||
422 | preventing other processes from accessing the cache - including CacheFiles's | ||
423 | cache management daemon (cachefilesd). | ||
424 | |||
425 | What is required is to temporarily override the security of the process that | ||
426 | issued the system call. We can't, however, just do an in-place change of the | ||
427 | security data as that affects the process as an object, not just as a subject. | ||
428 | This means it may lose signals or ptrace events for example, and affects what | ||
429 | the process looks like in /proc. | ||
430 | |||
431 | So CacheFiles makes use of a logical split in the security between the | ||
432 | objective security (task->sec) and the subjective security (task->act_as). The | ||
433 | objective security holds the intrinsic security properties of a process and is | ||
434 | never overridden. This is what appears in /proc, and is what is used when a | ||
435 | process is the target of an operation by some other process (SIGKILL for | ||
436 | example). | ||
437 | |||
438 | The subjective security holds the active security properties of a process, and | ||
439 | may be overridden. This is not seen externally, and is used whan a process | ||
440 | acts upon another object, for example SIGKILLing another process or opening a | ||
441 | file. | ||
442 | |||
443 | LSM hooks exist that allow SELinux (or Smack or whatever) to reject a request | ||
444 | for CacheFiles to run in a context of a specific security label, or to create | ||
445 | files and directories with another security label. | ||
446 | |||
447 | |||
448 | ======================= | ||
449 | STATISTICAL INFORMATION | ||
450 | ======================= | ||
451 | |||
452 | If FS-Cache is compiled with the following option enabled: | ||
453 | |||
454 | CONFIG_CACHEFILES_HISTOGRAM=y | ||
455 | |||
456 | then it will gather certain statistics and display them through a proc file. | ||
457 | |||
458 | (*) /proc/fs/cachefiles/histogram | ||
459 | |||
460 | cat /proc/fs/cachefiles/histogram | ||
461 | JIFS SECS LOOKUPS MKDIRS CREATES | ||
462 | ===== ===== ========= ========= ========= | ||
463 | |||
464 | This shows the breakdown of the number of times each amount of time | ||
465 | between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The | ||
466 | columns are as follows: | ||
467 | |||
468 | COLUMN TIME MEASUREMENT | ||
469 | ======= ======================================================= | ||
470 | LOOKUPS Length of time to perform a lookup on the backing fs | ||
471 | MKDIRS Length of time to perform a mkdir on the backing fs | ||
472 | CREATES Length of time to perform a create on the backing fs | ||
473 | |||
474 | Each row shows the number of events that took a particular range of times. | ||
475 | Each step is 1 jiffy in size. The JIFS column indicates the particular | ||
476 | jiffy range covered, and the SECS field the equivalent number of seconds. | ||
477 | |||
478 | |||
479 | ========= | ||
480 | DEBUGGING | ||
481 | ========= | ||
482 | |||
483 | If CONFIG_CACHEFILES_DEBUG is enabled, the CacheFiles facility can have runtime | ||
484 | debugging enabled by adjusting the value in: | ||
485 | |||
486 | /sys/module/cachefiles/parameters/debug | ||
487 | |||
488 | This is a bitmask of debugging streams to enable: | ||
489 | |||
490 | BIT VALUE STREAM POINT | ||
491 | ======= ======= =============================== ======================= | ||
492 | 0 1 General Function entry trace | ||
493 | 1 2 Function exit trace | ||
494 | 2 4 General | ||
495 | |||
496 | The appropriate set of values should be OR'd together and the result written to | ||
497 | the control file. For example: | ||
498 | |||
499 | echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug | ||
500 | |||
501 | will turn on all function entry debugging. | ||
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt new file mode 100644 index 000000000000..9e94b9491d89 --- /dev/null +++ b/Documentation/filesystems/caching/fscache.txt | |||
@@ -0,0 +1,333 @@ | |||
1 | ========================== | ||
2 | General Filesystem Caching | ||
3 | ========================== | ||
4 | |||
5 | ======== | ||
6 | OVERVIEW | ||
7 | ======== | ||
8 | |||
9 | This facility is a general purpose cache for network filesystems, though it | ||
10 | could be used for caching other things such as ISO9660 filesystems too. | ||
11 | |||
12 | FS-Cache mediates between cache backends (such as CacheFS) and network | ||
13 | filesystems: | ||
14 | |||
15 | +---------+ | ||
16 | | | +--------------+ | ||
17 | | NFS |--+ | | | ||
18 | | | | +-->| CacheFS | | ||
19 | +---------+ | +----------+ | | /dev/hda5 | | ||
20 | | | | | +--------------+ | ||
21 | +---------+ +-->| | | | ||
22 | | | | |--+ | ||
23 | | AFS |----->| FS-Cache | | ||
24 | | | | |--+ | ||
25 | +---------+ +-->| | | | ||
26 | | | | | +--------------+ | ||
27 | +---------+ | +----------+ | | | | ||
28 | | | | +-->| CacheFiles | | ||
29 | | ISOFS |--+ | /var/cache | | ||
30 | | | +--------------+ | ||
31 | +---------+ | ||
32 | |||
33 | Or to look at it another way, FS-Cache is a module that provides a caching | ||
34 | facility to a network filesystem such that the cache is transparent to the | ||
35 | user: | ||
36 | |||
37 | +---------+ | ||
38 | | | | ||
39 | | Server | | ||
40 | | | | ||
41 | +---------+ | ||
42 | | NETWORK | ||
43 | ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
44 | | | ||
45 | | +----------+ | ||
46 | V | | | ||
47 | +---------+ | | | ||
48 | | | | | | ||
49 | | NFS |----->| FS-Cache | | ||
50 | | | | |--+ | ||
51 | +---------+ | | | +--------------+ +--------------+ | ||
52 | | | | | | | | | | ||
53 | V +----------+ +-->| CacheFiles |-->| Ext3 | | ||
54 | +---------+ | /var/cache | | /dev/sda6 | | ||
55 | | | +--------------+ +--------------+ | ||
56 | | VFS | ^ ^ | ||
57 | | | | | | ||
58 | +---------+ +--------------+ | | ||
59 | | KERNEL SPACE | | | ||
60 | ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~ | ||
61 | | USER SPACE | | | ||
62 | V | | | ||
63 | +---------+ +--------------+ | ||
64 | | | | | | ||
65 | | Process | | cachefilesd | | ||
66 | | | | | | ||
67 | +---------+ +--------------+ | ||
68 | |||
69 | |||
70 | FS-Cache does not follow the idea of completely loading every netfs file | ||
71 | opened in its entirety into a cache before permitting it to be accessed and | ||
72 | then serving the pages out of that cache rather than the netfs inode because: | ||
73 | |||
74 | (1) It must be practical to operate without a cache. | ||
75 | |||
76 | (2) The size of any accessible file must not be limited to the size of the | ||
77 | cache. | ||
78 | |||
79 | (3) The combined size of all opened files (this includes mapped libraries) | ||
80 | must not be limited to the size of the cache. | ||
81 | |||
82 | (4) The user should not be forced to download an entire file just to do a | ||
83 | one-off access of a small portion of it (such as might be done with the | ||
84 | "file" program). | ||
85 | |||
86 | It instead serves the cache out in PAGE_SIZE chunks as and when requested by | ||
87 | the netfs('s) using it. | ||
88 | |||
89 | |||
90 | FS-Cache provides the following facilities: | ||
91 | |||
92 | (1) More than one cache can be used at once. Caches can be selected | ||
93 | explicitly by use of tags. | ||
94 | |||
95 | (2) Caches can be added / removed at any time. | ||
96 | |||
97 | (3) The netfs is provided with an interface that allows either party to | ||
98 | withdraw caching facilities from a file (required for (2)). | ||
99 | |||
100 | (4) The interface to the netfs returns as few errors as possible, preferring | ||
101 | rather to let the netfs remain oblivious. | ||
102 | |||
103 | (5) Cookies are used to represent indices, files and other objects to the | ||
104 | netfs. The simplest cookie is just a NULL pointer - indicating nothing | ||
105 | cached there. | ||
106 | |||
107 | (6) The netfs is allowed to propose - dynamically - any index hierarchy it | ||
108 | desires, though it must be aware that the index search function is | ||
109 | recursive, stack space is limited, and indices can only be children of | ||
110 | indices. | ||
111 | |||
112 | (7) Data I/O is done direct to and from the netfs's pages. The netfs | ||
113 | indicates that page A is at index B of the data-file represented by cookie | ||
114 | C, and that it should be read or written. The cache backend may or may | ||
115 | not start I/O on that page, but if it does, a netfs callback will be | ||
116 | invoked to indicate completion. The I/O may be either synchronous or | ||
117 | asynchronous. | ||
118 | |||
119 | (8) Cookies can be "retired" upon release. At this point FS-Cache will mark | ||
120 | them as obsolete and the index hierarchy rooted at that point will get | ||
121 | recycled. | ||
122 | |||
123 | (9) The netfs provides a "match" function for index searches. In addition to | ||
124 | saying whether a match was made or not, this can also specify that an | ||
125 | entry should be updated or deleted. | ||
126 | |||
127 | (10) As much as possible is done asynchronously. | ||
128 | |||
129 | |||
130 | FS-Cache maintains a virtual indexing tree in which all indices, files, objects | ||
131 | and pages are kept. Bits of this tree may actually reside in one or more | ||
132 | caches. | ||
133 | |||
134 | FSDEF | ||
135 | | | ||
136 | +------------------------------------+ | ||
137 | | | | ||
138 | NFS AFS | ||
139 | | | | ||
140 | +--------------------------+ +-----------+ | ||
141 | | | | | | ||
142 | homedir mirror afs.org redhat.com | ||
143 | | | | | ||
144 | +------------+ +---------------+ +----------+ | ||
145 | | | | | | | | ||
146 | 00001 00002 00007 00125 vol00001 vol00002 | ||
147 | | | | | | | ||
148 | +---+---+ +-----+ +---+ +------+------+ +-----+----+ | ||
149 | | | | | | | | | | | | | | | ||
150 | PG0 PG1 PG2 PG0 XATTR PG0 PG1 DIRENT DIRENT DIRENT R/W R/O Bak | ||
151 | | | | ||
152 | PG0 +-------+ | ||
153 | | | | ||
154 | 00001 00003 | ||
155 | | | ||
156 | +---+---+ | ||
157 | | | | | ||
158 | PG0 PG1 PG2 | ||
159 | |||
160 | In the example above, you can see two netfs's being backed: NFS and AFS. These | ||
161 | have different index hierarchies: | ||
162 | |||
163 | (*) The NFS primary index contains per-server indices. Each server index is | ||
164 | indexed by NFS file handles to get data file objects. Each data file | ||
165 | objects can have an array of pages, but may also have further child | ||
166 | objects, such as extended attributes and directory entries. Extended | ||
167 | attribute objects themselves have page-array contents. | ||
168 | |||
169 | (*) The AFS primary index contains per-cell indices. Each cell index contains | ||
170 | per-logical-volume indices. Each of volume index contains up to three | ||
171 | indices for the read-write, read-only and backup mirrors of those volumes. | ||
172 | Each of these contains vnode data file objects, each of which contains an | ||
173 | array of pages. | ||
174 | |||
175 | The very top index is the FS-Cache master index in which individual netfs's | ||
176 | have entries. | ||
177 | |||
178 | Any index object may reside in more than one cache, provided it only has index | ||
179 | children. Any index with non-index object children will be assumed to only | ||
180 | reside in one cache. | ||
181 | |||
182 | |||
183 | The netfs API to FS-Cache can be found in: | ||
184 | |||
185 | Documentation/filesystems/caching/netfs-api.txt | ||
186 | |||
187 | The cache backend API to FS-Cache can be found in: | ||
188 | |||
189 | Documentation/filesystems/caching/backend-api.txt | ||
190 | |||
191 | A description of the internal representations and object state machine can be | ||
192 | found in: | ||
193 | |||
194 | Documentation/filesystems/caching/object.txt | ||
195 | |||
196 | |||
197 | ======================= | ||
198 | STATISTICAL INFORMATION | ||
199 | ======================= | ||
200 | |||
201 | If FS-Cache is compiled with the following options enabled: | ||
202 | |||
203 | CONFIG_FSCACHE_STATS=y | ||
204 | CONFIG_FSCACHE_HISTOGRAM=y | ||
205 | |||
206 | then it will gather certain statistics and display them through a number of | ||
207 | proc files. | ||
208 | |||
209 | (*) /proc/fs/fscache/stats | ||
210 | |||
211 | This shows counts of a number of events that can happen in FS-Cache: | ||
212 | |||
213 | CLASS EVENT MEANING | ||
214 | ======= ======= ======================================================= | ||
215 | Cookies idx=N Number of index cookies allocated | ||
216 | dat=N Number of data storage cookies allocated | ||
217 | spc=N Number of special cookies allocated | ||
218 | Objects alc=N Number of objects allocated | ||
219 | nal=N Number of object allocation failures | ||
220 | avl=N Number of objects that reached the available state | ||
221 | ded=N Number of objects that reached the dead state | ||
222 | ChkAux non=N Number of objects that didn't have a coherency check | ||
223 | ok=N Number of objects that passed a coherency check | ||
224 | upd=N Number of objects that needed a coherency data update | ||
225 | obs=N Number of objects that were declared obsolete | ||
226 | Pages mrk=N Number of pages marked as being cached | ||
227 | unc=N Number of uncache page requests seen | ||
228 | Acquire n=N Number of acquire cookie requests seen | ||
229 | nul=N Number of acq reqs given a NULL parent | ||
230 | noc=N Number of acq reqs rejected due to no cache available | ||
231 | ok=N Number of acq reqs succeeded | ||
232 | nbf=N Number of acq reqs rejected due to error | ||
233 | oom=N Number of acq reqs failed on ENOMEM | ||
234 | Lookups n=N Number of lookup calls made on cache backends | ||
235 | neg=N Number of negative lookups made | ||
236 | pos=N Number of positive lookups made | ||
237 | crt=N Number of objects created by lookup | ||
238 | Updates n=N Number of update cookie requests seen | ||
239 | nul=N Number of upd reqs given a NULL parent | ||
240 | run=N Number of upd reqs granted CPU time | ||
241 | Relinqs n=N Number of relinquish cookie requests seen | ||
242 | nul=N Number of rlq reqs given a NULL parent | ||
243 | wcr=N Number of rlq reqs waited on completion of creation | ||
244 | AttrChg n=N Number of attribute changed requests seen | ||
245 | ok=N Number of attr changed requests queued | ||
246 | nbf=N Number of attr changed rejected -ENOBUFS | ||
247 | oom=N Number of attr changed failed -ENOMEM | ||
248 | run=N Number of attr changed ops given CPU time | ||
249 | Allocs n=N Number of allocation requests seen | ||
250 | ok=N Number of successful alloc reqs | ||
251 | wt=N Number of alloc reqs that waited on lookup completion | ||
252 | nbf=N Number of alloc reqs rejected -ENOBUFS | ||
253 | ops=N Number of alloc reqs submitted | ||
254 | owt=N Number of alloc reqs waited for CPU time | ||
255 | Retrvls n=N Number of retrieval (read) requests seen | ||
256 | ok=N Number of successful retr reqs | ||
257 | wt=N Number of retr reqs that waited on lookup completion | ||
258 | nod=N Number of retr reqs returned -ENODATA | ||
259 | nbf=N Number of retr reqs rejected -ENOBUFS | ||
260 | int=N Number of retr reqs aborted -ERESTARTSYS | ||
261 | oom=N Number of retr reqs failed -ENOMEM | ||
262 | ops=N Number of retr reqs submitted | ||
263 | owt=N Number of retr reqs waited for CPU time | ||
264 | Stores n=N Number of storage (write) requests seen | ||
265 | ok=N Number of successful store reqs | ||
266 | agn=N Number of store reqs on a page already pending storage | ||
267 | nbf=N Number of store reqs rejected -ENOBUFS | ||
268 | oom=N Number of store reqs failed -ENOMEM | ||
269 | ops=N Number of store reqs submitted | ||
270 | run=N Number of store reqs granted CPU time | ||
271 | Ops pend=N Number of times async ops added to pending queues | ||
272 | run=N Number of times async ops given CPU time | ||
273 | enq=N Number of times async ops queued for processing | ||
274 | dfr=N Number of async ops queued for deferred release | ||
275 | rel=N Number of async ops released | ||
276 | gc=N Number of deferred-release async ops garbage collected | ||
277 | |||
278 | |||
279 | (*) /proc/fs/fscache/histogram | ||
280 | |||
281 | cat /proc/fs/fscache/histogram | ||
282 | JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS | ||
283 | ===== ===== ========= ========= ========= ========= ========= | ||
284 | |||
285 | This shows the breakdown of the number of times each amount of time | ||
286 | between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The | ||
287 | columns are as follows: | ||
288 | |||
289 | COLUMN TIME MEASUREMENT | ||
290 | ======= ======================================================= | ||
291 | OBJ INST Length of time to instantiate an object | ||
292 | OP RUNS Length of time a call to process an operation took | ||
293 | OBJ RUNS Length of time a call to process an object event took | ||
294 | RETRV DLY Time between an requesting a read and lookup completing | ||
295 | RETRIEVLS Time between beginning and end of a retrieval | ||
296 | |||
297 | Each row shows the number of events that took a particular range of times. | ||
298 | Each step is 1 jiffy in size. The JIFS column indicates the particular | ||
299 | jiffy range covered, and the SECS field the equivalent number of seconds. | ||
300 | |||
301 | |||
302 | ========= | ||
303 | DEBUGGING | ||
304 | ========= | ||
305 | |||
306 | If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime | ||
307 | debugging enabled by adjusting the value in: | ||
308 | |||
309 | /sys/module/fscache/parameters/debug | ||
310 | |||
311 | This is a bitmask of debugging streams to enable: | ||
312 | |||
313 | BIT VALUE STREAM POINT | ||
314 | ======= ======= =============================== ======================= | ||
315 | 0 1 Cache management Function entry trace | ||
316 | 1 2 Function exit trace | ||
317 | 2 4 General | ||
318 | 3 8 Cookie management Function entry trace | ||
319 | 4 16 Function exit trace | ||
320 | 5 32 General | ||
321 | 6 64 Page handling Function entry trace | ||
322 | 7 128 Function exit trace | ||
323 | 8 256 General | ||
324 | 9 512 Operation management Function entry trace | ||
325 | 10 1024 Function exit trace | ||
326 | 11 2048 General | ||
327 | |||
328 | The appropriate set of values should be OR'd together and the result written to | ||
329 | the control file. For example: | ||
330 | |||
331 | echo $((1|8|64)) >/sys/module/fscache/parameters/debug | ||
332 | |||
333 | will turn on all function entry debugging. | ||
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt new file mode 100644 index 000000000000..4db125b3a5c6 --- /dev/null +++ b/Documentation/filesystems/caching/netfs-api.txt | |||
@@ -0,0 +1,778 @@ | |||
1 | =============================== | ||
2 | FS-CACHE NETWORK FILESYSTEM API | ||
3 | =============================== | ||
4 | |||
5 | There's an API by which a network filesystem can make use of the FS-Cache | ||
6 | facilities. This is based around a number of principles: | ||
7 | |||
8 | (1) Caches can store a number of different object types. There are two main | ||
9 | object types: indices and files. The first is a special type used by | ||
10 | FS-Cache to make finding objects faster and to make retiring of groups of | ||
11 | objects easier. | ||
12 | |||
13 | (2) Every index, file or other object is represented by a cookie. This cookie | ||
14 | may or may not have anything associated with it, but the netfs doesn't | ||
15 | need to care. | ||
16 | |||
17 | (3) Barring the top-level index (one entry per cached netfs), the index | ||
18 | hierarchy for each netfs is structured according the whim of the netfs. | ||
19 | |||
20 | This API is declared in <linux/fscache.h>. | ||
21 | |||
22 | This document contains the following sections: | ||
23 | |||
24 | (1) Network filesystem definition | ||
25 | (2) Index definition | ||
26 | (3) Object definition | ||
27 | (4) Network filesystem (un)registration | ||
28 | (5) Cache tag lookup | ||
29 | (6) Index registration | ||
30 | (7) Data file registration | ||
31 | (8) Miscellaneous object registration | ||
32 | (9) Setting the data file size | ||
33 | (10) Page alloc/read/write | ||
34 | (11) Page uncaching | ||
35 | (12) Index and data file update | ||
36 | (13) Miscellaneous cookie operations | ||
37 | (14) Cookie unregistration | ||
38 | (15) Index and data file invalidation | ||
39 | (16) FS-Cache specific page flags. | ||
40 | |||
41 | |||
42 | ============================= | ||
43 | NETWORK FILESYSTEM DEFINITION | ||
44 | ============================= | ||
45 | |||
46 | FS-Cache needs a description of the network filesystem. This is specified | ||
47 | using a record of the following structure: | ||
48 | |||
49 | struct fscache_netfs { | ||
50 | uint32_t version; | ||
51 | const char *name; | ||
52 | struct fscache_cookie *primary_index; | ||
53 | ... | ||
54 | }; | ||
55 | |||
56 | This first two fields should be filled in before registration, and the third | ||
57 | will be filled in by the registration function; any other fields should just be | ||
58 | ignored and are for internal use only. | ||
59 | |||
60 | The fields are: | ||
61 | |||
62 | (1) The name of the netfs (used as the key in the toplevel index). | ||
63 | |||
64 | (2) The version of the netfs (if the name matches but the version doesn't, the | ||
65 | entire in-cache hierarchy for this netfs will be scrapped and begun | ||
66 | afresh). | ||
67 | |||
68 | (3) The cookie representing the primary index will be allocated according to | ||
69 | another parameter passed into the registration function. | ||
70 | |||
71 | For example, kAFS (linux/fs/afs/) uses the following definitions to describe | ||
72 | itself: | ||
73 | |||
74 | struct fscache_netfs afs_cache_netfs = { | ||
75 | .version = 0, | ||
76 | .name = "afs", | ||
77 | }; | ||
78 | |||
79 | |||
80 | ================ | ||
81 | INDEX DEFINITION | ||
82 | ================ | ||
83 | |||
84 | Indices are used for two purposes: | ||
85 | |||
86 | (1) To aid the finding of a file based on a series of keys (such as AFS's | ||
87 | "cell", "volume ID", "vnode ID"). | ||
88 | |||
89 | (2) To make it easier to discard a subset of all the files cached based around | ||
90 | a particular key - for instance to mirror the removal of an AFS volume. | ||
91 | |||
92 | However, since it's unlikely that any two netfs's are going to want to define | ||
93 | their index hierarchies in quite the same way, FS-Cache tries to impose as few | ||
94 | restraints as possible on how an index is structured and where it is placed in | ||
95 | the tree. The netfs can even mix indices and data files at the same level, but | ||
96 | it's not recommended. | ||
97 | |||
98 | Each index entry consists of a key of indeterminate length plus some auxilliary | ||
99 | data, also of indeterminate length. | ||
100 | |||
101 | There are some limits on indices: | ||
102 | |||
103 | (1) Any index containing non-index objects should be restricted to a single | ||
104 | cache. Any such objects created within an index will be created in the | ||
105 | first cache only. The cache in which an index is created can be | ||
106 | controlled by cache tags (see below). | ||
107 | |||
108 | (2) The entry data must be atomically journallable, so it is limited to about | ||
109 | 400 bytes at present. At least 400 bytes will be available. | ||
110 | |||
111 | (3) The depth of the index tree should be judged with care as the search | ||
112 | function is recursive. Too many layers will run the kernel out of stack. | ||
113 | |||
114 | |||
115 | ================= | ||
116 | OBJECT DEFINITION | ||
117 | ================= | ||
118 | |||
119 | To define an object, a structure of the following type should be filled out: | ||
120 | |||
121 | struct fscache_cookie_def | ||
122 | { | ||
123 | uint8_t name[16]; | ||
124 | uint8_t type; | ||
125 | |||
126 | struct fscache_cache_tag *(*select_cache)( | ||
127 | const void *parent_netfs_data, | ||
128 | const void *cookie_netfs_data); | ||
129 | |||
130 | uint16_t (*get_key)(const void *cookie_netfs_data, | ||
131 | void *buffer, | ||
132 | uint16_t bufmax); | ||
133 | |||
134 | void (*get_attr)(const void *cookie_netfs_data, | ||
135 | uint64_t *size); | ||
136 | |||
137 | uint16_t (*get_aux)(const void *cookie_netfs_data, | ||
138 | void *buffer, | ||
139 | uint16_t bufmax); | ||
140 | |||
141 | enum fscache_checkaux (*check_aux)(void *cookie_netfs_data, | ||
142 | const void *data, | ||
143 | uint16_t datalen); | ||
144 | |||
145 | void (*get_context)(void *cookie_netfs_data, void *context); | ||
146 | |||
147 | void (*put_context)(void *cookie_netfs_data, void *context); | ||
148 | |||
149 | void (*mark_pages_cached)(void *cookie_netfs_data, | ||
150 | struct address_space *mapping, | ||
151 | struct pagevec *cached_pvec); | ||
152 | |||
153 | void (*now_uncached)(void *cookie_netfs_data); | ||
154 | }; | ||
155 | |||
156 | This has the following fields: | ||
157 | |||
158 | (1) The type of the object [mandatory]. | ||
159 | |||
160 | This is one of the following values: | ||
161 | |||
162 | (*) FSCACHE_COOKIE_TYPE_INDEX | ||
163 | |||
164 | This defines an index, which is a special FS-Cache type. | ||
165 | |||
166 | (*) FSCACHE_COOKIE_TYPE_DATAFILE | ||
167 | |||
168 | This defines an ordinary data file. | ||
169 | |||
170 | (*) Any other value between 2 and 255 | ||
171 | |||
172 | This defines an extraordinary object such as an XATTR. | ||
173 | |||
174 | (2) The name of the object type (NUL terminated unless all 16 chars are used) | ||
175 | [optional]. | ||
176 | |||
177 | (3) A function to select the cache in which to store an index [optional]. | ||
178 | |||
179 | This function is invoked when an index needs to be instantiated in a cache | ||
180 | during the instantiation of a non-index object. Only the immediate index | ||
181 | parent for the non-index object will be queried. Any indices above that | ||
182 | in the hierarchy may be stored in multiple caches. This function does not | ||
183 | need to be supplied for any non-index object or any index that will only | ||
184 | have index children. | ||
185 | |||
186 | If this function is not supplied or if it returns NULL then the first | ||
187 | cache in the parent's list will be chosed, or failing that, the first | ||
188 | cache in the master list. | ||
189 | |||
190 | (4) A function to retrieve an object's key from the netfs [mandatory]. | ||
191 | |||
192 | This function will be called with the netfs data that was passed to the | ||
193 | cookie acquisition function and the maximum length of key data that it may | ||
194 | provide. It should write the required key data into the given buffer and | ||
195 | return the quantity it wrote. | ||
196 | |||
197 | (5) A function to retrieve attribute data from the netfs [optional]. | ||
198 | |||
199 | This function will be called with the netfs data that was passed to the | ||
200 | cookie acquisition function. It should return the size of the file if | ||
201 | this is a data file. The size may be used to govern how much cache must | ||
202 | be reserved for this file in the cache. | ||
203 | |||
204 | If the function is absent, a file size of 0 is assumed. | ||
205 | |||
206 | (6) A function to retrieve auxilliary data from the netfs [optional]. | ||
207 | |||
208 | This function will be called with the netfs data that was passed to the | ||
209 | cookie acquisition function and the maximum length of auxilliary data that | ||
210 | it may provide. It should write the auxilliary data into the given buffer | ||
211 | and return the quantity it wrote. | ||
212 | |||
213 | If this function is absent, the auxilliary data length will be set to 0. | ||
214 | |||
215 | The length of the auxilliary data buffer may be dependent on the key | ||
216 | length. A netfs mustn't rely on being able to provide more than 400 bytes | ||
217 | for both. | ||
218 | |||
219 | (7) A function to check the auxilliary data [optional]. | ||
220 | |||
221 | This function will be called to check that a match found in the cache for | ||
222 | this object is valid. For instance with AFS it could check the auxilliary | ||
223 | data against the data version number returned by the server to determine | ||
224 | whether the index entry in a cache is still valid. | ||
225 | |||
226 | If this function is absent, it will be assumed that matching objects in a | ||
227 | cache are always valid. | ||
228 | |||
229 | If present, the function should return one of the following values: | ||
230 | |||
231 | (*) FSCACHE_CHECKAUX_OKAY - the entry is okay as is | ||
232 | (*) FSCACHE_CHECKAUX_NEEDS_UPDATE - the entry requires update | ||
233 | (*) FSCACHE_CHECKAUX_OBSOLETE - the entry should be deleted | ||
234 | |||
235 | This function can also be used to extract data from the auxilliary data in | ||
236 | the cache and copy it into the netfs's structures. | ||
237 | |||
238 | (8) A pair of functions to manage contexts for the completion callback | ||
239 | [optional]. | ||
240 | |||
241 | The cache read/write functions are passed a context which is then passed | ||
242 | to the I/O completion callback function. To ensure this context remains | ||
243 | valid until after the I/O completion is called, two functions may be | ||
244 | provided: one to get an extra reference on the context, and one to drop a | ||
245 | reference to it. | ||
246 | |||
247 | If the context is not used or is a type of object that won't go out of | ||
248 | scope, then these functions are not required. These functions are not | ||
249 | required for indices as indices may not contain data. These functions may | ||
250 | be called in interrupt context and so may not sleep. | ||
251 | |||
252 | (9) A function to mark a page as retaining cache metadata [optional]. | ||
253 | |||
254 | This is called by the cache to indicate that it is retaining in-memory | ||
255 | information for this page and that the netfs should uncache the page when | ||
256 | it has finished. This does not indicate whether there's data on the disk | ||
257 | or not. Note that several pages at once may be presented for marking. | ||
258 | |||
259 | The PG_fscache bit is set on the pages before this function would be | ||
260 | called, so the function need not be provided if this is sufficient. | ||
261 | |||
262 | This function is not required for indices as they're not permitted data. | ||
263 | |||
264 | (10) A function to unmark all the pages retaining cache metadata [mandatory]. | ||
265 | |||
266 | This is called by FS-Cache to indicate that a backing store is being | ||
267 | unbound from a cookie and that all the marks on the pages should be | ||
268 | cleared to prevent confusion. Note that the cache will have torn down all | ||
269 | its tracking information so that the pages don't need to be explicitly | ||
270 | uncached. | ||
271 | |||
272 | This function is not required for indices as they're not permitted data. | ||
273 | |||
274 | |||
275 | =================================== | ||
276 | NETWORK FILESYSTEM (UN)REGISTRATION | ||
277 | =================================== | ||
278 | |||
279 | The first step is to declare the network filesystem to the cache. This also | ||
280 | involves specifying the layout of the primary index (for AFS, this would be the | ||
281 | "cell" level). | ||
282 | |||
283 | The registration function is: | ||
284 | |||
285 | int fscache_register_netfs(struct fscache_netfs *netfs); | ||
286 | |||
287 | It just takes a pointer to the netfs definition. It returns 0 or an error as | ||
288 | appropriate. | ||
289 | |||
290 | For kAFS, registration is done as follows: | ||
291 | |||
292 | ret = fscache_register_netfs(&afs_cache_netfs); | ||
293 | |||
294 | The last step is, of course, unregistration: | ||
295 | |||
296 | void fscache_unregister_netfs(struct fscache_netfs *netfs); | ||
297 | |||
298 | |||
299 | ================ | ||
300 | CACHE TAG LOOKUP | ||
301 | ================ | ||
302 | |||
303 | FS-Cache permits the use of more than one cache. To permit particular index | ||
304 | subtrees to be bound to particular caches, the second step is to look up cache | ||
305 | representation tags. This step is optional; it can be left entirely up to | ||
306 | FS-Cache as to which cache should be used. The problem with doing that is that | ||
307 | FS-Cache will always pick the first cache that was registered. | ||
308 | |||
309 | To get the representation for a named tag: | ||
310 | |||
311 | struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name); | ||
312 | |||
313 | This takes a text string as the name and returns a representation of a tag. It | ||
314 | will never return an error. It may return a dummy tag, however, if it runs out | ||
315 | of memory; this will inhibit caching with this tag. | ||
316 | |||
317 | Any representation so obtained must be released by passing it to this function: | ||
318 | |||
319 | void fscache_release_cache_tag(struct fscache_cache_tag *tag); | ||
320 | |||
321 | The tag will be retrieved by FS-Cache when it calls the object definition | ||
322 | operation select_cache(). | ||
323 | |||
324 | |||
325 | ================== | ||
326 | INDEX REGISTRATION | ||
327 | ================== | ||
328 | |||
329 | The third step is to inform FS-Cache about part of an index hierarchy that can | ||
330 | be used to locate files. This is done by requesting a cookie for each index in | ||
331 | the path to the file: | ||
332 | |||
333 | struct fscache_cookie * | ||
334 | fscache_acquire_cookie(struct fscache_cookie *parent, | ||
335 | const struct fscache_object_def *def, | ||
336 | void *netfs_data); | ||
337 | |||
338 | This function creates an index entry in the index represented by parent, | ||
339 | filling in the index entry by calling the operations pointed to by def. | ||
340 | |||
341 | Note that this function never returns an error - all errors are handled | ||
342 | internally. It may, however, return NULL to indicate no cookie. It is quite | ||
343 | acceptable to pass this token back to this function as the parent to another | ||
344 | acquisition (or even to the relinquish cookie, read page and write page | ||
345 | functions - see below). | ||
346 | |||
347 | Note also that no indices are actually created in a cache until a non-index | ||
348 | object needs to be created somewhere down the hierarchy. Furthermore, an index | ||
349 | may be created in several different caches independently at different times. | ||
350 | This is all handled transparently, and the netfs doesn't see any of it. | ||
351 | |||
352 | For example, with AFS, a cell would be added to the primary index. This index | ||
353 | entry would have a dependent inode containing a volume location index for the | ||
354 | volume mappings within this cell: | ||
355 | |||
356 | cell->cache = | ||
357 | fscache_acquire_cookie(afs_cache_netfs.primary_index, | ||
358 | &afs_cell_cache_index_def, | ||
359 | cell); | ||
360 | |||
361 | Then when a volume location was accessed, it would be entered into the cell's | ||
362 | index and an inode would be allocated that acts as a volume type and hash chain | ||
363 | combination: | ||
364 | |||
365 | vlocation->cache = | ||
366 | fscache_acquire_cookie(cell->cache, | ||
367 | &afs_vlocation_cache_index_def, | ||
368 | vlocation); | ||
369 | |||
370 | And then a particular flavour of volume (R/O for example) could be added to | ||
371 | that index, creating another index for vnodes (AFS inode equivalents): | ||
372 | |||
373 | volume->cache = | ||
374 | fscache_acquire_cookie(vlocation->cache, | ||
375 | &afs_volume_cache_index_def, | ||
376 | volume); | ||
377 | |||
378 | |||
379 | ====================== | ||
380 | DATA FILE REGISTRATION | ||
381 | ====================== | ||
382 | |||
383 | The fourth step is to request a data file be created in the cache. This is | ||
384 | identical to index cookie acquisition. The only difference is that the type in | ||
385 | the object definition should be something other than index type. | ||
386 | |||
387 | vnode->cache = | ||
388 | fscache_acquire_cookie(volume->cache, | ||
389 | &afs_vnode_cache_object_def, | ||
390 | vnode); | ||
391 | |||
392 | |||
393 | ================================= | ||
394 | MISCELLANEOUS OBJECT REGISTRATION | ||
395 | ================================= | ||
396 | |||
397 | An optional step is to request an object of miscellaneous type be created in | ||
398 | the cache. This is almost identical to index cookie acquisition. The only | ||
399 | difference is that the type in the object definition should be something other | ||
400 | than index type. Whilst the parent object could be an index, it's more likely | ||
401 | it would be some other type of object such as a data file. | ||
402 | |||
403 | xattr->cache = | ||
404 | fscache_acquire_cookie(vnode->cache, | ||
405 | &afs_xattr_cache_object_def, | ||
406 | xattr); | ||
407 | |||
408 | Miscellaneous objects might be used to store extended attributes or directory | ||
409 | entries for example. | ||
410 | |||
411 | |||
412 | ========================== | ||
413 | SETTING THE DATA FILE SIZE | ||
414 | ========================== | ||
415 | |||
416 | The fifth step is to set the physical attributes of the file, such as its size. | ||
417 | This doesn't automatically reserve any space in the cache, but permits the | ||
418 | cache to adjust its metadata for data tracking appropriately: | ||
419 | |||
420 | int fscache_attr_changed(struct fscache_cookie *cookie); | ||
421 | |||
422 | The cache will return -ENOBUFS if there is no backing cache or if there is no | ||
423 | space to allocate any extra metadata required in the cache. The attributes | ||
424 | will be accessed with the get_attr() cookie definition operation. | ||
425 | |||
426 | Note that attempts to read or write data pages in the cache over this size may | ||
427 | be rebuffed with -ENOBUFS. | ||
428 | |||
429 | This operation schedules an attribute adjustment to happen asynchronously at | ||
430 | some point in the future, and as such, it may happen after the function returns | ||
431 | to the caller. The attribute adjustment excludes read and write operations. | ||
432 | |||
433 | |||
434 | ===================== | ||
435 | PAGE READ/ALLOC/WRITE | ||
436 | ===================== | ||
437 | |||
438 | And the sixth step is to store and retrieve pages in the cache. There are | ||
439 | three functions that are used to do this. | ||
440 | |||
441 | Note: | ||
442 | |||
443 | (1) A page should not be re-read or re-allocated without uncaching it first. | ||
444 | |||
445 | (2) A read or allocated page must be uncached when the netfs page is released | ||
446 | from the pagecache. | ||
447 | |||
448 | (3) A page should only be written to the cache if previous read or allocated. | ||
449 | |||
450 | This permits the cache to maintain its page tracking in proper order. | ||
451 | |||
452 | |||
453 | PAGE READ | ||
454 | --------- | ||
455 | |||
456 | Firstly, the netfs should ask FS-Cache to examine the caches and read the | ||
457 | contents cached for a particular page of a particular file if present, or else | ||
458 | allocate space to store the contents if not: | ||
459 | |||
460 | typedef | ||
461 | void (*fscache_rw_complete_t)(struct page *page, | ||
462 | void *context, | ||
463 | int error); | ||
464 | |||
465 | int fscache_read_or_alloc_page(struct fscache_cookie *cookie, | ||
466 | struct page *page, | ||
467 | fscache_rw_complete_t end_io_func, | ||
468 | void *context, | ||
469 | gfp_t gfp); | ||
470 | |||
471 | The cookie argument must specify a cookie for an object that isn't an index, | ||
472 | the page specified will have the data loaded into it (and is also used to | ||
473 | specify the page number), and the gfp argument is used to control how any | ||
474 | memory allocations made are satisfied. | ||
475 | |||
476 | If the cookie indicates the inode is not cached: | ||
477 | |||
478 | (1) The function will return -ENOBUFS. | ||
479 | |||
480 | Else if there's a copy of the page resident in the cache: | ||
481 | |||
482 | (1) The mark_pages_cached() cookie operation will be called on that page. | ||
483 | |||
484 | (2) The function will submit a request to read the data from the cache's | ||
485 | backing device directly into the page specified. | ||
486 | |||
487 | (3) The function will return 0. | ||
488 | |||
489 | (4) When the read is complete, end_io_func() will be invoked with: | ||
490 | |||
491 | (*) The netfs data supplied when the cookie was created. | ||
492 | |||
493 | (*) The page descriptor. | ||
494 | |||
495 | (*) The context argument passed to the above function. This will be | ||
496 | maintained with the get_context/put_context functions mentioned above. | ||
497 | |||
498 | (*) An argument that's 0 on success or negative for an error code. | ||
499 | |||
500 | If an error occurs, it should be assumed that the page contains no usable | ||
501 | data. | ||
502 | |||
503 | end_io_func() will be called in process context if the read is results in | ||
504 | an error, but it might be called in interrupt context if the read is | ||
505 | successful. | ||
506 | |||
507 | Otherwise, if there's not a copy available in cache, but the cache may be able | ||
508 | to store the page: | ||
509 | |||
510 | (1) The mark_pages_cached() cookie operation will be called on that page. | ||
511 | |||
512 | (2) A block may be reserved in the cache and attached to the object at the | ||
513 | appropriate place. | ||
514 | |||
515 | (3) The function will return -ENODATA. | ||
516 | |||
517 | This function may also return -ENOMEM or -EINTR, in which case it won't have | ||
518 | read any data from the cache. | ||
519 | |||
520 | |||
521 | PAGE ALLOCATE | ||
522 | ------------- | ||
523 | |||
524 | Alternatively, if there's not expected to be any data in the cache for a page | ||
525 | because the file has been extended, a block can simply be allocated instead: | ||
526 | |||
527 | int fscache_alloc_page(struct fscache_cookie *cookie, | ||
528 | struct page *page, | ||
529 | gfp_t gfp); | ||
530 | |||
531 | This is similar to the fscache_read_or_alloc_page() function, except that it | ||
532 | never reads from the cache. It will return 0 if a block has been allocated, | ||
533 | rather than -ENODATA as the other would. One or the other must be performed | ||
534 | before writing to the cache. | ||
535 | |||
536 | The mark_pages_cached() cookie operation will be called on the page if | ||
537 | successful. | ||
538 | |||
539 | |||
540 | PAGE WRITE | ||
541 | ---------- | ||
542 | |||
543 | Secondly, if the netfs changes the contents of the page (either due to an | ||
544 | initial download or if a user performs a write), then the page should be | ||
545 | written back to the cache: | ||
546 | |||
547 | int fscache_write_page(struct fscache_cookie *cookie, | ||
548 | struct page *page, | ||
549 | gfp_t gfp); | ||
550 | |||
551 | The cookie argument must specify a data file cookie, the page specified should | ||
552 | contain the data to be written (and is also used to specify the page number), | ||
553 | and the gfp argument is used to control how any memory allocations made are | ||
554 | satisfied. | ||
555 | |||
556 | The page must have first been read or allocated successfully and must not have | ||
557 | been uncached before writing is performed. | ||
558 | |||
559 | If the cookie indicates the inode is not cached then: | ||
560 | |||
561 | (1) The function will return -ENOBUFS. | ||
562 | |||
563 | Else if space can be allocated in the cache to hold this page: | ||
564 | |||
565 | (1) PG_fscache_write will be set on the page. | ||
566 | |||
567 | (2) The function will submit a request to write the data to cache's backing | ||
568 | device directly from the page specified. | ||
569 | |||
570 | (3) The function will return 0. | ||
571 | |||
572 | (4) When the write is complete PG_fscache_write is cleared on the page and | ||
573 | anyone waiting for that bit will be woken up. | ||
574 | |||
575 | Else if there's no space available in the cache, -ENOBUFS will be returned. It | ||
576 | is also possible for the PG_fscache_write bit to be cleared when no write took | ||
577 | place if unforeseen circumstances arose (such as a disk error). | ||
578 | |||
579 | Writing takes place asynchronously. | ||
580 | |||
581 | |||
582 | MULTIPLE PAGE READ | ||
583 | ------------------ | ||
584 | |||
585 | A facility is provided to read several pages at once, as requested by the | ||
586 | readpages() address space operation: | ||
587 | |||
588 | int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, | ||
589 | struct address_space *mapping, | ||
590 | struct list_head *pages, | ||
591 | int *nr_pages, | ||
592 | fscache_rw_complete_t end_io_func, | ||
593 | void *context, | ||
594 | gfp_t gfp); | ||
595 | |||
596 | This works in a similar way to fscache_read_or_alloc_page(), except: | ||
597 | |||
598 | (1) Any page it can retrieve data for is removed from pages and nr_pages and | ||
599 | dispatched for reading to the disk. Reads of adjacent pages on disk may | ||
600 | be merged for greater efficiency. | ||
601 | |||
602 | (2) The mark_pages_cached() cookie operation will be called on several pages | ||
603 | at once if they're being read or allocated. | ||
604 | |||
605 | (3) If there was an general error, then that error will be returned. | ||
606 | |||
607 | Else if some pages couldn't be allocated or read, then -ENOBUFS will be | ||
608 | returned. | ||
609 | |||
610 | Else if some pages couldn't be read but were allocated, then -ENODATA will | ||
611 | be returned. | ||
612 | |||
613 | Otherwise, if all pages had reads dispatched, then 0 will be returned, the | ||
614 | list will be empty and *nr_pages will be 0. | ||
615 | |||
616 | (4) end_io_func will be called once for each page being read as the reads | ||
617 | complete. It will be called in process context if error != 0, but it may | ||
618 | be called in interrupt context if there is no error. | ||
619 | |||
620 | Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude | ||
621 | some of the pages being read and some being allocated. Those pages will have | ||
622 | been marked appropriately and will need uncaching. | ||
623 | |||
624 | |||
625 | ============== | ||
626 | PAGE UNCACHING | ||
627 | ============== | ||
628 | |||
629 | To uncache a page, this function should be called: | ||
630 | |||
631 | void fscache_uncache_page(struct fscache_cookie *cookie, | ||
632 | struct page *page); | ||
633 | |||
634 | This function permits the cache to release any in-memory representation it | ||
635 | might be holding for this netfs page. This function must be called once for | ||
636 | each page on which the read or write page functions above have been called to | ||
637 | make sure the cache's in-memory tracking information gets torn down. | ||
638 | |||
639 | Note that pages can't be explicitly deleted from the a data file. The whole | ||
640 | data file must be retired (see the relinquish cookie function below). | ||
641 | |||
642 | Furthermore, note that this does not cancel the asynchronous read or write | ||
643 | operation started by the read/alloc and write functions, so the page | ||
644 | invalidation and release functions must use: | ||
645 | |||
646 | bool fscache_check_page_write(struct fscache_cookie *cookie, | ||
647 | struct page *page); | ||
648 | |||
649 | to see if a page is being written to the cache, and: | ||
650 | |||
651 | void fscache_wait_on_page_write(struct fscache_cookie *cookie, | ||
652 | struct page *page); | ||
653 | |||
654 | to wait for it to finish if it is. | ||
655 | |||
656 | |||
657 | ========================== | ||
658 | INDEX AND DATA FILE UPDATE | ||
659 | ========================== | ||
660 | |||
661 | To request an update of the index data for an index or other object, the | ||
662 | following function should be called: | ||
663 | |||
664 | void fscache_update_cookie(struct fscache_cookie *cookie); | ||
665 | |||
666 | This function will refer back to the netfs_data pointer stored in the cookie by | ||
667 | the acquisition function to obtain the data to write into each revised index | ||
668 | entry. The update method in the parent index definition will be called to | ||
669 | transfer the data. | ||
670 | |||
671 | Note that partial updates may happen automatically at other times, such as when | ||
672 | data blocks are added to a data file object. | ||
673 | |||
674 | |||
675 | =============================== | ||
676 | MISCELLANEOUS COOKIE OPERATIONS | ||
677 | =============================== | ||
678 | |||
679 | There are a number of operations that can be used to control cookies: | ||
680 | |||
681 | (*) Cookie pinning: | ||
682 | |||
683 | int fscache_pin_cookie(struct fscache_cookie *cookie); | ||
684 | void fscache_unpin_cookie(struct fscache_cookie *cookie); | ||
685 | |||
686 | These operations permit data cookies to be pinned into the cache and to | ||
687 | have the pinning removed. They are not permitted on index cookies. | ||
688 | |||
689 | The pinning function will return 0 if successful, -ENOBUFS in the cookie | ||
690 | isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning, | ||
691 | -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or | ||
692 | -EIO if there's any other problem. | ||
693 | |||
694 | (*) Data space reservation: | ||
695 | |||
696 | int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size); | ||
697 | |||
698 | This permits a netfs to request cache space be reserved to store up to the | ||
699 | given amount of a file. It is permitted to ask for more than the current | ||
700 | size of the file to allow for future file expansion. | ||
701 | |||
702 | If size is given as zero then the reservation will be cancelled. | ||
703 | |||
704 | The function will return 0 if successful, -ENOBUFS in the cookie isn't | ||
705 | backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations, | ||
706 | -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or | ||
707 | -EIO if there's any other problem. | ||
708 | |||
709 | Note that this doesn't pin an object in a cache; it can still be culled to | ||
710 | make space if it's not in use. | ||
711 | |||
712 | |||
713 | ===================== | ||
714 | COOKIE UNREGISTRATION | ||
715 | ===================== | ||
716 | |||
717 | To get rid of a cookie, this function should be called. | ||
718 | |||
719 | void fscache_relinquish_cookie(struct fscache_cookie *cookie, | ||
720 | int retire); | ||
721 | |||
722 | If retire is non-zero, then the object will be marked for recycling, and all | ||
723 | copies of it will be removed from all active caches in which it is present. | ||
724 | Not only that but all child objects will also be retired. | ||
725 | |||
726 | If retire is zero, then the object may be available again when next the | ||
727 | acquisition function is called. Retirement here will overrule the pinning on a | ||
728 | cookie. | ||
729 | |||
730 | One very important note - relinquish must NOT be called for a cookie unless all | ||
731 | the cookies for "child" indices, objects and pages have been relinquished | ||
732 | first. | ||
733 | |||
734 | |||
735 | ================================ | ||
736 | INDEX AND DATA FILE INVALIDATION | ||
737 | ================================ | ||
738 | |||
739 | There is no direct way to invalidate an index subtree or a data file. To do | ||
740 | this, the caller should relinquish and retire the cookie they have, and then | ||
741 | acquire a new one. | ||
742 | |||
743 | |||
744 | =========================== | ||
745 | FS-CACHE SPECIFIC PAGE FLAG | ||
746 | =========================== | ||
747 | |||
748 | FS-Cache makes use of a page flag, PG_private_2, for its own purpose. This is | ||
749 | given the alternative name PG_fscache. | ||
750 | |||
751 | PG_fscache is used to indicate that the page is known by the cache, and that | ||
752 | the cache must be informed if the page is going to go away. It's an indication | ||
753 | to the netfs that the cache has an interest in this page, where an interest may | ||
754 | be a pointer to it, resources allocated or reserved for it, or I/O in progress | ||
755 | upon it. | ||
756 | |||
757 | The netfs can use this information in methods such as releasepage() to | ||
758 | determine whether it needs to uncache a page or update it. | ||
759 | |||
760 | Furthermore, if this bit is set, releasepage() and invalidatepage() operations | ||
761 | will be called on a page to get rid of it, even if PG_private is not set. This | ||
762 | allows caching to attempted on a page before read_cache_pages() to be called | ||
763 | after fscache_read_or_alloc_pages() as the former will try and release pages it | ||
764 | was given under certain circumstances. | ||
765 | |||
766 | This bit does not overlap with such as PG_private. This means that FS-Cache | ||
767 | can be used with a filesystem that uses the block buffering code. | ||
768 | |||
769 | There are a number of operations defined on this flag: | ||
770 | |||
771 | int PageFsCache(struct page *page); | ||
772 | void SetPageFsCache(struct page *page) | ||
773 | void ClearPageFsCache(struct page *page) | ||
774 | int TestSetPageFsCache(struct page *page) | ||
775 | int TestClearPageFsCache(struct page *page) | ||
776 | |||
777 | These functions are bit test, bit set, bit clear, bit test and set and bit | ||
778 | test and clear operations on PG_fscache. | ||
diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt new file mode 100644 index 000000000000..e8b0a35d8fe5 --- /dev/null +++ b/Documentation/filesystems/caching/object.txt | |||
@@ -0,0 +1,313 @@ | |||
1 | ==================================================== | ||
2 | IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT | ||
3 | ==================================================== | ||
4 | |||
5 | By: David Howells <dhowells@redhat.com> | ||
6 | |||
7 | Contents: | ||
8 | |||
9 | (*) Representation | ||
10 | |||
11 | (*) Object management state machine. | ||
12 | |||
13 | - Provision of cpu time. | ||
14 | - Locking simplification. | ||
15 | |||
16 | (*) The set of states. | ||
17 | |||
18 | (*) The set of events. | ||
19 | |||
20 | |||
21 | ============== | ||
22 | REPRESENTATION | ||
23 | ============== | ||
24 | |||
25 | FS-Cache maintains an in-kernel representation of each object that a netfs is | ||
26 | currently interested in. Such objects are represented by the fscache_cookie | ||
27 | struct and are referred to as cookies. | ||
28 | |||
29 | FS-Cache also maintains a separate in-kernel representation of the objects that | ||
30 | a cache backend is currently actively caching. Such objects are represented by | ||
31 | the fscache_object struct. The cache backends allocate these upon request, and | ||
32 | are expected to embed them in their own representations. These are referred to | ||
33 | as objects. | ||
34 | |||
35 | There is a 1:N relationship between cookies and objects. A cookie may be | ||
36 | represented by multiple objects - an index may exist in more than one cache - | ||
37 | or even by no objects (it may not be cached). | ||
38 | |||
39 | Furthermore, both cookies and objects are hierarchical. The two hierarchies | ||
40 | correspond, but the cookies tree is a superset of the union of the object trees | ||
41 | of multiple caches: | ||
42 | |||
43 | NETFS INDEX TREE : CACHE 1 : CACHE 2 | ||
44 | : : | ||
45 | : +-----------+ : | ||
46 | +----------->| IObject | : | ||
47 | +-----------+ | : +-----------+ : | ||
48 | | ICookie |-------+ : | : | ||
49 | +-----------+ | : | : +-----------+ | ||
50 | | +------------------------------>| IObject | | ||
51 | | : | : +-----------+ | ||
52 | | : V : | | ||
53 | | : +-----------+ : | | ||
54 | V +----------->| IObject | : | | ||
55 | +-----------+ | : +-----------+ : | | ||
56 | | ICookie |-------+ : | : V | ||
57 | +-----------+ | : | : +-----------+ | ||
58 | | +------------------------------>| IObject | | ||
59 | +-----+-----+ : | : +-----------+ | ||
60 | | | : | : | | ||
61 | V | : V : | | ||
62 | +-----------+ | : +-----------+ : | | ||
63 | | ICookie |------------------------->| IObject | : | | ||
64 | +-----------+ | : +-----------+ : | | ||
65 | | V : | : V | ||
66 | | +-----------+ : | : +-----------+ | ||
67 | | | ICookie |-------------------------------->| IObject | | ||
68 | | +-----------+ : | : +-----------+ | ||
69 | V | : V : | | ||
70 | +-----------+ | : +-----------+ : | | ||
71 | | DCookie |------------------------->| DObject | : | | ||
72 | +-----------+ | : +-----------+ : | | ||
73 | | : : | | ||
74 | +-------+-------+ : : | | ||
75 | | | : : | | ||
76 | V V : : V | ||
77 | +-----------+ +-----------+ : : +-----------+ | ||
78 | | DCookie | | DCookie |------------------------>| DObject | | ||
79 | +-----------+ +-----------+ : : +-----------+ | ||
80 | : : | ||
81 | |||
82 | In the above illustration, ICookie and IObject represent indices and DCookie | ||
83 | and DObject represent data storage objects. Indices may have representation in | ||
84 | multiple caches, but currently, non-index objects may not. Objects of any type | ||
85 | may also be entirely unrepresented. | ||
86 | |||
87 | As far as the netfs API goes, the netfs is only actually permitted to see | ||
88 | pointers to the cookies. The cookies themselves and any objects attached to | ||
89 | those cookies are hidden from it. | ||
90 | |||
91 | |||
92 | =============================== | ||
93 | OBJECT MANAGEMENT STATE MACHINE | ||
94 | =============================== | ||
95 | |||
96 | Within FS-Cache, each active object is managed by its own individual state | ||
97 | machine. The state for an object is kept in the fscache_object struct, in | ||
98 | object->state. A cookie may point to a set of objects that are in different | ||
99 | states. | ||
100 | |||
101 | Each state has an action associated with it that is invoked when the machine | ||
102 | wakes up in that state. There are four logical sets of states: | ||
103 | |||
104 | (1) Preparation: states that wait for the parent objects to become ready. The | ||
105 | representations are hierarchical, and it is expected that an object must | ||
106 | be created or accessed with respect to its parent object. | ||
107 | |||
108 | (2) Initialisation: states that perform lookups in the cache and validate | ||
109 | what's found and that create on disk any missing metadata. | ||
110 | |||
111 | (3) Normal running: states that allow netfs operations on objects to proceed | ||
112 | and that update the state of objects. | ||
113 | |||
114 | (4) Termination: states that detach objects from their netfs cookies, that | ||
115 | delete objects from disk, that handle disk and system errors and that free | ||
116 | up in-memory resources. | ||
117 | |||
118 | |||
119 | In most cases, transitioning between states is in response to signalled events. | ||
120 | When a state has finished processing, it will usually set the mask of events in | ||
121 | which it is interested (object->event_mask) and relinquish the worker thread. | ||
122 | Then when an event is raised (by calling fscache_raise_event()), if the event | ||
123 | is not masked, the object will be queued for processing (by calling | ||
124 | fscache_enqueue_object()). | ||
125 | |||
126 | |||
127 | PROVISION OF CPU TIME | ||
128 | --------------------- | ||
129 | |||
130 | The work to be done by the various states is given CPU time by the threads of | ||
131 | the slow work facility (see Documentation/slow-work.txt). This is used in | ||
132 | preference to the workqueue facility because: | ||
133 | |||
134 | (1) Threads may be completely occupied for very long periods of time by a | ||
135 | particular work item. These state actions may be doing sequences of | ||
136 | synchronous, journalled disk accesses (lookup, mkdir, create, setxattr, | ||
137 | getxattr, truncate, unlink, rmdir, rename). | ||
138 | |||
139 | (2) Threads may do little actual work, but may rather spend a lot of time | ||
140 | sleeping on I/O. This means that single-threaded and 1-per-CPU-threaded | ||
141 | workqueues don't necessarily have the right numbers of threads. | ||
142 | |||
143 | |||
144 | LOCKING SIMPLIFICATION | ||
145 | ---------------------- | ||
146 | |||
147 | Because only one worker thread may be operating on any particular object's | ||
148 | state machine at once, this simplifies the locking, particularly with respect | ||
149 | to disconnecting the netfs's representation of a cache object (fscache_cookie) | ||
150 | from the cache backend's representation (fscache_object) - which may be | ||
151 | requested from either end. | ||
152 | |||
153 | |||
154 | ================= | ||
155 | THE SET OF STATES | ||
156 | ================= | ||
157 | |||
158 | The object state machine has a set of states that it can be in. There are | ||
159 | preparation states in which the object sets itself up and waits for its parent | ||
160 | object to transit to a state that allows access to its children: | ||
161 | |||
162 | (1) State FSCACHE_OBJECT_INIT. | ||
163 | |||
164 | Initialise the object and wait for the parent object to become active. In | ||
165 | the cache, it is expected that it will not be possible to look an object | ||
166 | up from the parent object, until that parent object itself has been looked | ||
167 | up. | ||
168 | |||
169 | There are initialisation states in which the object sets itself up and accesses | ||
170 | disk for the object metadata: | ||
171 | |||
172 | (2) State FSCACHE_OBJECT_LOOKING_UP. | ||
173 | |||
174 | Look up the object on disk, using the parent as a starting point. | ||
175 | FS-Cache expects the cache backend to probe the cache to see whether this | ||
176 | object is represented there, and if it is, to see if it's valid (coherency | ||
177 | management). | ||
178 | |||
179 | The cache should call fscache_object_lookup_negative() to indicate lookup | ||
180 | failure for whatever reason, and should call fscache_obtained_object() to | ||
181 | indicate success. | ||
182 | |||
183 | At the completion of lookup, FS-Cache will let the netfs go ahead with | ||
184 | read operations, no matter whether the file is yet cached. If not yet | ||
185 | cached, read operations will be immediately rejected with ENODATA until | ||
186 | the first known page is uncached - as to that point there can be no data | ||
187 | to be read out of the cache for that file that isn't currently also held | ||
188 | in the pagecache. | ||
189 | |||
190 | (3) State FSCACHE_OBJECT_CREATING. | ||
191 | |||
192 | Create an object on disk, using the parent as a starting point. This | ||
193 | happens if the lookup failed to find the object, or if the object's | ||
194 | coherency data indicated what's on disk is out of date. In this state, | ||
195 | FS-Cache expects the cache to create | ||
196 | |||
197 | The cache should call fscache_obtained_object() if creation completes | ||
198 | successfully, fscache_object_lookup_negative() otherwise. | ||
199 | |||
200 | At the completion of creation, FS-Cache will start processing write | ||
201 | operations the netfs has queued for an object. If creation failed, the | ||
202 | write ops will be transparently discarded, and nothing recorded in the | ||
203 | cache. | ||
204 | |||
205 | There are some normal running states in which the object spends its time | ||
206 | servicing netfs requests: | ||
207 | |||
208 | (4) State FSCACHE_OBJECT_AVAILABLE. | ||
209 | |||
210 | A transient state in which pending operations are started, child objects | ||
211 | are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary | ||
212 | lookup data is freed. | ||
213 | |||
214 | (5) State FSCACHE_OBJECT_ACTIVE. | ||
215 | |||
216 | The normal running state. In this state, requests the netfs makes will be | ||
217 | passed on to the cache. | ||
218 | |||
219 | (6) State FSCACHE_OBJECT_UPDATING. | ||
220 | |||
221 | The state machine comes here to update the object in the cache from the | ||
222 | netfs's records. This involves updating the auxiliary data that is used | ||
223 | to maintain coherency. | ||
224 | |||
225 | And there are terminal states in which an object cleans itself up, deallocates | ||
226 | memory and potentially deletes stuff from disk: | ||
227 | |||
228 | (7) State FSCACHE_OBJECT_LC_DYING. | ||
229 | |||
230 | The object comes here if it is dying because of a lookup or creation | ||
231 | error. This would be due to a disk error or system error of some sort. | ||
232 | Temporary data is cleaned up, and the parent is released. | ||
233 | |||
234 | (8) State FSCACHE_OBJECT_DYING. | ||
235 | |||
236 | The object comes here if it is dying due to an error, because its parent | ||
237 | cookie has been relinquished by the netfs or because the cache is being | ||
238 | withdrawn. | ||
239 | |||
240 | Any child objects waiting on this one are given CPU time so that they too | ||
241 | can destroy themselves. This object waits for all its children to go away | ||
242 | before advancing to the next state. | ||
243 | |||
244 | (9) State FSCACHE_OBJECT_ABORT_INIT. | ||
245 | |||
246 | The object comes to this state if it was waiting on its parent in | ||
247 | FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself | ||
248 | so that the parent may proceed from the FSCACHE_OBJECT_DYING state. | ||
249 | |||
250 | (10) State FSCACHE_OBJECT_RELEASING. | ||
251 | (11) State FSCACHE_OBJECT_RECYCLING. | ||
252 | |||
253 | The object comes to one of these two states when dying once it is rid of | ||
254 | all its children, if it is dying because the netfs relinquished its | ||
255 | cookie. In the first state, the cached data is expected to persist, and | ||
256 | in the second it will be deleted. | ||
257 | |||
258 | (12) State FSCACHE_OBJECT_WITHDRAWING. | ||
259 | |||
260 | The object transits to this state if the cache decides it wants to | ||
261 | withdraw the object from service, perhaps to make space, but also due to | ||
262 | error or just because the whole cache is being withdrawn. | ||
263 | |||
264 | (13) State FSCACHE_OBJECT_DEAD. | ||
265 | |||
266 | The object transits to this state when the in-memory object record is | ||
267 | ready to be deleted. The object processor shouldn't ever see an object in | ||
268 | this state. | ||
269 | |||
270 | |||
271 | THE SET OF EVENTS | ||
272 | ----------------- | ||
273 | |||
274 | There are a number of events that can be raised to an object state machine: | ||
275 | |||
276 | (*) FSCACHE_OBJECT_EV_UPDATE | ||
277 | |||
278 | The netfs requested that an object be updated. The state machine will ask | ||
279 | the cache backend to update the object, and the cache backend will ask the | ||
280 | netfs for details of the change through its cookie definition ops. | ||
281 | |||
282 | (*) FSCACHE_OBJECT_EV_CLEARED | ||
283 | |||
284 | This is signalled in two circumstances: | ||
285 | |||
286 | (a) when an object's last child object is dropped and | ||
287 | |||
288 | (b) when the last operation outstanding on an object is completed. | ||
289 | |||
290 | This is used to proceed from the dying state. | ||
291 | |||
292 | (*) FSCACHE_OBJECT_EV_ERROR | ||
293 | |||
294 | This is signalled when an I/O error occurs during the processing of some | ||
295 | object. | ||
296 | |||
297 | (*) FSCACHE_OBJECT_EV_RELEASE | ||
298 | (*) FSCACHE_OBJECT_EV_RETIRE | ||
299 | |||
300 | These are signalled when the netfs relinquishes a cookie it was using. | ||
301 | The event selected depends on whether the netfs asks for the backing | ||
302 | object to be retired (deleted) or retained. | ||
303 | |||
304 | (*) FSCACHE_OBJECT_EV_WITHDRAW | ||
305 | |||
306 | This is signalled when the cache backend wants to withdraw an object. | ||
307 | This means that the object will have to be detached from the netfs's | ||
308 | cookie. | ||
309 | |||
310 | Because the withdrawing releasing/retiring events are all handled by the object | ||
311 | state machine, it doesn't matter if there's a collision with both ends trying | ||
312 | to sever the connection at the same time. The state machine can just pick | ||
313 | which one it wants to honour, and that effects the other. | ||
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt new file mode 100644 index 000000000000..b6b070c57cbf --- /dev/null +++ b/Documentation/filesystems/caching/operations.txt | |||
@@ -0,0 +1,213 @@ | |||
1 | ================================ | ||
2 | ASYNCHRONOUS OPERATIONS HANDLING | ||
3 | ================================ | ||
4 | |||
5 | By: David Howells <dhowells@redhat.com> | ||
6 | |||
7 | Contents: | ||
8 | |||
9 | (*) Overview. | ||
10 | |||
11 | (*) Operation record initialisation. | ||
12 | |||
13 | (*) Parameters. | ||
14 | |||
15 | (*) Procedure. | ||
16 | |||
17 | (*) Asynchronous callback. | ||
18 | |||
19 | |||
20 | ======== | ||
21 | OVERVIEW | ||
22 | ======== | ||
23 | |||
24 | FS-Cache has an asynchronous operations handling facility that it uses for its | ||
25 | data storage and retrieval routines. Its operations are represented by | ||
26 | fscache_operation structs, though these are usually embedded into some other | ||
27 | structure. | ||
28 | |||
29 | This facility is available to and expected to be be used by the cache backends, | ||
30 | and FS-Cache will create operations and pass them off to the appropriate cache | ||
31 | backend for completion. | ||
32 | |||
33 | To make use of this facility, <linux/fscache-cache.h> should be #included. | ||
34 | |||
35 | |||
36 | =============================== | ||
37 | OPERATION RECORD INITIALISATION | ||
38 | =============================== | ||
39 | |||
40 | An operation is recorded in an fscache_operation struct: | ||
41 | |||
42 | struct fscache_operation { | ||
43 | union { | ||
44 | struct work_struct fast_work; | ||
45 | struct slow_work slow_work; | ||
46 | }; | ||
47 | unsigned long flags; | ||
48 | fscache_operation_processor_t processor; | ||
49 | ... | ||
50 | }; | ||
51 | |||
52 | Someone wanting to issue an operation should allocate something with this | ||
53 | struct embedded in it. They should initialise it by calling: | ||
54 | |||
55 | void fscache_operation_init(struct fscache_operation *op, | ||
56 | fscache_operation_release_t release); | ||
57 | |||
58 | with the operation to be initialised and the release function to use. | ||
59 | |||
60 | The op->flags parameter should be set to indicate the CPU time provision and | ||
61 | the exclusivity (see the Parameters section). | ||
62 | |||
63 | The op->fast_work, op->slow_work and op->processor flags should be set as | ||
64 | appropriate for the CPU time provision (see the Parameters section). | ||
65 | |||
66 | FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the | ||
67 | operation and waited for afterwards. | ||
68 | |||
69 | |||
70 | ========== | ||
71 | PARAMETERS | ||
72 | ========== | ||
73 | |||
74 | There are a number of parameters that can be set in the operation record's flag | ||
75 | parameter. There are three options for the provision of CPU time in these | ||
76 | operations: | ||
77 | |||
78 | (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD). A thread | ||
79 | may decide it wants to handle an operation itself without deferring it to | ||
80 | another thread. | ||
81 | |||
82 | This is, for example, used in read operations for calling readpages() on | ||
83 | the backing filesystem in CacheFiles. Although readpages() does an | ||
84 | asynchronous data fetch, the determination of whether pages exist is done | ||
85 | synchronously - and the netfs does not proceed until this has been | ||
86 | determined. | ||
87 | |||
88 | If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags | ||
89 | before submitting the operation, and the operating thread must wait for it | ||
90 | to be cleared before proceeding: | ||
91 | |||
92 | wait_on_bit(&op->flags, FSCACHE_OP_WAITING, | ||
93 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
94 | |||
95 | |||
96 | (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it | ||
97 | will be given to keventd to process. Such an operation is not permitted | ||
98 | to sleep on I/O. | ||
99 | |||
100 | This is, for example, used by CacheFiles to copy data from a backing fs | ||
101 | page to a netfs page after the backing fs has read the page in. | ||
102 | |||
103 | If this option is used, op->fast_work and op->processor must be | ||
104 | initialised before submitting the operation: | ||
105 | |||
106 | INIT_WORK(&op->fast_work, do_some_work); | ||
107 | |||
108 | |||
109 | (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it | ||
110 | will be given to the slow work facility to process. Such an operation is | ||
111 | permitted to sleep on I/O. | ||
112 | |||
113 | This is, for example, used by FS-Cache to handle background writes of | ||
114 | pages that have just been fetched from a remote server. | ||
115 | |||
116 | If this option is used, op->slow_work and op->processor must be | ||
117 | initialised before submitting the operation: | ||
118 | |||
119 | fscache_operation_init_slow(op, processor) | ||
120 | |||
121 | |||
122 | Furthermore, operations may be one of two types: | ||
123 | |||
124 | (1) Exclusive (FSCACHE_OP_EXCLUSIVE). Operations of this type may not run in | ||
125 | conjunction with any other operation on the object being operated upon. | ||
126 | |||
127 | An example of this is the attribute change operation, in which the file | ||
128 | being written to may need truncation. | ||
129 | |||
130 | (2) Shareable. Operations of this type may be running simultaneously. It's | ||
131 | up to the operation implementation to prevent interference between other | ||
132 | operations running at the same time. | ||
133 | |||
134 | |||
135 | ========= | ||
136 | PROCEDURE | ||
137 | ========= | ||
138 | |||
139 | Operations are used through the following procedure: | ||
140 | |||
141 | (1) The submitting thread must allocate the operation and initialise it | ||
142 | itself. Normally this would be part of a more specific structure with the | ||
143 | generic op embedded within. | ||
144 | |||
145 | (2) The submitting thread must then submit the operation for processing using | ||
146 | one of the following two functions: | ||
147 | |||
148 | int fscache_submit_op(struct fscache_object *object, | ||
149 | struct fscache_operation *op); | ||
150 | |||
151 | int fscache_submit_exclusive_op(struct fscache_object *object, | ||
152 | struct fscache_operation *op); | ||
153 | |||
154 | The first function should be used to submit non-exclusive ops and the | ||
155 | second to submit exclusive ones. The caller must still set the | ||
156 | FSCACHE_OP_EXCLUSIVE flag. | ||
157 | |||
158 | If successful, both functions will assign the operation to the specified | ||
159 | object and return 0. -ENOBUFS will be returned if the object specified is | ||
160 | permanently unavailable. | ||
161 | |||
162 | The operation manager will defer operations on an object that is still | ||
163 | undergoing lookup or creation. The operation will also be deferred if an | ||
164 | operation of conflicting exclusivity is in progress on the object. | ||
165 | |||
166 | If the operation is asynchronous, the manager will retain a reference to | ||
167 | it, so the caller should put their reference to it by passing it to: | ||
168 | |||
169 | void fscache_put_operation(struct fscache_operation *op); | ||
170 | |||
171 | (3) If the submitting thread wants to do the work itself, and has marked the | ||
172 | operation with FSCACHE_OP_MYTHREAD, then it should monitor | ||
173 | FSCACHE_OP_WAITING as described above and check the state of the object if | ||
174 | necessary (the object might have died whilst the thread was waiting). | ||
175 | |||
176 | When it has finished doing its processing, it should call | ||
177 | fscache_put_operation() on it. | ||
178 | |||
179 | (4) The operation holds an effective lock upon the object, preventing other | ||
180 | exclusive ops conflicting until it is released. The operation can be | ||
181 | enqueued for further immediate asynchronous processing by adjusting the | ||
182 | CPU time provisioning option if necessary, eg: | ||
183 | |||
184 | op->flags &= ~FSCACHE_OP_TYPE; | ||
185 | op->flags |= ~FSCACHE_OP_FAST; | ||
186 | |||
187 | and calling: | ||
188 | |||
189 | void fscache_enqueue_operation(struct fscache_operation *op) | ||
190 | |||
191 | This can be used to allow other things to have use of the worker thread | ||
192 | pools. | ||
193 | |||
194 | |||
195 | ===================== | ||
196 | ASYNCHRONOUS CALLBACK | ||
197 | ===================== | ||
198 | |||
199 | When used in asynchronous mode, the worker thread pool will invoke the | ||
200 | processor method with a pointer to the operation. This should then get at the | ||
201 | container struct by using container_of(): | ||
202 | |||
203 | static void fscache_write_op(struct fscache_operation *_op) | ||
204 | { | ||
205 | struct fscache_storage *op = | ||
206 | container_of(_op, struct fscache_storage, op); | ||
207 | ... | ||
208 | } | ||
209 | |||
210 | The caller holds a reference on the operation, and will invoke | ||
211 | fscache_put_operation() when the processor function returns. The processor | ||
212 | function is at liberty to call fscache_enqueue_operation() or to take extra | ||
213 | references. | ||
diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt new file mode 100644 index 000000000000..ebc50f808ea4 --- /dev/null +++ b/Documentation/slow-work.txt | |||
@@ -0,0 +1,174 @@ | |||
1 | ==================================== | ||
2 | SLOW WORK ITEM EXECUTION THREAD POOL | ||
3 | ==================================== | ||
4 | |||
5 | By: David Howells <dhowells@redhat.com> | ||
6 | |||
7 | The slow work item execution thread pool is a pool of threads for performing | ||
8 | things that take a relatively long time, such as making mkdir calls. | ||
9 | Typically, when processing something, these items will spend a lot of time | ||
10 | blocking a thread on I/O, thus making that thread unavailable for doing other | ||
11 | work. | ||
12 | |||
13 | The standard workqueue model is unsuitable for this class of work item as that | ||
14 | limits the owner to a single thread or a single thread per CPU. For some | ||
15 | tasks, however, more threads - or fewer - are required. | ||
16 | |||
17 | There is just one pool per system. It contains no threads unless something | ||
18 | wants to use it - and that something must register its interest first. When | ||
19 | the pool is active, the number of threads it contains is dynamic, varying | ||
20 | between a maximum and minimum setting, depending on the load. | ||
21 | |||
22 | |||
23 | ==================== | ||
24 | CLASSES OF WORK ITEM | ||
25 | ==================== | ||
26 | |||
27 | This pool support two classes of work items: | ||
28 | |||
29 | (*) Slow work items. | ||
30 | |||
31 | (*) Very slow work items. | ||
32 | |||
33 | The former are expected to finish much quicker than the latter. | ||
34 | |||
35 | An operation of the very slow class may do a batch combination of several | ||
36 | lookups, mkdirs, and a create for instance. | ||
37 | |||
38 | An operation of the ordinarily slow class may, for example, write stuff or | ||
39 | expand files, provided the time taken to do so isn't too long. | ||
40 | |||
41 | Operations of both types may sleep during execution, thus tying up the thread | ||
42 | loaned to it. | ||
43 | |||
44 | |||
45 | THREAD-TO-CLASS ALLOCATION | ||
46 | -------------------------- | ||
47 | |||
48 | Not all the threads in the pool are available to work on very slow work items. | ||
49 | The number will be between one and one fewer than the number of active threads. | ||
50 | This is configurable (see the "Pool Configuration" section). | ||
51 | |||
52 | All the threads are available to work on ordinarily slow work items, but a | ||
53 | percentage of the threads will prefer to work on very slow work items. | ||
54 | |||
55 | The configuration ensures that at least one thread will be available to work on | ||
56 | very slow work items, and at least one thread will be available that won't work | ||
57 | on very slow work items at all. | ||
58 | |||
59 | |||
60 | ===================== | ||
61 | USING SLOW WORK ITEMS | ||
62 | ===================== | ||
63 | |||
64 | Firstly, a module or subsystem wanting to make use of slow work items must | ||
65 | register its interest: | ||
66 | |||
67 | int ret = slow_work_register_user(); | ||
68 | |||
69 | This will return 0 if successful, or a -ve error upon failure. | ||
70 | |||
71 | |||
72 | Slow work items may then be set up by: | ||
73 | |||
74 | (1) Declaring a slow_work struct type variable: | ||
75 | |||
76 | #include <linux/slow-work.h> | ||
77 | |||
78 | struct slow_work myitem; | ||
79 | |||
80 | (2) Declaring the operations to be used for this item: | ||
81 | |||
82 | struct slow_work_ops myitem_ops = { | ||
83 | .get_ref = myitem_get_ref, | ||
84 | .put_ref = myitem_put_ref, | ||
85 | .execute = myitem_execute, | ||
86 | }; | ||
87 | |||
88 | [*] For a description of the ops, see section "Item Operations". | ||
89 | |||
90 | (3) Initialising the item: | ||
91 | |||
92 | slow_work_init(&myitem, &myitem_ops); | ||
93 | |||
94 | or: | ||
95 | |||
96 | vslow_work_init(&myitem, &myitem_ops); | ||
97 | |||
98 | depending on its class. | ||
99 | |||
100 | A suitably set up work item can then be enqueued for processing: | ||
101 | |||
102 | int ret = slow_work_enqueue(&myitem); | ||
103 | |||
104 | This will return a -ve error if the thread pool is unable to gain a reference | ||
105 | on the item, 0 otherwise. | ||
106 | |||
107 | |||
108 | The items are reference counted, so there ought to be no need for a flush | ||
109 | operation. When all a module's slow work items have been processed, and the | ||
110 | module has no further interest in the facility, it should unregister its | ||
111 | interest: | ||
112 | |||
113 | slow_work_unregister_user(); | ||
114 | |||
115 | |||
116 | =============== | ||
117 | ITEM OPERATIONS | ||
118 | =============== | ||
119 | |||
120 | Each work item requires a table of operations of type struct slow_work_ops. | ||
121 | All members are required: | ||
122 | |||
123 | (*) Get a reference on an item: | ||
124 | |||
125 | int (*get_ref)(struct slow_work *work); | ||
126 | |||
127 | This allows the thread pool to attempt to pin an item by getting a | ||
128 | reference on it. This function should return 0 if the reference was | ||
129 | granted, or a -ve error otherwise. If an error is returned, | ||
130 | slow_work_enqueue() will fail. | ||
131 | |||
132 | The reference is held whilst the item is queued and whilst it is being | ||
133 | executed. The item may then be requeued with the same reference held, or | ||
134 | the reference will be released. | ||
135 | |||
136 | (*) Release a reference on an item: | ||
137 | |||
138 | void (*put_ref)(struct slow_work *work); | ||
139 | |||
140 | This allows the thread pool to unpin an item by releasing the reference on | ||
141 | it. The thread pool will not touch the item again once this has been | ||
142 | called. | ||
143 | |||
144 | (*) Execute an item: | ||
145 | |||
146 | void (*execute)(struct slow_work *work); | ||
147 | |||
148 | This should perform the work required of the item. It may sleep, it may | ||
149 | perform disk I/O and it may wait for locks. | ||
150 | |||
151 | |||
152 | ================== | ||
153 | POOL CONFIGURATION | ||
154 | ================== | ||
155 | |||
156 | The slow-work thread pool has a number of configurables: | ||
157 | |||
158 | (*) /proc/sys/kernel/slow-work/min-threads | ||
159 | |||
160 | The minimum number of threads that should be in the pool whilst it is in | ||
161 | use. This may be anywhere between 2 and max-threads. | ||
162 | |||
163 | (*) /proc/sys/kernel/slow-work/max-threads | ||
164 | |||
165 | The maximum number of threads that should in the pool. This may be | ||
166 | anywhere between min-threads and 255 or NR_CPUS * 2, whichever is greater. | ||
167 | |||
168 | (*) /proc/sys/kernel/slow-work/vslow-percentage | ||
169 | |||
170 | The percentage of active threads in the pool that may be used to execute | ||
171 | very slow work items. This may be between 1 and 99. The resultant number | ||
172 | is bounded to between 1 and one fewer than the number of active threads. | ||
173 | This ensures there is always at least one thread that can process very | ||
174 | slow work items, and always at least one thread that won't. | ||
diff --git a/fs/Kconfig b/fs/Kconfig index ae3b34a2ea69..86b203fc3c56 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -66,6 +66,13 @@ config GENERIC_ACL | |||
66 | bool | 66 | bool |
67 | select FS_POSIX_ACL | 67 | select FS_POSIX_ACL |
68 | 68 | ||
69 | menu "Caches" | ||
70 | |||
71 | source "fs/fscache/Kconfig" | ||
72 | source "fs/cachefiles/Kconfig" | ||
73 | |||
74 | endmenu | ||
75 | |||
69 | if BLOCK | 76 | if BLOCK |
70 | menu "CD-ROM/DVD Filesystems" | 77 | menu "CD-ROM/DVD Filesystems" |
71 | 78 | ||
diff --git a/fs/Makefile b/fs/Makefile index 15f73014a208..70b2aed87133 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -63,6 +63,7 @@ obj-$(CONFIG_PROFILING) += dcookies.o | |||
63 | obj-$(CONFIG_DLM) += dlm/ | 63 | obj-$(CONFIG_DLM) += dlm/ |
64 | 64 | ||
65 | # Do not add any filesystems before this line | 65 | # Do not add any filesystems before this line |
66 | obj-$(CONFIG_FSCACHE) += fscache/ | ||
66 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 67 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
67 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 | 68 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 |
68 | obj-$(CONFIG_EXT2_FS) += ext2/ | 69 | obj-$(CONFIG_EXT2_FS) += ext2/ |
@@ -116,6 +117,7 @@ obj-$(CONFIG_AFS_FS) += afs/ | |||
116 | obj-$(CONFIG_BEFS_FS) += befs/ | 117 | obj-$(CONFIG_BEFS_FS) += befs/ |
117 | obj-$(CONFIG_HOSTFS) += hostfs/ | 118 | obj-$(CONFIG_HOSTFS) += hostfs/ |
118 | obj-$(CONFIG_HPPFS) += hppfs/ | 119 | obj-$(CONFIG_HPPFS) += hppfs/ |
120 | obj-$(CONFIG_CACHEFILES) += cachefiles/ | ||
119 | obj-$(CONFIG_DEBUG_FS) += debugfs/ | 121 | obj-$(CONFIG_DEBUG_FS) += debugfs/ |
120 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ | 122 | obj-$(CONFIG_OCFS2_FS) += ocfs2/ |
121 | obj-$(CONFIG_BTRFS_FS) += btrfs/ | 123 | obj-$(CONFIG_BTRFS_FS) += btrfs/ |
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig index e7b522fe15e1..5c4e61d3c772 100644 --- a/fs/afs/Kconfig +++ b/fs/afs/Kconfig | |||
@@ -19,3 +19,11 @@ config AFS_DEBUG | |||
19 | See <file:Documentation/filesystems/afs.txt> for more information. | 19 | See <file:Documentation/filesystems/afs.txt> for more information. |
20 | 20 | ||
21 | If unsure, say N. | 21 | If unsure, say N. |
22 | |||
23 | config AFS_FSCACHE | ||
24 | bool "Provide AFS client caching support (EXPERIMENTAL)" | ||
25 | depends on EXPERIMENTAL | ||
26 | depends on AFS_FS=m && FSCACHE || AFS_FS=y && FSCACHE=y | ||
27 | help | ||
28 | Say Y here if you want AFS data to be cached locally on disk through | ||
29 | the generic filesystem cache manager | ||
diff --git a/fs/afs/Makefile b/fs/afs/Makefile index a66671082cfb..4f64b95d57bd 100644 --- a/fs/afs/Makefile +++ b/fs/afs/Makefile | |||
@@ -2,7 +2,10 @@ | |||
2 | # Makefile for Red Hat Linux AFS client. | 2 | # Makefile for Red Hat Linux AFS client. |
3 | # | 3 | # |
4 | 4 | ||
5 | afs-cache-$(CONFIG_AFS_FSCACHE) := cache.o | ||
6 | |||
5 | kafs-objs := \ | 7 | kafs-objs := \ |
8 | $(afs-cache-y) \ | ||
6 | callback.o \ | 9 | callback.o \ |
7 | cell.o \ | 10 | cell.o \ |
8 | cmservice.o \ | 11 | cmservice.o \ |
diff --git a/fs/afs/cache.c b/fs/afs/cache.c index de0d7de69edc..e2b1d3f16519 100644 --- a/fs/afs/cache.c +++ b/fs/afs/cache.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* AFS caching stuff | 1 | /* AFS caching stuff |
2 | * | 2 | * |
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
@@ -9,248 +9,395 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #ifdef AFS_CACHING_SUPPORT | 12 | #include <linux/slab.h> |
13 | static cachefs_match_val_t afs_cell_cache_match(void *target, | 13 | #include <linux/sched.h> |
14 | const void *entry); | 14 | #include "internal.h" |
15 | static void afs_cell_cache_update(void *source, void *entry); | 15 | |
16 | 16 | static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, | |
17 | struct cachefs_index_def afs_cache_cell_index_def = { | 17 | void *buffer, uint16_t buflen); |
18 | .name = "cell_ix", | 18 | static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, |
19 | .data_size = sizeof(struct afs_cache_cell), | 19 | void *buffer, uint16_t buflen); |
20 | .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, | 20 | static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, |
21 | .match = afs_cell_cache_match, | 21 | const void *buffer, |
22 | .update = afs_cell_cache_update, | 22 | uint16_t buflen); |
23 | |||
24 | static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, | ||
25 | void *buffer, uint16_t buflen); | ||
26 | static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, | ||
27 | void *buffer, uint16_t buflen); | ||
28 | static enum fscache_checkaux afs_vlocation_cache_check_aux( | ||
29 | void *cookie_netfs_data, const void *buffer, uint16_t buflen); | ||
30 | |||
31 | static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, | ||
32 | void *buffer, uint16_t buflen); | ||
33 | |||
34 | static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, | ||
35 | void *buffer, uint16_t buflen); | ||
36 | static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, | ||
37 | uint64_t *size); | ||
38 | static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, | ||
39 | void *buffer, uint16_t buflen); | ||
40 | static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, | ||
41 | const void *buffer, | ||
42 | uint16_t buflen); | ||
43 | static void afs_vnode_cache_now_uncached(void *cookie_netfs_data); | ||
44 | |||
45 | struct fscache_netfs afs_cache_netfs = { | ||
46 | .name = "afs", | ||
47 | .version = 0, | ||
48 | }; | ||
49 | |||
50 | struct fscache_cookie_def afs_cell_cache_index_def = { | ||
51 | .name = "AFS.cell", | ||
52 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
53 | .get_key = afs_cell_cache_get_key, | ||
54 | .get_aux = afs_cell_cache_get_aux, | ||
55 | .check_aux = afs_cell_cache_check_aux, | ||
56 | }; | ||
57 | |||
58 | struct fscache_cookie_def afs_vlocation_cache_index_def = { | ||
59 | .name = "AFS.vldb", | ||
60 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
61 | .get_key = afs_vlocation_cache_get_key, | ||
62 | .get_aux = afs_vlocation_cache_get_aux, | ||
63 | .check_aux = afs_vlocation_cache_check_aux, | ||
64 | }; | ||
65 | |||
66 | struct fscache_cookie_def afs_volume_cache_index_def = { | ||
67 | .name = "AFS.volume", | ||
68 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
69 | .get_key = afs_volume_cache_get_key, | ||
70 | }; | ||
71 | |||
72 | struct fscache_cookie_def afs_vnode_cache_index_def = { | ||
73 | .name = "AFS.vnode", | ||
74 | .type = FSCACHE_COOKIE_TYPE_DATAFILE, | ||
75 | .get_key = afs_vnode_cache_get_key, | ||
76 | .get_attr = afs_vnode_cache_get_attr, | ||
77 | .get_aux = afs_vnode_cache_get_aux, | ||
78 | .check_aux = afs_vnode_cache_check_aux, | ||
79 | .now_uncached = afs_vnode_cache_now_uncached, | ||
23 | }; | 80 | }; |
24 | #endif | ||
25 | 81 | ||
26 | /* | 82 | /* |
27 | * match a cell record obtained from the cache | 83 | * set the key for the index entry |
28 | */ | 84 | */ |
29 | #ifdef AFS_CACHING_SUPPORT | 85 | static uint16_t afs_cell_cache_get_key(const void *cookie_netfs_data, |
30 | static cachefs_match_val_t afs_cell_cache_match(void *target, | 86 | void *buffer, uint16_t bufmax) |
31 | const void *entry) | ||
32 | { | 87 | { |
33 | const struct afs_cache_cell *ccell = entry; | 88 | const struct afs_cell *cell = cookie_netfs_data; |
34 | struct afs_cell *cell = target; | 89 | uint16_t klen; |
35 | 90 | ||
36 | _enter("{%s},{%s}", ccell->name, cell->name); | 91 | _enter("%p,%p,%u", cell, buffer, bufmax); |
37 | 92 | ||
38 | if (strncmp(ccell->name, cell->name, sizeof(ccell->name)) == 0) { | 93 | klen = strlen(cell->name); |
39 | _leave(" = SUCCESS"); | 94 | if (klen > bufmax) |
40 | return CACHEFS_MATCH_SUCCESS; | 95 | return 0; |
41 | } | ||
42 | 96 | ||
43 | _leave(" = FAILED"); | 97 | memcpy(buffer, cell->name, klen); |
44 | return CACHEFS_MATCH_FAILED; | 98 | return klen; |
45 | } | 99 | } |
46 | #endif | ||
47 | 100 | ||
48 | /* | 101 | /* |
49 | * update a cell record in the cache | 102 | * provide new auxilliary cache data |
50 | */ | 103 | */ |
51 | #ifdef AFS_CACHING_SUPPORT | 104 | static uint16_t afs_cell_cache_get_aux(const void *cookie_netfs_data, |
52 | static void afs_cell_cache_update(void *source, void *entry) | 105 | void *buffer, uint16_t bufmax) |
53 | { | 106 | { |
54 | struct afs_cache_cell *ccell = entry; | 107 | const struct afs_cell *cell = cookie_netfs_data; |
55 | struct afs_cell *cell = source; | 108 | uint16_t dlen; |
56 | 109 | ||
57 | _enter("%p,%p", source, entry); | 110 | _enter("%p,%p,%u", cell, buffer, bufmax); |
58 | 111 | ||
59 | strncpy(ccell->name, cell->name, sizeof(ccell->name)); | 112 | dlen = cell->vl_naddrs * sizeof(cell->vl_addrs[0]); |
113 | dlen = min(dlen, bufmax); | ||
114 | dlen &= ~(sizeof(cell->vl_addrs[0]) - 1); | ||
60 | 115 | ||
61 | memcpy(ccell->vl_servers, | 116 | memcpy(buffer, cell->vl_addrs, dlen); |
62 | cell->vl_addrs, | 117 | return dlen; |
63 | min(sizeof(ccell->vl_servers), sizeof(cell->vl_addrs))); | 118 | } |
64 | 119 | ||
120 | /* | ||
121 | * check that the auxilliary data indicates that the entry is still valid | ||
122 | */ | ||
123 | static enum fscache_checkaux afs_cell_cache_check_aux(void *cookie_netfs_data, | ||
124 | const void *buffer, | ||
125 | uint16_t buflen) | ||
126 | { | ||
127 | _leave(" = OKAY"); | ||
128 | return FSCACHE_CHECKAUX_OKAY; | ||
65 | } | 129 | } |
66 | #endif | ||
67 | |||
68 | #ifdef AFS_CACHING_SUPPORT | ||
69 | static cachefs_match_val_t afs_vlocation_cache_match(void *target, | ||
70 | const void *entry); | ||
71 | static void afs_vlocation_cache_update(void *source, void *entry); | ||
72 | |||
73 | struct cachefs_index_def afs_vlocation_cache_index_def = { | ||
74 | .name = "vldb", | ||
75 | .data_size = sizeof(struct afs_cache_vlocation), | ||
76 | .keys[0] = { CACHEFS_INDEX_KEYS_ASCIIZ, 64 }, | ||
77 | .match = afs_vlocation_cache_match, | ||
78 | .update = afs_vlocation_cache_update, | ||
79 | }; | ||
80 | #endif | ||
81 | 130 | ||
131 | /*****************************************************************************/ | ||
82 | /* | 132 | /* |
83 | * match a VLDB record stored in the cache | 133 | * set the key for the index entry |
84 | * - may also load target from entry | ||
85 | */ | 134 | */ |
86 | #ifdef AFS_CACHING_SUPPORT | 135 | static uint16_t afs_vlocation_cache_get_key(const void *cookie_netfs_data, |
87 | static cachefs_match_val_t afs_vlocation_cache_match(void *target, | 136 | void *buffer, uint16_t bufmax) |
88 | const void *entry) | ||
89 | { | 137 | { |
90 | const struct afs_cache_vlocation *vldb = entry; | 138 | const struct afs_vlocation *vlocation = cookie_netfs_data; |
91 | struct afs_vlocation *vlocation = target; | 139 | uint16_t klen; |
140 | |||
141 | _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); | ||
142 | |||
143 | klen = strnlen(vlocation->vldb.name, sizeof(vlocation->vldb.name)); | ||
144 | if (klen > bufmax) | ||
145 | return 0; | ||
92 | 146 | ||
93 | _enter("{%s},{%s}", vlocation->vldb.name, vldb->name); | 147 | memcpy(buffer, vlocation->vldb.name, klen); |
94 | 148 | ||
95 | if (strncmp(vlocation->vldb.name, vldb->name, sizeof(vldb->name)) == 0 | 149 | _leave(" = %u", klen); |
96 | ) { | 150 | return klen; |
97 | if (!vlocation->valid || | 151 | } |
98 | vlocation->vldb.rtime == vldb->rtime | 152 | |
153 | /* | ||
154 | * provide new auxilliary cache data | ||
155 | */ | ||
156 | static uint16_t afs_vlocation_cache_get_aux(const void *cookie_netfs_data, | ||
157 | void *buffer, uint16_t bufmax) | ||
158 | { | ||
159 | const struct afs_vlocation *vlocation = cookie_netfs_data; | ||
160 | uint16_t dlen; | ||
161 | |||
162 | _enter("{%s},%p,%u", vlocation->vldb.name, buffer, bufmax); | ||
163 | |||
164 | dlen = sizeof(struct afs_cache_vlocation); | ||
165 | dlen -= offsetof(struct afs_cache_vlocation, nservers); | ||
166 | if (dlen > bufmax) | ||
167 | return 0; | ||
168 | |||
169 | memcpy(buffer, (uint8_t *)&vlocation->vldb.nservers, dlen); | ||
170 | |||
171 | _leave(" = %u", dlen); | ||
172 | return dlen; | ||
173 | } | ||
174 | |||
175 | /* | ||
176 | * check that the auxilliary data indicates that the entry is still valid | ||
177 | */ | ||
178 | static | ||
179 | enum fscache_checkaux afs_vlocation_cache_check_aux(void *cookie_netfs_data, | ||
180 | const void *buffer, | ||
181 | uint16_t buflen) | ||
182 | { | ||
183 | const struct afs_cache_vlocation *cvldb; | ||
184 | struct afs_vlocation *vlocation = cookie_netfs_data; | ||
185 | uint16_t dlen; | ||
186 | |||
187 | _enter("{%s},%p,%u", vlocation->vldb.name, buffer, buflen); | ||
188 | |||
189 | /* check the size of the data is what we're expecting */ | ||
190 | dlen = sizeof(struct afs_cache_vlocation); | ||
191 | dlen -= offsetof(struct afs_cache_vlocation, nservers); | ||
192 | if (dlen != buflen) | ||
193 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
194 | |||
195 | cvldb = container_of(buffer, struct afs_cache_vlocation, nservers); | ||
196 | |||
197 | /* if what's on disk is more valid than what's in memory, then use the | ||
198 | * VL record from the cache */ | ||
199 | if (!vlocation->valid || vlocation->vldb.rtime == cvldb->rtime) { | ||
200 | memcpy((uint8_t *)&vlocation->vldb.nservers, buffer, dlen); | ||
201 | vlocation->valid = 1; | ||
202 | _leave(" = SUCCESS [c->m]"); | ||
203 | return FSCACHE_CHECKAUX_OKAY; | ||
204 | } | ||
205 | |||
206 | /* need to update the cache if the cached info differs */ | ||
207 | if (memcmp(&vlocation->vldb, buffer, dlen) != 0) { | ||
208 | /* delete if the volume IDs for this name differ */ | ||
209 | if (memcmp(&vlocation->vldb.vid, &cvldb->vid, | ||
210 | sizeof(cvldb->vid)) != 0 | ||
99 | ) { | 211 | ) { |
100 | vlocation->vldb = *vldb; | 212 | _leave(" = OBSOLETE"); |
101 | vlocation->valid = 1; | 213 | return FSCACHE_CHECKAUX_OBSOLETE; |
102 | _leave(" = SUCCESS [c->m]"); | ||
103 | return CACHEFS_MATCH_SUCCESS; | ||
104 | } else if (memcmp(&vlocation->vldb, vldb, sizeof(*vldb)) != 0) { | ||
105 | /* delete if VIDs for this name differ */ | ||
106 | if (memcmp(&vlocation->vldb.vid, | ||
107 | &vldb->vid, | ||
108 | sizeof(vldb->vid)) != 0) { | ||
109 | _leave(" = DELETE"); | ||
110 | return CACHEFS_MATCH_SUCCESS_DELETE; | ||
111 | } | ||
112 | |||
113 | _leave(" = UPDATE"); | ||
114 | return CACHEFS_MATCH_SUCCESS_UPDATE; | ||
115 | } else { | ||
116 | _leave(" = SUCCESS"); | ||
117 | return CACHEFS_MATCH_SUCCESS; | ||
118 | } | 214 | } |
215 | |||
216 | _leave(" = UPDATE"); | ||
217 | return FSCACHE_CHECKAUX_NEEDS_UPDATE; | ||
119 | } | 218 | } |
120 | 219 | ||
121 | _leave(" = FAILED"); | 220 | _leave(" = OKAY"); |
122 | return CACHEFS_MATCH_FAILED; | 221 | return FSCACHE_CHECKAUX_OKAY; |
123 | } | 222 | } |
124 | #endif | ||
125 | 223 | ||
224 | /*****************************************************************************/ | ||
126 | /* | 225 | /* |
127 | * update a VLDB record stored in the cache | 226 | * set the key for the volume index entry |
128 | */ | 227 | */ |
129 | #ifdef AFS_CACHING_SUPPORT | 228 | static uint16_t afs_volume_cache_get_key(const void *cookie_netfs_data, |
130 | static void afs_vlocation_cache_update(void *source, void *entry) | 229 | void *buffer, uint16_t bufmax) |
131 | { | 230 | { |
132 | struct afs_cache_vlocation *vldb = entry; | 231 | const struct afs_volume *volume = cookie_netfs_data; |
133 | struct afs_vlocation *vlocation = source; | 232 | uint16_t klen; |
233 | |||
234 | _enter("{%u},%p,%u", volume->type, buffer, bufmax); | ||
235 | |||
236 | klen = sizeof(volume->type); | ||
237 | if (klen > bufmax) | ||
238 | return 0; | ||
134 | 239 | ||
135 | _enter(""); | 240 | memcpy(buffer, &volume->type, sizeof(volume->type)); |
241 | |||
242 | _leave(" = %u", klen); | ||
243 | return klen; | ||
136 | 244 | ||
137 | *vldb = vlocation->vldb; | ||
138 | } | 245 | } |
139 | #endif | ||
140 | |||
141 | #ifdef AFS_CACHING_SUPPORT | ||
142 | static cachefs_match_val_t afs_volume_cache_match(void *target, | ||
143 | const void *entry); | ||
144 | static void afs_volume_cache_update(void *source, void *entry); | ||
145 | |||
146 | struct cachefs_index_def afs_volume_cache_index_def = { | ||
147 | .name = "volume", | ||
148 | .data_size = sizeof(struct afs_cache_vhash), | ||
149 | .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 1 }, | ||
150 | .keys[1] = { CACHEFS_INDEX_KEYS_BIN, 1 }, | ||
151 | .match = afs_volume_cache_match, | ||
152 | .update = afs_volume_cache_update, | ||
153 | }; | ||
154 | #endif | ||
155 | 246 | ||
247 | /*****************************************************************************/ | ||
156 | /* | 248 | /* |
157 | * match a volume hash record stored in the cache | 249 | * set the key for the index entry |
158 | */ | 250 | */ |
159 | #ifdef AFS_CACHING_SUPPORT | 251 | static uint16_t afs_vnode_cache_get_key(const void *cookie_netfs_data, |
160 | static cachefs_match_val_t afs_volume_cache_match(void *target, | 252 | void *buffer, uint16_t bufmax) |
161 | const void *entry) | ||
162 | { | 253 | { |
163 | const struct afs_cache_vhash *vhash = entry; | 254 | const struct afs_vnode *vnode = cookie_netfs_data; |
164 | struct afs_volume *volume = target; | 255 | uint16_t klen; |
165 | 256 | ||
166 | _enter("{%u},{%u}", volume->type, vhash->vtype); | 257 | _enter("{%x,%x,%llx},%p,%u", |
258 | vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, | ||
259 | buffer, bufmax); | ||
167 | 260 | ||
168 | if (volume->type == vhash->vtype) { | 261 | klen = sizeof(vnode->fid.vnode); |
169 | _leave(" = SUCCESS"); | 262 | if (klen > bufmax) |
170 | return CACHEFS_MATCH_SUCCESS; | 263 | return 0; |
171 | } | 264 | |
265 | memcpy(buffer, &vnode->fid.vnode, sizeof(vnode->fid.vnode)); | ||
172 | 266 | ||
173 | _leave(" = FAILED"); | 267 | _leave(" = %u", klen); |
174 | return CACHEFS_MATCH_FAILED; | 268 | return klen; |
175 | } | 269 | } |
176 | #endif | ||
177 | 270 | ||
178 | /* | 271 | /* |
179 | * update a volume hash record stored in the cache | 272 | * provide updated file attributes |
180 | */ | 273 | */ |
181 | #ifdef AFS_CACHING_SUPPORT | 274 | static void afs_vnode_cache_get_attr(const void *cookie_netfs_data, |
182 | static void afs_volume_cache_update(void *source, void *entry) | 275 | uint64_t *size) |
183 | { | 276 | { |
184 | struct afs_cache_vhash *vhash = entry; | 277 | const struct afs_vnode *vnode = cookie_netfs_data; |
185 | struct afs_volume *volume = source; | ||
186 | 278 | ||
187 | _enter(""); | 279 | _enter("{%x,%x,%llx},", |
280 | vnode->fid.vnode, vnode->fid.unique, | ||
281 | vnode->status.data_version); | ||
188 | 282 | ||
189 | vhash->vtype = volume->type; | 283 | *size = vnode->status.size; |
190 | } | 284 | } |
191 | #endif | ||
192 | |||
193 | #ifdef AFS_CACHING_SUPPORT | ||
194 | static cachefs_match_val_t afs_vnode_cache_match(void *target, | ||
195 | const void *entry); | ||
196 | static void afs_vnode_cache_update(void *source, void *entry); | ||
197 | |||
198 | struct cachefs_index_def afs_vnode_cache_index_def = { | ||
199 | .name = "vnode", | ||
200 | .data_size = sizeof(struct afs_cache_vnode), | ||
201 | .keys[0] = { CACHEFS_INDEX_KEYS_BIN, 4 }, | ||
202 | .match = afs_vnode_cache_match, | ||
203 | .update = afs_vnode_cache_update, | ||
204 | }; | ||
205 | #endif | ||
206 | 285 | ||
207 | /* | 286 | /* |
208 | * match a vnode record stored in the cache | 287 | * provide new auxilliary cache data |
288 | */ | ||
289 | static uint16_t afs_vnode_cache_get_aux(const void *cookie_netfs_data, | ||
290 | void *buffer, uint16_t bufmax) | ||
291 | { | ||
292 | const struct afs_vnode *vnode = cookie_netfs_data; | ||
293 | uint16_t dlen; | ||
294 | |||
295 | _enter("{%x,%x,%Lx},%p,%u", | ||
296 | vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, | ||
297 | buffer, bufmax); | ||
298 | |||
299 | dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); | ||
300 | if (dlen > bufmax) | ||
301 | return 0; | ||
302 | |||
303 | memcpy(buffer, &vnode->fid.unique, sizeof(vnode->fid.unique)); | ||
304 | buffer += sizeof(vnode->fid.unique); | ||
305 | memcpy(buffer, &vnode->status.data_version, | ||
306 | sizeof(vnode->status.data_version)); | ||
307 | |||
308 | _leave(" = %u", dlen); | ||
309 | return dlen; | ||
310 | } | ||
311 | |||
312 | /* | ||
313 | * check that the auxilliary data indicates that the entry is still valid | ||
209 | */ | 314 | */ |
210 | #ifdef AFS_CACHING_SUPPORT | 315 | static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data, |
211 | static cachefs_match_val_t afs_vnode_cache_match(void *target, | 316 | const void *buffer, |
212 | const void *entry) | 317 | uint16_t buflen) |
213 | { | 318 | { |
214 | const struct afs_cache_vnode *cvnode = entry; | 319 | struct afs_vnode *vnode = cookie_netfs_data; |
215 | struct afs_vnode *vnode = target; | 320 | uint16_t dlen; |
216 | 321 | ||
217 | _enter("{%x,%x,%Lx},{%x,%x,%Lx}", | 322 | _enter("{%x,%x,%llx},%p,%u", |
218 | vnode->fid.vnode, | 323 | vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version, |
219 | vnode->fid.unique, | 324 | buffer, buflen); |
220 | vnode->status.version, | 325 | |
221 | cvnode->vnode_id, | 326 | /* check the size of the data is what we're expecting */ |
222 | cvnode->vnode_unique, | 327 | dlen = sizeof(vnode->fid.unique) + sizeof(vnode->status.data_version); |
223 | cvnode->data_version); | 328 | if (dlen != buflen) { |
224 | 329 | _leave(" = OBSOLETE [len %hx != %hx]", dlen, buflen); | |
225 | if (vnode->fid.vnode != cvnode->vnode_id) { | 330 | return FSCACHE_CHECKAUX_OBSOLETE; |
226 | _leave(" = FAILED"); | ||
227 | return CACHEFS_MATCH_FAILED; | ||
228 | } | 331 | } |
229 | 332 | ||
230 | if (vnode->fid.unique != cvnode->vnode_unique || | 333 | if (memcmp(buffer, |
231 | vnode->status.version != cvnode->data_version) { | 334 | &vnode->fid.unique, |
232 | _leave(" = DELETE"); | 335 | sizeof(vnode->fid.unique) |
233 | return CACHEFS_MATCH_SUCCESS_DELETE; | 336 | ) != 0) { |
337 | unsigned unique; | ||
338 | |||
339 | memcpy(&unique, buffer, sizeof(unique)); | ||
340 | |||
341 | _leave(" = OBSOLETE [uniq %x != %x]", | ||
342 | unique, vnode->fid.unique); | ||
343 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
344 | } | ||
345 | |||
346 | if (memcmp(buffer + sizeof(vnode->fid.unique), | ||
347 | &vnode->status.data_version, | ||
348 | sizeof(vnode->status.data_version) | ||
349 | ) != 0) { | ||
350 | afs_dataversion_t version; | ||
351 | |||
352 | memcpy(&version, buffer + sizeof(vnode->fid.unique), | ||
353 | sizeof(version)); | ||
354 | |||
355 | _leave(" = OBSOLETE [vers %llx != %llx]", | ||
356 | version, vnode->status.data_version); | ||
357 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
234 | } | 358 | } |
235 | 359 | ||
236 | _leave(" = SUCCESS"); | 360 | _leave(" = SUCCESS"); |
237 | return CACHEFS_MATCH_SUCCESS; | 361 | return FSCACHE_CHECKAUX_OKAY; |
238 | } | 362 | } |
239 | #endif | ||
240 | 363 | ||
241 | /* | 364 | /* |
242 | * update a vnode record stored in the cache | 365 | * indication the cookie is no longer uncached |
366 | * - this function is called when the backing store currently caching a cookie | ||
367 | * is removed | ||
368 | * - the netfs should use this to clean up any markers indicating cached pages | ||
369 | * - this is mandatory for any object that may have data | ||
243 | */ | 370 | */ |
244 | #ifdef AFS_CACHING_SUPPORT | 371 | static void afs_vnode_cache_now_uncached(void *cookie_netfs_data) |
245 | static void afs_vnode_cache_update(void *source, void *entry) | ||
246 | { | 372 | { |
247 | struct afs_cache_vnode *cvnode = entry; | 373 | struct afs_vnode *vnode = cookie_netfs_data; |
248 | struct afs_vnode *vnode = source; | 374 | struct pagevec pvec; |
375 | pgoff_t first; | ||
376 | int loop, nr_pages; | ||
377 | |||
378 | _enter("{%x,%x,%Lx}", | ||
379 | vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version); | ||
380 | |||
381 | pagevec_init(&pvec, 0); | ||
382 | first = 0; | ||
383 | |||
384 | for (;;) { | ||
385 | /* grab a bunch of pages to clean */ | ||
386 | nr_pages = pagevec_lookup(&pvec, vnode->vfs_inode.i_mapping, | ||
387 | first, | ||
388 | PAGEVEC_SIZE - pagevec_count(&pvec)); | ||
389 | if (!nr_pages) | ||
390 | break; | ||
249 | 391 | ||
250 | _enter(""); | 392 | for (loop = 0; loop < nr_pages; loop++) |
393 | ClearPageFsCache(pvec.pages[loop]); | ||
394 | |||
395 | first = pvec.pages[nr_pages - 1]->index + 1; | ||
396 | |||
397 | pvec.nr = nr_pages; | ||
398 | pagevec_release(&pvec); | ||
399 | cond_resched(); | ||
400 | } | ||
251 | 401 | ||
252 | cvnode->vnode_id = vnode->fid.vnode; | 402 | _leave(""); |
253 | cvnode->vnode_unique = vnode->fid.unique; | ||
254 | cvnode->data_version = vnode->status.version; | ||
255 | } | 403 | } |
256 | #endif | ||
diff --git a/fs/afs/cache.h b/fs/afs/cache.h index 36a3642cf90e..5c4f6b499e90 100644 --- a/fs/afs/cache.h +++ b/fs/afs/cache.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* AFS local cache management interface | 1 | /* AFS local cache management interface |
2 | * | 2 | * |
3 | * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
@@ -9,15 +9,4 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #ifndef AFS_CACHE_H | 12 | #include <linux/fscache.h> |
13 | #define AFS_CACHE_H | ||
14 | |||
15 | #undef AFS_CACHING_SUPPORT | ||
16 | |||
17 | #include <linux/mm.h> | ||
18 | #ifdef AFS_CACHING_SUPPORT | ||
19 | #include <linux/cachefs.h> | ||
20 | #endif | ||
21 | #include "types.h" | ||
22 | |||
23 | #endif /* AFS_CACHE_H */ | ||
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 5e1df14e16b1..e19c13f059ed 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c | |||
@@ -147,12 +147,11 @@ struct afs_cell *afs_cell_create(const char *name, char *vllist) | |||
147 | if (ret < 0) | 147 | if (ret < 0) |
148 | goto error; | 148 | goto error; |
149 | 149 | ||
150 | #ifdef AFS_CACHING_SUPPORT | 150 | #ifdef CONFIG_AFS_FSCACHE |
151 | /* put it up for caching */ | 151 | /* put it up for caching (this never returns an error) */ |
152 | cachefs_acquire_cookie(afs_cache_netfs.primary_index, | 152 | cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, |
153 | &afs_vlocation_cache_index_def, | 153 | &afs_cell_cache_index_def, |
154 | cell, | 154 | cell); |
155 | &cell->cache); | ||
156 | #endif | 155 | #endif |
157 | 156 | ||
158 | /* add to the cell lists */ | 157 | /* add to the cell lists */ |
@@ -362,10 +361,9 @@ static void afs_cell_destroy(struct afs_cell *cell) | |||
362 | list_del_init(&cell->proc_link); | 361 | list_del_init(&cell->proc_link); |
363 | up_write(&afs_proc_cells_sem); | 362 | up_write(&afs_proc_cells_sem); |
364 | 363 | ||
365 | #ifdef AFS_CACHING_SUPPORT | 364 | #ifdef CONFIG_AFS_FSCACHE |
366 | cachefs_relinquish_cookie(cell->cache, 0); | 365 | fscache_relinquish_cookie(cell->cache, 0); |
367 | #endif | 366 | #endif |
368 | |||
369 | key_put(cell->anonymous_key); | 367 | key_put(cell->anonymous_key); |
370 | kfree(cell); | 368 | kfree(cell); |
371 | 369 | ||
diff --git a/fs/afs/file.c b/fs/afs/file.c index a3901769a96c..7a1d942ef68d 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -23,6 +23,9 @@ static void afs_invalidatepage(struct page *page, unsigned long offset); | |||
23 | static int afs_releasepage(struct page *page, gfp_t gfp_flags); | 23 | static int afs_releasepage(struct page *page, gfp_t gfp_flags); |
24 | static int afs_launder_page(struct page *page); | 24 | static int afs_launder_page(struct page *page); |
25 | 25 | ||
26 | static int afs_readpages(struct file *filp, struct address_space *mapping, | ||
27 | struct list_head *pages, unsigned nr_pages); | ||
28 | |||
26 | const struct file_operations afs_file_operations = { | 29 | const struct file_operations afs_file_operations = { |
27 | .open = afs_open, | 30 | .open = afs_open, |
28 | .release = afs_release, | 31 | .release = afs_release, |
@@ -46,6 +49,7 @@ const struct inode_operations afs_file_inode_operations = { | |||
46 | 49 | ||
47 | const struct address_space_operations afs_fs_aops = { | 50 | const struct address_space_operations afs_fs_aops = { |
48 | .readpage = afs_readpage, | 51 | .readpage = afs_readpage, |
52 | .readpages = afs_readpages, | ||
49 | .set_page_dirty = afs_set_page_dirty, | 53 | .set_page_dirty = afs_set_page_dirty, |
50 | .launder_page = afs_launder_page, | 54 | .launder_page = afs_launder_page, |
51 | .releasepage = afs_releasepage, | 55 | .releasepage = afs_releasepage, |
@@ -101,37 +105,18 @@ int afs_release(struct inode *inode, struct file *file) | |||
101 | /* | 105 | /* |
102 | * deal with notification that a page was read from the cache | 106 | * deal with notification that a page was read from the cache |
103 | */ | 107 | */ |
104 | #ifdef AFS_CACHING_SUPPORT | 108 | static void afs_file_readpage_read_complete(struct page *page, |
105 | static void afs_readpage_read_complete(void *cookie_data, | 109 | void *data, |
106 | struct page *page, | 110 | int error) |
107 | void *data, | ||
108 | int error) | ||
109 | { | 111 | { |
110 | _enter("%p,%p,%p,%d", cookie_data, page, data, error); | 112 | _enter("%p,%p,%d", page, data, error); |
111 | 113 | ||
112 | if (error) | 114 | /* if the read completes with an error, we just unlock the page and let |
113 | SetPageError(page); | 115 | * the VM reissue the readpage */ |
114 | else | 116 | if (!error) |
115 | SetPageUptodate(page); | 117 | SetPageUptodate(page); |
116 | unlock_page(page); | 118 | unlock_page(page); |
117 | |||
118 | } | 119 | } |
119 | #endif | ||
120 | |||
121 | /* | ||
122 | * deal with notification that a page was written to the cache | ||
123 | */ | ||
124 | #ifdef AFS_CACHING_SUPPORT | ||
125 | static void afs_readpage_write_complete(void *cookie_data, | ||
126 | struct page *page, | ||
127 | void *data, | ||
128 | int error) | ||
129 | { | ||
130 | _enter("%p,%p,%p,%d", cookie_data, page, data, error); | ||
131 | |||
132 | unlock_page(page); | ||
133 | } | ||
134 | #endif | ||
135 | 120 | ||
136 | /* | 121 | /* |
137 | * AFS read page from file, directory or symlink | 122 | * AFS read page from file, directory or symlink |
@@ -161,9 +146,9 @@ static int afs_readpage(struct file *file, struct page *page) | |||
161 | if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) | 146 | if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) |
162 | goto error; | 147 | goto error; |
163 | 148 | ||
164 | #ifdef AFS_CACHING_SUPPORT | ||
165 | /* is it cached? */ | 149 | /* is it cached? */ |
166 | ret = cachefs_read_or_alloc_page(vnode->cache, | 150 | #ifdef CONFIG_AFS_FSCACHE |
151 | ret = fscache_read_or_alloc_page(vnode->cache, | ||
167 | page, | 152 | page, |
168 | afs_file_readpage_read_complete, | 153 | afs_file_readpage_read_complete, |
169 | NULL, | 154 | NULL, |
@@ -171,20 +156,21 @@ static int afs_readpage(struct file *file, struct page *page) | |||
171 | #else | 156 | #else |
172 | ret = -ENOBUFS; | 157 | ret = -ENOBUFS; |
173 | #endif | 158 | #endif |
174 | |||
175 | switch (ret) { | 159 | switch (ret) { |
176 | /* read BIO submitted and wb-journal entry found */ | ||
177 | case 1: | ||
178 | BUG(); // TODO - handle wb-journal match | ||
179 | |||
180 | /* read BIO submitted (page in cache) */ | 160 | /* read BIO submitted (page in cache) */ |
181 | case 0: | 161 | case 0: |
182 | break; | 162 | break; |
183 | 163 | ||
184 | /* no page available in cache */ | 164 | /* page not yet cached */ |
185 | case -ENOBUFS: | ||
186 | case -ENODATA: | 165 | case -ENODATA: |
166 | _debug("cache said ENODATA"); | ||
167 | goto go_on; | ||
168 | |||
169 | /* page will not be cached */ | ||
170 | case -ENOBUFS: | ||
171 | _debug("cache said ENOBUFS"); | ||
187 | default: | 172 | default: |
173 | go_on: | ||
188 | offset = page->index << PAGE_CACHE_SHIFT; | 174 | offset = page->index << PAGE_CACHE_SHIFT; |
189 | len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE); | 175 | len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE); |
190 | 176 | ||
@@ -198,27 +184,25 @@ static int afs_readpage(struct file *file, struct page *page) | |||
198 | set_bit(AFS_VNODE_DELETED, &vnode->flags); | 184 | set_bit(AFS_VNODE_DELETED, &vnode->flags); |
199 | ret = -ESTALE; | 185 | ret = -ESTALE; |
200 | } | 186 | } |
201 | #ifdef AFS_CACHING_SUPPORT | 187 | |
202 | cachefs_uncache_page(vnode->cache, page); | 188 | #ifdef CONFIG_AFS_FSCACHE |
189 | fscache_uncache_page(vnode->cache, page); | ||
203 | #endif | 190 | #endif |
191 | BUG_ON(PageFsCache(page)); | ||
204 | goto error; | 192 | goto error; |
205 | } | 193 | } |
206 | 194 | ||
207 | SetPageUptodate(page); | 195 | SetPageUptodate(page); |
208 | 196 | ||
209 | #ifdef AFS_CACHING_SUPPORT | 197 | /* send the page to the cache */ |
210 | if (cachefs_write_page(vnode->cache, | 198 | #ifdef CONFIG_AFS_FSCACHE |
211 | page, | 199 | if (PageFsCache(page) && |
212 | afs_file_readpage_write_complete, | 200 | fscache_write_page(vnode->cache, page, GFP_KERNEL) != 0) { |
213 | NULL, | 201 | fscache_uncache_page(vnode->cache, page); |
214 | GFP_KERNEL) != 0 | 202 | BUG_ON(PageFsCache(page)); |
215 | ) { | ||
216 | cachefs_uncache_page(vnode->cache, page); | ||
217 | unlock_page(page); | ||
218 | } | 203 | } |
219 | #else | ||
220 | unlock_page(page); | ||
221 | #endif | 204 | #endif |
205 | unlock_page(page); | ||
222 | } | 206 | } |
223 | 207 | ||
224 | _leave(" = 0"); | 208 | _leave(" = 0"); |
@@ -232,34 +216,59 @@ error: | |||
232 | } | 216 | } |
233 | 217 | ||
234 | /* | 218 | /* |
235 | * invalidate part or all of a page | 219 | * read a set of pages |
236 | */ | 220 | */ |
237 | static void afs_invalidatepage(struct page *page, unsigned long offset) | 221 | static int afs_readpages(struct file *file, struct address_space *mapping, |
222 | struct list_head *pages, unsigned nr_pages) | ||
238 | { | 223 | { |
239 | int ret = 1; | 224 | struct afs_vnode *vnode; |
225 | int ret = 0; | ||
240 | 226 | ||
241 | _enter("{%lu},%lu", page->index, offset); | 227 | _enter(",{%lu},,%d", mapping->host->i_ino, nr_pages); |
242 | 228 | ||
243 | BUG_ON(!PageLocked(page)); | 229 | vnode = AFS_FS_I(mapping->host); |
230 | if (vnode->flags & AFS_VNODE_DELETED) { | ||
231 | _leave(" = -ESTALE"); | ||
232 | return -ESTALE; | ||
233 | } | ||
244 | 234 | ||
245 | if (PagePrivate(page)) { | 235 | /* attempt to read as many of the pages as possible */ |
246 | /* We release buffers only if the entire page is being | 236 | #ifdef CONFIG_AFS_FSCACHE |
247 | * invalidated. | 237 | ret = fscache_read_or_alloc_pages(vnode->cache, |
248 | * The get_block cached value has been unconditionally | 238 | mapping, |
249 | * invalidated, so real IO is not possible anymore. | 239 | pages, |
250 | */ | 240 | &nr_pages, |
251 | if (offset == 0) { | 241 | afs_file_readpage_read_complete, |
252 | BUG_ON(!PageLocked(page)); | 242 | NULL, |
253 | 243 | mapping_gfp_mask(mapping)); | |
254 | ret = 0; | 244 | #else |
255 | if (!PageWriteback(page)) | 245 | ret = -ENOBUFS; |
256 | ret = page->mapping->a_ops->releasepage(page, | 246 | #endif |
257 | 0); | 247 | |
258 | /* possibly should BUG_ON(!ret); - neilb */ | 248 | switch (ret) { |
259 | } | 249 | /* all pages are being read from the cache */ |
250 | case 0: | ||
251 | BUG_ON(!list_empty(pages)); | ||
252 | BUG_ON(nr_pages != 0); | ||
253 | _leave(" = 0 [reading all]"); | ||
254 | return 0; | ||
255 | |||
256 | /* there were pages that couldn't be read from the cache */ | ||
257 | case -ENODATA: | ||
258 | case -ENOBUFS: | ||
259 | break; | ||
260 | |||
261 | /* other error */ | ||
262 | default: | ||
263 | _leave(" = %d", ret); | ||
264 | return ret; | ||
260 | } | 265 | } |
261 | 266 | ||
262 | _leave(" = %d", ret); | 267 | /* load the missing pages from the network */ |
268 | ret = read_cache_pages(mapping, pages, (void *) afs_readpage, file); | ||
269 | |||
270 | _leave(" = %d [netting]", ret); | ||
271 | return ret; | ||
263 | } | 272 | } |
264 | 273 | ||
265 | /* | 274 | /* |
@@ -273,25 +282,82 @@ static int afs_launder_page(struct page *page) | |||
273 | } | 282 | } |
274 | 283 | ||
275 | /* | 284 | /* |
276 | * release a page and cleanup its private data | 285 | * invalidate part or all of a page |
286 | * - release a page and clean up its private data if offset is 0 (indicating | ||
287 | * the entire page) | ||
288 | */ | ||
289 | static void afs_invalidatepage(struct page *page, unsigned long offset) | ||
290 | { | ||
291 | struct afs_writeback *wb = (struct afs_writeback *) page_private(page); | ||
292 | |||
293 | _enter("{%lu},%lu", page->index, offset); | ||
294 | |||
295 | BUG_ON(!PageLocked(page)); | ||
296 | |||
297 | /* we clean up only if the entire page is being invalidated */ | ||
298 | if (offset == 0) { | ||
299 | #ifdef CONFIG_AFS_FSCACHE | ||
300 | if (PageFsCache(page)) { | ||
301 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); | ||
302 | fscache_wait_on_page_write(vnode->cache, page); | ||
303 | fscache_uncache_page(vnode->cache, page); | ||
304 | ClearPageFsCache(page); | ||
305 | } | ||
306 | #endif | ||
307 | |||
308 | if (PagePrivate(page)) { | ||
309 | if (wb && !PageWriteback(page)) { | ||
310 | set_page_private(page, 0); | ||
311 | afs_put_writeback(wb); | ||
312 | } | ||
313 | |||
314 | if (!page_private(page)) | ||
315 | ClearPagePrivate(page); | ||
316 | } | ||
317 | } | ||
318 | |||
319 | _leave(""); | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * release a page and clean up its private state if it's not busy | ||
324 | * - return true if the page can now be released, false if not | ||
277 | */ | 325 | */ |
278 | static int afs_releasepage(struct page *page, gfp_t gfp_flags) | 326 | static int afs_releasepage(struct page *page, gfp_t gfp_flags) |
279 | { | 327 | { |
328 | struct afs_writeback *wb = (struct afs_writeback *) page_private(page); | ||
280 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); | 329 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); |
281 | struct afs_writeback *wb; | ||
282 | 330 | ||
283 | _enter("{{%x:%u}[%lu],%lx},%x", | 331 | _enter("{{%x:%u}[%lu],%lx},%x", |
284 | vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, | 332 | vnode->fid.vid, vnode->fid.vnode, page->index, page->flags, |
285 | gfp_flags); | 333 | gfp_flags); |
286 | 334 | ||
335 | /* deny if page is being written to the cache and the caller hasn't | ||
336 | * elected to wait */ | ||
337 | #ifdef CONFIG_AFS_FSCACHE | ||
338 | if (PageFsCache(page)) { | ||
339 | if (fscache_check_page_write(vnode->cache, page)) { | ||
340 | if (!(gfp_flags & __GFP_WAIT)) { | ||
341 | _leave(" = F [cache busy]"); | ||
342 | return 0; | ||
343 | } | ||
344 | fscache_wait_on_page_write(vnode->cache, page); | ||
345 | } | ||
346 | |||
347 | fscache_uncache_page(vnode->cache, page); | ||
348 | ClearPageFsCache(page); | ||
349 | } | ||
350 | #endif | ||
351 | |||
287 | if (PagePrivate(page)) { | 352 | if (PagePrivate(page)) { |
288 | wb = (struct afs_writeback *) page_private(page); | 353 | if (wb) { |
289 | ASSERT(wb != NULL); | 354 | set_page_private(page, 0); |
290 | set_page_private(page, 0); | 355 | afs_put_writeback(wb); |
356 | } | ||
291 | ClearPagePrivate(page); | 357 | ClearPagePrivate(page); |
292 | afs_put_writeback(wb); | ||
293 | } | 358 | } |
294 | 359 | ||
295 | _leave(" = 0"); | 360 | /* indicate that the page can be released */ |
296 | return 0; | 361 | _leave(" = T"); |
362 | return 1; | ||
297 | } | 363 | } |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index bb47217f6a18..c048f0658751 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -61,6 +61,11 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) | |||
61 | return -EBADMSG; | 61 | return -EBADMSG; |
62 | } | 62 | } |
63 | 63 | ||
64 | #ifdef CONFIG_AFS_FSCACHE | ||
65 | if (vnode->status.size != inode->i_size) | ||
66 | fscache_attr_changed(vnode->cache); | ||
67 | #endif | ||
68 | |||
64 | inode->i_nlink = vnode->status.nlink; | 69 | inode->i_nlink = vnode->status.nlink; |
65 | inode->i_uid = vnode->status.owner; | 70 | inode->i_uid = vnode->status.owner; |
66 | inode->i_gid = 0; | 71 | inode->i_gid = 0; |
@@ -149,15 +154,6 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, | |||
149 | return inode; | 154 | return inode; |
150 | } | 155 | } |
151 | 156 | ||
152 | #ifdef AFS_CACHING_SUPPORT | ||
153 | /* set up caching before reading the status, as fetch-status reads the | ||
154 | * first page of symlinks to see if they're really mntpts */ | ||
155 | cachefs_acquire_cookie(vnode->volume->cache, | ||
156 | NULL, | ||
157 | vnode, | ||
158 | &vnode->cache); | ||
159 | #endif | ||
160 | |||
161 | if (!status) { | 157 | if (!status) { |
162 | /* it's a remotely extant inode */ | 158 | /* it's a remotely extant inode */ |
163 | set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); | 159 | set_bit(AFS_VNODE_CB_BROKEN, &vnode->flags); |
@@ -183,6 +179,15 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, | |||
183 | } | 179 | } |
184 | } | 180 | } |
185 | 181 | ||
182 | /* set up caching before mapping the status, as map-status reads the | ||
183 | * first page of symlinks to see if they're really mountpoints */ | ||
184 | inode->i_size = vnode->status.size; | ||
185 | #ifdef CONFIG_AFS_FSCACHE | ||
186 | vnode->cache = fscache_acquire_cookie(vnode->volume->cache, | ||
187 | &afs_vnode_cache_index_def, | ||
188 | vnode); | ||
189 | #endif | ||
190 | |||
186 | ret = afs_inode_map_status(vnode, key); | 191 | ret = afs_inode_map_status(vnode, key); |
187 | if (ret < 0) | 192 | if (ret < 0) |
188 | goto bad_inode; | 193 | goto bad_inode; |
@@ -196,6 +201,10 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, | |||
196 | 201 | ||
197 | /* failure */ | 202 | /* failure */ |
198 | bad_inode: | 203 | bad_inode: |
204 | #ifdef CONFIG_AFS_FSCACHE | ||
205 | fscache_relinquish_cookie(vnode->cache, 0); | ||
206 | vnode->cache = NULL; | ||
207 | #endif | ||
199 | iget_failed(inode); | 208 | iget_failed(inode); |
200 | _leave(" = %d [bad]", ret); | 209 | _leave(" = %d [bad]", ret); |
201 | return ERR_PTR(ret); | 210 | return ERR_PTR(ret); |
@@ -340,8 +349,8 @@ void afs_clear_inode(struct inode *inode) | |||
340 | ASSERT(list_empty(&vnode->writebacks)); | 349 | ASSERT(list_empty(&vnode->writebacks)); |
341 | ASSERT(!vnode->cb_promised); | 350 | ASSERT(!vnode->cb_promised); |
342 | 351 | ||
343 | #ifdef AFS_CACHING_SUPPORT | 352 | #ifdef CONFIG_AFS_FSCACHE |
344 | cachefs_relinquish_cookie(vnode->cache, 0); | 353 | fscache_relinquish_cookie(vnode->cache, 0); |
345 | vnode->cache = NULL; | 354 | vnode->cache = NULL; |
346 | #endif | 355 | #endif |
347 | 356 | ||
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 67f259d99cd6..106be66dafd2 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include "afs.h" | 22 | #include "afs.h" |
23 | #include "afs_vl.h" | 23 | #include "afs_vl.h" |
24 | #include "cache.h" | ||
24 | 25 | ||
25 | #define AFS_CELL_MAX_ADDRS 15 | 26 | #define AFS_CELL_MAX_ADDRS 15 |
26 | 27 | ||
@@ -193,8 +194,8 @@ struct afs_cell { | |||
193 | struct key *anonymous_key; /* anonymous user key for this cell */ | 194 | struct key *anonymous_key; /* anonymous user key for this cell */ |
194 | struct list_head proc_link; /* /proc cell list link */ | 195 | struct list_head proc_link; /* /proc cell list link */ |
195 | struct proc_dir_entry *proc_dir; /* /proc dir for this cell */ | 196 | struct proc_dir_entry *proc_dir; /* /proc dir for this cell */ |
196 | #ifdef AFS_CACHING_SUPPORT | 197 | #ifdef CONFIG_AFS_FSCACHE |
197 | struct cachefs_cookie *cache; /* caching cookie */ | 198 | struct fscache_cookie *cache; /* caching cookie */ |
198 | #endif | 199 | #endif |
199 | 200 | ||
200 | /* server record management */ | 201 | /* server record management */ |
@@ -249,8 +250,8 @@ struct afs_vlocation { | |||
249 | struct list_head grave; /* link in master graveyard list */ | 250 | struct list_head grave; /* link in master graveyard list */ |
250 | struct list_head update; /* link in master update list */ | 251 | struct list_head update; /* link in master update list */ |
251 | struct afs_cell *cell; /* cell to which volume belongs */ | 252 | struct afs_cell *cell; /* cell to which volume belongs */ |
252 | #ifdef AFS_CACHING_SUPPORT | 253 | #ifdef CONFIG_AFS_FSCACHE |
253 | struct cachefs_cookie *cache; /* caching cookie */ | 254 | struct fscache_cookie *cache; /* caching cookie */ |
254 | #endif | 255 | #endif |
255 | struct afs_cache_vlocation vldb; /* volume information DB record */ | 256 | struct afs_cache_vlocation vldb; /* volume information DB record */ |
256 | struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ | 257 | struct afs_volume *vols[3]; /* volume access record pointer (index by type) */ |
@@ -302,8 +303,8 @@ struct afs_volume { | |||
302 | atomic_t usage; | 303 | atomic_t usage; |
303 | struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ | 304 | struct afs_cell *cell; /* cell to which belongs (unrefd ptr) */ |
304 | struct afs_vlocation *vlocation; /* volume location */ | 305 | struct afs_vlocation *vlocation; /* volume location */ |
305 | #ifdef AFS_CACHING_SUPPORT | 306 | #ifdef CONFIG_AFS_FSCACHE |
306 | struct cachefs_cookie *cache; /* caching cookie */ | 307 | struct fscache_cookie *cache; /* caching cookie */ |
307 | #endif | 308 | #endif |
308 | afs_volid_t vid; /* volume ID */ | 309 | afs_volid_t vid; /* volume ID */ |
309 | afs_voltype_t type; /* type of volume */ | 310 | afs_voltype_t type; /* type of volume */ |
@@ -333,8 +334,8 @@ struct afs_vnode { | |||
333 | struct afs_server *server; /* server currently supplying this file */ | 334 | struct afs_server *server; /* server currently supplying this file */ |
334 | struct afs_fid fid; /* the file identifier for this inode */ | 335 | struct afs_fid fid; /* the file identifier for this inode */ |
335 | struct afs_file_status status; /* AFS status info for this file */ | 336 | struct afs_file_status status; /* AFS status info for this file */ |
336 | #ifdef AFS_CACHING_SUPPORT | 337 | #ifdef CONFIG_AFS_FSCACHE |
337 | struct cachefs_cookie *cache; /* caching cookie */ | 338 | struct fscache_cookie *cache; /* caching cookie */ |
338 | #endif | 339 | #endif |
339 | struct afs_permits *permits; /* cache of permits so far obtained */ | 340 | struct afs_permits *permits; /* cache of permits so far obtained */ |
340 | struct mutex permits_lock; /* lock for altering permits list */ | 341 | struct mutex permits_lock; /* lock for altering permits list */ |
@@ -428,6 +429,22 @@ struct afs_uuid { | |||
428 | 429 | ||
429 | /*****************************************************************************/ | 430 | /*****************************************************************************/ |
430 | /* | 431 | /* |
432 | * cache.c | ||
433 | */ | ||
434 | #ifdef CONFIG_AFS_FSCACHE | ||
435 | extern struct fscache_netfs afs_cache_netfs; | ||
436 | extern struct fscache_cookie_def afs_cell_cache_index_def; | ||
437 | extern struct fscache_cookie_def afs_vlocation_cache_index_def; | ||
438 | extern struct fscache_cookie_def afs_volume_cache_index_def; | ||
439 | extern struct fscache_cookie_def afs_vnode_cache_index_def; | ||
440 | #else | ||
441 | #define afs_cell_cache_index_def (*(struct fscache_cookie_def *) NULL) | ||
442 | #define afs_vlocation_cache_index_def (*(struct fscache_cookie_def *) NULL) | ||
443 | #define afs_volume_cache_index_def (*(struct fscache_cookie_def *) NULL) | ||
444 | #define afs_vnode_cache_index_def (*(struct fscache_cookie_def *) NULL) | ||
445 | #endif | ||
446 | |||
447 | /* | ||
431 | * callback.c | 448 | * callback.c |
432 | */ | 449 | */ |
433 | extern void afs_init_callback_state(struct afs_server *); | 450 | extern void afs_init_callback_state(struct afs_server *); |
@@ -446,9 +463,6 @@ extern void afs_callback_update_kill(void); | |||
446 | */ | 463 | */ |
447 | extern struct rw_semaphore afs_proc_cells_sem; | 464 | extern struct rw_semaphore afs_proc_cells_sem; |
448 | extern struct list_head afs_proc_cells; | 465 | extern struct list_head afs_proc_cells; |
449 | #ifdef AFS_CACHING_SUPPORT | ||
450 | extern struct cachefs_index_def afs_cache_cell_index_def; | ||
451 | #endif | ||
452 | 466 | ||
453 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) | 467 | #define afs_get_cell(C) do { atomic_inc(&(C)->usage); } while(0) |
454 | extern int afs_cell_init(char *); | 468 | extern int afs_cell_init(char *); |
@@ -554,9 +568,6 @@ extern void afs_clear_inode(struct inode *); | |||
554 | * main.c | 568 | * main.c |
555 | */ | 569 | */ |
556 | extern struct afs_uuid afs_uuid; | 570 | extern struct afs_uuid afs_uuid; |
557 | #ifdef AFS_CACHING_SUPPORT | ||
558 | extern struct cachefs_netfs afs_cache_netfs; | ||
559 | #endif | ||
560 | 571 | ||
561 | /* | 572 | /* |
562 | * misc.c | 573 | * misc.c |
@@ -637,10 +648,6 @@ extern int afs_get_MAC_address(u8 *, size_t); | |||
637 | /* | 648 | /* |
638 | * vlclient.c | 649 | * vlclient.c |
639 | */ | 650 | */ |
640 | #ifdef AFS_CACHING_SUPPORT | ||
641 | extern struct cachefs_index_def afs_vlocation_cache_index_def; | ||
642 | #endif | ||
643 | |||
644 | extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, | 651 | extern int afs_vl_get_entry_by_name(struct in_addr *, struct key *, |
645 | const char *, struct afs_cache_vlocation *, | 652 | const char *, struct afs_cache_vlocation *, |
646 | const struct afs_wait_mode *); | 653 | const struct afs_wait_mode *); |
@@ -664,12 +671,6 @@ extern void afs_vlocation_purge(void); | |||
664 | /* | 671 | /* |
665 | * vnode.c | 672 | * vnode.c |
666 | */ | 673 | */ |
667 | #ifdef AFS_CACHING_SUPPORT | ||
668 | extern struct cachefs_index_def afs_vnode_cache_index_def; | ||
669 | #endif | ||
670 | |||
671 | extern struct afs_timer_ops afs_vnode_cb_timed_out_ops; | ||
672 | |||
673 | static inline struct afs_vnode *AFS_FS_I(struct inode *inode) | 674 | static inline struct afs_vnode *AFS_FS_I(struct inode *inode) |
674 | { | 675 | { |
675 | return container_of(inode, struct afs_vnode, vfs_inode); | 676 | return container_of(inode, struct afs_vnode, vfs_inode); |
@@ -711,10 +712,6 @@ extern int afs_vnode_release_lock(struct afs_vnode *, struct key *); | |||
711 | /* | 712 | /* |
712 | * volume.c | 713 | * volume.c |
713 | */ | 714 | */ |
714 | #ifdef AFS_CACHING_SUPPORT | ||
715 | extern struct cachefs_index_def afs_volume_cache_index_def; | ||
716 | #endif | ||
717 | |||
718 | #define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) | 715 | #define afs_get_volume(V) do { atomic_inc(&(V)->usage); } while(0) |
719 | 716 | ||
720 | extern void afs_put_volume(struct afs_volume *); | 717 | extern void afs_put_volume(struct afs_volume *); |
diff --git a/fs/afs/main.c b/fs/afs/main.c index 2d3e5d4fb9f7..66d54d348c55 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* AFS client file system | 1 | /* AFS client file system |
2 | * | 2 | * |
3 | * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. | 3 | * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) | 4 | * Written by David Howells (dhowells@redhat.com) |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or | 6 | * This program is free software; you can redistribute it and/or |
@@ -29,18 +29,6 @@ static char *rootcell; | |||
29 | module_param(rootcell, charp, 0); | 29 | module_param(rootcell, charp, 0); |
30 | MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); | 30 | MODULE_PARM_DESC(rootcell, "root AFS cell name and VL server IP addr list"); |
31 | 31 | ||
32 | #ifdef AFS_CACHING_SUPPORT | ||
33 | static struct cachefs_netfs_operations afs_cache_ops = { | ||
34 | .get_page_cookie = afs_cache_get_page_cookie, | ||
35 | }; | ||
36 | |||
37 | struct cachefs_netfs afs_cache_netfs = { | ||
38 | .name = "afs", | ||
39 | .version = 0, | ||
40 | .ops = &afs_cache_ops, | ||
41 | }; | ||
42 | #endif | ||
43 | |||
44 | struct afs_uuid afs_uuid; | 32 | struct afs_uuid afs_uuid; |
45 | 33 | ||
46 | /* | 34 | /* |
@@ -104,10 +92,9 @@ static int __init afs_init(void) | |||
104 | if (ret < 0) | 92 | if (ret < 0) |
105 | return ret; | 93 | return ret; |
106 | 94 | ||
107 | #ifdef AFS_CACHING_SUPPORT | 95 | #ifdef CONFIG_AFS_FSCACHE |
108 | /* we want to be able to cache */ | 96 | /* we want to be able to cache */ |
109 | ret = cachefs_register_netfs(&afs_cache_netfs, | 97 | ret = fscache_register_netfs(&afs_cache_netfs); |
110 | &afs_cache_cell_index_def); | ||
111 | if (ret < 0) | 98 | if (ret < 0) |
112 | goto error_cache; | 99 | goto error_cache; |
113 | #endif | 100 | #endif |
@@ -142,8 +129,8 @@ error_fs: | |||
142 | error_open_socket: | 129 | error_open_socket: |
143 | error_vl_update_init: | 130 | error_vl_update_init: |
144 | error_cell_init: | 131 | error_cell_init: |
145 | #ifdef AFS_CACHING_SUPPORT | 132 | #ifdef CONFIG_AFS_FSCACHE |
146 | cachefs_unregister_netfs(&afs_cache_netfs); | 133 | fscache_unregister_netfs(&afs_cache_netfs); |
147 | error_cache: | 134 | error_cache: |
148 | #endif | 135 | #endif |
149 | afs_callback_update_kill(); | 136 | afs_callback_update_kill(); |
@@ -175,8 +162,8 @@ static void __exit afs_exit(void) | |||
175 | afs_vlocation_purge(); | 162 | afs_vlocation_purge(); |
176 | flush_scheduled_work(); | 163 | flush_scheduled_work(); |
177 | afs_cell_purge(); | 164 | afs_cell_purge(); |
178 | #ifdef AFS_CACHING_SUPPORT | 165 | #ifdef CONFIG_AFS_FSCACHE |
179 | cachefs_unregister_netfs(&afs_cache_netfs); | 166 | fscache_unregister_netfs(&afs_cache_netfs); |
180 | #endif | 167 | #endif |
181 | afs_proc_cleanup(); | 168 | afs_proc_cleanup(); |
182 | rcu_barrier(); | 169 | rcu_barrier(); |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 78db4953a800..2b9e2d03a390 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -173,9 +173,9 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) | |||
173 | if (PageError(page)) | 173 | if (PageError(page)) |
174 | goto error; | 174 | goto error; |
175 | 175 | ||
176 | buf = kmap(page); | 176 | buf = kmap_atomic(page, KM_USER0); |
177 | memcpy(devname, buf, size); | 177 | memcpy(devname, buf, size); |
178 | kunmap(page); | 178 | kunmap_atomic(buf, KM_USER0); |
179 | page_cache_release(page); | 179 | page_cache_release(page); |
180 | page = NULL; | 180 | page = NULL; |
181 | 181 | ||
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 849fc3160cb5..ec2a7431e458 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c | |||
@@ -281,9 +281,8 @@ static void afs_vlocation_apply_update(struct afs_vlocation *vl, | |||
281 | 281 | ||
282 | vl->vldb = *vldb; | 282 | vl->vldb = *vldb; |
283 | 283 | ||
284 | #ifdef AFS_CACHING_SUPPORT | 284 | #ifdef CONFIG_AFS_FSCACHE |
285 | /* update volume entry in local cache */ | 285 | fscache_update_cookie(vl->cache); |
286 | cachefs_update_cookie(vl->cache); | ||
287 | #endif | 286 | #endif |
288 | } | 287 | } |
289 | 288 | ||
@@ -304,11 +303,9 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, | |||
304 | memset(&vldb, 0, sizeof(vldb)); | 303 | memset(&vldb, 0, sizeof(vldb)); |
305 | 304 | ||
306 | /* see if we have an in-cache copy (will set vl->valid if there is) */ | 305 | /* see if we have an in-cache copy (will set vl->valid if there is) */ |
307 | #ifdef AFS_CACHING_SUPPORT | 306 | #ifdef CONFIG_AFS_FSCACHE |
308 | cachefs_acquire_cookie(cell->cache, | 307 | vl->cache = fscache_acquire_cookie(vl->cell->cache, |
309 | &afs_volume_cache_index_def, | 308 | &afs_vlocation_cache_index_def, vl); |
310 | vlocation, | ||
311 | &vl->cache); | ||
312 | #endif | 309 | #endif |
313 | 310 | ||
314 | if (vl->valid) { | 311 | if (vl->valid) { |
@@ -420,6 +417,11 @@ fill_in_record: | |||
420 | spin_unlock(&vl->lock); | 417 | spin_unlock(&vl->lock); |
421 | wake_up(&vl->waitq); | 418 | wake_up(&vl->waitq); |
422 | 419 | ||
420 | /* update volume entry in local cache */ | ||
421 | #ifdef CONFIG_AFS_FSCACHE | ||
422 | fscache_update_cookie(vl->cache); | ||
423 | #endif | ||
424 | |||
423 | /* schedule for regular updates */ | 425 | /* schedule for regular updates */ |
424 | afs_vlocation_queue_for_updates(vl); | 426 | afs_vlocation_queue_for_updates(vl); |
425 | goto success; | 427 | goto success; |
@@ -465,7 +467,7 @@ found_in_memory: | |||
465 | spin_unlock(&vl->lock); | 467 | spin_unlock(&vl->lock); |
466 | 468 | ||
467 | success: | 469 | success: |
468 | _leave(" = %p",vl); | 470 | _leave(" = %p", vl); |
469 | return vl; | 471 | return vl; |
470 | 472 | ||
471 | error_abandon: | 473 | error_abandon: |
@@ -523,10 +525,9 @@ static void afs_vlocation_destroy(struct afs_vlocation *vl) | |||
523 | { | 525 | { |
524 | _enter("%p", vl); | 526 | _enter("%p", vl); |
525 | 527 | ||
526 | #ifdef AFS_CACHING_SUPPORT | 528 | #ifdef CONFIG_AFS_FSCACHE |
527 | cachefs_relinquish_cookie(vl->cache, 0); | 529 | fscache_relinquish_cookie(vl->cache, 0); |
528 | #endif | 530 | #endif |
529 | |||
530 | afs_put_cell(vl->cell); | 531 | afs_put_cell(vl->cell); |
531 | kfree(vl); | 532 | kfree(vl); |
532 | } | 533 | } |
diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 8bab0e3437f9..a353e69e2391 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c | |||
@@ -124,13 +124,11 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) | |||
124 | } | 124 | } |
125 | 125 | ||
126 | /* attach the cache and volume location */ | 126 | /* attach the cache and volume location */ |
127 | #ifdef AFS_CACHING_SUPPORT | 127 | #ifdef CONFIG_AFS_FSCACHE |
128 | cachefs_acquire_cookie(vlocation->cache, | 128 | volume->cache = fscache_acquire_cookie(vlocation->cache, |
129 | &afs_vnode_cache_index_def, | 129 | &afs_volume_cache_index_def, |
130 | volume, | 130 | volume); |
131 | &volume->cache); | ||
132 | #endif | 131 | #endif |
133 | |||
134 | afs_get_vlocation(vlocation); | 132 | afs_get_vlocation(vlocation); |
135 | volume->vlocation = vlocation; | 133 | volume->vlocation = vlocation; |
136 | 134 | ||
@@ -194,8 +192,8 @@ void afs_put_volume(struct afs_volume *volume) | |||
194 | up_write(&vlocation->cell->vl_sem); | 192 | up_write(&vlocation->cell->vl_sem); |
195 | 193 | ||
196 | /* finish cleaning up the volume */ | 194 | /* finish cleaning up the volume */ |
197 | #ifdef AFS_CACHING_SUPPORT | 195 | #ifdef CONFIG_AFS_FSCACHE |
198 | cachefs_relinquish_cookie(volume->cache, 0); | 196 | fscache_relinquish_cookie(volume->cache, 0); |
199 | #endif | 197 | #endif |
200 | afs_put_vlocation(vlocation); | 198 | afs_put_vlocation(vlocation); |
201 | 199 | ||
diff --git a/fs/afs/write.c b/fs/afs/write.c index 3fb36d433621..c2e7a7ff0080 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -780,3 +780,24 @@ int afs_fsync(struct file *file, struct dentry *dentry, int datasync) | |||
780 | _leave(" = %d", ret); | 780 | _leave(" = %d", ret); |
781 | return ret; | 781 | return ret; |
782 | } | 782 | } |
783 | |||
784 | /* | ||
785 | * notification that a previously read-only page is about to become writable | ||
786 | * - if it returns an error, the caller will deliver a bus error signal | ||
787 | */ | ||
788 | int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
789 | { | ||
790 | struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host); | ||
791 | |||
792 | _enter("{{%x:%u}},{%lx}", | ||
793 | vnode->fid.vid, vnode->fid.vnode, page->index); | ||
794 | |||
795 | /* wait for the page to be written to the cache before we allow it to | ||
796 | * be modified */ | ||
797 | #ifdef CONFIG_AFS_FSCACHE | ||
798 | fscache_wait_on_page_write(vnode->cache, page); | ||
799 | #endif | ||
800 | |||
801 | _leave(" = 0"); | ||
802 | return 0; | ||
803 | } | ||
diff --git a/fs/cachefiles/Kconfig b/fs/cachefiles/Kconfig new file mode 100644 index 000000000000..80e9c6167f0b --- /dev/null +++ b/fs/cachefiles/Kconfig | |||
@@ -0,0 +1,39 @@ | |||
1 | |||
2 | config CACHEFILES | ||
3 | tristate "Filesystem caching on files" | ||
4 | depends on FSCACHE && BLOCK | ||
5 | help | ||
6 | This permits use of a mounted filesystem as a cache for other | ||
7 | filesystems - primarily networking filesystems - thus allowing fast | ||
8 | local disk to enhance the speed of slower devices. | ||
9 | |||
10 | See Documentation/filesystems/caching/cachefiles.txt for more | ||
11 | information. | ||
12 | |||
13 | config CACHEFILES_DEBUG | ||
14 | bool "Debug CacheFiles" | ||
15 | depends on CACHEFILES | ||
16 | help | ||
17 | This permits debugging to be dynamically enabled in the filesystem | ||
18 | caching on files module. If this is set, the debugging output may be | ||
19 | enabled by setting bits in /sys/modules/cachefiles/parameter/debug or | ||
20 | by including a debugging specifier in /etc/cachefilesd.conf. | ||
21 | |||
22 | config CACHEFILES_HISTOGRAM | ||
23 | bool "Gather latency information on CacheFiles" | ||
24 | depends on CACHEFILES && PROC_FS | ||
25 | help | ||
26 | |||
27 | This option causes latency information to be gathered on CacheFiles | ||
28 | operation and exported through file: | ||
29 | |||
30 | /proc/fs/cachefiles/histogram | ||
31 | |||
32 | The generation of this histogram adds a certain amount of overhead to | ||
33 | execution as there are a number of points at which data is gathered, | ||
34 | and on a multi-CPU system these may be on cachelines that keep | ||
35 | bouncing between CPUs. On the other hand, the histogram may be | ||
36 | useful for debugging purposes. Saying 'N' here is recommended. | ||
37 | |||
38 | See Documentation/filesystems/caching/cachefiles.txt for more | ||
39 | information. | ||
diff --git a/fs/cachefiles/Makefile b/fs/cachefiles/Makefile new file mode 100644 index 000000000000..32cbab0ffce3 --- /dev/null +++ b/fs/cachefiles/Makefile | |||
@@ -0,0 +1,18 @@ | |||
1 | # | ||
2 | # Makefile for caching in a mounted filesystem | ||
3 | # | ||
4 | |||
5 | cachefiles-y := \ | ||
6 | bind.o \ | ||
7 | daemon.o \ | ||
8 | interface.o \ | ||
9 | key.o \ | ||
10 | main.o \ | ||
11 | namei.o \ | ||
12 | rdwr.o \ | ||
13 | security.o \ | ||
14 | xattr.o | ||
15 | |||
16 | cachefiles-$(CONFIG_CACHEFILES_HISTOGRAM) += proc.o | ||
17 | |||
18 | obj-$(CONFIG_CACHEFILES) := cachefiles.o | ||
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c new file mode 100644 index 000000000000..3797e0077b35 --- /dev/null +++ b/fs/cachefiles/bind.c | |||
@@ -0,0 +1,286 @@ | |||
1 | /* Bind and unbind a cache from the filesystem backing it | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/completion.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/fs.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/namei.h> | ||
20 | #include <linux/mount.h> | ||
21 | #include <linux/statfs.h> | ||
22 | #include <linux/ctype.h> | ||
23 | #include "internal.h" | ||
24 | |||
25 | static int cachefiles_daemon_add_cache(struct cachefiles_cache *caches); | ||
26 | |||
27 | /* | ||
28 | * bind a directory as a cache | ||
29 | */ | ||
30 | int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) | ||
31 | { | ||
32 | _enter("{%u,%u,%u,%u,%u,%u},%s", | ||
33 | cache->frun_percent, | ||
34 | cache->fcull_percent, | ||
35 | cache->fstop_percent, | ||
36 | cache->brun_percent, | ||
37 | cache->bcull_percent, | ||
38 | cache->bstop_percent, | ||
39 | args); | ||
40 | |||
41 | /* start by checking things over */ | ||
42 | ASSERT(cache->fstop_percent >= 0 && | ||
43 | cache->fstop_percent < cache->fcull_percent && | ||
44 | cache->fcull_percent < cache->frun_percent && | ||
45 | cache->frun_percent < 100); | ||
46 | |||
47 | ASSERT(cache->bstop_percent >= 0 && | ||
48 | cache->bstop_percent < cache->bcull_percent && | ||
49 | cache->bcull_percent < cache->brun_percent && | ||
50 | cache->brun_percent < 100); | ||
51 | |||
52 | if (*args) { | ||
53 | kerror("'bind' command doesn't take an argument"); | ||
54 | return -EINVAL; | ||
55 | } | ||
56 | |||
57 | if (!cache->rootdirname) { | ||
58 | kerror("No cache directory specified"); | ||
59 | return -EINVAL; | ||
60 | } | ||
61 | |||
62 | /* don't permit already bound caches to be re-bound */ | ||
63 | if (test_bit(CACHEFILES_READY, &cache->flags)) { | ||
64 | kerror("Cache already bound"); | ||
65 | return -EBUSY; | ||
66 | } | ||
67 | |||
68 | /* make sure we have copies of the tag and dirname strings */ | ||
69 | if (!cache->tag) { | ||
70 | /* the tag string is released by the fops->release() | ||
71 | * function, so we don't release it on error here */ | ||
72 | cache->tag = kstrdup("CacheFiles", GFP_KERNEL); | ||
73 | if (!cache->tag) | ||
74 | return -ENOMEM; | ||
75 | } | ||
76 | |||
77 | /* add the cache */ | ||
78 | return cachefiles_daemon_add_cache(cache); | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * add a cache | ||
83 | */ | ||
84 | static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) | ||
85 | { | ||
86 | struct cachefiles_object *fsdef; | ||
87 | struct nameidata nd; | ||
88 | struct kstatfs stats; | ||
89 | struct dentry *graveyard, *cachedir, *root; | ||
90 | const struct cred *saved_cred; | ||
91 | int ret; | ||
92 | |||
93 | _enter(""); | ||
94 | |||
95 | /* we want to work under the module's security ID */ | ||
96 | ret = cachefiles_get_security_ID(cache); | ||
97 | if (ret < 0) | ||
98 | return ret; | ||
99 | |||
100 | cachefiles_begin_secure(cache, &saved_cred); | ||
101 | |||
102 | /* allocate the root index object */ | ||
103 | ret = -ENOMEM; | ||
104 | |||
105 | fsdef = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); | ||
106 | if (!fsdef) | ||
107 | goto error_root_object; | ||
108 | |||
109 | ASSERTCMP(fsdef->backer, ==, NULL); | ||
110 | |||
111 | atomic_set(&fsdef->usage, 1); | ||
112 | fsdef->type = FSCACHE_COOKIE_TYPE_INDEX; | ||
113 | |||
114 | _debug("- fsdef %p", fsdef); | ||
115 | |||
116 | /* look up the directory at the root of the cache */ | ||
117 | memset(&nd, 0, sizeof(nd)); | ||
118 | |||
119 | ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd); | ||
120 | if (ret < 0) | ||
121 | goto error_open_root; | ||
122 | |||
123 | cache->mnt = mntget(nd.path.mnt); | ||
124 | root = dget(nd.path.dentry); | ||
125 | path_put(&nd.path); | ||
126 | |||
127 | /* check parameters */ | ||
128 | ret = -EOPNOTSUPP; | ||
129 | if (!root->d_inode || | ||
130 | !root->d_inode->i_op || | ||
131 | !root->d_inode->i_op->lookup || | ||
132 | !root->d_inode->i_op->mkdir || | ||
133 | !root->d_inode->i_op->setxattr || | ||
134 | !root->d_inode->i_op->getxattr || | ||
135 | !root->d_sb || | ||
136 | !root->d_sb->s_op || | ||
137 | !root->d_sb->s_op->statfs || | ||
138 | !root->d_sb->s_op->sync_fs) | ||
139 | goto error_unsupported; | ||
140 | |||
141 | ret = -EROFS; | ||
142 | if (root->d_sb->s_flags & MS_RDONLY) | ||
143 | goto error_unsupported; | ||
144 | |||
145 | /* determine the security of the on-disk cache as this governs | ||
146 | * security ID of files we create */ | ||
147 | ret = cachefiles_determine_cache_security(cache, root, &saved_cred); | ||
148 | if (ret < 0) | ||
149 | goto error_unsupported; | ||
150 | |||
151 | /* get the cache size and blocksize */ | ||
152 | ret = vfs_statfs(root, &stats); | ||
153 | if (ret < 0) | ||
154 | goto error_unsupported; | ||
155 | |||
156 | ret = -ERANGE; | ||
157 | if (stats.f_bsize <= 0) | ||
158 | goto error_unsupported; | ||
159 | |||
160 | ret = -EOPNOTSUPP; | ||
161 | if (stats.f_bsize > PAGE_SIZE) | ||
162 | goto error_unsupported; | ||
163 | |||
164 | cache->bsize = stats.f_bsize; | ||
165 | cache->bshift = 0; | ||
166 | if (stats.f_bsize < PAGE_SIZE) | ||
167 | cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize); | ||
168 | |||
169 | _debug("blksize %u (shift %u)", | ||
170 | cache->bsize, cache->bshift); | ||
171 | |||
172 | _debug("size %llu, avail %llu", | ||
173 | (unsigned long long) stats.f_blocks, | ||
174 | (unsigned long long) stats.f_bavail); | ||
175 | |||
176 | /* set up caching limits */ | ||
177 | do_div(stats.f_files, 100); | ||
178 | cache->fstop = stats.f_files * cache->fstop_percent; | ||
179 | cache->fcull = stats.f_files * cache->fcull_percent; | ||
180 | cache->frun = stats.f_files * cache->frun_percent; | ||
181 | |||
182 | _debug("limits {%llu,%llu,%llu} files", | ||
183 | (unsigned long long) cache->frun, | ||
184 | (unsigned long long) cache->fcull, | ||
185 | (unsigned long long) cache->fstop); | ||
186 | |||
187 | stats.f_blocks >>= cache->bshift; | ||
188 | do_div(stats.f_blocks, 100); | ||
189 | cache->bstop = stats.f_blocks * cache->bstop_percent; | ||
190 | cache->bcull = stats.f_blocks * cache->bcull_percent; | ||
191 | cache->brun = stats.f_blocks * cache->brun_percent; | ||
192 | |||
193 | _debug("limits {%llu,%llu,%llu} blocks", | ||
194 | (unsigned long long) cache->brun, | ||
195 | (unsigned long long) cache->bcull, | ||
196 | (unsigned long long) cache->bstop); | ||
197 | |||
198 | /* get the cache directory and check its type */ | ||
199 | cachedir = cachefiles_get_directory(cache, root, "cache"); | ||
200 | if (IS_ERR(cachedir)) { | ||
201 | ret = PTR_ERR(cachedir); | ||
202 | goto error_unsupported; | ||
203 | } | ||
204 | |||
205 | fsdef->dentry = cachedir; | ||
206 | fsdef->fscache.cookie = NULL; | ||
207 | |||
208 | ret = cachefiles_check_object_type(fsdef); | ||
209 | if (ret < 0) | ||
210 | goto error_unsupported; | ||
211 | |||
212 | /* get the graveyard directory */ | ||
213 | graveyard = cachefiles_get_directory(cache, root, "graveyard"); | ||
214 | if (IS_ERR(graveyard)) { | ||
215 | ret = PTR_ERR(graveyard); | ||
216 | goto error_unsupported; | ||
217 | } | ||
218 | |||
219 | cache->graveyard = graveyard; | ||
220 | |||
221 | /* publish the cache */ | ||
222 | fscache_init_cache(&cache->cache, | ||
223 | &cachefiles_cache_ops, | ||
224 | "%s", | ||
225 | fsdef->dentry->d_sb->s_id); | ||
226 | |||
227 | fscache_object_init(&fsdef->fscache, NULL, &cache->cache); | ||
228 | |||
229 | ret = fscache_add_cache(&cache->cache, &fsdef->fscache, cache->tag); | ||
230 | if (ret < 0) | ||
231 | goto error_add_cache; | ||
232 | |||
233 | /* done */ | ||
234 | set_bit(CACHEFILES_READY, &cache->flags); | ||
235 | dput(root); | ||
236 | |||
237 | printk(KERN_INFO "CacheFiles:" | ||
238 | " File cache on %s registered\n", | ||
239 | cache->cache.identifier); | ||
240 | |||
241 | /* check how much space the cache has */ | ||
242 | cachefiles_has_space(cache, 0, 0); | ||
243 | cachefiles_end_secure(cache, saved_cred); | ||
244 | return 0; | ||
245 | |||
246 | error_add_cache: | ||
247 | dput(cache->graveyard); | ||
248 | cache->graveyard = NULL; | ||
249 | error_unsupported: | ||
250 | mntput(cache->mnt); | ||
251 | cache->mnt = NULL; | ||
252 | dput(fsdef->dentry); | ||
253 | fsdef->dentry = NULL; | ||
254 | dput(root); | ||
255 | error_open_root: | ||
256 | kmem_cache_free(cachefiles_object_jar, fsdef); | ||
257 | error_root_object: | ||
258 | cachefiles_end_secure(cache, saved_cred); | ||
259 | kerror("Failed to register: %d", ret); | ||
260 | return ret; | ||
261 | } | ||
262 | |||
263 | /* | ||
264 | * unbind a cache on fd release | ||
265 | */ | ||
266 | void cachefiles_daemon_unbind(struct cachefiles_cache *cache) | ||
267 | { | ||
268 | _enter(""); | ||
269 | |||
270 | if (test_bit(CACHEFILES_READY, &cache->flags)) { | ||
271 | printk(KERN_INFO "CacheFiles:" | ||
272 | " File cache on %s unregistering\n", | ||
273 | cache->cache.identifier); | ||
274 | |||
275 | fscache_withdraw_cache(&cache->cache); | ||
276 | } | ||
277 | |||
278 | dput(cache->graveyard); | ||
279 | mntput(cache->mnt); | ||
280 | |||
281 | kfree(cache->rootdirname); | ||
282 | kfree(cache->secctx); | ||
283 | kfree(cache->tag); | ||
284 | |||
285 | _leave(""); | ||
286 | } | ||
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c new file mode 100644 index 000000000000..4618516dd994 --- /dev/null +++ b/fs/cachefiles/daemon.c | |||
@@ -0,0 +1,755 @@ | |||
1 | /* Daemon interface | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/completion.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/fs.h> | ||
18 | #include <linux/file.h> | ||
19 | #include <linux/namei.h> | ||
20 | #include <linux/poll.h> | ||
21 | #include <linux/mount.h> | ||
22 | #include <linux/statfs.h> | ||
23 | #include <linux/ctype.h> | ||
24 | #include <linux/fs_struct.h> | ||
25 | #include "internal.h" | ||
26 | |||
27 | static int cachefiles_daemon_open(struct inode *, struct file *); | ||
28 | static int cachefiles_daemon_release(struct inode *, struct file *); | ||
29 | static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t, | ||
30 | loff_t *); | ||
31 | static ssize_t cachefiles_daemon_write(struct file *, const char __user *, | ||
32 | size_t, loff_t *); | ||
33 | static unsigned int cachefiles_daemon_poll(struct file *, | ||
34 | struct poll_table_struct *); | ||
35 | static int cachefiles_daemon_frun(struct cachefiles_cache *, char *); | ||
36 | static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *); | ||
37 | static int cachefiles_daemon_fstop(struct cachefiles_cache *, char *); | ||
38 | static int cachefiles_daemon_brun(struct cachefiles_cache *, char *); | ||
39 | static int cachefiles_daemon_bcull(struct cachefiles_cache *, char *); | ||
40 | static int cachefiles_daemon_bstop(struct cachefiles_cache *, char *); | ||
41 | static int cachefiles_daemon_cull(struct cachefiles_cache *, char *); | ||
42 | static int cachefiles_daemon_debug(struct cachefiles_cache *, char *); | ||
43 | static int cachefiles_daemon_dir(struct cachefiles_cache *, char *); | ||
44 | static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *); | ||
45 | static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *); | ||
46 | static int cachefiles_daemon_tag(struct cachefiles_cache *, char *); | ||
47 | |||
48 | static unsigned long cachefiles_open; | ||
49 | |||
50 | const struct file_operations cachefiles_daemon_fops = { | ||
51 | .owner = THIS_MODULE, | ||
52 | .open = cachefiles_daemon_open, | ||
53 | .release = cachefiles_daemon_release, | ||
54 | .read = cachefiles_daemon_read, | ||
55 | .write = cachefiles_daemon_write, | ||
56 | .poll = cachefiles_daemon_poll, | ||
57 | }; | ||
58 | |||
59 | struct cachefiles_daemon_cmd { | ||
60 | char name[8]; | ||
61 | int (*handler)(struct cachefiles_cache *cache, char *args); | ||
62 | }; | ||
63 | |||
64 | static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { | ||
65 | { "bind", cachefiles_daemon_bind }, | ||
66 | { "brun", cachefiles_daemon_brun }, | ||
67 | { "bcull", cachefiles_daemon_bcull }, | ||
68 | { "bstop", cachefiles_daemon_bstop }, | ||
69 | { "cull", cachefiles_daemon_cull }, | ||
70 | { "debug", cachefiles_daemon_debug }, | ||
71 | { "dir", cachefiles_daemon_dir }, | ||
72 | { "frun", cachefiles_daemon_frun }, | ||
73 | { "fcull", cachefiles_daemon_fcull }, | ||
74 | { "fstop", cachefiles_daemon_fstop }, | ||
75 | { "inuse", cachefiles_daemon_inuse }, | ||
76 | { "secctx", cachefiles_daemon_secctx }, | ||
77 | { "tag", cachefiles_daemon_tag }, | ||
78 | { "", NULL } | ||
79 | }; | ||
80 | |||
81 | |||
82 | /* | ||
83 | * do various checks | ||
84 | */ | ||
85 | static int cachefiles_daemon_open(struct inode *inode, struct file *file) | ||
86 | { | ||
87 | struct cachefiles_cache *cache; | ||
88 | |||
89 | _enter(""); | ||
90 | |||
91 | /* only the superuser may do this */ | ||
92 | if (!capable(CAP_SYS_ADMIN)) | ||
93 | return -EPERM; | ||
94 | |||
95 | /* the cachefiles device may only be open once at a time */ | ||
96 | if (xchg(&cachefiles_open, 1) == 1) | ||
97 | return -EBUSY; | ||
98 | |||
99 | /* allocate a cache record */ | ||
100 | cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL); | ||
101 | if (!cache) { | ||
102 | cachefiles_open = 0; | ||
103 | return -ENOMEM; | ||
104 | } | ||
105 | |||
106 | mutex_init(&cache->daemon_mutex); | ||
107 | cache->active_nodes = RB_ROOT; | ||
108 | rwlock_init(&cache->active_lock); | ||
109 | init_waitqueue_head(&cache->daemon_pollwq); | ||
110 | |||
111 | /* set default caching limits | ||
112 | * - limit at 1% free space and/or free files | ||
113 | * - cull below 5% free space and/or free files | ||
114 | * - cease culling above 7% free space and/or free files | ||
115 | */ | ||
116 | cache->frun_percent = 7; | ||
117 | cache->fcull_percent = 5; | ||
118 | cache->fstop_percent = 1; | ||
119 | cache->brun_percent = 7; | ||
120 | cache->bcull_percent = 5; | ||
121 | cache->bstop_percent = 1; | ||
122 | |||
123 | file->private_data = cache; | ||
124 | cache->cachefilesd = file; | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * release a cache | ||
130 | */ | ||
131 | static int cachefiles_daemon_release(struct inode *inode, struct file *file) | ||
132 | { | ||
133 | struct cachefiles_cache *cache = file->private_data; | ||
134 | |||
135 | _enter(""); | ||
136 | |||
137 | ASSERT(cache); | ||
138 | |||
139 | set_bit(CACHEFILES_DEAD, &cache->flags); | ||
140 | |||
141 | cachefiles_daemon_unbind(cache); | ||
142 | |||
143 | ASSERT(!cache->active_nodes.rb_node); | ||
144 | |||
145 | /* clean up the control file interface */ | ||
146 | cache->cachefilesd = NULL; | ||
147 | file->private_data = NULL; | ||
148 | cachefiles_open = 0; | ||
149 | |||
150 | kfree(cache); | ||
151 | |||
152 | _leave(""); | ||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * read the cache state | ||
158 | */ | ||
159 | static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, | ||
160 | size_t buflen, loff_t *pos) | ||
161 | { | ||
162 | struct cachefiles_cache *cache = file->private_data; | ||
163 | char buffer[256]; | ||
164 | int n; | ||
165 | |||
166 | //_enter(",,%zu,", buflen); | ||
167 | |||
168 | if (!test_bit(CACHEFILES_READY, &cache->flags)) | ||
169 | return 0; | ||
170 | |||
171 | /* check how much space the cache has */ | ||
172 | cachefiles_has_space(cache, 0, 0); | ||
173 | |||
174 | /* summarise */ | ||
175 | clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags); | ||
176 | |||
177 | n = snprintf(buffer, sizeof(buffer), | ||
178 | "cull=%c" | ||
179 | " frun=%llx" | ||
180 | " fcull=%llx" | ||
181 | " fstop=%llx" | ||
182 | " brun=%llx" | ||
183 | " bcull=%llx" | ||
184 | " bstop=%llx", | ||
185 | test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0', | ||
186 | (unsigned long long) cache->frun, | ||
187 | (unsigned long long) cache->fcull, | ||
188 | (unsigned long long) cache->fstop, | ||
189 | (unsigned long long) cache->brun, | ||
190 | (unsigned long long) cache->bcull, | ||
191 | (unsigned long long) cache->bstop | ||
192 | ); | ||
193 | |||
194 | if (n > buflen) | ||
195 | return -EMSGSIZE; | ||
196 | |||
197 | if (copy_to_user(_buffer, buffer, n) != 0) | ||
198 | return -EFAULT; | ||
199 | |||
200 | return n; | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * command the cache | ||
205 | */ | ||
206 | static ssize_t cachefiles_daemon_write(struct file *file, | ||
207 | const char __user *_data, | ||
208 | size_t datalen, | ||
209 | loff_t *pos) | ||
210 | { | ||
211 | const struct cachefiles_daemon_cmd *cmd; | ||
212 | struct cachefiles_cache *cache = file->private_data; | ||
213 | ssize_t ret; | ||
214 | char *data, *args, *cp; | ||
215 | |||
216 | //_enter(",,%zu,", datalen); | ||
217 | |||
218 | ASSERT(cache); | ||
219 | |||
220 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) | ||
221 | return -EIO; | ||
222 | |||
223 | if (datalen < 0 || datalen > PAGE_SIZE - 1) | ||
224 | return -EOPNOTSUPP; | ||
225 | |||
226 | /* drag the command string into the kernel so we can parse it */ | ||
227 | data = kmalloc(datalen + 1, GFP_KERNEL); | ||
228 | if (!data) | ||
229 | return -ENOMEM; | ||
230 | |||
231 | ret = -EFAULT; | ||
232 | if (copy_from_user(data, _data, datalen) != 0) | ||
233 | goto error; | ||
234 | |||
235 | data[datalen] = '\0'; | ||
236 | |||
237 | ret = -EINVAL; | ||
238 | if (memchr(data, '\0', datalen)) | ||
239 | goto error; | ||
240 | |||
241 | /* strip any newline */ | ||
242 | cp = memchr(data, '\n', datalen); | ||
243 | if (cp) { | ||
244 | if (cp == data) | ||
245 | goto error; | ||
246 | |||
247 | *cp = '\0'; | ||
248 | } | ||
249 | |||
250 | /* parse the command */ | ||
251 | ret = -EOPNOTSUPP; | ||
252 | |||
253 | for (args = data; *args; args++) | ||
254 | if (isspace(*args)) | ||
255 | break; | ||
256 | if (*args) { | ||
257 | if (args == data) | ||
258 | goto error; | ||
259 | *args = '\0'; | ||
260 | for (args++; isspace(*args); args++) | ||
261 | continue; | ||
262 | } | ||
263 | |||
264 | /* run the appropriate command handler */ | ||
265 | for (cmd = cachefiles_daemon_cmds; cmd->name[0]; cmd++) | ||
266 | if (strcmp(cmd->name, data) == 0) | ||
267 | goto found_command; | ||
268 | |||
269 | error: | ||
270 | kfree(data); | ||
271 | //_leave(" = %zd", ret); | ||
272 | return ret; | ||
273 | |||
274 | found_command: | ||
275 | mutex_lock(&cache->daemon_mutex); | ||
276 | |||
277 | ret = -EIO; | ||
278 | if (!test_bit(CACHEFILES_DEAD, &cache->flags)) | ||
279 | ret = cmd->handler(cache, args); | ||
280 | |||
281 | mutex_unlock(&cache->daemon_mutex); | ||
282 | |||
283 | if (ret == 0) | ||
284 | ret = datalen; | ||
285 | goto error; | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * poll for culling state | ||
290 | * - use POLLOUT to indicate culling state | ||
291 | */ | ||
292 | static unsigned int cachefiles_daemon_poll(struct file *file, | ||
293 | struct poll_table_struct *poll) | ||
294 | { | ||
295 | struct cachefiles_cache *cache = file->private_data; | ||
296 | unsigned int mask; | ||
297 | |||
298 | poll_wait(file, &cache->daemon_pollwq, poll); | ||
299 | mask = 0; | ||
300 | |||
301 | if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) | ||
302 | mask |= POLLIN; | ||
303 | |||
304 | if (test_bit(CACHEFILES_CULLING, &cache->flags)) | ||
305 | mask |= POLLOUT; | ||
306 | |||
307 | return mask; | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * give a range error for cache space constraints | ||
312 | * - can be tail-called | ||
313 | */ | ||
314 | static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, | ||
315 | char *args) | ||
316 | { | ||
317 | kerror("Free space limits must be in range" | ||
318 | " 0%%<=stop<cull<run<100%%"); | ||
319 | |||
320 | return -EINVAL; | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * set the percentage of files at which to stop culling | ||
325 | * - command: "frun <N>%" | ||
326 | */ | ||
327 | static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) | ||
328 | { | ||
329 | unsigned long frun; | ||
330 | |||
331 | _enter(",%s", args); | ||
332 | |||
333 | if (!*args) | ||
334 | return -EINVAL; | ||
335 | |||
336 | frun = simple_strtoul(args, &args, 10); | ||
337 | if (args[0] != '%' || args[1] != '\0') | ||
338 | return -EINVAL; | ||
339 | |||
340 | if (frun <= cache->fcull_percent || frun >= 100) | ||
341 | return cachefiles_daemon_range_error(cache, args); | ||
342 | |||
343 | cache->frun_percent = frun; | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * set the percentage of files at which to start culling | ||
349 | * - command: "fcull <N>%" | ||
350 | */ | ||
351 | static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) | ||
352 | { | ||
353 | unsigned long fcull; | ||
354 | |||
355 | _enter(",%s", args); | ||
356 | |||
357 | if (!*args) | ||
358 | return -EINVAL; | ||
359 | |||
360 | fcull = simple_strtoul(args, &args, 10); | ||
361 | if (args[0] != '%' || args[1] != '\0') | ||
362 | return -EINVAL; | ||
363 | |||
364 | if (fcull <= cache->fstop_percent || fcull >= cache->frun_percent) | ||
365 | return cachefiles_daemon_range_error(cache, args); | ||
366 | |||
367 | cache->fcull_percent = fcull; | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * set the percentage of files at which to stop allocating | ||
373 | * - command: "fstop <N>%" | ||
374 | */ | ||
375 | static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) | ||
376 | { | ||
377 | unsigned long fstop; | ||
378 | |||
379 | _enter(",%s", args); | ||
380 | |||
381 | if (!*args) | ||
382 | return -EINVAL; | ||
383 | |||
384 | fstop = simple_strtoul(args, &args, 10); | ||
385 | if (args[0] != '%' || args[1] != '\0') | ||
386 | return -EINVAL; | ||
387 | |||
388 | if (fstop < 0 || fstop >= cache->fcull_percent) | ||
389 | return cachefiles_daemon_range_error(cache, args); | ||
390 | |||
391 | cache->fstop_percent = fstop; | ||
392 | return 0; | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * set the percentage of blocks at which to stop culling | ||
397 | * - command: "brun <N>%" | ||
398 | */ | ||
399 | static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) | ||
400 | { | ||
401 | unsigned long brun; | ||
402 | |||
403 | _enter(",%s", args); | ||
404 | |||
405 | if (!*args) | ||
406 | return -EINVAL; | ||
407 | |||
408 | brun = simple_strtoul(args, &args, 10); | ||
409 | if (args[0] != '%' || args[1] != '\0') | ||
410 | return -EINVAL; | ||
411 | |||
412 | if (brun <= cache->bcull_percent || brun >= 100) | ||
413 | return cachefiles_daemon_range_error(cache, args); | ||
414 | |||
415 | cache->brun_percent = brun; | ||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | /* | ||
420 | * set the percentage of blocks at which to start culling | ||
421 | * - command: "bcull <N>%" | ||
422 | */ | ||
423 | static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) | ||
424 | { | ||
425 | unsigned long bcull; | ||
426 | |||
427 | _enter(",%s", args); | ||
428 | |||
429 | if (!*args) | ||
430 | return -EINVAL; | ||
431 | |||
432 | bcull = simple_strtoul(args, &args, 10); | ||
433 | if (args[0] != '%' || args[1] != '\0') | ||
434 | return -EINVAL; | ||
435 | |||
436 | if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent) | ||
437 | return cachefiles_daemon_range_error(cache, args); | ||
438 | |||
439 | cache->bcull_percent = bcull; | ||
440 | return 0; | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * set the percentage of blocks at which to stop allocating | ||
445 | * - command: "bstop <N>%" | ||
446 | */ | ||
447 | static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) | ||
448 | { | ||
449 | unsigned long bstop; | ||
450 | |||
451 | _enter(",%s", args); | ||
452 | |||
453 | if (!*args) | ||
454 | return -EINVAL; | ||
455 | |||
456 | bstop = simple_strtoul(args, &args, 10); | ||
457 | if (args[0] != '%' || args[1] != '\0') | ||
458 | return -EINVAL; | ||
459 | |||
460 | if (bstop < 0 || bstop >= cache->bcull_percent) | ||
461 | return cachefiles_daemon_range_error(cache, args); | ||
462 | |||
463 | cache->bstop_percent = bstop; | ||
464 | return 0; | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * set the cache directory | ||
469 | * - command: "dir <name>" | ||
470 | */ | ||
471 | static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) | ||
472 | { | ||
473 | char *dir; | ||
474 | |||
475 | _enter(",%s", args); | ||
476 | |||
477 | if (!*args) { | ||
478 | kerror("Empty directory specified"); | ||
479 | return -EINVAL; | ||
480 | } | ||
481 | |||
482 | if (cache->rootdirname) { | ||
483 | kerror("Second cache directory specified"); | ||
484 | return -EEXIST; | ||
485 | } | ||
486 | |||
487 | dir = kstrdup(args, GFP_KERNEL); | ||
488 | if (!dir) | ||
489 | return -ENOMEM; | ||
490 | |||
491 | cache->rootdirname = dir; | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * set the cache security context | ||
497 | * - command: "secctx <ctx>" | ||
498 | */ | ||
499 | static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) | ||
500 | { | ||
501 | char *secctx; | ||
502 | |||
503 | _enter(",%s", args); | ||
504 | |||
505 | if (!*args) { | ||
506 | kerror("Empty security context specified"); | ||
507 | return -EINVAL; | ||
508 | } | ||
509 | |||
510 | if (cache->secctx) { | ||
511 | kerror("Second security context specified"); | ||
512 | return -EINVAL; | ||
513 | } | ||
514 | |||
515 | secctx = kstrdup(args, GFP_KERNEL); | ||
516 | if (!secctx) | ||
517 | return -ENOMEM; | ||
518 | |||
519 | cache->secctx = secctx; | ||
520 | return 0; | ||
521 | } | ||
522 | |||
523 | /* | ||
524 | * set the cache tag | ||
525 | * - command: "tag <name>" | ||
526 | */ | ||
527 | static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) | ||
528 | { | ||
529 | char *tag; | ||
530 | |||
531 | _enter(",%s", args); | ||
532 | |||
533 | if (!*args) { | ||
534 | kerror("Empty tag specified"); | ||
535 | return -EINVAL; | ||
536 | } | ||
537 | |||
538 | if (cache->tag) | ||
539 | return -EEXIST; | ||
540 | |||
541 | tag = kstrdup(args, GFP_KERNEL); | ||
542 | if (!tag) | ||
543 | return -ENOMEM; | ||
544 | |||
545 | cache->tag = tag; | ||
546 | return 0; | ||
547 | } | ||
548 | |||
549 | /* | ||
550 | * request a node in the cache be culled from the current working directory | ||
551 | * - command: "cull <name>" | ||
552 | */ | ||
553 | static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) | ||
554 | { | ||
555 | struct fs_struct *fs; | ||
556 | struct dentry *dir; | ||
557 | const struct cred *saved_cred; | ||
558 | int ret; | ||
559 | |||
560 | _enter(",%s", args); | ||
561 | |||
562 | if (strchr(args, '/')) | ||
563 | goto inval; | ||
564 | |||
565 | if (!test_bit(CACHEFILES_READY, &cache->flags)) { | ||
566 | kerror("cull applied to unready cache"); | ||
567 | return -EIO; | ||
568 | } | ||
569 | |||
570 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) { | ||
571 | kerror("cull applied to dead cache"); | ||
572 | return -EIO; | ||
573 | } | ||
574 | |||
575 | /* extract the directory dentry from the cwd */ | ||
576 | fs = current->fs; | ||
577 | read_lock(&fs->lock); | ||
578 | dir = dget(fs->pwd.dentry); | ||
579 | read_unlock(&fs->lock); | ||
580 | |||
581 | if (!S_ISDIR(dir->d_inode->i_mode)) | ||
582 | goto notdir; | ||
583 | |||
584 | cachefiles_begin_secure(cache, &saved_cred); | ||
585 | ret = cachefiles_cull(cache, dir, args); | ||
586 | cachefiles_end_secure(cache, saved_cred); | ||
587 | |||
588 | dput(dir); | ||
589 | _leave(" = %d", ret); | ||
590 | return ret; | ||
591 | |||
592 | notdir: | ||
593 | dput(dir); | ||
594 | kerror("cull command requires dirfd to be a directory"); | ||
595 | return -ENOTDIR; | ||
596 | |||
597 | inval: | ||
598 | kerror("cull command requires dirfd and filename"); | ||
599 | return -EINVAL; | ||
600 | } | ||
601 | |||
602 | /* | ||
603 | * set debugging mode | ||
604 | * - command: "debug <mask>" | ||
605 | */ | ||
606 | static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) | ||
607 | { | ||
608 | unsigned long mask; | ||
609 | |||
610 | _enter(",%s", args); | ||
611 | |||
612 | mask = simple_strtoul(args, &args, 0); | ||
613 | if (args[0] != '\0') | ||
614 | goto inval; | ||
615 | |||
616 | cachefiles_debug = mask; | ||
617 | _leave(" = 0"); | ||
618 | return 0; | ||
619 | |||
620 | inval: | ||
621 | kerror("debug command requires mask"); | ||
622 | return -EINVAL; | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * find out whether an object in the current working directory is in use or not | ||
627 | * - command: "inuse <name>" | ||
628 | */ | ||
629 | static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) | ||
630 | { | ||
631 | struct fs_struct *fs; | ||
632 | struct dentry *dir; | ||
633 | const struct cred *saved_cred; | ||
634 | int ret; | ||
635 | |||
636 | //_enter(",%s", args); | ||
637 | |||
638 | if (strchr(args, '/')) | ||
639 | goto inval; | ||
640 | |||
641 | if (!test_bit(CACHEFILES_READY, &cache->flags)) { | ||
642 | kerror("inuse applied to unready cache"); | ||
643 | return -EIO; | ||
644 | } | ||
645 | |||
646 | if (test_bit(CACHEFILES_DEAD, &cache->flags)) { | ||
647 | kerror("inuse applied to dead cache"); | ||
648 | return -EIO; | ||
649 | } | ||
650 | |||
651 | /* extract the directory dentry from the cwd */ | ||
652 | fs = current->fs; | ||
653 | read_lock(&fs->lock); | ||
654 | dir = dget(fs->pwd.dentry); | ||
655 | read_unlock(&fs->lock); | ||
656 | |||
657 | if (!S_ISDIR(dir->d_inode->i_mode)) | ||
658 | goto notdir; | ||
659 | |||
660 | cachefiles_begin_secure(cache, &saved_cred); | ||
661 | ret = cachefiles_check_in_use(cache, dir, args); | ||
662 | cachefiles_end_secure(cache, saved_cred); | ||
663 | |||
664 | dput(dir); | ||
665 | //_leave(" = %d", ret); | ||
666 | return ret; | ||
667 | |||
668 | notdir: | ||
669 | dput(dir); | ||
670 | kerror("inuse command requires dirfd to be a directory"); | ||
671 | return -ENOTDIR; | ||
672 | |||
673 | inval: | ||
674 | kerror("inuse command requires dirfd and filename"); | ||
675 | return -EINVAL; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * see if we have space for a number of pages and/or a number of files in the | ||
680 | * cache | ||
681 | */ | ||
682 | int cachefiles_has_space(struct cachefiles_cache *cache, | ||
683 | unsigned fnr, unsigned bnr) | ||
684 | { | ||
685 | struct kstatfs stats; | ||
686 | int ret; | ||
687 | |||
688 | //_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u", | ||
689 | // (unsigned long long) cache->frun, | ||
690 | // (unsigned long long) cache->fcull, | ||
691 | // (unsigned long long) cache->fstop, | ||
692 | // (unsigned long long) cache->brun, | ||
693 | // (unsigned long long) cache->bcull, | ||
694 | // (unsigned long long) cache->bstop, | ||
695 | // fnr, bnr); | ||
696 | |||
697 | /* find out how many pages of blockdev are available */ | ||
698 | memset(&stats, 0, sizeof(stats)); | ||
699 | |||
700 | ret = vfs_statfs(cache->mnt->mnt_root, &stats); | ||
701 | if (ret < 0) { | ||
702 | if (ret == -EIO) | ||
703 | cachefiles_io_error(cache, "statfs failed"); | ||
704 | _leave(" = %d", ret); | ||
705 | return ret; | ||
706 | } | ||
707 | |||
708 | stats.f_bavail >>= cache->bshift; | ||
709 | |||
710 | //_debug("avail %llu,%llu", | ||
711 | // (unsigned long long) stats.f_ffree, | ||
712 | // (unsigned long long) stats.f_bavail); | ||
713 | |||
714 | /* see if there is sufficient space */ | ||
715 | if (stats.f_ffree > fnr) | ||
716 | stats.f_ffree -= fnr; | ||
717 | else | ||
718 | stats.f_ffree = 0; | ||
719 | |||
720 | if (stats.f_bavail > bnr) | ||
721 | stats.f_bavail -= bnr; | ||
722 | else | ||
723 | stats.f_bavail = 0; | ||
724 | |||
725 | ret = -ENOBUFS; | ||
726 | if (stats.f_ffree < cache->fstop || | ||
727 | stats.f_bavail < cache->bstop) | ||
728 | goto begin_cull; | ||
729 | |||
730 | ret = 0; | ||
731 | if (stats.f_ffree < cache->fcull || | ||
732 | stats.f_bavail < cache->bcull) | ||
733 | goto begin_cull; | ||
734 | |||
735 | if (test_bit(CACHEFILES_CULLING, &cache->flags) && | ||
736 | stats.f_ffree >= cache->frun && | ||
737 | stats.f_bavail >= cache->brun && | ||
738 | test_and_clear_bit(CACHEFILES_CULLING, &cache->flags) | ||
739 | ) { | ||
740 | _debug("cease culling"); | ||
741 | cachefiles_state_changed(cache); | ||
742 | } | ||
743 | |||
744 | //_leave(" = 0"); | ||
745 | return 0; | ||
746 | |||
747 | begin_cull: | ||
748 | if (!test_and_set_bit(CACHEFILES_CULLING, &cache->flags)) { | ||
749 | _debug("### CULL CACHE ###"); | ||
750 | cachefiles_state_changed(cache); | ||
751 | } | ||
752 | |||
753 | _leave(" = %d", ret); | ||
754 | return ret; | ||
755 | } | ||
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c new file mode 100644 index 000000000000..1e962348d111 --- /dev/null +++ b/fs/cachefiles/interface.c | |||
@@ -0,0 +1,449 @@ | |||
1 | /* FS-Cache interface to CacheFiles | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/mount.h> | ||
13 | #include <linux/buffer_head.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) | ||
17 | |||
18 | struct cachefiles_lookup_data { | ||
19 | struct cachefiles_xattr *auxdata; /* auxiliary data */ | ||
20 | char *key; /* key path */ | ||
21 | }; | ||
22 | |||
23 | static int cachefiles_attr_changed(struct fscache_object *_object); | ||
24 | |||
25 | /* | ||
26 | * allocate an object record for a cookie lookup and prepare the lookup data | ||
27 | */ | ||
28 | static struct fscache_object *cachefiles_alloc_object( | ||
29 | struct fscache_cache *_cache, | ||
30 | struct fscache_cookie *cookie) | ||
31 | { | ||
32 | struct cachefiles_lookup_data *lookup_data; | ||
33 | struct cachefiles_object *object; | ||
34 | struct cachefiles_cache *cache; | ||
35 | struct cachefiles_xattr *auxdata; | ||
36 | unsigned keylen, auxlen; | ||
37 | void *buffer; | ||
38 | char *key; | ||
39 | |||
40 | cache = container_of(_cache, struct cachefiles_cache, cache); | ||
41 | |||
42 | _enter("{%s},%p,", cache->cache.identifier, cookie); | ||
43 | |||
44 | lookup_data = kmalloc(sizeof(*lookup_data), GFP_KERNEL); | ||
45 | if (!lookup_data) | ||
46 | goto nomem_lookup_data; | ||
47 | |||
48 | /* create a new object record and a temporary leaf image */ | ||
49 | object = kmem_cache_alloc(cachefiles_object_jar, GFP_KERNEL); | ||
50 | if (!object) | ||
51 | goto nomem_object; | ||
52 | |||
53 | ASSERTCMP(object->backer, ==, NULL); | ||
54 | |||
55 | BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); | ||
56 | atomic_set(&object->usage, 1); | ||
57 | |||
58 | fscache_object_init(&object->fscache, cookie, &cache->cache); | ||
59 | |||
60 | object->type = cookie->def->type; | ||
61 | |||
62 | /* get hold of the raw key | ||
63 | * - stick the length on the front and leave space on the back for the | ||
64 | * encoder | ||
65 | */ | ||
66 | buffer = kmalloc((2 + 512) + 3, GFP_KERNEL); | ||
67 | if (!buffer) | ||
68 | goto nomem_buffer; | ||
69 | |||
70 | keylen = cookie->def->get_key(cookie->netfs_data, buffer + 2, 512); | ||
71 | ASSERTCMP(keylen, <, 512); | ||
72 | |||
73 | *(uint16_t *)buffer = keylen; | ||
74 | ((char *)buffer)[keylen + 2] = 0; | ||
75 | ((char *)buffer)[keylen + 3] = 0; | ||
76 | ((char *)buffer)[keylen + 4] = 0; | ||
77 | |||
78 | /* turn the raw key into something that can work with as a filename */ | ||
79 | key = cachefiles_cook_key(buffer, keylen + 2, object->type); | ||
80 | if (!key) | ||
81 | goto nomem_key; | ||
82 | |||
83 | /* get hold of the auxiliary data and prepend the object type */ | ||
84 | auxdata = buffer; | ||
85 | auxlen = 0; | ||
86 | if (cookie->def->get_aux) { | ||
87 | auxlen = cookie->def->get_aux(cookie->netfs_data, | ||
88 | auxdata->data, 511); | ||
89 | ASSERTCMP(auxlen, <, 511); | ||
90 | } | ||
91 | |||
92 | auxdata->len = auxlen + 1; | ||
93 | auxdata->type = cookie->def->type; | ||
94 | |||
95 | lookup_data->auxdata = auxdata; | ||
96 | lookup_data->key = key; | ||
97 | object->lookup_data = lookup_data; | ||
98 | |||
99 | _leave(" = %p [%p]", &object->fscache, lookup_data); | ||
100 | return &object->fscache; | ||
101 | |||
102 | nomem_key: | ||
103 | kfree(buffer); | ||
104 | nomem_buffer: | ||
105 | BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); | ||
106 | kmem_cache_free(cachefiles_object_jar, object); | ||
107 | fscache_object_destroyed(&cache->cache); | ||
108 | nomem_object: | ||
109 | kfree(lookup_data); | ||
110 | nomem_lookup_data: | ||
111 | _leave(" = -ENOMEM"); | ||
112 | return ERR_PTR(-ENOMEM); | ||
113 | } | ||
114 | |||
115 | /* | ||
116 | * attempt to look up the nominated node in this cache | ||
117 | */ | ||
118 | static void cachefiles_lookup_object(struct fscache_object *_object) | ||
119 | { | ||
120 | struct cachefiles_lookup_data *lookup_data; | ||
121 | struct cachefiles_object *parent, *object; | ||
122 | struct cachefiles_cache *cache; | ||
123 | const struct cred *saved_cred; | ||
124 | int ret; | ||
125 | |||
126 | _enter("{OBJ%x}", _object->debug_id); | ||
127 | |||
128 | cache = container_of(_object->cache, struct cachefiles_cache, cache); | ||
129 | parent = container_of(_object->parent, | ||
130 | struct cachefiles_object, fscache); | ||
131 | object = container_of(_object, struct cachefiles_object, fscache); | ||
132 | lookup_data = object->lookup_data; | ||
133 | |||
134 | ASSERTCMP(lookup_data, !=, NULL); | ||
135 | |||
136 | /* look up the key, creating any missing bits */ | ||
137 | cachefiles_begin_secure(cache, &saved_cred); | ||
138 | ret = cachefiles_walk_to_object(parent, object, | ||
139 | lookup_data->key, | ||
140 | lookup_data->auxdata); | ||
141 | cachefiles_end_secure(cache, saved_cred); | ||
142 | |||
143 | /* polish off by setting the attributes of non-index files */ | ||
144 | if (ret == 0 && | ||
145 | object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) | ||
146 | cachefiles_attr_changed(&object->fscache); | ||
147 | |||
148 | if (ret < 0) { | ||
149 | printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n", | ||
150 | ret); | ||
151 | fscache_object_lookup_error(&object->fscache); | ||
152 | } | ||
153 | |||
154 | _leave(" [%d]", ret); | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * indication of lookup completion | ||
159 | */ | ||
160 | static void cachefiles_lookup_complete(struct fscache_object *_object) | ||
161 | { | ||
162 | struct cachefiles_object *object; | ||
163 | |||
164 | object = container_of(_object, struct cachefiles_object, fscache); | ||
165 | |||
166 | _enter("{OBJ%x,%p}", object->fscache.debug_id, object->lookup_data); | ||
167 | |||
168 | if (object->lookup_data) { | ||
169 | kfree(object->lookup_data->key); | ||
170 | kfree(object->lookup_data->auxdata); | ||
171 | kfree(object->lookup_data); | ||
172 | object->lookup_data = NULL; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * increment the usage count on an inode object (may fail if unmounting) | ||
178 | */ | ||
179 | static | ||
180 | struct fscache_object *cachefiles_grab_object(struct fscache_object *_object) | ||
181 | { | ||
182 | struct cachefiles_object *object = | ||
183 | container_of(_object, struct cachefiles_object, fscache); | ||
184 | |||
185 | _enter("{OBJ%x,%d}", _object->debug_id, atomic_read(&object->usage)); | ||
186 | |||
187 | #ifdef CACHEFILES_DEBUG_SLAB | ||
188 | ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); | ||
189 | #endif | ||
190 | |||
191 | atomic_inc(&object->usage); | ||
192 | return &object->fscache; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * update the auxilliary data for an object object on disk | ||
197 | */ | ||
198 | static void cachefiles_update_object(struct fscache_object *_object) | ||
199 | { | ||
200 | struct cachefiles_object *object; | ||
201 | struct cachefiles_xattr *auxdata; | ||
202 | struct cachefiles_cache *cache; | ||
203 | struct fscache_cookie *cookie; | ||
204 | const struct cred *saved_cred; | ||
205 | unsigned auxlen; | ||
206 | |||
207 | _enter("{OBJ%x}", _object->debug_id); | ||
208 | |||
209 | object = container_of(_object, struct cachefiles_object, fscache); | ||
210 | cache = container_of(object->fscache.cache, struct cachefiles_cache, | ||
211 | cache); | ||
212 | cookie = object->fscache.cookie; | ||
213 | |||
214 | if (!cookie->def->get_aux) { | ||
215 | _leave(" [no aux]"); | ||
216 | return; | ||
217 | } | ||
218 | |||
219 | auxdata = kmalloc(2 + 512 + 3, GFP_KERNEL); | ||
220 | if (!auxdata) { | ||
221 | _leave(" [nomem]"); | ||
222 | return; | ||
223 | } | ||
224 | |||
225 | auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); | ||
226 | ASSERTCMP(auxlen, <, 511); | ||
227 | |||
228 | auxdata->len = auxlen + 1; | ||
229 | auxdata->type = cookie->def->type; | ||
230 | |||
231 | cachefiles_begin_secure(cache, &saved_cred); | ||
232 | cachefiles_update_object_xattr(object, auxdata); | ||
233 | cachefiles_end_secure(cache, saved_cred); | ||
234 | kfree(auxdata); | ||
235 | _leave(""); | ||
236 | } | ||
237 | |||
238 | /* | ||
239 | * discard the resources pinned by an object and effect retirement if | ||
240 | * requested | ||
241 | */ | ||
242 | static void cachefiles_drop_object(struct fscache_object *_object) | ||
243 | { | ||
244 | struct cachefiles_object *object; | ||
245 | struct cachefiles_cache *cache; | ||
246 | const struct cred *saved_cred; | ||
247 | |||
248 | ASSERT(_object); | ||
249 | |||
250 | object = container_of(_object, struct cachefiles_object, fscache); | ||
251 | |||
252 | _enter("{OBJ%x,%d}", | ||
253 | object->fscache.debug_id, atomic_read(&object->usage)); | ||
254 | |||
255 | cache = container_of(object->fscache.cache, | ||
256 | struct cachefiles_cache, cache); | ||
257 | |||
258 | #ifdef CACHEFILES_DEBUG_SLAB | ||
259 | ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); | ||
260 | #endif | ||
261 | |||
262 | /* delete retired objects */ | ||
263 | if (object->fscache.state == FSCACHE_OBJECT_RECYCLING && | ||
264 | _object != cache->cache.fsdef | ||
265 | ) { | ||
266 | _debug("- retire object OBJ%x", object->fscache.debug_id); | ||
267 | cachefiles_begin_secure(cache, &saved_cred); | ||
268 | cachefiles_delete_object(cache, object); | ||
269 | cachefiles_end_secure(cache, saved_cred); | ||
270 | } | ||
271 | |||
272 | /* close the filesystem stuff attached to the object */ | ||
273 | if (object->backer != object->dentry) | ||
274 | dput(object->backer); | ||
275 | object->backer = NULL; | ||
276 | |||
277 | /* note that the object is now inactive */ | ||
278 | if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) { | ||
279 | write_lock(&cache->active_lock); | ||
280 | if (!test_and_clear_bit(CACHEFILES_OBJECT_ACTIVE, | ||
281 | &object->flags)) | ||
282 | BUG(); | ||
283 | rb_erase(&object->active_node, &cache->active_nodes); | ||
284 | wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); | ||
285 | write_unlock(&cache->active_lock); | ||
286 | } | ||
287 | |||
288 | dput(object->dentry); | ||
289 | object->dentry = NULL; | ||
290 | |||
291 | _leave(""); | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * dispose of a reference to an object | ||
296 | */ | ||
297 | static void cachefiles_put_object(struct fscache_object *_object) | ||
298 | { | ||
299 | struct cachefiles_object *object; | ||
300 | struct fscache_cache *cache; | ||
301 | |||
302 | ASSERT(_object); | ||
303 | |||
304 | object = container_of(_object, struct cachefiles_object, fscache); | ||
305 | |||
306 | _enter("{OBJ%x,%d}", | ||
307 | object->fscache.debug_id, atomic_read(&object->usage)); | ||
308 | |||
309 | #ifdef CACHEFILES_DEBUG_SLAB | ||
310 | ASSERT((atomic_read(&object->usage) & 0xffff0000) != 0x6b6b0000); | ||
311 | #endif | ||
312 | |||
313 | ASSERTIFCMP(object->fscache.parent, | ||
314 | object->fscache.parent->n_children, >, 0); | ||
315 | |||
316 | if (atomic_dec_and_test(&object->usage)) { | ||
317 | _debug("- kill object OBJ%x", object->fscache.debug_id); | ||
318 | |||
319 | ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)); | ||
320 | ASSERTCMP(object->fscache.parent, ==, NULL); | ||
321 | ASSERTCMP(object->backer, ==, NULL); | ||
322 | ASSERTCMP(object->dentry, ==, NULL); | ||
323 | ASSERTCMP(object->fscache.n_ops, ==, 0); | ||
324 | ASSERTCMP(object->fscache.n_children, ==, 0); | ||
325 | |||
326 | if (object->lookup_data) { | ||
327 | kfree(object->lookup_data->key); | ||
328 | kfree(object->lookup_data->auxdata); | ||
329 | kfree(object->lookup_data); | ||
330 | object->lookup_data = NULL; | ||
331 | } | ||
332 | |||
333 | cache = object->fscache.cache; | ||
334 | kmem_cache_free(cachefiles_object_jar, object); | ||
335 | fscache_object_destroyed(cache); | ||
336 | } | ||
337 | |||
338 | _leave(""); | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * sync a cache | ||
343 | */ | ||
344 | static void cachefiles_sync_cache(struct fscache_cache *_cache) | ||
345 | { | ||
346 | struct cachefiles_cache *cache; | ||
347 | const struct cred *saved_cred; | ||
348 | int ret; | ||
349 | |||
350 | _enter("%p", _cache); | ||
351 | |||
352 | cache = container_of(_cache, struct cachefiles_cache, cache); | ||
353 | |||
354 | /* make sure all pages pinned by operations on behalf of the netfs are | ||
355 | * written to disc */ | ||
356 | cachefiles_begin_secure(cache, &saved_cred); | ||
357 | ret = fsync_super(cache->mnt->mnt_sb); | ||
358 | cachefiles_end_secure(cache, saved_cred); | ||
359 | |||
360 | if (ret == -EIO) | ||
361 | cachefiles_io_error(cache, | ||
362 | "Attempt to sync backing fs superblock" | ||
363 | " returned error %d", | ||
364 | ret); | ||
365 | } | ||
366 | |||
367 | /* | ||
368 | * notification the attributes on an object have changed | ||
369 | * - called with reads/writes excluded by FS-Cache | ||
370 | */ | ||
371 | static int cachefiles_attr_changed(struct fscache_object *_object) | ||
372 | { | ||
373 | struct cachefiles_object *object; | ||
374 | struct cachefiles_cache *cache; | ||
375 | const struct cred *saved_cred; | ||
376 | struct iattr newattrs; | ||
377 | uint64_t ni_size; | ||
378 | loff_t oi_size; | ||
379 | int ret; | ||
380 | |||
381 | _object->cookie->def->get_attr(_object->cookie->netfs_data, &ni_size); | ||
382 | |||
383 | _enter("{OBJ%x},[%llu]", | ||
384 | _object->debug_id, (unsigned long long) ni_size); | ||
385 | |||
386 | object = container_of(_object, struct cachefiles_object, fscache); | ||
387 | cache = container_of(object->fscache.cache, | ||
388 | struct cachefiles_cache, cache); | ||
389 | |||
390 | if (ni_size == object->i_size) | ||
391 | return 0; | ||
392 | |||
393 | if (!object->backer) | ||
394 | return -ENOBUFS; | ||
395 | |||
396 | ASSERT(S_ISREG(object->backer->d_inode->i_mode)); | ||
397 | |||
398 | fscache_set_store_limit(&object->fscache, ni_size); | ||
399 | |||
400 | oi_size = i_size_read(object->backer->d_inode); | ||
401 | if (oi_size == ni_size) | ||
402 | return 0; | ||
403 | |||
404 | newattrs.ia_size = ni_size; | ||
405 | newattrs.ia_valid = ATTR_SIZE; | ||
406 | |||
407 | cachefiles_begin_secure(cache, &saved_cred); | ||
408 | mutex_lock(&object->backer->d_inode->i_mutex); | ||
409 | ret = notify_change(object->backer, &newattrs); | ||
410 | mutex_unlock(&object->backer->d_inode->i_mutex); | ||
411 | cachefiles_end_secure(cache, saved_cred); | ||
412 | |||
413 | if (ret == -EIO) { | ||
414 | fscache_set_store_limit(&object->fscache, 0); | ||
415 | cachefiles_io_error_obj(object, "Size set failed"); | ||
416 | ret = -ENOBUFS; | ||
417 | } | ||
418 | |||
419 | _leave(" = %d", ret); | ||
420 | return ret; | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * dissociate a cache from all the pages it was backing | ||
425 | */ | ||
426 | static void cachefiles_dissociate_pages(struct fscache_cache *cache) | ||
427 | { | ||
428 | _enter(""); | ||
429 | } | ||
430 | |||
431 | const struct fscache_cache_ops cachefiles_cache_ops = { | ||
432 | .name = "cachefiles", | ||
433 | .alloc_object = cachefiles_alloc_object, | ||
434 | .lookup_object = cachefiles_lookup_object, | ||
435 | .lookup_complete = cachefiles_lookup_complete, | ||
436 | .grab_object = cachefiles_grab_object, | ||
437 | .update_object = cachefiles_update_object, | ||
438 | .drop_object = cachefiles_drop_object, | ||
439 | .put_object = cachefiles_put_object, | ||
440 | .sync_cache = cachefiles_sync_cache, | ||
441 | .attr_changed = cachefiles_attr_changed, | ||
442 | .read_or_alloc_page = cachefiles_read_or_alloc_page, | ||
443 | .read_or_alloc_pages = cachefiles_read_or_alloc_pages, | ||
444 | .allocate_page = cachefiles_allocate_page, | ||
445 | .allocate_pages = cachefiles_allocate_pages, | ||
446 | .write_page = cachefiles_write_page, | ||
447 | .uncache_page = cachefiles_uncache_page, | ||
448 | .dissociate_pages = cachefiles_dissociate_pages, | ||
449 | }; | ||
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h new file mode 100644 index 000000000000..19218e1463d6 --- /dev/null +++ b/fs/cachefiles/internal.h | |||
@@ -0,0 +1,360 @@ | |||
1 | /* General netfs cache on cache files internal defs | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/fscache-cache.h> | ||
13 | #include <linux/timer.h> | ||
14 | #include <linux/wait.h> | ||
15 | #include <linux/workqueue.h> | ||
16 | #include <linux/security.h> | ||
17 | |||
18 | struct cachefiles_cache; | ||
19 | struct cachefiles_object; | ||
20 | |||
21 | extern unsigned cachefiles_debug; | ||
22 | #define CACHEFILES_DEBUG_KENTER 1 | ||
23 | #define CACHEFILES_DEBUG_KLEAVE 2 | ||
24 | #define CACHEFILES_DEBUG_KDEBUG 4 | ||
25 | |||
26 | /* | ||
27 | * node records | ||
28 | */ | ||
29 | struct cachefiles_object { | ||
30 | struct fscache_object fscache; /* fscache handle */ | ||
31 | struct cachefiles_lookup_data *lookup_data; /* cached lookup data */ | ||
32 | struct dentry *dentry; /* the file/dir representing this object */ | ||
33 | struct dentry *backer; /* backing file */ | ||
34 | loff_t i_size; /* object size */ | ||
35 | unsigned long flags; | ||
36 | #define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ | ||
37 | atomic_t usage; /* object usage count */ | ||
38 | uint8_t type; /* object type */ | ||
39 | uint8_t new; /* T if object new */ | ||
40 | spinlock_t work_lock; | ||
41 | struct rb_node active_node; /* link in active tree (dentry is key) */ | ||
42 | }; | ||
43 | |||
44 | extern struct kmem_cache *cachefiles_object_jar; | ||
45 | |||
46 | /* | ||
47 | * Cache files cache definition | ||
48 | */ | ||
49 | struct cachefiles_cache { | ||
50 | struct fscache_cache cache; /* FS-Cache record */ | ||
51 | struct vfsmount *mnt; /* mountpoint holding the cache */ | ||
52 | struct dentry *graveyard; /* directory into which dead objects go */ | ||
53 | struct file *cachefilesd; /* manager daemon handle */ | ||
54 | const struct cred *cache_cred; /* security override for accessing cache */ | ||
55 | struct mutex daemon_mutex; /* command serialisation mutex */ | ||
56 | wait_queue_head_t daemon_pollwq; /* poll waitqueue for daemon */ | ||
57 | struct rb_root active_nodes; /* active nodes (can't be culled) */ | ||
58 | rwlock_t active_lock; /* lock for active_nodes */ | ||
59 | atomic_t gravecounter; /* graveyard uniquifier */ | ||
60 | unsigned frun_percent; /* when to stop culling (% files) */ | ||
61 | unsigned fcull_percent; /* when to start culling (% files) */ | ||
62 | unsigned fstop_percent; /* when to stop allocating (% files) */ | ||
63 | unsigned brun_percent; /* when to stop culling (% blocks) */ | ||
64 | unsigned bcull_percent; /* when to start culling (% blocks) */ | ||
65 | unsigned bstop_percent; /* when to stop allocating (% blocks) */ | ||
66 | unsigned bsize; /* cache's block size */ | ||
67 | unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */ | ||
68 | uint64_t frun; /* when to stop culling */ | ||
69 | uint64_t fcull; /* when to start culling */ | ||
70 | uint64_t fstop; /* when to stop allocating */ | ||
71 | sector_t brun; /* when to stop culling */ | ||
72 | sector_t bcull; /* when to start culling */ | ||
73 | sector_t bstop; /* when to stop allocating */ | ||
74 | unsigned long flags; | ||
75 | #define CACHEFILES_READY 0 /* T if cache prepared */ | ||
76 | #define CACHEFILES_DEAD 1 /* T if cache dead */ | ||
77 | #define CACHEFILES_CULLING 2 /* T if cull engaged */ | ||
78 | #define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */ | ||
79 | char *rootdirname; /* name of cache root directory */ | ||
80 | char *secctx; /* LSM security context */ | ||
81 | char *tag; /* cache binding tag */ | ||
82 | }; | ||
83 | |||
84 | /* | ||
85 | * backing file read tracking | ||
86 | */ | ||
87 | struct cachefiles_one_read { | ||
88 | wait_queue_t monitor; /* link into monitored waitqueue */ | ||
89 | struct page *back_page; /* backing file page we're waiting for */ | ||
90 | struct page *netfs_page; /* netfs page we're going to fill */ | ||
91 | struct fscache_retrieval *op; /* retrieval op covering this */ | ||
92 | struct list_head op_link; /* link in op's todo list */ | ||
93 | }; | ||
94 | |||
95 | /* | ||
96 | * backing file write tracking | ||
97 | */ | ||
98 | struct cachefiles_one_write { | ||
99 | struct page *netfs_page; /* netfs page to copy */ | ||
100 | struct cachefiles_object *object; | ||
101 | struct list_head obj_link; /* link in object's lists */ | ||
102 | fscache_rw_complete_t end_io_func; | ||
103 | void *context; | ||
104 | }; | ||
105 | |||
106 | /* | ||
107 | * auxiliary data xattr buffer | ||
108 | */ | ||
109 | struct cachefiles_xattr { | ||
110 | uint16_t len; | ||
111 | uint8_t type; | ||
112 | uint8_t data[]; | ||
113 | }; | ||
114 | |||
115 | /* | ||
116 | * note change of state for daemon | ||
117 | */ | ||
118 | static inline void cachefiles_state_changed(struct cachefiles_cache *cache) | ||
119 | { | ||
120 | set_bit(CACHEFILES_STATE_CHANGED, &cache->flags); | ||
121 | wake_up_all(&cache->daemon_pollwq); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * cf-bind.c | ||
126 | */ | ||
127 | extern int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args); | ||
128 | extern void cachefiles_daemon_unbind(struct cachefiles_cache *cache); | ||
129 | |||
130 | /* | ||
131 | * cf-daemon.c | ||
132 | */ | ||
133 | extern const struct file_operations cachefiles_daemon_fops; | ||
134 | |||
135 | extern int cachefiles_has_space(struct cachefiles_cache *cache, | ||
136 | unsigned fnr, unsigned bnr); | ||
137 | |||
138 | /* | ||
139 | * cf-interface.c | ||
140 | */ | ||
141 | extern const struct fscache_cache_ops cachefiles_cache_ops; | ||
142 | |||
143 | /* | ||
144 | * cf-key.c | ||
145 | */ | ||
146 | extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type); | ||
147 | |||
148 | /* | ||
149 | * cf-namei.c | ||
150 | */ | ||
151 | extern int cachefiles_delete_object(struct cachefiles_cache *cache, | ||
152 | struct cachefiles_object *object); | ||
153 | extern int cachefiles_walk_to_object(struct cachefiles_object *parent, | ||
154 | struct cachefiles_object *object, | ||
155 | const char *key, | ||
156 | struct cachefiles_xattr *auxdata); | ||
157 | extern struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, | ||
158 | struct dentry *dir, | ||
159 | const char *name); | ||
160 | |||
161 | extern int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, | ||
162 | char *filename); | ||
163 | |||
164 | extern int cachefiles_check_in_use(struct cachefiles_cache *cache, | ||
165 | struct dentry *dir, char *filename); | ||
166 | |||
167 | /* | ||
168 | * cf-proc.c | ||
169 | */ | ||
170 | #ifdef CONFIG_CACHEFILES_HISTOGRAM | ||
171 | extern atomic_t cachefiles_lookup_histogram[HZ]; | ||
172 | extern atomic_t cachefiles_mkdir_histogram[HZ]; | ||
173 | extern atomic_t cachefiles_create_histogram[HZ]; | ||
174 | |||
175 | extern int __init cachefiles_proc_init(void); | ||
176 | extern void cachefiles_proc_cleanup(void); | ||
177 | static inline | ||
178 | void cachefiles_hist(atomic_t histogram[], unsigned long start_jif) | ||
179 | { | ||
180 | unsigned long jif = jiffies - start_jif; | ||
181 | if (jif >= HZ) | ||
182 | jif = HZ - 1; | ||
183 | atomic_inc(&histogram[jif]); | ||
184 | } | ||
185 | |||
186 | #else | ||
187 | #define cachefiles_proc_init() (0) | ||
188 | #define cachefiles_proc_cleanup() do {} while (0) | ||
189 | #define cachefiles_hist(hist, start_jif) do {} while (0) | ||
190 | #endif | ||
191 | |||
192 | /* | ||
193 | * cf-rdwr.c | ||
194 | */ | ||
195 | extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, | ||
196 | struct page *, gfp_t); | ||
197 | extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *, | ||
198 | struct list_head *, unsigned *, | ||
199 | gfp_t); | ||
200 | extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *, | ||
201 | gfp_t); | ||
202 | extern int cachefiles_allocate_pages(struct fscache_retrieval *, | ||
203 | struct list_head *, unsigned *, gfp_t); | ||
204 | extern int cachefiles_write_page(struct fscache_storage *, struct page *); | ||
205 | extern void cachefiles_uncache_page(struct fscache_object *, struct page *); | ||
206 | |||
207 | /* | ||
208 | * cf-security.c | ||
209 | */ | ||
210 | extern int cachefiles_get_security_ID(struct cachefiles_cache *cache); | ||
211 | extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, | ||
212 | struct dentry *root, | ||
213 | const struct cred **_saved_cred); | ||
214 | |||
215 | static inline void cachefiles_begin_secure(struct cachefiles_cache *cache, | ||
216 | const struct cred **_saved_cred) | ||
217 | { | ||
218 | *_saved_cred = override_creds(cache->cache_cred); | ||
219 | } | ||
220 | |||
221 | static inline void cachefiles_end_secure(struct cachefiles_cache *cache, | ||
222 | const struct cred *saved_cred) | ||
223 | { | ||
224 | revert_creds(saved_cred); | ||
225 | } | ||
226 | |||
227 | /* | ||
228 | * cf-xattr.c | ||
229 | */ | ||
230 | extern int cachefiles_check_object_type(struct cachefiles_object *object); | ||
231 | extern int cachefiles_set_object_xattr(struct cachefiles_object *object, | ||
232 | struct cachefiles_xattr *auxdata); | ||
233 | extern int cachefiles_update_object_xattr(struct cachefiles_object *object, | ||
234 | struct cachefiles_xattr *auxdata); | ||
235 | extern int cachefiles_check_object_xattr(struct cachefiles_object *object, | ||
236 | struct cachefiles_xattr *auxdata); | ||
237 | extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, | ||
238 | struct dentry *dentry); | ||
239 | |||
240 | |||
241 | /* | ||
242 | * error handling | ||
243 | */ | ||
244 | #define kerror(FMT, ...) printk(KERN_ERR "CacheFiles: "FMT"\n", ##__VA_ARGS__) | ||
245 | |||
246 | #define cachefiles_io_error(___cache, FMT, ...) \ | ||
247 | do { \ | ||
248 | kerror("I/O Error: " FMT, ##__VA_ARGS__); \ | ||
249 | fscache_io_error(&(___cache)->cache); \ | ||
250 | set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ | ||
251 | } while (0) | ||
252 | |||
253 | #define cachefiles_io_error_obj(object, FMT, ...) \ | ||
254 | do { \ | ||
255 | struct cachefiles_cache *___cache; \ | ||
256 | \ | ||
257 | ___cache = container_of((object)->fscache.cache, \ | ||
258 | struct cachefiles_cache, cache); \ | ||
259 | cachefiles_io_error(___cache, FMT, ##__VA_ARGS__); \ | ||
260 | } while (0) | ||
261 | |||
262 | |||
263 | /* | ||
264 | * debug tracing | ||
265 | */ | ||
266 | #define dbgprintk(FMT, ...) \ | ||
267 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | ||
268 | |||
269 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
270 | static inline void _dbprintk(const char *fmt, ...) | ||
271 | __attribute__((format(printf, 1, 2))); | ||
272 | static inline void _dbprintk(const char *fmt, ...) | ||
273 | { | ||
274 | } | ||
275 | |||
276 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | ||
277 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | ||
278 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | ||
279 | |||
280 | |||
281 | #if defined(__KDEBUG) | ||
282 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) | ||
283 | #define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) | ||
284 | #define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) | ||
285 | |||
286 | #elif defined(CONFIG_CACHEFILES_DEBUG) | ||
287 | #define _enter(FMT, ...) \ | ||
288 | do { \ | ||
289 | if (cachefiles_debug & CACHEFILES_DEBUG_KENTER) \ | ||
290 | kenter(FMT, ##__VA_ARGS__); \ | ||
291 | } while (0) | ||
292 | |||
293 | #define _leave(FMT, ...) \ | ||
294 | do { \ | ||
295 | if (cachefiles_debug & CACHEFILES_DEBUG_KLEAVE) \ | ||
296 | kleave(FMT, ##__VA_ARGS__); \ | ||
297 | } while (0) | ||
298 | |||
299 | #define _debug(FMT, ...) \ | ||
300 | do { \ | ||
301 | if (cachefiles_debug & CACHEFILES_DEBUG_KDEBUG) \ | ||
302 | kdebug(FMT, ##__VA_ARGS__); \ | ||
303 | } while (0) | ||
304 | |||
305 | #else | ||
306 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | ||
307 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | ||
308 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | ||
309 | #endif | ||
310 | |||
311 | #if 1 /* defined(__KDEBUGALL) */ | ||
312 | |||
313 | #define ASSERT(X) \ | ||
314 | do { \ | ||
315 | if (unlikely(!(X))) { \ | ||
316 | printk(KERN_ERR "\n"); \ | ||
317 | printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ | ||
318 | BUG(); \ | ||
319 | } \ | ||
320 | } while (0) | ||
321 | |||
322 | #define ASSERTCMP(X, OP, Y) \ | ||
323 | do { \ | ||
324 | if (unlikely(!((X) OP (Y)))) { \ | ||
325 | printk(KERN_ERR "\n"); \ | ||
326 | printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ | ||
327 | printk(KERN_ERR "%lx " #OP " %lx is false\n", \ | ||
328 | (unsigned long)(X), (unsigned long)(Y)); \ | ||
329 | BUG(); \ | ||
330 | } \ | ||
331 | } while (0) | ||
332 | |||
333 | #define ASSERTIF(C, X) \ | ||
334 | do { \ | ||
335 | if (unlikely((C) && !(X))) { \ | ||
336 | printk(KERN_ERR "\n"); \ | ||
337 | printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ | ||
338 | BUG(); \ | ||
339 | } \ | ||
340 | } while (0) | ||
341 | |||
342 | #define ASSERTIFCMP(C, X, OP, Y) \ | ||
343 | do { \ | ||
344 | if (unlikely((C) && !((X) OP (Y)))) { \ | ||
345 | printk(KERN_ERR "\n"); \ | ||
346 | printk(KERN_ERR "CacheFiles: Assertion failed\n"); \ | ||
347 | printk(KERN_ERR "%lx " #OP " %lx is false\n", \ | ||
348 | (unsigned long)(X), (unsigned long)(Y)); \ | ||
349 | BUG(); \ | ||
350 | } \ | ||
351 | } while (0) | ||
352 | |||
353 | #else | ||
354 | |||
355 | #define ASSERT(X) do {} while (0) | ||
356 | #define ASSERTCMP(X, OP, Y) do {} while (0) | ||
357 | #define ASSERTIF(C, X) do {} while (0) | ||
358 | #define ASSERTIFCMP(C, X, OP, Y) do {} while (0) | ||
359 | |||
360 | #endif | ||
diff --git a/fs/cachefiles/key.c b/fs/cachefiles/key.c new file mode 100644 index 000000000000..81b8b2b3a674 --- /dev/null +++ b/fs/cachefiles/key.c | |||
@@ -0,0 +1,159 @@ | |||
1 | /* Key to pathname encoder | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/slab.h> | ||
13 | #include "internal.h" | ||
14 | |||
15 | static const char cachefiles_charmap[64] = | ||
16 | "0123456789" /* 0 - 9 */ | ||
17 | "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ | ||
18 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ | ||
19 | "_-" /* 62 - 63 */ | ||
20 | ; | ||
21 | |||
22 | static const char cachefiles_filecharmap[256] = { | ||
23 | /* we skip space and tab and control chars */ | ||
24 | [33 ... 46] = 1, /* '!' -> '.' */ | ||
25 | /* we skip '/' as it's significant to pathwalk */ | ||
26 | [48 ... 127] = 1, /* '0' -> '~' */ | ||
27 | }; | ||
28 | |||
29 | /* | ||
30 | * turn the raw key into something cooked | ||
31 | * - the raw key should include the length in the two bytes at the front | ||
32 | * - the key may be up to 514 bytes in length (including the length word) | ||
33 | * - "base64" encode the strange keys, mapping 3 bytes of raw to four of | ||
34 | * cooked | ||
35 | * - need to cut the cooked key into 252 char lengths (189 raw bytes) | ||
36 | */ | ||
37 | char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type) | ||
38 | { | ||
39 | unsigned char csum, ch; | ||
40 | unsigned int acc; | ||
41 | char *key; | ||
42 | int loop, len, max, seg, mark, print; | ||
43 | |||
44 | _enter(",%d", keylen); | ||
45 | |||
46 | BUG_ON(keylen < 2 || keylen > 514); | ||
47 | |||
48 | csum = raw[0] + raw[1]; | ||
49 | print = 1; | ||
50 | for (loop = 2; loop < keylen; loop++) { | ||
51 | ch = raw[loop]; | ||
52 | csum += ch; | ||
53 | print &= cachefiles_filecharmap[ch]; | ||
54 | } | ||
55 | |||
56 | if (print) { | ||
57 | /* if the path is usable ASCII, then we render it directly */ | ||
58 | max = keylen - 2; | ||
59 | max += 2; /* two base64'd length chars on the front */ | ||
60 | max += 5; /* @checksum/M */ | ||
61 | max += 3 * 2; /* maximum number of segment dividers (".../M") | ||
62 | * is ((514 + 251) / 252) = 3 | ||
63 | */ | ||
64 | max += 1; /* NUL on end */ | ||
65 | } else { | ||
66 | /* calculate the maximum length of the cooked key */ | ||
67 | keylen = (keylen + 2) / 3; | ||
68 | |||
69 | max = keylen * 4; | ||
70 | max += 5; /* @checksum/M */ | ||
71 | max += 3 * 2; /* maximum number of segment dividers (".../M") | ||
72 | * is ((514 + 188) / 189) = 3 | ||
73 | */ | ||
74 | max += 1; /* NUL on end */ | ||
75 | } | ||
76 | |||
77 | max += 1; /* 2nd NUL on end */ | ||
78 | |||
79 | _debug("max: %d", max); | ||
80 | |||
81 | key = kmalloc(max, GFP_KERNEL); | ||
82 | if (!key) | ||
83 | return NULL; | ||
84 | |||
85 | len = 0; | ||
86 | |||
87 | /* build the cooked key */ | ||
88 | sprintf(key, "@%02x%c+", (unsigned) csum, 0); | ||
89 | len = 5; | ||
90 | mark = len - 1; | ||
91 | |||
92 | if (print) { | ||
93 | acc = *(uint16_t *) raw; | ||
94 | raw += 2; | ||
95 | |||
96 | key[len + 1] = cachefiles_charmap[acc & 63]; | ||
97 | acc >>= 6; | ||
98 | key[len] = cachefiles_charmap[acc & 63]; | ||
99 | len += 2; | ||
100 | |||
101 | seg = 250; | ||
102 | for (loop = keylen; loop > 0; loop--) { | ||
103 | if (seg <= 0) { | ||
104 | key[len++] = '\0'; | ||
105 | mark = len; | ||
106 | key[len++] = '+'; | ||
107 | seg = 252; | ||
108 | } | ||
109 | |||
110 | key[len++] = *raw++; | ||
111 | ASSERT(len < max); | ||
112 | } | ||
113 | |||
114 | switch (type) { | ||
115 | case FSCACHE_COOKIE_TYPE_INDEX: type = 'I'; break; | ||
116 | case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'D'; break; | ||
117 | default: type = 'S'; break; | ||
118 | } | ||
119 | } else { | ||
120 | seg = 252; | ||
121 | for (loop = keylen; loop > 0; loop--) { | ||
122 | if (seg <= 0) { | ||
123 | key[len++] = '\0'; | ||
124 | mark = len; | ||
125 | key[len++] = '+'; | ||
126 | seg = 252; | ||
127 | } | ||
128 | |||
129 | acc = *raw++; | ||
130 | acc |= *raw++ << 8; | ||
131 | acc |= *raw++ << 16; | ||
132 | |||
133 | _debug("acc: %06x", acc); | ||
134 | |||
135 | key[len++] = cachefiles_charmap[acc & 63]; | ||
136 | acc >>= 6; | ||
137 | key[len++] = cachefiles_charmap[acc & 63]; | ||
138 | acc >>= 6; | ||
139 | key[len++] = cachefiles_charmap[acc & 63]; | ||
140 | acc >>= 6; | ||
141 | key[len++] = cachefiles_charmap[acc & 63]; | ||
142 | |||
143 | ASSERT(len < max); | ||
144 | } | ||
145 | |||
146 | switch (type) { | ||
147 | case FSCACHE_COOKIE_TYPE_INDEX: type = 'J'; break; | ||
148 | case FSCACHE_COOKIE_TYPE_DATAFILE: type = 'E'; break; | ||
149 | default: type = 'T'; break; | ||
150 | } | ||
151 | } | ||
152 | |||
153 | key[mark] = type; | ||
154 | key[len++] = 0; | ||
155 | key[len] = 0; | ||
156 | |||
157 | _leave(" = %p %d", key, len); | ||
158 | return key; | ||
159 | } | ||
diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c new file mode 100644 index 000000000000..4bfa8cf43bf5 --- /dev/null +++ b/fs/cachefiles/main.c | |||
@@ -0,0 +1,106 @@ | |||
1 | /* Network filesystem caching backend to use cache files on a premounted | ||
2 | * filesystem | ||
3 | * | ||
4 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
5 | * Written by David Howells (dhowells@redhat.com) | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public Licence | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the Licence, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/completion.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/fs.h> | ||
19 | #include <linux/file.h> | ||
20 | #include <linux/namei.h> | ||
21 | #include <linux/mount.h> | ||
22 | #include <linux/statfs.h> | ||
23 | #include <linux/sysctl.h> | ||
24 | #include <linux/miscdevice.h> | ||
25 | #include "internal.h" | ||
26 | |||
27 | unsigned cachefiles_debug; | ||
28 | module_param_named(debug, cachefiles_debug, uint, S_IWUSR | S_IRUGO); | ||
29 | MODULE_PARM_DESC(cachefiles_debug, "CacheFiles debugging mask"); | ||
30 | |||
31 | MODULE_DESCRIPTION("Mounted-filesystem based cache"); | ||
32 | MODULE_AUTHOR("Red Hat, Inc."); | ||
33 | MODULE_LICENSE("GPL"); | ||
34 | |||
35 | struct kmem_cache *cachefiles_object_jar; | ||
36 | |||
37 | static struct miscdevice cachefiles_dev = { | ||
38 | .minor = MISC_DYNAMIC_MINOR, | ||
39 | .name = "cachefiles", | ||
40 | .fops = &cachefiles_daemon_fops, | ||
41 | }; | ||
42 | |||
43 | static void cachefiles_object_init_once(void *_object) | ||
44 | { | ||
45 | struct cachefiles_object *object = _object; | ||
46 | |||
47 | memset(object, 0, sizeof(*object)); | ||
48 | spin_lock_init(&object->work_lock); | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * initialise the fs caching module | ||
53 | */ | ||
54 | static int __init cachefiles_init(void) | ||
55 | { | ||
56 | int ret; | ||
57 | |||
58 | ret = misc_register(&cachefiles_dev); | ||
59 | if (ret < 0) | ||
60 | goto error_dev; | ||
61 | |||
62 | /* create an object jar */ | ||
63 | ret = -ENOMEM; | ||
64 | cachefiles_object_jar = | ||
65 | kmem_cache_create("cachefiles_object_jar", | ||
66 | sizeof(struct cachefiles_object), | ||
67 | 0, | ||
68 | SLAB_HWCACHE_ALIGN, | ||
69 | cachefiles_object_init_once); | ||
70 | if (!cachefiles_object_jar) { | ||
71 | printk(KERN_NOTICE | ||
72 | "CacheFiles: Failed to allocate an object jar\n"); | ||
73 | goto error_object_jar; | ||
74 | } | ||
75 | |||
76 | ret = cachefiles_proc_init(); | ||
77 | if (ret < 0) | ||
78 | goto error_proc; | ||
79 | |||
80 | printk(KERN_INFO "CacheFiles: Loaded\n"); | ||
81 | return 0; | ||
82 | |||
83 | error_proc: | ||
84 | kmem_cache_destroy(cachefiles_object_jar); | ||
85 | error_object_jar: | ||
86 | misc_deregister(&cachefiles_dev); | ||
87 | error_dev: | ||
88 | kerror("failed to register: %d", ret); | ||
89 | return ret; | ||
90 | } | ||
91 | |||
92 | fs_initcall(cachefiles_init); | ||
93 | |||
94 | /* | ||
95 | * clean up on module removal | ||
96 | */ | ||
97 | static void __exit cachefiles_exit(void) | ||
98 | { | ||
99 | printk(KERN_INFO "CacheFiles: Unloading\n"); | ||
100 | |||
101 | cachefiles_proc_cleanup(); | ||
102 | kmem_cache_destroy(cachefiles_object_jar); | ||
103 | misc_deregister(&cachefiles_dev); | ||
104 | } | ||
105 | |||
106 | module_exit(cachefiles_exit); | ||
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c new file mode 100644 index 000000000000..4ce818ae39ea --- /dev/null +++ b/fs/cachefiles/namei.c | |||
@@ -0,0 +1,771 @@ | |||
1 | /* CacheFiles path walking and related routines | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/file.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/fsnotify.h> | ||
17 | #include <linux/quotaops.h> | ||
18 | #include <linux/xattr.h> | ||
19 | #include <linux/mount.h> | ||
20 | #include <linux/namei.h> | ||
21 | #include <linux/security.h> | ||
22 | #include "internal.h" | ||
23 | |||
24 | static int cachefiles_wait_bit(void *flags) | ||
25 | { | ||
26 | schedule(); | ||
27 | return 0; | ||
28 | } | ||
29 | |||
30 | /* | ||
31 | * record the fact that an object is now active | ||
32 | */ | ||
33 | static void cachefiles_mark_object_active(struct cachefiles_cache *cache, | ||
34 | struct cachefiles_object *object) | ||
35 | { | ||
36 | struct cachefiles_object *xobject; | ||
37 | struct rb_node **_p, *_parent = NULL; | ||
38 | struct dentry *dentry; | ||
39 | |||
40 | _enter(",%p", object); | ||
41 | |||
42 | try_again: | ||
43 | write_lock(&cache->active_lock); | ||
44 | |||
45 | if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) | ||
46 | BUG(); | ||
47 | |||
48 | dentry = object->dentry; | ||
49 | _p = &cache->active_nodes.rb_node; | ||
50 | while (*_p) { | ||
51 | _parent = *_p; | ||
52 | xobject = rb_entry(_parent, | ||
53 | struct cachefiles_object, active_node); | ||
54 | |||
55 | ASSERT(xobject != object); | ||
56 | |||
57 | if (xobject->dentry > dentry) | ||
58 | _p = &(*_p)->rb_left; | ||
59 | else if (xobject->dentry < dentry) | ||
60 | _p = &(*_p)->rb_right; | ||
61 | else | ||
62 | goto wait_for_old_object; | ||
63 | } | ||
64 | |||
65 | rb_link_node(&object->active_node, _parent, _p); | ||
66 | rb_insert_color(&object->active_node, &cache->active_nodes); | ||
67 | |||
68 | write_unlock(&cache->active_lock); | ||
69 | _leave(""); | ||
70 | return; | ||
71 | |||
72 | /* an old object from a previous incarnation is hogging the slot - we | ||
73 | * need to wait for it to be destroyed */ | ||
74 | wait_for_old_object: | ||
75 | if (xobject->fscache.state < FSCACHE_OBJECT_DYING) { | ||
76 | printk(KERN_ERR "\n"); | ||
77 | printk(KERN_ERR "CacheFiles: Error:" | ||
78 | " Unexpected object collision\n"); | ||
79 | printk(KERN_ERR "xobject: OBJ%x\n", | ||
80 | xobject->fscache.debug_id); | ||
81 | printk(KERN_ERR "xobjstate=%s\n", | ||
82 | fscache_object_states[xobject->fscache.state]); | ||
83 | printk(KERN_ERR "xobjflags=%lx\n", xobject->fscache.flags); | ||
84 | printk(KERN_ERR "xobjevent=%lx [%lx]\n", | ||
85 | xobject->fscache.events, xobject->fscache.event_mask); | ||
86 | printk(KERN_ERR "xops=%u inp=%u exc=%u\n", | ||
87 | xobject->fscache.n_ops, xobject->fscache.n_in_progress, | ||
88 | xobject->fscache.n_exclusive); | ||
89 | printk(KERN_ERR "xcookie=%p [pr=%p nd=%p fl=%lx]\n", | ||
90 | xobject->fscache.cookie, | ||
91 | xobject->fscache.cookie->parent, | ||
92 | xobject->fscache.cookie->netfs_data, | ||
93 | xobject->fscache.cookie->flags); | ||
94 | printk(KERN_ERR "xparent=%p\n", | ||
95 | xobject->fscache.parent); | ||
96 | printk(KERN_ERR "object: OBJ%x\n", | ||
97 | object->fscache.debug_id); | ||
98 | printk(KERN_ERR "cookie=%p [pr=%p nd=%p fl=%lx]\n", | ||
99 | object->fscache.cookie, | ||
100 | object->fscache.cookie->parent, | ||
101 | object->fscache.cookie->netfs_data, | ||
102 | object->fscache.cookie->flags); | ||
103 | printk(KERN_ERR "parent=%p\n", | ||
104 | object->fscache.parent); | ||
105 | BUG(); | ||
106 | } | ||
107 | atomic_inc(&xobject->usage); | ||
108 | write_unlock(&cache->active_lock); | ||
109 | |||
110 | _debug(">>> wait"); | ||
111 | wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE, | ||
112 | cachefiles_wait_bit, TASK_UNINTERRUPTIBLE); | ||
113 | _debug("<<< waited"); | ||
114 | |||
115 | cache->cache.ops->put_object(&xobject->fscache); | ||
116 | goto try_again; | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * delete an object representation from the cache | ||
121 | * - file backed objects are unlinked | ||
122 | * - directory backed objects are stuffed into the graveyard for userspace to | ||
123 | * delete | ||
124 | * - unlocks the directory mutex | ||
125 | */ | ||
126 | static int cachefiles_bury_object(struct cachefiles_cache *cache, | ||
127 | struct dentry *dir, | ||
128 | struct dentry *rep) | ||
129 | { | ||
130 | struct dentry *grave, *trap; | ||
131 | char nbuffer[8 + 8 + 1]; | ||
132 | int ret; | ||
133 | |||
134 | _enter(",'%*.*s','%*.*s'", | ||
135 | dir->d_name.len, dir->d_name.len, dir->d_name.name, | ||
136 | rep->d_name.len, rep->d_name.len, rep->d_name.name); | ||
137 | |||
138 | /* non-directories can just be unlinked */ | ||
139 | if (!S_ISDIR(rep->d_inode->i_mode)) { | ||
140 | _debug("unlink stale object"); | ||
141 | ret = vfs_unlink(dir->d_inode, rep); | ||
142 | |||
143 | mutex_unlock(&dir->d_inode->i_mutex); | ||
144 | |||
145 | if (ret == -EIO) | ||
146 | cachefiles_io_error(cache, "Unlink failed"); | ||
147 | |||
148 | _leave(" = %d", ret); | ||
149 | return ret; | ||
150 | } | ||
151 | |||
152 | /* directories have to be moved to the graveyard */ | ||
153 | _debug("move stale object to graveyard"); | ||
154 | mutex_unlock(&dir->d_inode->i_mutex); | ||
155 | |||
156 | try_again: | ||
157 | /* first step is to make up a grave dentry in the graveyard */ | ||
158 | sprintf(nbuffer, "%08x%08x", | ||
159 | (uint32_t) get_seconds(), | ||
160 | (uint32_t) atomic_inc_return(&cache->gravecounter)); | ||
161 | |||
162 | /* do the multiway lock magic */ | ||
163 | trap = lock_rename(cache->graveyard, dir); | ||
164 | |||
165 | /* do some checks before getting the grave dentry */ | ||
166 | if (rep->d_parent != dir) { | ||
167 | /* the entry was probably culled when we dropped the parent dir | ||
168 | * lock */ | ||
169 | unlock_rename(cache->graveyard, dir); | ||
170 | _leave(" = 0 [culled?]"); | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) { | ||
175 | unlock_rename(cache->graveyard, dir); | ||
176 | cachefiles_io_error(cache, "Graveyard no longer a directory"); | ||
177 | return -EIO; | ||
178 | } | ||
179 | |||
180 | if (trap == rep) { | ||
181 | unlock_rename(cache->graveyard, dir); | ||
182 | cachefiles_io_error(cache, "May not make directory loop"); | ||
183 | return -EIO; | ||
184 | } | ||
185 | |||
186 | if (d_mountpoint(rep)) { | ||
187 | unlock_rename(cache->graveyard, dir); | ||
188 | cachefiles_io_error(cache, "Mountpoint in cache"); | ||
189 | return -EIO; | ||
190 | } | ||
191 | |||
192 | grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer)); | ||
193 | if (IS_ERR(grave)) { | ||
194 | unlock_rename(cache->graveyard, dir); | ||
195 | |||
196 | if (PTR_ERR(grave) == -ENOMEM) { | ||
197 | _leave(" = -ENOMEM"); | ||
198 | return -ENOMEM; | ||
199 | } | ||
200 | |||
201 | cachefiles_io_error(cache, "Lookup error %ld", | ||
202 | PTR_ERR(grave)); | ||
203 | return -EIO; | ||
204 | } | ||
205 | |||
206 | if (grave->d_inode) { | ||
207 | unlock_rename(cache->graveyard, dir); | ||
208 | dput(grave); | ||
209 | grave = NULL; | ||
210 | cond_resched(); | ||
211 | goto try_again; | ||
212 | } | ||
213 | |||
214 | if (d_mountpoint(grave)) { | ||
215 | unlock_rename(cache->graveyard, dir); | ||
216 | dput(grave); | ||
217 | cachefiles_io_error(cache, "Mountpoint in graveyard"); | ||
218 | return -EIO; | ||
219 | } | ||
220 | |||
221 | /* target should not be an ancestor of source */ | ||
222 | if (trap == grave) { | ||
223 | unlock_rename(cache->graveyard, dir); | ||
224 | dput(grave); | ||
225 | cachefiles_io_error(cache, "May not make directory loop"); | ||
226 | return -EIO; | ||
227 | } | ||
228 | |||
229 | /* attempt the rename */ | ||
230 | ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave); | ||
231 | if (ret != 0 && ret != -ENOMEM) | ||
232 | cachefiles_io_error(cache, "Rename failed with error %d", ret); | ||
233 | |||
234 | unlock_rename(cache->graveyard, dir); | ||
235 | dput(grave); | ||
236 | _leave(" = 0"); | ||
237 | return 0; | ||
238 | } | ||
239 | |||
240 | /* | ||
241 | * delete an object representation from the cache | ||
242 | */ | ||
243 | int cachefiles_delete_object(struct cachefiles_cache *cache, | ||
244 | struct cachefiles_object *object) | ||
245 | { | ||
246 | struct dentry *dir; | ||
247 | int ret; | ||
248 | |||
249 | _enter(",{%p}", object->dentry); | ||
250 | |||
251 | ASSERT(object->dentry); | ||
252 | ASSERT(object->dentry->d_inode); | ||
253 | ASSERT(object->dentry->d_parent); | ||
254 | |||
255 | dir = dget_parent(object->dentry); | ||
256 | |||
257 | mutex_lock(&dir->d_inode->i_mutex); | ||
258 | ret = cachefiles_bury_object(cache, dir, object->dentry); | ||
259 | |||
260 | dput(dir); | ||
261 | _leave(" = %d", ret); | ||
262 | return ret; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * walk from the parent object to the child object through the backing | ||
267 | * filesystem, creating directories as we go | ||
268 | */ | ||
269 | int cachefiles_walk_to_object(struct cachefiles_object *parent, | ||
270 | struct cachefiles_object *object, | ||
271 | const char *key, | ||
272 | struct cachefiles_xattr *auxdata) | ||
273 | { | ||
274 | struct cachefiles_cache *cache; | ||
275 | struct dentry *dir, *next = NULL; | ||
276 | unsigned long start; | ||
277 | const char *name; | ||
278 | int ret, nlen; | ||
279 | |||
280 | _enter("{%p},,%s,", parent->dentry, key); | ||
281 | |||
282 | cache = container_of(parent->fscache.cache, | ||
283 | struct cachefiles_cache, cache); | ||
284 | |||
285 | ASSERT(parent->dentry); | ||
286 | ASSERT(parent->dentry->d_inode); | ||
287 | |||
288 | if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) { | ||
289 | // TODO: convert file to dir | ||
290 | _leave("looking up in none directory"); | ||
291 | return -ENOBUFS; | ||
292 | } | ||
293 | |||
294 | dir = dget(parent->dentry); | ||
295 | |||
296 | advance: | ||
297 | /* attempt to transit the first directory component */ | ||
298 | name = key; | ||
299 | nlen = strlen(key); | ||
300 | |||
301 | /* key ends in a double NUL */ | ||
302 | key = key + nlen + 1; | ||
303 | if (!*key) | ||
304 | key = NULL; | ||
305 | |||
306 | lookup_again: | ||
307 | /* search the current directory for the element name */ | ||
308 | _debug("lookup '%s'", name); | ||
309 | |||
310 | mutex_lock(&dir->d_inode->i_mutex); | ||
311 | |||
312 | start = jiffies; | ||
313 | next = lookup_one_len(name, dir, nlen); | ||
314 | cachefiles_hist(cachefiles_lookup_histogram, start); | ||
315 | if (IS_ERR(next)) | ||
316 | goto lookup_error; | ||
317 | |||
318 | _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative"); | ||
319 | |||
320 | if (!key) | ||
321 | object->new = !next->d_inode; | ||
322 | |||
323 | /* if this element of the path doesn't exist, then the lookup phase | ||
324 | * failed, and we can release any readers in the certain knowledge that | ||
325 | * there's nothing for them to actually read */ | ||
326 | if (!next->d_inode) | ||
327 | fscache_object_lookup_negative(&object->fscache); | ||
328 | |||
329 | /* we need to create the object if it's negative */ | ||
330 | if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) { | ||
331 | /* index objects and intervening tree levels must be subdirs */ | ||
332 | if (!next->d_inode) { | ||
333 | ret = cachefiles_has_space(cache, 1, 0); | ||
334 | if (ret < 0) | ||
335 | goto create_error; | ||
336 | |||
337 | start = jiffies; | ||
338 | ret = vfs_mkdir(dir->d_inode, next, 0); | ||
339 | cachefiles_hist(cachefiles_mkdir_histogram, start); | ||
340 | if (ret < 0) | ||
341 | goto create_error; | ||
342 | |||
343 | ASSERT(next->d_inode); | ||
344 | |||
345 | _debug("mkdir -> %p{%p{ino=%lu}}", | ||
346 | next, next->d_inode, next->d_inode->i_ino); | ||
347 | |||
348 | } else if (!S_ISDIR(next->d_inode->i_mode)) { | ||
349 | kerror("inode %lu is not a directory", | ||
350 | next->d_inode->i_ino); | ||
351 | ret = -ENOBUFS; | ||
352 | goto error; | ||
353 | } | ||
354 | |||
355 | } else { | ||
356 | /* non-index objects start out life as files */ | ||
357 | if (!next->d_inode) { | ||
358 | ret = cachefiles_has_space(cache, 1, 0); | ||
359 | if (ret < 0) | ||
360 | goto create_error; | ||
361 | |||
362 | start = jiffies; | ||
363 | ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); | ||
364 | cachefiles_hist(cachefiles_create_histogram, start); | ||
365 | if (ret < 0) | ||
366 | goto create_error; | ||
367 | |||
368 | ASSERT(next->d_inode); | ||
369 | |||
370 | _debug("create -> %p{%p{ino=%lu}}", | ||
371 | next, next->d_inode, next->d_inode->i_ino); | ||
372 | |||
373 | } else if (!S_ISDIR(next->d_inode->i_mode) && | ||
374 | !S_ISREG(next->d_inode->i_mode) | ||
375 | ) { | ||
376 | kerror("inode %lu is not a file or directory", | ||
377 | next->d_inode->i_ino); | ||
378 | ret = -ENOBUFS; | ||
379 | goto error; | ||
380 | } | ||
381 | } | ||
382 | |||
383 | /* process the next component */ | ||
384 | if (key) { | ||
385 | _debug("advance"); | ||
386 | mutex_unlock(&dir->d_inode->i_mutex); | ||
387 | dput(dir); | ||
388 | dir = next; | ||
389 | next = NULL; | ||
390 | goto advance; | ||
391 | } | ||
392 | |||
393 | /* we've found the object we were looking for */ | ||
394 | object->dentry = next; | ||
395 | |||
396 | /* if we've found that the terminal object exists, then we need to | ||
397 | * check its attributes and delete it if it's out of date */ | ||
398 | if (!object->new) { | ||
399 | _debug("validate '%*.*s'", | ||
400 | next->d_name.len, next->d_name.len, next->d_name.name); | ||
401 | |||
402 | ret = cachefiles_check_object_xattr(object, auxdata); | ||
403 | if (ret == -ESTALE) { | ||
404 | /* delete the object (the deleter drops the directory | ||
405 | * mutex) */ | ||
406 | object->dentry = NULL; | ||
407 | |||
408 | ret = cachefiles_bury_object(cache, dir, next); | ||
409 | dput(next); | ||
410 | next = NULL; | ||
411 | |||
412 | if (ret < 0) | ||
413 | goto delete_error; | ||
414 | |||
415 | _debug("redo lookup"); | ||
416 | goto lookup_again; | ||
417 | } | ||
418 | } | ||
419 | |||
420 | /* note that we're now using this object */ | ||
421 | cachefiles_mark_object_active(cache, object); | ||
422 | |||
423 | mutex_unlock(&dir->d_inode->i_mutex); | ||
424 | dput(dir); | ||
425 | dir = NULL; | ||
426 | |||
427 | _debug("=== OBTAINED_OBJECT ==="); | ||
428 | |||
429 | if (object->new) { | ||
430 | /* attach data to a newly constructed terminal object */ | ||
431 | ret = cachefiles_set_object_xattr(object, auxdata); | ||
432 | if (ret < 0) | ||
433 | goto check_error; | ||
434 | } else { | ||
435 | /* always update the atime on an object we've just looked up | ||
436 | * (this is used to keep track of culling, and atimes are only | ||
437 | * updated by read, write and readdir but not lookup or | ||
438 | * open) */ | ||
439 | touch_atime(cache->mnt, next); | ||
440 | } | ||
441 | |||
442 | /* open a file interface onto a data file */ | ||
443 | if (object->type != FSCACHE_COOKIE_TYPE_INDEX) { | ||
444 | if (S_ISREG(object->dentry->d_inode->i_mode)) { | ||
445 | const struct address_space_operations *aops; | ||
446 | |||
447 | ret = -EPERM; | ||
448 | aops = object->dentry->d_inode->i_mapping->a_ops; | ||
449 | if (!aops->bmap) | ||
450 | goto check_error; | ||
451 | |||
452 | object->backer = object->dentry; | ||
453 | } else { | ||
454 | BUG(); // TODO: open file in data-class subdir | ||
455 | } | ||
456 | } | ||
457 | |||
458 | object->new = 0; | ||
459 | fscache_obtained_object(&object->fscache); | ||
460 | |||
461 | _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino); | ||
462 | return 0; | ||
463 | |||
464 | create_error: | ||
465 | _debug("create error %d", ret); | ||
466 | if (ret == -EIO) | ||
467 | cachefiles_io_error(cache, "Create/mkdir failed"); | ||
468 | goto error; | ||
469 | |||
470 | check_error: | ||
471 | _debug("check error %d", ret); | ||
472 | write_lock(&cache->active_lock); | ||
473 | rb_erase(&object->active_node, &cache->active_nodes); | ||
474 | clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); | ||
475 | wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); | ||
476 | write_unlock(&cache->active_lock); | ||
477 | |||
478 | dput(object->dentry); | ||
479 | object->dentry = NULL; | ||
480 | goto error_out; | ||
481 | |||
482 | delete_error: | ||
483 | _debug("delete error %d", ret); | ||
484 | goto error_out2; | ||
485 | |||
486 | lookup_error: | ||
487 | _debug("lookup error %ld", PTR_ERR(next)); | ||
488 | ret = PTR_ERR(next); | ||
489 | if (ret == -EIO) | ||
490 | cachefiles_io_error(cache, "Lookup failed"); | ||
491 | next = NULL; | ||
492 | error: | ||
493 | mutex_unlock(&dir->d_inode->i_mutex); | ||
494 | dput(next); | ||
495 | error_out2: | ||
496 | dput(dir); | ||
497 | error_out: | ||
498 | if (ret == -ENOSPC) | ||
499 | ret = -ENOBUFS; | ||
500 | |||
501 | _leave(" = error %d", -ret); | ||
502 | return ret; | ||
503 | } | ||
504 | |||
505 | /* | ||
506 | * get a subdirectory | ||
507 | */ | ||
508 | struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, | ||
509 | struct dentry *dir, | ||
510 | const char *dirname) | ||
511 | { | ||
512 | struct dentry *subdir; | ||
513 | unsigned long start; | ||
514 | int ret; | ||
515 | |||
516 | _enter(",,%s", dirname); | ||
517 | |||
518 | /* search the current directory for the element name */ | ||
519 | mutex_lock(&dir->d_inode->i_mutex); | ||
520 | |||
521 | start = jiffies; | ||
522 | subdir = lookup_one_len(dirname, dir, strlen(dirname)); | ||
523 | cachefiles_hist(cachefiles_lookup_histogram, start); | ||
524 | if (IS_ERR(subdir)) { | ||
525 | if (PTR_ERR(subdir) == -ENOMEM) | ||
526 | goto nomem_d_alloc; | ||
527 | goto lookup_error; | ||
528 | } | ||
529 | |||
530 | _debug("subdir -> %p %s", | ||
531 | subdir, subdir->d_inode ? "positive" : "negative"); | ||
532 | |||
533 | /* we need to create the subdir if it doesn't exist yet */ | ||
534 | if (!subdir->d_inode) { | ||
535 | ret = cachefiles_has_space(cache, 1, 0); | ||
536 | if (ret < 0) | ||
537 | goto mkdir_error; | ||
538 | |||
539 | _debug("attempt mkdir"); | ||
540 | |||
541 | ret = vfs_mkdir(dir->d_inode, subdir, 0700); | ||
542 | if (ret < 0) | ||
543 | goto mkdir_error; | ||
544 | |||
545 | ASSERT(subdir->d_inode); | ||
546 | |||
547 | _debug("mkdir -> %p{%p{ino=%lu}}", | ||
548 | subdir, | ||
549 | subdir->d_inode, | ||
550 | subdir->d_inode->i_ino); | ||
551 | } | ||
552 | |||
553 | mutex_unlock(&dir->d_inode->i_mutex); | ||
554 | |||
555 | /* we need to make sure the subdir is a directory */ | ||
556 | ASSERT(subdir->d_inode); | ||
557 | |||
558 | if (!S_ISDIR(subdir->d_inode->i_mode)) { | ||
559 | kerror("%s is not a directory", dirname); | ||
560 | ret = -EIO; | ||
561 | goto check_error; | ||
562 | } | ||
563 | |||
564 | ret = -EPERM; | ||
565 | if (!subdir->d_inode->i_op || | ||
566 | !subdir->d_inode->i_op->setxattr || | ||
567 | !subdir->d_inode->i_op->getxattr || | ||
568 | !subdir->d_inode->i_op->lookup || | ||
569 | !subdir->d_inode->i_op->mkdir || | ||
570 | !subdir->d_inode->i_op->create || | ||
571 | !subdir->d_inode->i_op->rename || | ||
572 | !subdir->d_inode->i_op->rmdir || | ||
573 | !subdir->d_inode->i_op->unlink) | ||
574 | goto check_error; | ||
575 | |||
576 | _leave(" = [%lu]", subdir->d_inode->i_ino); | ||
577 | return subdir; | ||
578 | |||
579 | check_error: | ||
580 | dput(subdir); | ||
581 | _leave(" = %d [check]", ret); | ||
582 | return ERR_PTR(ret); | ||
583 | |||
584 | mkdir_error: | ||
585 | mutex_unlock(&dir->d_inode->i_mutex); | ||
586 | dput(subdir); | ||
587 | kerror("mkdir %s failed with error %d", dirname, ret); | ||
588 | return ERR_PTR(ret); | ||
589 | |||
590 | lookup_error: | ||
591 | mutex_unlock(&dir->d_inode->i_mutex); | ||
592 | ret = PTR_ERR(subdir); | ||
593 | kerror("Lookup %s failed with error %d", dirname, ret); | ||
594 | return ERR_PTR(ret); | ||
595 | |||
596 | nomem_d_alloc: | ||
597 | mutex_unlock(&dir->d_inode->i_mutex); | ||
598 | _leave(" = -ENOMEM"); | ||
599 | return ERR_PTR(-ENOMEM); | ||
600 | } | ||
601 | |||
602 | /* | ||
603 | * find out if an object is in use or not | ||
604 | * - if finds object and it's not in use: | ||
605 | * - returns a pointer to the object and a reference on it | ||
606 | * - returns with the directory locked | ||
607 | */ | ||
608 | static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, | ||
609 | struct dentry *dir, | ||
610 | char *filename) | ||
611 | { | ||
612 | struct cachefiles_object *object; | ||
613 | struct rb_node *_n; | ||
614 | struct dentry *victim; | ||
615 | unsigned long start; | ||
616 | int ret; | ||
617 | |||
618 | //_enter(",%*.*s/,%s", | ||
619 | // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); | ||
620 | |||
621 | /* look up the victim */ | ||
622 | mutex_lock_nested(&dir->d_inode->i_mutex, 1); | ||
623 | |||
624 | start = jiffies; | ||
625 | victim = lookup_one_len(filename, dir, strlen(filename)); | ||
626 | cachefiles_hist(cachefiles_lookup_histogram, start); | ||
627 | if (IS_ERR(victim)) | ||
628 | goto lookup_error; | ||
629 | |||
630 | //_debug("victim -> %p %s", | ||
631 | // victim, victim->d_inode ? "positive" : "negative"); | ||
632 | |||
633 | /* if the object is no longer there then we probably retired the object | ||
634 | * at the netfs's request whilst the cull was in progress | ||
635 | */ | ||
636 | if (!victim->d_inode) { | ||
637 | mutex_unlock(&dir->d_inode->i_mutex); | ||
638 | dput(victim); | ||
639 | _leave(" = -ENOENT [absent]"); | ||
640 | return ERR_PTR(-ENOENT); | ||
641 | } | ||
642 | |||
643 | /* check to see if we're using this object */ | ||
644 | read_lock(&cache->active_lock); | ||
645 | |||
646 | _n = cache->active_nodes.rb_node; | ||
647 | |||
648 | while (_n) { | ||
649 | object = rb_entry(_n, struct cachefiles_object, active_node); | ||
650 | |||
651 | if (object->dentry > victim) | ||
652 | _n = _n->rb_left; | ||
653 | else if (object->dentry < victim) | ||
654 | _n = _n->rb_right; | ||
655 | else | ||
656 | goto object_in_use; | ||
657 | } | ||
658 | |||
659 | read_unlock(&cache->active_lock); | ||
660 | |||
661 | //_leave(" = %p", victim); | ||
662 | return victim; | ||
663 | |||
664 | object_in_use: | ||
665 | read_unlock(&cache->active_lock); | ||
666 | mutex_unlock(&dir->d_inode->i_mutex); | ||
667 | dput(victim); | ||
668 | //_leave(" = -EBUSY [in use]"); | ||
669 | return ERR_PTR(-EBUSY); | ||
670 | |||
671 | lookup_error: | ||
672 | mutex_unlock(&dir->d_inode->i_mutex); | ||
673 | ret = PTR_ERR(victim); | ||
674 | if (ret == -ENOENT) { | ||
675 | /* file or dir now absent - probably retired by netfs */ | ||
676 | _leave(" = -ESTALE [absent]"); | ||
677 | return ERR_PTR(-ESTALE); | ||
678 | } | ||
679 | |||
680 | if (ret == -EIO) { | ||
681 | cachefiles_io_error(cache, "Lookup failed"); | ||
682 | } else if (ret != -ENOMEM) { | ||
683 | kerror("Internal error: %d", ret); | ||
684 | ret = -EIO; | ||
685 | } | ||
686 | |||
687 | _leave(" = %d", ret); | ||
688 | return ERR_PTR(ret); | ||
689 | } | ||
690 | |||
691 | /* | ||
692 | * cull an object if it's not in use | ||
693 | * - called only by cache manager daemon | ||
694 | */ | ||
695 | int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir, | ||
696 | char *filename) | ||
697 | { | ||
698 | struct dentry *victim; | ||
699 | int ret; | ||
700 | |||
701 | _enter(",%*.*s/,%s", | ||
702 | dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); | ||
703 | |||
704 | victim = cachefiles_check_active(cache, dir, filename); | ||
705 | if (IS_ERR(victim)) | ||
706 | return PTR_ERR(victim); | ||
707 | |||
708 | _debug("victim -> %p %s", | ||
709 | victim, victim->d_inode ? "positive" : "negative"); | ||
710 | |||
711 | /* okay... the victim is not being used so we can cull it | ||
712 | * - start by marking it as stale | ||
713 | */ | ||
714 | _debug("victim is cullable"); | ||
715 | |||
716 | ret = cachefiles_remove_object_xattr(cache, victim); | ||
717 | if (ret < 0) | ||
718 | goto error_unlock; | ||
719 | |||
720 | /* actually remove the victim (drops the dir mutex) */ | ||
721 | _debug("bury"); | ||
722 | |||
723 | ret = cachefiles_bury_object(cache, dir, victim); | ||
724 | if (ret < 0) | ||
725 | goto error; | ||
726 | |||
727 | dput(victim); | ||
728 | _leave(" = 0"); | ||
729 | return 0; | ||
730 | |||
731 | error_unlock: | ||
732 | mutex_unlock(&dir->d_inode->i_mutex); | ||
733 | error: | ||
734 | dput(victim); | ||
735 | if (ret == -ENOENT) { | ||
736 | /* file or dir now absent - probably retired by netfs */ | ||
737 | _leave(" = -ESTALE [absent]"); | ||
738 | return -ESTALE; | ||
739 | } | ||
740 | |||
741 | if (ret != -ENOMEM) { | ||
742 | kerror("Internal error: %d", ret); | ||
743 | ret = -EIO; | ||
744 | } | ||
745 | |||
746 | _leave(" = %d", ret); | ||
747 | return ret; | ||
748 | } | ||
749 | |||
750 | /* | ||
751 | * find out if an object is in use or not | ||
752 | * - called only by cache manager daemon | ||
753 | * - returns -EBUSY or 0 to indicate whether an object is in use or not | ||
754 | */ | ||
755 | int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir, | ||
756 | char *filename) | ||
757 | { | ||
758 | struct dentry *victim; | ||
759 | |||
760 | //_enter(",%*.*s/,%s", | ||
761 | // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); | ||
762 | |||
763 | victim = cachefiles_check_active(cache, dir, filename); | ||
764 | if (IS_ERR(victim)) | ||
765 | return PTR_ERR(victim); | ||
766 | |||
767 | mutex_unlock(&dir->d_inode->i_mutex); | ||
768 | dput(victim); | ||
769 | //_leave(" = 0"); | ||
770 | return 0; | ||
771 | } | ||
diff --git a/fs/cachefiles/proc.c b/fs/cachefiles/proc.c new file mode 100644 index 000000000000..eccd33941199 --- /dev/null +++ b/fs/cachefiles/proc.c | |||
@@ -0,0 +1,134 @@ | |||
1 | /* CacheFiles statistics | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/proc_fs.h> | ||
14 | #include <linux/seq_file.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | atomic_t cachefiles_lookup_histogram[HZ]; | ||
18 | atomic_t cachefiles_mkdir_histogram[HZ]; | ||
19 | atomic_t cachefiles_create_histogram[HZ]; | ||
20 | |||
21 | /* | ||
22 | * display the latency histogram | ||
23 | */ | ||
24 | static int cachefiles_histogram_show(struct seq_file *m, void *v) | ||
25 | { | ||
26 | unsigned long index; | ||
27 | unsigned x, y, z, t; | ||
28 | |||
29 | switch ((unsigned long) v) { | ||
30 | case 1: | ||
31 | seq_puts(m, "JIFS SECS LOOKUPS MKDIRS CREATES\n"); | ||
32 | return 0; | ||
33 | case 2: | ||
34 | seq_puts(m, "===== ===== ========= ========= =========\n"); | ||
35 | return 0; | ||
36 | default: | ||
37 | index = (unsigned long) v - 3; | ||
38 | x = atomic_read(&cachefiles_lookup_histogram[index]); | ||
39 | y = atomic_read(&cachefiles_mkdir_histogram[index]); | ||
40 | z = atomic_read(&cachefiles_create_histogram[index]); | ||
41 | if (x == 0 && y == 0 && z == 0) | ||
42 | return 0; | ||
43 | |||
44 | t = (index * 1000) / HZ; | ||
45 | |||
46 | seq_printf(m, "%4lu 0.%03u %9u %9u %9u\n", index, t, x, y, z); | ||
47 | return 0; | ||
48 | } | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * set up the iterator to start reading from the first line | ||
53 | */ | ||
54 | static void *cachefiles_histogram_start(struct seq_file *m, loff_t *_pos) | ||
55 | { | ||
56 | if ((unsigned long long)*_pos >= HZ + 2) | ||
57 | return NULL; | ||
58 | if (*_pos == 0) | ||
59 | *_pos = 1; | ||
60 | return (void *)(unsigned long) *_pos; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * move to the next line | ||
65 | */ | ||
66 | static void *cachefiles_histogram_next(struct seq_file *m, void *v, loff_t *pos) | ||
67 | { | ||
68 | (*pos)++; | ||
69 | return (unsigned long long)*pos > HZ + 2 ? | ||
70 | NULL : (void *)(unsigned long) *pos; | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * clean up after reading | ||
75 | */ | ||
76 | static void cachefiles_histogram_stop(struct seq_file *m, void *v) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | static const struct seq_operations cachefiles_histogram_ops = { | ||
81 | .start = cachefiles_histogram_start, | ||
82 | .stop = cachefiles_histogram_stop, | ||
83 | .next = cachefiles_histogram_next, | ||
84 | .show = cachefiles_histogram_show, | ||
85 | }; | ||
86 | |||
87 | /* | ||
88 | * open "/proc/fs/cachefiles/XXX" which provide statistics summaries | ||
89 | */ | ||
90 | static int cachefiles_histogram_open(struct inode *inode, struct file *file) | ||
91 | { | ||
92 | return seq_open(file, &cachefiles_histogram_ops); | ||
93 | } | ||
94 | |||
95 | static const struct file_operations cachefiles_histogram_fops = { | ||
96 | .owner = THIS_MODULE, | ||
97 | .open = cachefiles_histogram_open, | ||
98 | .read = seq_read, | ||
99 | .llseek = seq_lseek, | ||
100 | .release = seq_release, | ||
101 | }; | ||
102 | |||
103 | /* | ||
104 | * initialise the /proc/fs/cachefiles/ directory | ||
105 | */ | ||
106 | int __init cachefiles_proc_init(void) | ||
107 | { | ||
108 | _enter(""); | ||
109 | |||
110 | if (!proc_mkdir("fs/cachefiles", NULL)) | ||
111 | goto error_dir; | ||
112 | |||
113 | if (!proc_create("fs/cachefiles/histogram", S_IFREG | 0444, NULL, | ||
114 | &cachefiles_histogram_fops)) | ||
115 | goto error_histogram; | ||
116 | |||
117 | _leave(" = 0"); | ||
118 | return 0; | ||
119 | |||
120 | error_histogram: | ||
121 | remove_proc_entry("fs/cachefiles", NULL); | ||
122 | error_dir: | ||
123 | _leave(" = -ENOMEM"); | ||
124 | return -ENOMEM; | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * clean up the /proc/fs/cachefiles/ directory | ||
129 | */ | ||
130 | void cachefiles_proc_cleanup(void) | ||
131 | { | ||
132 | remove_proc_entry("fs/cachefiles/histogram", NULL); | ||
133 | remove_proc_entry("fs/cachefiles", NULL); | ||
134 | } | ||
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c new file mode 100644 index 000000000000..a69787e7dd96 --- /dev/null +++ b/fs/cachefiles/rdwr.c | |||
@@ -0,0 +1,879 @@ | |||
1 | /* Storage object read/write | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/mount.h> | ||
13 | #include <linux/file.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | /* | ||
17 | * detect wake up events generated by the unlocking of pages in which we're | ||
18 | * interested | ||
19 | * - we use this to detect read completion of backing pages | ||
20 | * - the caller holds the waitqueue lock | ||
21 | */ | ||
22 | static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode, | ||
23 | int sync, void *_key) | ||
24 | { | ||
25 | struct cachefiles_one_read *monitor = | ||
26 | container_of(wait, struct cachefiles_one_read, monitor); | ||
27 | struct cachefiles_object *object; | ||
28 | struct wait_bit_key *key = _key; | ||
29 | struct page *page = wait->private; | ||
30 | |||
31 | ASSERT(key); | ||
32 | |||
33 | _enter("{%lu},%u,%d,{%p,%u}", | ||
34 | monitor->netfs_page->index, mode, sync, | ||
35 | key->flags, key->bit_nr); | ||
36 | |||
37 | if (key->flags != &page->flags || | ||
38 | key->bit_nr != PG_locked) | ||
39 | return 0; | ||
40 | |||
41 | _debug("--- monitor %p %lx ---", page, page->flags); | ||
42 | |||
43 | if (!PageUptodate(page) && !PageError(page)) | ||
44 | dump_stack(); | ||
45 | |||
46 | /* remove from the waitqueue */ | ||
47 | list_del(&wait->task_list); | ||
48 | |||
49 | /* move onto the action list and queue for FS-Cache thread pool */ | ||
50 | ASSERT(monitor->op); | ||
51 | |||
52 | object = container_of(monitor->op->op.object, | ||
53 | struct cachefiles_object, fscache); | ||
54 | |||
55 | spin_lock(&object->work_lock); | ||
56 | list_add_tail(&monitor->op_link, &monitor->op->to_do); | ||
57 | spin_unlock(&object->work_lock); | ||
58 | |||
59 | fscache_enqueue_retrieval(monitor->op); | ||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * copy data from backing pages to netfs pages to complete a read operation | ||
65 | * - driven by FS-Cache's thread pool | ||
66 | */ | ||
67 | static void cachefiles_read_copier(struct fscache_operation *_op) | ||
68 | { | ||
69 | struct cachefiles_one_read *monitor; | ||
70 | struct cachefiles_object *object; | ||
71 | struct fscache_retrieval *op; | ||
72 | struct pagevec pagevec; | ||
73 | int error, max; | ||
74 | |||
75 | op = container_of(_op, struct fscache_retrieval, op); | ||
76 | object = container_of(op->op.object, | ||
77 | struct cachefiles_object, fscache); | ||
78 | |||
79 | _enter("{ino=%lu}", object->backer->d_inode->i_ino); | ||
80 | |||
81 | pagevec_init(&pagevec, 0); | ||
82 | |||
83 | max = 8; | ||
84 | spin_lock_irq(&object->work_lock); | ||
85 | |||
86 | while (!list_empty(&op->to_do)) { | ||
87 | monitor = list_entry(op->to_do.next, | ||
88 | struct cachefiles_one_read, op_link); | ||
89 | list_del(&monitor->op_link); | ||
90 | |||
91 | spin_unlock_irq(&object->work_lock); | ||
92 | |||
93 | _debug("- copy {%lu}", monitor->back_page->index); | ||
94 | |||
95 | error = -EIO; | ||
96 | if (PageUptodate(monitor->back_page)) { | ||
97 | copy_highpage(monitor->netfs_page, monitor->back_page); | ||
98 | |||
99 | pagevec_add(&pagevec, monitor->netfs_page); | ||
100 | fscache_mark_pages_cached(monitor->op, &pagevec); | ||
101 | error = 0; | ||
102 | } | ||
103 | |||
104 | if (error) | ||
105 | cachefiles_io_error_obj( | ||
106 | object, | ||
107 | "Readpage failed on backing file %lx", | ||
108 | (unsigned long) monitor->back_page->flags); | ||
109 | |||
110 | page_cache_release(monitor->back_page); | ||
111 | |||
112 | fscache_end_io(op, monitor->netfs_page, error); | ||
113 | page_cache_release(monitor->netfs_page); | ||
114 | fscache_put_retrieval(op); | ||
115 | kfree(monitor); | ||
116 | |||
117 | /* let the thread pool have some air occasionally */ | ||
118 | max--; | ||
119 | if (max < 0 || need_resched()) { | ||
120 | if (!list_empty(&op->to_do)) | ||
121 | fscache_enqueue_retrieval(op); | ||
122 | _leave(" [maxed out]"); | ||
123 | return; | ||
124 | } | ||
125 | |||
126 | spin_lock_irq(&object->work_lock); | ||
127 | } | ||
128 | |||
129 | spin_unlock_irq(&object->work_lock); | ||
130 | _leave(""); | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * read the corresponding page to the given set from the backing file | ||
135 | * - an uncertain page is simply discarded, to be tried again another time | ||
136 | */ | ||
137 | static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | ||
138 | struct fscache_retrieval *op, | ||
139 | struct page *netpage, | ||
140 | struct pagevec *pagevec) | ||
141 | { | ||
142 | struct cachefiles_one_read *monitor; | ||
143 | struct address_space *bmapping; | ||
144 | struct page *newpage, *backpage; | ||
145 | int ret; | ||
146 | |||
147 | _enter(""); | ||
148 | |||
149 | pagevec_reinit(pagevec); | ||
150 | |||
151 | _debug("read back %p{%lu,%d}", | ||
152 | netpage, netpage->index, page_count(netpage)); | ||
153 | |||
154 | monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); | ||
155 | if (!monitor) | ||
156 | goto nomem; | ||
157 | |||
158 | monitor->netfs_page = netpage; | ||
159 | monitor->op = fscache_get_retrieval(op); | ||
160 | |||
161 | init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter); | ||
162 | |||
163 | /* attempt to get hold of the backing page */ | ||
164 | bmapping = object->backer->d_inode->i_mapping; | ||
165 | newpage = NULL; | ||
166 | |||
167 | for (;;) { | ||
168 | backpage = find_get_page(bmapping, netpage->index); | ||
169 | if (backpage) | ||
170 | goto backing_page_already_present; | ||
171 | |||
172 | if (!newpage) { | ||
173 | newpage = page_cache_alloc_cold(bmapping); | ||
174 | if (!newpage) | ||
175 | goto nomem_monitor; | ||
176 | } | ||
177 | |||
178 | ret = add_to_page_cache(newpage, bmapping, | ||
179 | netpage->index, GFP_KERNEL); | ||
180 | if (ret == 0) | ||
181 | goto installed_new_backing_page; | ||
182 | if (ret != -EEXIST) | ||
183 | goto nomem_page; | ||
184 | } | ||
185 | |||
186 | /* we've installed a new backing page, so now we need to add it | ||
187 | * to the LRU list and start it reading */ | ||
188 | installed_new_backing_page: | ||
189 | _debug("- new %p", newpage); | ||
190 | |||
191 | backpage = newpage; | ||
192 | newpage = NULL; | ||
193 | |||
194 | page_cache_get(backpage); | ||
195 | pagevec_add(pagevec, backpage); | ||
196 | __pagevec_lru_add_file(pagevec); | ||
197 | |||
198 | read_backing_page: | ||
199 | ret = bmapping->a_ops->readpage(NULL, backpage); | ||
200 | if (ret < 0) | ||
201 | goto read_error; | ||
202 | |||
203 | /* set the monitor to transfer the data across */ | ||
204 | monitor_backing_page: | ||
205 | _debug("- monitor add"); | ||
206 | |||
207 | /* install the monitor */ | ||
208 | page_cache_get(monitor->netfs_page); | ||
209 | page_cache_get(backpage); | ||
210 | monitor->back_page = backpage; | ||
211 | monitor->monitor.private = backpage; | ||
212 | add_page_wait_queue(backpage, &monitor->monitor); | ||
213 | monitor = NULL; | ||
214 | |||
215 | /* but the page may have been read before the monitor was installed, so | ||
216 | * the monitor may miss the event - so we have to ensure that we do get | ||
217 | * one in such a case */ | ||
218 | if (trylock_page(backpage)) { | ||
219 | _debug("jumpstart %p {%lx}", backpage, backpage->flags); | ||
220 | unlock_page(backpage); | ||
221 | } | ||
222 | goto success; | ||
223 | |||
224 | /* if the backing page is already present, it can be in one of | ||
225 | * three states: read in progress, read failed or read okay */ | ||
226 | backing_page_already_present: | ||
227 | _debug("- present"); | ||
228 | |||
229 | if (newpage) { | ||
230 | page_cache_release(newpage); | ||
231 | newpage = NULL; | ||
232 | } | ||
233 | |||
234 | if (PageError(backpage)) | ||
235 | goto io_error; | ||
236 | |||
237 | if (PageUptodate(backpage)) | ||
238 | goto backing_page_already_uptodate; | ||
239 | |||
240 | if (!trylock_page(backpage)) | ||
241 | goto monitor_backing_page; | ||
242 | _debug("read %p {%lx}", backpage, backpage->flags); | ||
243 | goto read_backing_page; | ||
244 | |||
245 | /* the backing page is already up to date, attach the netfs | ||
246 | * page to the pagecache and LRU and copy the data across */ | ||
247 | backing_page_already_uptodate: | ||
248 | _debug("- uptodate"); | ||
249 | |||
250 | pagevec_add(pagevec, netpage); | ||
251 | fscache_mark_pages_cached(op, pagevec); | ||
252 | |||
253 | copy_highpage(netpage, backpage); | ||
254 | fscache_end_io(op, netpage, 0); | ||
255 | |||
256 | success: | ||
257 | _debug("success"); | ||
258 | ret = 0; | ||
259 | |||
260 | out: | ||
261 | if (backpage) | ||
262 | page_cache_release(backpage); | ||
263 | if (monitor) { | ||
264 | fscache_put_retrieval(monitor->op); | ||
265 | kfree(monitor); | ||
266 | } | ||
267 | _leave(" = %d", ret); | ||
268 | return ret; | ||
269 | |||
270 | read_error: | ||
271 | _debug("read error %d", ret); | ||
272 | if (ret == -ENOMEM) | ||
273 | goto out; | ||
274 | io_error: | ||
275 | cachefiles_io_error_obj(object, "Page read error on backing file"); | ||
276 | ret = -ENOBUFS; | ||
277 | goto out; | ||
278 | |||
279 | nomem_page: | ||
280 | page_cache_release(newpage); | ||
281 | nomem_monitor: | ||
282 | fscache_put_retrieval(monitor->op); | ||
283 | kfree(monitor); | ||
284 | nomem: | ||
285 | _leave(" = -ENOMEM"); | ||
286 | return -ENOMEM; | ||
287 | } | ||
288 | |||
289 | /* | ||
290 | * read a page from the cache or allocate a block in which to store it | ||
291 | * - cache withdrawal is prevented by the caller | ||
292 | * - returns -EINTR if interrupted | ||
293 | * - returns -ENOMEM if ran out of memory | ||
294 | * - returns -ENOBUFS if no buffers can be made available | ||
295 | * - returns -ENOBUFS if page is beyond EOF | ||
296 | * - if the page is backed by a block in the cache: | ||
297 | * - a read will be started which will call the callback on completion | ||
298 | * - 0 will be returned | ||
299 | * - else if the page is unbacked: | ||
300 | * - the metadata will be retained | ||
301 | * - -ENODATA will be returned | ||
302 | */ | ||
303 | int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, | ||
304 | struct page *page, | ||
305 | gfp_t gfp) | ||
306 | { | ||
307 | struct cachefiles_object *object; | ||
308 | struct cachefiles_cache *cache; | ||
309 | struct pagevec pagevec; | ||
310 | struct inode *inode; | ||
311 | sector_t block0, block; | ||
312 | unsigned shift; | ||
313 | int ret; | ||
314 | |||
315 | object = container_of(op->op.object, | ||
316 | struct cachefiles_object, fscache); | ||
317 | cache = container_of(object->fscache.cache, | ||
318 | struct cachefiles_cache, cache); | ||
319 | |||
320 | _enter("{%p},{%lx},,,", object, page->index); | ||
321 | |||
322 | if (!object->backer) | ||
323 | return -ENOBUFS; | ||
324 | |||
325 | inode = object->backer->d_inode; | ||
326 | ASSERT(S_ISREG(inode->i_mode)); | ||
327 | ASSERT(inode->i_mapping->a_ops->bmap); | ||
328 | ASSERT(inode->i_mapping->a_ops->readpages); | ||
329 | |||
330 | /* calculate the shift required to use bmap */ | ||
331 | if (inode->i_sb->s_blocksize > PAGE_SIZE) | ||
332 | return -ENOBUFS; | ||
333 | |||
334 | shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; | ||
335 | |||
336 | op->op.flags = FSCACHE_OP_FAST; | ||
337 | op->op.processor = cachefiles_read_copier; | ||
338 | |||
339 | pagevec_init(&pagevec, 0); | ||
340 | |||
341 | /* we assume the absence or presence of the first block is a good | ||
342 | * enough indication for the page as a whole | ||
343 | * - TODO: don't use bmap() for this as it is _not_ actually good | ||
344 | * enough for this as it doesn't indicate errors, but it's all we've | ||
345 | * got for the moment | ||
346 | */ | ||
347 | block0 = page->index; | ||
348 | block0 <<= shift; | ||
349 | |||
350 | block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0); | ||
351 | _debug("%llx -> %llx", | ||
352 | (unsigned long long) block0, | ||
353 | (unsigned long long) block); | ||
354 | |||
355 | if (block) { | ||
356 | /* submit the apparently valid page to the backing fs to be | ||
357 | * read from disk */ | ||
358 | ret = cachefiles_read_backing_file_one(object, op, page, | ||
359 | &pagevec); | ||
360 | } else if (cachefiles_has_space(cache, 0, 1) == 0) { | ||
361 | /* there's space in the cache we can use */ | ||
362 | pagevec_add(&pagevec, page); | ||
363 | fscache_mark_pages_cached(op, &pagevec); | ||
364 | ret = -ENODATA; | ||
365 | } else { | ||
366 | ret = -ENOBUFS; | ||
367 | } | ||
368 | |||
369 | _leave(" = %d", ret); | ||
370 | return ret; | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * read the corresponding pages to the given set from the backing file | ||
375 | * - any uncertain pages are simply discarded, to be tried again another time | ||
376 | */ | ||
377 | static int cachefiles_read_backing_file(struct cachefiles_object *object, | ||
378 | struct fscache_retrieval *op, | ||
379 | struct list_head *list, | ||
380 | struct pagevec *mark_pvec) | ||
381 | { | ||
382 | struct cachefiles_one_read *monitor = NULL; | ||
383 | struct address_space *bmapping = object->backer->d_inode->i_mapping; | ||
384 | struct pagevec lru_pvec; | ||
385 | struct page *newpage = NULL, *netpage, *_n, *backpage = NULL; | ||
386 | int ret = 0; | ||
387 | |||
388 | _enter(""); | ||
389 | |||
390 | pagevec_init(&lru_pvec, 0); | ||
391 | |||
392 | list_for_each_entry_safe(netpage, _n, list, lru) { | ||
393 | list_del(&netpage->lru); | ||
394 | |||
395 | _debug("read back %p{%lu,%d}", | ||
396 | netpage, netpage->index, page_count(netpage)); | ||
397 | |||
398 | if (!monitor) { | ||
399 | monitor = kzalloc(sizeof(*monitor), GFP_KERNEL); | ||
400 | if (!monitor) | ||
401 | goto nomem; | ||
402 | |||
403 | monitor->op = fscache_get_retrieval(op); | ||
404 | init_waitqueue_func_entry(&monitor->monitor, | ||
405 | cachefiles_read_waiter); | ||
406 | } | ||
407 | |||
408 | for (;;) { | ||
409 | backpage = find_get_page(bmapping, netpage->index); | ||
410 | if (backpage) | ||
411 | goto backing_page_already_present; | ||
412 | |||
413 | if (!newpage) { | ||
414 | newpage = page_cache_alloc_cold(bmapping); | ||
415 | if (!newpage) | ||
416 | goto nomem; | ||
417 | } | ||
418 | |||
419 | ret = add_to_page_cache(newpage, bmapping, | ||
420 | netpage->index, GFP_KERNEL); | ||
421 | if (ret == 0) | ||
422 | goto installed_new_backing_page; | ||
423 | if (ret != -EEXIST) | ||
424 | goto nomem; | ||
425 | } | ||
426 | |||
427 | /* we've installed a new backing page, so now we need to add it | ||
428 | * to the LRU list and start it reading */ | ||
429 | installed_new_backing_page: | ||
430 | _debug("- new %p", newpage); | ||
431 | |||
432 | backpage = newpage; | ||
433 | newpage = NULL; | ||
434 | |||
435 | page_cache_get(backpage); | ||
436 | if (!pagevec_add(&lru_pvec, backpage)) | ||
437 | __pagevec_lru_add_file(&lru_pvec); | ||
438 | |||
439 | reread_backing_page: | ||
440 | ret = bmapping->a_ops->readpage(NULL, backpage); | ||
441 | if (ret < 0) | ||
442 | goto read_error; | ||
443 | |||
444 | /* add the netfs page to the pagecache and LRU, and set the | ||
445 | * monitor to transfer the data across */ | ||
446 | monitor_backing_page: | ||
447 | _debug("- monitor add"); | ||
448 | |||
449 | ret = add_to_page_cache(netpage, op->mapping, netpage->index, | ||
450 | GFP_KERNEL); | ||
451 | if (ret < 0) { | ||
452 | if (ret == -EEXIST) { | ||
453 | page_cache_release(netpage); | ||
454 | continue; | ||
455 | } | ||
456 | goto nomem; | ||
457 | } | ||
458 | |||
459 | page_cache_get(netpage); | ||
460 | if (!pagevec_add(&lru_pvec, netpage)) | ||
461 | __pagevec_lru_add_file(&lru_pvec); | ||
462 | |||
463 | /* install a monitor */ | ||
464 | page_cache_get(netpage); | ||
465 | monitor->netfs_page = netpage; | ||
466 | |||
467 | page_cache_get(backpage); | ||
468 | monitor->back_page = backpage; | ||
469 | monitor->monitor.private = backpage; | ||
470 | add_page_wait_queue(backpage, &monitor->monitor); | ||
471 | monitor = NULL; | ||
472 | |||
473 | /* but the page may have been read before the monitor was | ||
474 | * installed, so the monitor may miss the event - so we have to | ||
475 | * ensure that we do get one in such a case */ | ||
476 | if (trylock_page(backpage)) { | ||
477 | _debug("2unlock %p {%lx}", backpage, backpage->flags); | ||
478 | unlock_page(backpage); | ||
479 | } | ||
480 | |||
481 | page_cache_release(backpage); | ||
482 | backpage = NULL; | ||
483 | |||
484 | page_cache_release(netpage); | ||
485 | netpage = NULL; | ||
486 | continue; | ||
487 | |||
488 | /* if the backing page is already present, it can be in one of | ||
489 | * three states: read in progress, read failed or read okay */ | ||
490 | backing_page_already_present: | ||
491 | _debug("- present %p", backpage); | ||
492 | |||
493 | if (PageError(backpage)) | ||
494 | goto io_error; | ||
495 | |||
496 | if (PageUptodate(backpage)) | ||
497 | goto backing_page_already_uptodate; | ||
498 | |||
499 | _debug("- not ready %p{%lx}", backpage, backpage->flags); | ||
500 | |||
501 | if (!trylock_page(backpage)) | ||
502 | goto monitor_backing_page; | ||
503 | |||
504 | if (PageError(backpage)) { | ||
505 | _debug("error %lx", backpage->flags); | ||
506 | unlock_page(backpage); | ||
507 | goto io_error; | ||
508 | } | ||
509 | |||
510 | if (PageUptodate(backpage)) | ||
511 | goto backing_page_already_uptodate_unlock; | ||
512 | |||
513 | /* we've locked a page that's neither up to date nor erroneous, | ||
514 | * so we need to attempt to read it again */ | ||
515 | goto reread_backing_page; | ||
516 | |||
517 | /* the backing page is already up to date, attach the netfs | ||
518 | * page to the pagecache and LRU and copy the data across */ | ||
519 | backing_page_already_uptodate_unlock: | ||
520 | _debug("uptodate %lx", backpage->flags); | ||
521 | unlock_page(backpage); | ||
522 | backing_page_already_uptodate: | ||
523 | _debug("- uptodate"); | ||
524 | |||
525 | ret = add_to_page_cache(netpage, op->mapping, netpage->index, | ||
526 | GFP_KERNEL); | ||
527 | if (ret < 0) { | ||
528 | if (ret == -EEXIST) { | ||
529 | page_cache_release(netpage); | ||
530 | continue; | ||
531 | } | ||
532 | goto nomem; | ||
533 | } | ||
534 | |||
535 | copy_highpage(netpage, backpage); | ||
536 | |||
537 | page_cache_release(backpage); | ||
538 | backpage = NULL; | ||
539 | |||
540 | if (!pagevec_add(mark_pvec, netpage)) | ||
541 | fscache_mark_pages_cached(op, mark_pvec); | ||
542 | |||
543 | page_cache_get(netpage); | ||
544 | if (!pagevec_add(&lru_pvec, netpage)) | ||
545 | __pagevec_lru_add_file(&lru_pvec); | ||
546 | |||
547 | fscache_end_io(op, netpage, 0); | ||
548 | page_cache_release(netpage); | ||
549 | netpage = NULL; | ||
550 | continue; | ||
551 | } | ||
552 | |||
553 | netpage = NULL; | ||
554 | |||
555 | _debug("out"); | ||
556 | |||
557 | out: | ||
558 | /* tidy up */ | ||
559 | pagevec_lru_add_file(&lru_pvec); | ||
560 | |||
561 | if (newpage) | ||
562 | page_cache_release(newpage); | ||
563 | if (netpage) | ||
564 | page_cache_release(netpage); | ||
565 | if (backpage) | ||
566 | page_cache_release(backpage); | ||
567 | if (monitor) { | ||
568 | fscache_put_retrieval(op); | ||
569 | kfree(monitor); | ||
570 | } | ||
571 | |||
572 | list_for_each_entry_safe(netpage, _n, list, lru) { | ||
573 | list_del(&netpage->lru); | ||
574 | page_cache_release(netpage); | ||
575 | } | ||
576 | |||
577 | _leave(" = %d", ret); | ||
578 | return ret; | ||
579 | |||
580 | nomem: | ||
581 | _debug("nomem"); | ||
582 | ret = -ENOMEM; | ||
583 | goto out; | ||
584 | |||
585 | read_error: | ||
586 | _debug("read error %d", ret); | ||
587 | if (ret == -ENOMEM) | ||
588 | goto out; | ||
589 | io_error: | ||
590 | cachefiles_io_error_obj(object, "Page read error on backing file"); | ||
591 | ret = -ENOBUFS; | ||
592 | goto out; | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * read a list of pages from the cache or allocate blocks in which to store | ||
597 | * them | ||
598 | */ | ||
599 | int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, | ||
600 | struct list_head *pages, | ||
601 | unsigned *nr_pages, | ||
602 | gfp_t gfp) | ||
603 | { | ||
604 | struct cachefiles_object *object; | ||
605 | struct cachefiles_cache *cache; | ||
606 | struct list_head backpages; | ||
607 | struct pagevec pagevec; | ||
608 | struct inode *inode; | ||
609 | struct page *page, *_n; | ||
610 | unsigned shift, nrbackpages; | ||
611 | int ret, ret2, space; | ||
612 | |||
613 | object = container_of(op->op.object, | ||
614 | struct cachefiles_object, fscache); | ||
615 | cache = container_of(object->fscache.cache, | ||
616 | struct cachefiles_cache, cache); | ||
617 | |||
618 | _enter("{OBJ%x,%d},,%d,,", | ||
619 | object->fscache.debug_id, atomic_read(&op->op.usage), | ||
620 | *nr_pages); | ||
621 | |||
622 | if (!object->backer) | ||
623 | return -ENOBUFS; | ||
624 | |||
625 | space = 1; | ||
626 | if (cachefiles_has_space(cache, 0, *nr_pages) < 0) | ||
627 | space = 0; | ||
628 | |||
629 | inode = object->backer->d_inode; | ||
630 | ASSERT(S_ISREG(inode->i_mode)); | ||
631 | ASSERT(inode->i_mapping->a_ops->bmap); | ||
632 | ASSERT(inode->i_mapping->a_ops->readpages); | ||
633 | |||
634 | /* calculate the shift required to use bmap */ | ||
635 | if (inode->i_sb->s_blocksize > PAGE_SIZE) | ||
636 | return -ENOBUFS; | ||
637 | |||
638 | shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; | ||
639 | |||
640 | pagevec_init(&pagevec, 0); | ||
641 | |||
642 | op->op.flags = FSCACHE_OP_FAST; | ||
643 | op->op.processor = cachefiles_read_copier; | ||
644 | |||
645 | INIT_LIST_HEAD(&backpages); | ||
646 | nrbackpages = 0; | ||
647 | |||
648 | ret = space ? -ENODATA : -ENOBUFS; | ||
649 | list_for_each_entry_safe(page, _n, pages, lru) { | ||
650 | sector_t block0, block; | ||
651 | |||
652 | /* we assume the absence or presence of the first block is a | ||
653 | * good enough indication for the page as a whole | ||
654 | * - TODO: don't use bmap() for this as it is _not_ actually | ||
655 | * good enough for this as it doesn't indicate errors, but | ||
656 | * it's all we've got for the moment | ||
657 | */ | ||
658 | block0 = page->index; | ||
659 | block0 <<= shift; | ||
660 | |||
661 | block = inode->i_mapping->a_ops->bmap(inode->i_mapping, | ||
662 | block0); | ||
663 | _debug("%llx -> %llx", | ||
664 | (unsigned long long) block0, | ||
665 | (unsigned long long) block); | ||
666 | |||
667 | if (block) { | ||
668 | /* we have data - add it to the list to give to the | ||
669 | * backing fs */ | ||
670 | list_move(&page->lru, &backpages); | ||
671 | (*nr_pages)--; | ||
672 | nrbackpages++; | ||
673 | } else if (space && pagevec_add(&pagevec, page) == 0) { | ||
674 | fscache_mark_pages_cached(op, &pagevec); | ||
675 | ret = -ENODATA; | ||
676 | } | ||
677 | } | ||
678 | |||
679 | if (pagevec_count(&pagevec) > 0) | ||
680 | fscache_mark_pages_cached(op, &pagevec); | ||
681 | |||
682 | if (list_empty(pages)) | ||
683 | ret = 0; | ||
684 | |||
685 | /* submit the apparently valid pages to the backing fs to be read from | ||
686 | * disk */ | ||
687 | if (nrbackpages > 0) { | ||
688 | ret2 = cachefiles_read_backing_file(object, op, &backpages, | ||
689 | &pagevec); | ||
690 | if (ret2 == -ENOMEM || ret2 == -EINTR) | ||
691 | ret = ret2; | ||
692 | } | ||
693 | |||
694 | if (pagevec_count(&pagevec) > 0) | ||
695 | fscache_mark_pages_cached(op, &pagevec); | ||
696 | |||
697 | _leave(" = %d [nr=%u%s]", | ||
698 | ret, *nr_pages, list_empty(pages) ? " empty" : ""); | ||
699 | return ret; | ||
700 | } | ||
701 | |||
702 | /* | ||
703 | * allocate a block in the cache in which to store a page | ||
704 | * - cache withdrawal is prevented by the caller | ||
705 | * - returns -EINTR if interrupted | ||
706 | * - returns -ENOMEM if ran out of memory | ||
707 | * - returns -ENOBUFS if no buffers can be made available | ||
708 | * - returns -ENOBUFS if page is beyond EOF | ||
709 | * - otherwise: | ||
710 | * - the metadata will be retained | ||
711 | * - 0 will be returned | ||
712 | */ | ||
713 | int cachefiles_allocate_page(struct fscache_retrieval *op, | ||
714 | struct page *page, | ||
715 | gfp_t gfp) | ||
716 | { | ||
717 | struct cachefiles_object *object; | ||
718 | struct cachefiles_cache *cache; | ||
719 | struct pagevec pagevec; | ||
720 | int ret; | ||
721 | |||
722 | object = container_of(op->op.object, | ||
723 | struct cachefiles_object, fscache); | ||
724 | cache = container_of(object->fscache.cache, | ||
725 | struct cachefiles_cache, cache); | ||
726 | |||
727 | _enter("%p,{%lx},", object, page->index); | ||
728 | |||
729 | ret = cachefiles_has_space(cache, 0, 1); | ||
730 | if (ret == 0) { | ||
731 | pagevec_init(&pagevec, 0); | ||
732 | pagevec_add(&pagevec, page); | ||
733 | fscache_mark_pages_cached(op, &pagevec); | ||
734 | } else { | ||
735 | ret = -ENOBUFS; | ||
736 | } | ||
737 | |||
738 | _leave(" = %d", ret); | ||
739 | return ret; | ||
740 | } | ||
741 | |||
742 | /* | ||
743 | * allocate blocks in the cache in which to store a set of pages | ||
744 | * - cache withdrawal is prevented by the caller | ||
745 | * - returns -EINTR if interrupted | ||
746 | * - returns -ENOMEM if ran out of memory | ||
747 | * - returns -ENOBUFS if some buffers couldn't be made available | ||
748 | * - returns -ENOBUFS if some pages are beyond EOF | ||
749 | * - otherwise: | ||
750 | * - -ENODATA will be returned | ||
751 | * - metadata will be retained for any page marked | ||
752 | */ | ||
753 | int cachefiles_allocate_pages(struct fscache_retrieval *op, | ||
754 | struct list_head *pages, | ||
755 | unsigned *nr_pages, | ||
756 | gfp_t gfp) | ||
757 | { | ||
758 | struct cachefiles_object *object; | ||
759 | struct cachefiles_cache *cache; | ||
760 | struct pagevec pagevec; | ||
761 | struct page *page; | ||
762 | int ret; | ||
763 | |||
764 | object = container_of(op->op.object, | ||
765 | struct cachefiles_object, fscache); | ||
766 | cache = container_of(object->fscache.cache, | ||
767 | struct cachefiles_cache, cache); | ||
768 | |||
769 | _enter("%p,,,%d,", object, *nr_pages); | ||
770 | |||
771 | ret = cachefiles_has_space(cache, 0, *nr_pages); | ||
772 | if (ret == 0) { | ||
773 | pagevec_init(&pagevec, 0); | ||
774 | |||
775 | list_for_each_entry(page, pages, lru) { | ||
776 | if (pagevec_add(&pagevec, page) == 0) | ||
777 | fscache_mark_pages_cached(op, &pagevec); | ||
778 | } | ||
779 | |||
780 | if (pagevec_count(&pagevec) > 0) | ||
781 | fscache_mark_pages_cached(op, &pagevec); | ||
782 | ret = -ENODATA; | ||
783 | } else { | ||
784 | ret = -ENOBUFS; | ||
785 | } | ||
786 | |||
787 | _leave(" = %d", ret); | ||
788 | return ret; | ||
789 | } | ||
790 | |||
791 | /* | ||
792 | * request a page be stored in the cache | ||
793 | * - cache withdrawal is prevented by the caller | ||
794 | * - this request may be ignored if there's no cache block available, in which | ||
795 | * case -ENOBUFS will be returned | ||
796 | * - if the op is in progress, 0 will be returned | ||
797 | */ | ||
798 | int cachefiles_write_page(struct fscache_storage *op, struct page *page) | ||
799 | { | ||
800 | struct cachefiles_object *object; | ||
801 | struct cachefiles_cache *cache; | ||
802 | mm_segment_t old_fs; | ||
803 | struct file *file; | ||
804 | loff_t pos; | ||
805 | void *data; | ||
806 | int ret; | ||
807 | |||
808 | ASSERT(op != NULL); | ||
809 | ASSERT(page != NULL); | ||
810 | |||
811 | object = container_of(op->op.object, | ||
812 | struct cachefiles_object, fscache); | ||
813 | |||
814 | _enter("%p,%p{%lx},,,", object, page, page->index); | ||
815 | |||
816 | if (!object->backer) { | ||
817 | _leave(" = -ENOBUFS"); | ||
818 | return -ENOBUFS; | ||
819 | } | ||
820 | |||
821 | ASSERT(S_ISREG(object->backer->d_inode->i_mode)); | ||
822 | |||
823 | cache = container_of(object->fscache.cache, | ||
824 | struct cachefiles_cache, cache); | ||
825 | |||
826 | /* write the page to the backing filesystem and let it store it in its | ||
827 | * own time */ | ||
828 | dget(object->backer); | ||
829 | mntget(cache->mnt); | ||
830 | file = dentry_open(object->backer, cache->mnt, O_RDWR, | ||
831 | cache->cache_cred); | ||
832 | if (IS_ERR(file)) { | ||
833 | ret = PTR_ERR(file); | ||
834 | } else { | ||
835 | ret = -EIO; | ||
836 | if (file->f_op->write) { | ||
837 | pos = (loff_t) page->index << PAGE_SHIFT; | ||
838 | data = kmap(page); | ||
839 | old_fs = get_fs(); | ||
840 | set_fs(KERNEL_DS); | ||
841 | ret = file->f_op->write( | ||
842 | file, (const void __user *) data, PAGE_SIZE, | ||
843 | &pos); | ||
844 | set_fs(old_fs); | ||
845 | kunmap(page); | ||
846 | if (ret != PAGE_SIZE) | ||
847 | ret = -EIO; | ||
848 | } | ||
849 | fput(file); | ||
850 | } | ||
851 | |||
852 | if (ret < 0) { | ||
853 | if (ret == -EIO) | ||
854 | cachefiles_io_error_obj( | ||
855 | object, "Write page to backing file failed"); | ||
856 | ret = -ENOBUFS; | ||
857 | } | ||
858 | |||
859 | _leave(" = %d", ret); | ||
860 | return ret; | ||
861 | } | ||
862 | |||
863 | /* | ||
864 | * detach a backing block from a page | ||
865 | * - cache withdrawal is prevented by the caller | ||
866 | */ | ||
867 | void cachefiles_uncache_page(struct fscache_object *_object, struct page *page) | ||
868 | { | ||
869 | struct cachefiles_object *object; | ||
870 | struct cachefiles_cache *cache; | ||
871 | |||
872 | object = container_of(_object, struct cachefiles_object, fscache); | ||
873 | cache = container_of(object->fscache.cache, | ||
874 | struct cachefiles_cache, cache); | ||
875 | |||
876 | _enter("%p,{%lu}", object, page->index); | ||
877 | |||
878 | spin_unlock(&object->fscache.cookie->lock); | ||
879 | } | ||
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c new file mode 100644 index 000000000000..b5808cdb2232 --- /dev/null +++ b/fs/cachefiles/security.c | |||
@@ -0,0 +1,116 @@ | |||
1 | /* CacheFiles security management | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/fs.h> | ||
13 | #include <linux/cred.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | /* | ||
17 | * determine the security context within which we access the cache from within | ||
18 | * the kernel | ||
19 | */ | ||
20 | int cachefiles_get_security_ID(struct cachefiles_cache *cache) | ||
21 | { | ||
22 | struct cred *new; | ||
23 | int ret; | ||
24 | |||
25 | _enter("{%s}", cache->secctx); | ||
26 | |||
27 | new = prepare_kernel_cred(current); | ||
28 | if (!new) { | ||
29 | ret = -ENOMEM; | ||
30 | goto error; | ||
31 | } | ||
32 | |||
33 | if (cache->secctx) { | ||
34 | ret = set_security_override_from_ctx(new, cache->secctx); | ||
35 | if (ret < 0) { | ||
36 | put_cred(new); | ||
37 | printk(KERN_ERR "CacheFiles:" | ||
38 | " Security denies permission to nominate" | ||
39 | " security context: error %d\n", | ||
40 | ret); | ||
41 | goto error; | ||
42 | } | ||
43 | } | ||
44 | |||
45 | cache->cache_cred = new; | ||
46 | ret = 0; | ||
47 | error: | ||
48 | _leave(" = %d", ret); | ||
49 | return ret; | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * see if mkdir and create can be performed in the root directory | ||
54 | */ | ||
55 | static int cachefiles_check_cache_dir(struct cachefiles_cache *cache, | ||
56 | struct dentry *root) | ||
57 | { | ||
58 | int ret; | ||
59 | |||
60 | ret = security_inode_mkdir(root->d_inode, root, 0); | ||
61 | if (ret < 0) { | ||
62 | printk(KERN_ERR "CacheFiles:" | ||
63 | " Security denies permission to make dirs: error %d", | ||
64 | ret); | ||
65 | return ret; | ||
66 | } | ||
67 | |||
68 | ret = security_inode_create(root->d_inode, root, 0); | ||
69 | if (ret < 0) | ||
70 | printk(KERN_ERR "CacheFiles:" | ||
71 | " Security denies permission to create files: error %d", | ||
72 | ret); | ||
73 | |||
74 | return ret; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * check the security details of the on-disk cache | ||
79 | * - must be called with security override in force | ||
80 | */ | ||
81 | int cachefiles_determine_cache_security(struct cachefiles_cache *cache, | ||
82 | struct dentry *root, | ||
83 | const struct cred **_saved_cred) | ||
84 | { | ||
85 | struct cred *new; | ||
86 | int ret; | ||
87 | |||
88 | _enter(""); | ||
89 | |||
90 | /* duplicate the cache creds for COW (the override is currently in | ||
91 | * force, so we can use prepare_creds() to do this) */ | ||
92 | new = prepare_creds(); | ||
93 | if (!new) | ||
94 | return -ENOMEM; | ||
95 | |||
96 | cachefiles_end_secure(cache, *_saved_cred); | ||
97 | |||
98 | /* use the cache root dir's security context as the basis with | ||
99 | * which create files */ | ||
100 | ret = set_create_files_as(new, root->d_inode); | ||
101 | if (ret < 0) { | ||
102 | _leave(" = %d [cfa]", ret); | ||
103 | return ret; | ||
104 | } | ||
105 | |||
106 | put_cred(cache->cache_cred); | ||
107 | cache->cache_cred = new; | ||
108 | |||
109 | cachefiles_begin_secure(cache, _saved_cred); | ||
110 | ret = cachefiles_check_cache_dir(cache, root); | ||
111 | |||
112 | if (ret == -EOPNOTSUPP) | ||
113 | ret = 0; | ||
114 | _leave(" = %d", ret); | ||
115 | return ret; | ||
116 | } | ||
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c new file mode 100644 index 000000000000..f3e7a0bf068b --- /dev/null +++ b/fs/cachefiles/xattr.c | |||
@@ -0,0 +1,291 @@ | |||
1 | /* CacheFiles extended attribute management | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/module.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/file.h> | ||
15 | #include <linux/fs.h> | ||
16 | #include <linux/fsnotify.h> | ||
17 | #include <linux/quotaops.h> | ||
18 | #include <linux/xattr.h> | ||
19 | #include "internal.h" | ||
20 | |||
21 | static const char cachefiles_xattr_cache[] = | ||
22 | XATTR_USER_PREFIX "CacheFiles.cache"; | ||
23 | |||
24 | /* | ||
25 | * check the type label on an object | ||
26 | * - done using xattrs | ||
27 | */ | ||
28 | int cachefiles_check_object_type(struct cachefiles_object *object) | ||
29 | { | ||
30 | struct dentry *dentry = object->dentry; | ||
31 | char type[3], xtype[3]; | ||
32 | int ret; | ||
33 | |||
34 | ASSERT(dentry); | ||
35 | ASSERT(dentry->d_inode); | ||
36 | |||
37 | if (!object->fscache.cookie) | ||
38 | strcpy(type, "C3"); | ||
39 | else | ||
40 | snprintf(type, 3, "%02x", object->fscache.cookie->def->type); | ||
41 | |||
42 | _enter("%p{%s}", object, type); | ||
43 | |||
44 | /* attempt to install a type label directly */ | ||
45 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2, | ||
46 | XATTR_CREATE); | ||
47 | if (ret == 0) { | ||
48 | _debug("SET"); /* we succeeded */ | ||
49 | goto error; | ||
50 | } | ||
51 | |||
52 | if (ret != -EEXIST) { | ||
53 | kerror("Can't set xattr on %*.*s [%lu] (err %d)", | ||
54 | dentry->d_name.len, dentry->d_name.len, | ||
55 | dentry->d_name.name, dentry->d_inode->i_ino, | ||
56 | -ret); | ||
57 | goto error; | ||
58 | } | ||
59 | |||
60 | /* read the current type label */ | ||
61 | ret = vfs_getxattr(dentry, cachefiles_xattr_cache, xtype, 3); | ||
62 | if (ret < 0) { | ||
63 | if (ret == -ERANGE) | ||
64 | goto bad_type_length; | ||
65 | |||
66 | kerror("Can't read xattr on %*.*s [%lu] (err %d)", | ||
67 | dentry->d_name.len, dentry->d_name.len, | ||
68 | dentry->d_name.name, dentry->d_inode->i_ino, | ||
69 | -ret); | ||
70 | goto error; | ||
71 | } | ||
72 | |||
73 | /* check the type is what we're expecting */ | ||
74 | if (ret != 2) | ||
75 | goto bad_type_length; | ||
76 | |||
77 | if (xtype[0] != type[0] || xtype[1] != type[1]) | ||
78 | goto bad_type; | ||
79 | |||
80 | ret = 0; | ||
81 | |||
82 | error: | ||
83 | _leave(" = %d", ret); | ||
84 | return ret; | ||
85 | |||
86 | bad_type_length: | ||
87 | kerror("Cache object %lu type xattr length incorrect", | ||
88 | dentry->d_inode->i_ino); | ||
89 | ret = -EIO; | ||
90 | goto error; | ||
91 | |||
92 | bad_type: | ||
93 | xtype[2] = 0; | ||
94 | kerror("Cache object %*.*s [%lu] type %s not %s", | ||
95 | dentry->d_name.len, dentry->d_name.len, | ||
96 | dentry->d_name.name, dentry->d_inode->i_ino, | ||
97 | xtype, type); | ||
98 | ret = -EIO; | ||
99 | goto error; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * set the state xattr on a cache file | ||
104 | */ | ||
105 | int cachefiles_set_object_xattr(struct cachefiles_object *object, | ||
106 | struct cachefiles_xattr *auxdata) | ||
107 | { | ||
108 | struct dentry *dentry = object->dentry; | ||
109 | int ret; | ||
110 | |||
111 | ASSERT(object->fscache.cookie); | ||
112 | ASSERT(dentry); | ||
113 | |||
114 | _enter("%p,#%d", object, auxdata->len); | ||
115 | |||
116 | /* attempt to install the cache metadata directly */ | ||
117 | _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); | ||
118 | |||
119 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | ||
120 | &auxdata->type, auxdata->len, | ||
121 | XATTR_CREATE); | ||
122 | if (ret < 0 && ret != -ENOMEM) | ||
123 | cachefiles_io_error_obj( | ||
124 | object, | ||
125 | "Failed to set xattr with error %d", ret); | ||
126 | |||
127 | _leave(" = %d", ret); | ||
128 | return ret; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * update the state xattr on a cache file | ||
133 | */ | ||
134 | int cachefiles_update_object_xattr(struct cachefiles_object *object, | ||
135 | struct cachefiles_xattr *auxdata) | ||
136 | { | ||
137 | struct dentry *dentry = object->dentry; | ||
138 | int ret; | ||
139 | |||
140 | ASSERT(object->fscache.cookie); | ||
141 | ASSERT(dentry); | ||
142 | |||
143 | _enter("%p,#%d", object, auxdata->len); | ||
144 | |||
145 | /* attempt to install the cache metadata directly */ | ||
146 | _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); | ||
147 | |||
148 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | ||
149 | &auxdata->type, auxdata->len, | ||
150 | XATTR_REPLACE); | ||
151 | if (ret < 0 && ret != -ENOMEM) | ||
152 | cachefiles_io_error_obj( | ||
153 | object, | ||
154 | "Failed to update xattr with error %d", ret); | ||
155 | |||
156 | _leave(" = %d", ret); | ||
157 | return ret; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * check the state xattr on a cache file | ||
162 | * - return -ESTALE if the object should be deleted | ||
163 | */ | ||
164 | int cachefiles_check_object_xattr(struct cachefiles_object *object, | ||
165 | struct cachefiles_xattr *auxdata) | ||
166 | { | ||
167 | struct cachefiles_xattr *auxbuf; | ||
168 | struct dentry *dentry = object->dentry; | ||
169 | int ret; | ||
170 | |||
171 | _enter("%p,#%d", object, auxdata->len); | ||
172 | |||
173 | ASSERT(dentry); | ||
174 | ASSERT(dentry->d_inode); | ||
175 | |||
176 | auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL); | ||
177 | if (!auxbuf) { | ||
178 | _leave(" = -ENOMEM"); | ||
179 | return -ENOMEM; | ||
180 | } | ||
181 | |||
182 | /* read the current type label */ | ||
183 | ret = vfs_getxattr(dentry, cachefiles_xattr_cache, | ||
184 | &auxbuf->type, 512 + 1); | ||
185 | if (ret < 0) { | ||
186 | if (ret == -ENODATA) | ||
187 | goto stale; /* no attribute - power went off | ||
188 | * mid-cull? */ | ||
189 | |||
190 | if (ret == -ERANGE) | ||
191 | goto bad_type_length; | ||
192 | |||
193 | cachefiles_io_error_obj(object, | ||
194 | "Can't read xattr on %lu (err %d)", | ||
195 | dentry->d_inode->i_ino, -ret); | ||
196 | goto error; | ||
197 | } | ||
198 | |||
199 | /* check the on-disk object */ | ||
200 | if (ret < 1) | ||
201 | goto bad_type_length; | ||
202 | |||
203 | if (auxbuf->type != auxdata->type) | ||
204 | goto stale; | ||
205 | |||
206 | auxbuf->len = ret; | ||
207 | |||
208 | /* consult the netfs */ | ||
209 | if (object->fscache.cookie->def->check_aux) { | ||
210 | enum fscache_checkaux result; | ||
211 | unsigned int dlen; | ||
212 | |||
213 | dlen = auxbuf->len - 1; | ||
214 | |||
215 | _debug("checkaux %s #%u", | ||
216 | object->fscache.cookie->def->name, dlen); | ||
217 | |||
218 | result = fscache_check_aux(&object->fscache, | ||
219 | &auxbuf->data, dlen); | ||
220 | |||
221 | switch (result) { | ||
222 | /* entry okay as is */ | ||
223 | case FSCACHE_CHECKAUX_OKAY: | ||
224 | goto okay; | ||
225 | |||
226 | /* entry requires update */ | ||
227 | case FSCACHE_CHECKAUX_NEEDS_UPDATE: | ||
228 | break; | ||
229 | |||
230 | /* entry requires deletion */ | ||
231 | case FSCACHE_CHECKAUX_OBSOLETE: | ||
232 | goto stale; | ||
233 | |||
234 | default: | ||
235 | BUG(); | ||
236 | } | ||
237 | |||
238 | /* update the current label */ | ||
239 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | ||
240 | &auxdata->type, auxdata->len, | ||
241 | XATTR_REPLACE); | ||
242 | if (ret < 0) { | ||
243 | cachefiles_io_error_obj(object, | ||
244 | "Can't update xattr on %lu" | ||
245 | " (error %d)", | ||
246 | dentry->d_inode->i_ino, -ret); | ||
247 | goto error; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | okay: | ||
252 | ret = 0; | ||
253 | |||
254 | error: | ||
255 | kfree(auxbuf); | ||
256 | _leave(" = %d", ret); | ||
257 | return ret; | ||
258 | |||
259 | bad_type_length: | ||
260 | kerror("Cache object %lu xattr length incorrect", | ||
261 | dentry->d_inode->i_ino); | ||
262 | ret = -EIO; | ||
263 | goto error; | ||
264 | |||
265 | stale: | ||
266 | ret = -ESTALE; | ||
267 | goto error; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * remove the object's xattr to mark it stale | ||
272 | */ | ||
273 | int cachefiles_remove_object_xattr(struct cachefiles_cache *cache, | ||
274 | struct dentry *dentry) | ||
275 | { | ||
276 | int ret; | ||
277 | |||
278 | ret = vfs_removexattr(dentry, cachefiles_xattr_cache); | ||
279 | if (ret < 0) { | ||
280 | if (ret == -ENOENT || ret == -ENODATA) | ||
281 | ret = 0; | ||
282 | else if (ret != -ENOMEM) | ||
283 | cachefiles_io_error(cache, | ||
284 | "Can't remove xattr from %lu" | ||
285 | " (error %d)", | ||
286 | dentry->d_inode->i_ino, -ret); | ||
287 | } | ||
288 | |||
289 | _leave(" = %d", ret); | ||
290 | return ret; | ||
291 | } | ||
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig new file mode 100644 index 000000000000..9bbb8ce7bea0 --- /dev/null +++ b/fs/fscache/Kconfig | |||
@@ -0,0 +1,56 @@ | |||
1 | |||
2 | config FSCACHE | ||
3 | tristate "General filesystem local caching manager" | ||
4 | depends on EXPERIMENTAL | ||
5 | select SLOW_WORK | ||
6 | help | ||
7 | This option enables a generic filesystem caching manager that can be | ||
8 | used by various network and other filesystems to cache data locally. | ||
9 | Different sorts of caches can be plugged in, depending on the | ||
10 | resources available. | ||
11 | |||
12 | See Documentation/filesystems/caching/fscache.txt for more information. | ||
13 | |||
14 | config FSCACHE_STATS | ||
15 | bool "Gather statistical information on local caching" | ||
16 | depends on FSCACHE && PROC_FS | ||
17 | help | ||
18 | This option causes statistical information to be gathered on local | ||
19 | caching and exported through file: | ||
20 | |||
21 | /proc/fs/fscache/stats | ||
22 | |||
23 | The gathering of statistics adds a certain amount of overhead to | ||
24 | execution as there are a quite a few stats gathered, and on a | ||
25 | multi-CPU system these may be on cachelines that keep bouncing | ||
26 | between CPUs. On the other hand, the stats are very useful for | ||
27 | debugging purposes. Saying 'Y' here is recommended. | ||
28 | |||
29 | See Documentation/filesystems/caching/fscache.txt for more information. | ||
30 | |||
31 | config FSCACHE_HISTOGRAM | ||
32 | bool "Gather latency information on local caching" | ||
33 | depends on FSCACHE && PROC_FS | ||
34 | help | ||
35 | This option causes latency information to be gathered on local | ||
36 | caching and exported through file: | ||
37 | |||
38 | /proc/fs/fscache/histogram | ||
39 | |||
40 | The generation of this histogram adds a certain amount of overhead to | ||
41 | execution as there are a number of points at which data is gathered, | ||
42 | and on a multi-CPU system these may be on cachelines that keep | ||
43 | bouncing between CPUs. On the other hand, the histogram may be | ||
44 | useful for debugging purposes. Saying 'N' here is recommended. | ||
45 | |||
46 | See Documentation/filesystems/caching/fscache.txt for more information. | ||
47 | |||
48 | config FSCACHE_DEBUG | ||
49 | bool "Debug FS-Cache" | ||
50 | depends on FSCACHE | ||
51 | help | ||
52 | This permits debugging to be dynamically enabled in the local caching | ||
53 | management module. If this is set, the debugging output may be | ||
54 | enabled by setting bits in /sys/modules/fscache/parameter/debug. | ||
55 | |||
56 | See Documentation/filesystems/caching/fscache.txt for more information. | ||
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile new file mode 100644 index 000000000000..91571b95aacc --- /dev/null +++ b/fs/fscache/Makefile | |||
@@ -0,0 +1,19 @@ | |||
1 | # | ||
2 | # Makefile for general filesystem caching code | ||
3 | # | ||
4 | |||
5 | fscache-y := \ | ||
6 | cache.o \ | ||
7 | cookie.o \ | ||
8 | fsdef.o \ | ||
9 | main.o \ | ||
10 | netfs.o \ | ||
11 | object.o \ | ||
12 | operation.o \ | ||
13 | page.o | ||
14 | |||
15 | fscache-$(CONFIG_PROC_FS) += proc.o | ||
16 | fscache-$(CONFIG_FSCACHE_STATS) += stats.o | ||
17 | fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o | ||
18 | |||
19 | obj-$(CONFIG_FSCACHE) := fscache.o | ||
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c new file mode 100644 index 000000000000..e21985bbb1fb --- /dev/null +++ b/fs/fscache/cache.c | |||
@@ -0,0 +1,415 @@ | |||
1 | /* FS-Cache cache handling | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL CACHE | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | LIST_HEAD(fscache_cache_list); | ||
18 | DECLARE_RWSEM(fscache_addremove_sem); | ||
19 | DECLARE_WAIT_QUEUE_HEAD(fscache_cache_cleared_wq); | ||
20 | EXPORT_SYMBOL(fscache_cache_cleared_wq); | ||
21 | |||
22 | static LIST_HEAD(fscache_cache_tag_list); | ||
23 | |||
24 | /* | ||
25 | * look up a cache tag | ||
26 | */ | ||
27 | struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *name) | ||
28 | { | ||
29 | struct fscache_cache_tag *tag, *xtag; | ||
30 | |||
31 | /* firstly check for the existence of the tag under read lock */ | ||
32 | down_read(&fscache_addremove_sem); | ||
33 | |||
34 | list_for_each_entry(tag, &fscache_cache_tag_list, link) { | ||
35 | if (strcmp(tag->name, name) == 0) { | ||
36 | atomic_inc(&tag->usage); | ||
37 | up_read(&fscache_addremove_sem); | ||
38 | return tag; | ||
39 | } | ||
40 | } | ||
41 | |||
42 | up_read(&fscache_addremove_sem); | ||
43 | |||
44 | /* the tag does not exist - create a candidate */ | ||
45 | xtag = kzalloc(sizeof(*xtag) + strlen(name) + 1, GFP_KERNEL); | ||
46 | if (!xtag) | ||
47 | /* return a dummy tag if out of memory */ | ||
48 | return ERR_PTR(-ENOMEM); | ||
49 | |||
50 | atomic_set(&xtag->usage, 1); | ||
51 | strcpy(xtag->name, name); | ||
52 | |||
53 | /* write lock, search again and add if still not present */ | ||
54 | down_write(&fscache_addremove_sem); | ||
55 | |||
56 | list_for_each_entry(tag, &fscache_cache_tag_list, link) { | ||
57 | if (strcmp(tag->name, name) == 0) { | ||
58 | atomic_inc(&tag->usage); | ||
59 | up_write(&fscache_addremove_sem); | ||
60 | kfree(xtag); | ||
61 | return tag; | ||
62 | } | ||
63 | } | ||
64 | |||
65 | list_add_tail(&xtag->link, &fscache_cache_tag_list); | ||
66 | up_write(&fscache_addremove_sem); | ||
67 | return xtag; | ||
68 | } | ||
69 | |||
70 | /* | ||
71 | * release a reference to a cache tag | ||
72 | */ | ||
73 | void __fscache_release_cache_tag(struct fscache_cache_tag *tag) | ||
74 | { | ||
75 | if (tag != ERR_PTR(-ENOMEM)) { | ||
76 | down_write(&fscache_addremove_sem); | ||
77 | |||
78 | if (atomic_dec_and_test(&tag->usage)) | ||
79 | list_del_init(&tag->link); | ||
80 | else | ||
81 | tag = NULL; | ||
82 | |||
83 | up_write(&fscache_addremove_sem); | ||
84 | |||
85 | kfree(tag); | ||
86 | } | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * select a cache in which to store an object | ||
91 | * - the cache addremove semaphore must be at least read-locked by the caller | ||
92 | * - the object will never be an index | ||
93 | */ | ||
94 | struct fscache_cache *fscache_select_cache_for_object( | ||
95 | struct fscache_cookie *cookie) | ||
96 | { | ||
97 | struct fscache_cache_tag *tag; | ||
98 | struct fscache_object *object; | ||
99 | struct fscache_cache *cache; | ||
100 | |||
101 | _enter(""); | ||
102 | |||
103 | if (list_empty(&fscache_cache_list)) { | ||
104 | _leave(" = NULL [no cache]"); | ||
105 | return NULL; | ||
106 | } | ||
107 | |||
108 | /* we check the parent to determine the cache to use */ | ||
109 | spin_lock(&cookie->lock); | ||
110 | |||
111 | /* the first in the parent's backing list should be the preferred | ||
112 | * cache */ | ||
113 | if (!hlist_empty(&cookie->backing_objects)) { | ||
114 | object = hlist_entry(cookie->backing_objects.first, | ||
115 | struct fscache_object, cookie_link); | ||
116 | |||
117 | cache = object->cache; | ||
118 | if (object->state >= FSCACHE_OBJECT_DYING || | ||
119 | test_bit(FSCACHE_IOERROR, &cache->flags)) | ||
120 | cache = NULL; | ||
121 | |||
122 | spin_unlock(&cookie->lock); | ||
123 | _leave(" = %p [parent]", cache); | ||
124 | return cache; | ||
125 | } | ||
126 | |||
127 | /* the parent is unbacked */ | ||
128 | if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { | ||
129 | /* cookie not an index and is unbacked */ | ||
130 | spin_unlock(&cookie->lock); | ||
131 | _leave(" = NULL [cookie ub,ni]"); | ||
132 | return NULL; | ||
133 | } | ||
134 | |||
135 | spin_unlock(&cookie->lock); | ||
136 | |||
137 | if (!cookie->def->select_cache) | ||
138 | goto no_preference; | ||
139 | |||
140 | /* ask the netfs for its preference */ | ||
141 | tag = cookie->def->select_cache(cookie->parent->netfs_data, | ||
142 | cookie->netfs_data); | ||
143 | if (!tag) | ||
144 | goto no_preference; | ||
145 | |||
146 | if (tag == ERR_PTR(-ENOMEM)) { | ||
147 | _leave(" = NULL [nomem tag]"); | ||
148 | return NULL; | ||
149 | } | ||
150 | |||
151 | if (!tag->cache) { | ||
152 | _leave(" = NULL [unbacked tag]"); | ||
153 | return NULL; | ||
154 | } | ||
155 | |||
156 | if (test_bit(FSCACHE_IOERROR, &tag->cache->flags)) | ||
157 | return NULL; | ||
158 | |||
159 | _leave(" = %p [specific]", tag->cache); | ||
160 | return tag->cache; | ||
161 | |||
162 | no_preference: | ||
163 | /* netfs has no preference - just select first cache */ | ||
164 | cache = list_entry(fscache_cache_list.next, | ||
165 | struct fscache_cache, link); | ||
166 | _leave(" = %p [first]", cache); | ||
167 | return cache; | ||
168 | } | ||
169 | |||
170 | /** | ||
171 | * fscache_init_cache - Initialise a cache record | ||
172 | * @cache: The cache record to be initialised | ||
173 | * @ops: The cache operations to be installed in that record | ||
174 | * @idfmt: Format string to define identifier | ||
175 | * @...: sprintf-style arguments | ||
176 | * | ||
177 | * Initialise a record of a cache and fill in the name. | ||
178 | * | ||
179 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
180 | * description. | ||
181 | */ | ||
182 | void fscache_init_cache(struct fscache_cache *cache, | ||
183 | const struct fscache_cache_ops *ops, | ||
184 | const char *idfmt, | ||
185 | ...) | ||
186 | { | ||
187 | va_list va; | ||
188 | |||
189 | memset(cache, 0, sizeof(*cache)); | ||
190 | |||
191 | cache->ops = ops; | ||
192 | |||
193 | va_start(va, idfmt); | ||
194 | vsnprintf(cache->identifier, sizeof(cache->identifier), idfmt, va); | ||
195 | va_end(va); | ||
196 | |||
197 | INIT_WORK(&cache->op_gc, fscache_operation_gc); | ||
198 | INIT_LIST_HEAD(&cache->link); | ||
199 | INIT_LIST_HEAD(&cache->object_list); | ||
200 | INIT_LIST_HEAD(&cache->op_gc_list); | ||
201 | spin_lock_init(&cache->object_list_lock); | ||
202 | spin_lock_init(&cache->op_gc_list_lock); | ||
203 | } | ||
204 | EXPORT_SYMBOL(fscache_init_cache); | ||
205 | |||
206 | /** | ||
207 | * fscache_add_cache - Declare a cache as being open for business | ||
208 | * @cache: The record describing the cache | ||
209 | * @ifsdef: The record of the cache object describing the top-level index | ||
210 | * @tagname: The tag describing this cache | ||
211 | * | ||
212 | * Add a cache to the system, making it available for netfs's to use. | ||
213 | * | ||
214 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
215 | * description. | ||
216 | */ | ||
217 | int fscache_add_cache(struct fscache_cache *cache, | ||
218 | struct fscache_object *ifsdef, | ||
219 | const char *tagname) | ||
220 | { | ||
221 | struct fscache_cache_tag *tag; | ||
222 | |||
223 | BUG_ON(!cache->ops); | ||
224 | BUG_ON(!ifsdef); | ||
225 | |||
226 | cache->flags = 0; | ||
227 | ifsdef->event_mask = ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
228 | ifsdef->state = FSCACHE_OBJECT_ACTIVE; | ||
229 | |||
230 | if (!tagname) | ||
231 | tagname = cache->identifier; | ||
232 | |||
233 | BUG_ON(!tagname[0]); | ||
234 | |||
235 | _enter("{%s.%s},,%s", cache->ops->name, cache->identifier, tagname); | ||
236 | |||
237 | /* we use the cache tag to uniquely identify caches */ | ||
238 | tag = __fscache_lookup_cache_tag(tagname); | ||
239 | if (IS_ERR(tag)) | ||
240 | goto nomem; | ||
241 | |||
242 | if (test_and_set_bit(FSCACHE_TAG_RESERVED, &tag->flags)) | ||
243 | goto tag_in_use; | ||
244 | |||
245 | cache->kobj = kobject_create_and_add(tagname, fscache_root); | ||
246 | if (!cache->kobj) | ||
247 | goto error; | ||
248 | |||
249 | ifsdef->cookie = &fscache_fsdef_index; | ||
250 | ifsdef->cache = cache; | ||
251 | cache->fsdef = ifsdef; | ||
252 | |||
253 | down_write(&fscache_addremove_sem); | ||
254 | |||
255 | tag->cache = cache; | ||
256 | cache->tag = tag; | ||
257 | |||
258 | /* add the cache to the list */ | ||
259 | list_add(&cache->link, &fscache_cache_list); | ||
260 | |||
261 | /* add the cache's netfs definition index object to the cache's | ||
262 | * list */ | ||
263 | spin_lock(&cache->object_list_lock); | ||
264 | list_add_tail(&ifsdef->cache_link, &cache->object_list); | ||
265 | spin_unlock(&cache->object_list_lock); | ||
266 | |||
267 | /* add the cache's netfs definition index object to the top level index | ||
268 | * cookie as a known backing object */ | ||
269 | spin_lock(&fscache_fsdef_index.lock); | ||
270 | |||
271 | hlist_add_head(&ifsdef->cookie_link, | ||
272 | &fscache_fsdef_index.backing_objects); | ||
273 | |||
274 | atomic_inc(&fscache_fsdef_index.usage); | ||
275 | |||
276 | /* done */ | ||
277 | spin_unlock(&fscache_fsdef_index.lock); | ||
278 | up_write(&fscache_addremove_sem); | ||
279 | |||
280 | printk(KERN_NOTICE "FS-Cache: Cache \"%s\" added (type %s)\n", | ||
281 | cache->tag->name, cache->ops->name); | ||
282 | kobject_uevent(cache->kobj, KOBJ_ADD); | ||
283 | |||
284 | _leave(" = 0 [%s]", cache->identifier); | ||
285 | return 0; | ||
286 | |||
287 | tag_in_use: | ||
288 | printk(KERN_ERR "FS-Cache: Cache tag '%s' already in use\n", tagname); | ||
289 | __fscache_release_cache_tag(tag); | ||
290 | _leave(" = -EXIST"); | ||
291 | return -EEXIST; | ||
292 | |||
293 | error: | ||
294 | __fscache_release_cache_tag(tag); | ||
295 | _leave(" = -EINVAL"); | ||
296 | return -EINVAL; | ||
297 | |||
298 | nomem: | ||
299 | _leave(" = -ENOMEM"); | ||
300 | return -ENOMEM; | ||
301 | } | ||
302 | EXPORT_SYMBOL(fscache_add_cache); | ||
303 | |||
304 | /** | ||
305 | * fscache_io_error - Note a cache I/O error | ||
306 | * @cache: The record describing the cache | ||
307 | * | ||
308 | * Note that an I/O error occurred in a cache and that it should no longer be | ||
309 | * used for anything. This also reports the error into the kernel log. | ||
310 | * | ||
311 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
312 | * description. | ||
313 | */ | ||
314 | void fscache_io_error(struct fscache_cache *cache) | ||
315 | { | ||
316 | set_bit(FSCACHE_IOERROR, &cache->flags); | ||
317 | |||
318 | printk(KERN_ERR "FS-Cache: Cache %s stopped due to I/O error\n", | ||
319 | cache->ops->name); | ||
320 | } | ||
321 | EXPORT_SYMBOL(fscache_io_error); | ||
322 | |||
323 | /* | ||
324 | * request withdrawal of all the objects in a cache | ||
325 | * - all the objects being withdrawn are moved onto the supplied list | ||
326 | */ | ||
327 | static void fscache_withdraw_all_objects(struct fscache_cache *cache, | ||
328 | struct list_head *dying_objects) | ||
329 | { | ||
330 | struct fscache_object *object; | ||
331 | |||
332 | spin_lock(&cache->object_list_lock); | ||
333 | |||
334 | while (!list_empty(&cache->object_list)) { | ||
335 | object = list_entry(cache->object_list.next, | ||
336 | struct fscache_object, cache_link); | ||
337 | list_move_tail(&object->cache_link, dying_objects); | ||
338 | |||
339 | _debug("withdraw %p", object->cookie); | ||
340 | |||
341 | spin_lock(&object->lock); | ||
342 | spin_unlock(&cache->object_list_lock); | ||
343 | fscache_raise_event(object, FSCACHE_OBJECT_EV_WITHDRAW); | ||
344 | spin_unlock(&object->lock); | ||
345 | |||
346 | cond_resched(); | ||
347 | spin_lock(&cache->object_list_lock); | ||
348 | } | ||
349 | |||
350 | spin_unlock(&cache->object_list_lock); | ||
351 | } | ||
352 | |||
353 | /** | ||
354 | * fscache_withdraw_cache - Withdraw a cache from the active service | ||
355 | * @cache: The record describing the cache | ||
356 | * | ||
357 | * Withdraw a cache from service, unbinding all its cache objects from the | ||
358 | * netfs cookies they're currently representing. | ||
359 | * | ||
360 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
361 | * description. | ||
362 | */ | ||
363 | void fscache_withdraw_cache(struct fscache_cache *cache) | ||
364 | { | ||
365 | LIST_HEAD(dying_objects); | ||
366 | |||
367 | _enter(""); | ||
368 | |||
369 | printk(KERN_NOTICE "FS-Cache: Withdrawing cache \"%s\"\n", | ||
370 | cache->tag->name); | ||
371 | |||
372 | /* make the cache unavailable for cookie acquisition */ | ||
373 | if (test_and_set_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags)) | ||
374 | BUG(); | ||
375 | |||
376 | down_write(&fscache_addremove_sem); | ||
377 | list_del_init(&cache->link); | ||
378 | cache->tag->cache = NULL; | ||
379 | up_write(&fscache_addremove_sem); | ||
380 | |||
381 | /* make sure all pages pinned by operations on behalf of the netfs are | ||
382 | * written to disk */ | ||
383 | cache->ops->sync_cache(cache); | ||
384 | |||
385 | /* dissociate all the netfs pages backed by this cache from the block | ||
386 | * mappings in the cache */ | ||
387 | cache->ops->dissociate_pages(cache); | ||
388 | |||
389 | /* we now have to destroy all the active objects pertaining to this | ||
390 | * cache - which we do by passing them off to thread pool to be | ||
391 | * disposed of */ | ||
392 | _debug("destroy"); | ||
393 | |||
394 | fscache_withdraw_all_objects(cache, &dying_objects); | ||
395 | |||
396 | /* wait for all extant objects to finish their outstanding operations | ||
397 | * and go away */ | ||
398 | _debug("wait for finish"); | ||
399 | wait_event(fscache_cache_cleared_wq, | ||
400 | atomic_read(&cache->object_count) == 0); | ||
401 | _debug("wait for clearance"); | ||
402 | wait_event(fscache_cache_cleared_wq, | ||
403 | list_empty(&cache->object_list)); | ||
404 | _debug("cleared"); | ||
405 | ASSERT(list_empty(&dying_objects)); | ||
406 | |||
407 | kobject_put(cache->kobj); | ||
408 | |||
409 | clear_bit(FSCACHE_TAG_RESERVED, &cache->tag->flags); | ||
410 | fscache_release_cache_tag(cache->tag); | ||
411 | cache->tag = NULL; | ||
412 | |||
413 | _leave(""); | ||
414 | } | ||
415 | EXPORT_SYMBOL(fscache_withdraw_cache); | ||
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c new file mode 100644 index 000000000000..72fd18f6c71f --- /dev/null +++ b/fs/fscache/cookie.c | |||
@@ -0,0 +1,500 @@ | |||
1 | /* netfs cookie management | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/filesystems/caching/netfs-api.txt for more information on | ||
12 | * the netfs API. | ||
13 | */ | ||
14 | |||
15 | #define FSCACHE_DEBUG_LEVEL COOKIE | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include "internal.h" | ||
19 | |||
20 | struct kmem_cache *fscache_cookie_jar; | ||
21 | |||
22 | static atomic_t fscache_object_debug_id = ATOMIC_INIT(0); | ||
23 | |||
24 | static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie); | ||
25 | static int fscache_alloc_object(struct fscache_cache *cache, | ||
26 | struct fscache_cookie *cookie); | ||
27 | static int fscache_attach_object(struct fscache_cookie *cookie, | ||
28 | struct fscache_object *object); | ||
29 | |||
30 | /* | ||
31 | * initialise an cookie jar slab element prior to any use | ||
32 | */ | ||
33 | void fscache_cookie_init_once(void *_cookie) | ||
34 | { | ||
35 | struct fscache_cookie *cookie = _cookie; | ||
36 | |||
37 | memset(cookie, 0, sizeof(*cookie)); | ||
38 | spin_lock_init(&cookie->lock); | ||
39 | INIT_HLIST_HEAD(&cookie->backing_objects); | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * request a cookie to represent an object (index, datafile, xattr, etc) | ||
44 | * - parent specifies the parent object | ||
45 | * - the top level index cookie for each netfs is stored in the fscache_netfs | ||
46 | * struct upon registration | ||
47 | * - def points to the definition | ||
48 | * - the netfs_data will be passed to the functions pointed to in *def | ||
49 | * - all attached caches will be searched to see if they contain this object | ||
50 | * - index objects aren't stored on disk until there's a dependent file that | ||
51 | * needs storing | ||
52 | * - other objects are stored in a selected cache immediately, and all the | ||
53 | * indices forming the path to it are instantiated if necessary | ||
54 | * - we never let on to the netfs about errors | ||
55 | * - we may set a negative cookie pointer, but that's okay | ||
56 | */ | ||
57 | struct fscache_cookie *__fscache_acquire_cookie( | ||
58 | struct fscache_cookie *parent, | ||
59 | const struct fscache_cookie_def *def, | ||
60 | void *netfs_data) | ||
61 | { | ||
62 | struct fscache_cookie *cookie; | ||
63 | |||
64 | BUG_ON(!def); | ||
65 | |||
66 | _enter("{%s},{%s},%p", | ||
67 | parent ? (char *) parent->def->name : "<no-parent>", | ||
68 | def->name, netfs_data); | ||
69 | |||
70 | fscache_stat(&fscache_n_acquires); | ||
71 | |||
72 | /* if there's no parent cookie, then we don't create one here either */ | ||
73 | if (!parent) { | ||
74 | fscache_stat(&fscache_n_acquires_null); | ||
75 | _leave(" [no parent]"); | ||
76 | return NULL; | ||
77 | } | ||
78 | |||
79 | /* validate the definition */ | ||
80 | BUG_ON(!def->get_key); | ||
81 | BUG_ON(!def->name[0]); | ||
82 | |||
83 | BUG_ON(def->type == FSCACHE_COOKIE_TYPE_INDEX && | ||
84 | parent->def->type != FSCACHE_COOKIE_TYPE_INDEX); | ||
85 | |||
86 | /* allocate and initialise a cookie */ | ||
87 | cookie = kmem_cache_alloc(fscache_cookie_jar, GFP_KERNEL); | ||
88 | if (!cookie) { | ||
89 | fscache_stat(&fscache_n_acquires_oom); | ||
90 | _leave(" [ENOMEM]"); | ||
91 | return NULL; | ||
92 | } | ||
93 | |||
94 | atomic_set(&cookie->usage, 1); | ||
95 | atomic_set(&cookie->n_children, 0); | ||
96 | |||
97 | atomic_inc(&parent->usage); | ||
98 | atomic_inc(&parent->n_children); | ||
99 | |||
100 | cookie->def = def; | ||
101 | cookie->parent = parent; | ||
102 | cookie->netfs_data = netfs_data; | ||
103 | cookie->flags = 0; | ||
104 | |||
105 | INIT_RADIX_TREE(&cookie->stores, GFP_NOFS); | ||
106 | |||
107 | switch (cookie->def->type) { | ||
108 | case FSCACHE_COOKIE_TYPE_INDEX: | ||
109 | fscache_stat(&fscache_n_cookie_index); | ||
110 | break; | ||
111 | case FSCACHE_COOKIE_TYPE_DATAFILE: | ||
112 | fscache_stat(&fscache_n_cookie_data); | ||
113 | break; | ||
114 | default: | ||
115 | fscache_stat(&fscache_n_cookie_special); | ||
116 | break; | ||
117 | } | ||
118 | |||
119 | /* if the object is an index then we need do nothing more here - we | ||
120 | * create indices on disk when we need them as an index may exist in | ||
121 | * multiple caches */ | ||
122 | if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { | ||
123 | if (fscache_acquire_non_index_cookie(cookie) < 0) { | ||
124 | atomic_dec(&parent->n_children); | ||
125 | __fscache_cookie_put(cookie); | ||
126 | fscache_stat(&fscache_n_acquires_nobufs); | ||
127 | _leave(" = NULL"); | ||
128 | return NULL; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | fscache_stat(&fscache_n_acquires_ok); | ||
133 | _leave(" = %p", cookie); | ||
134 | return cookie; | ||
135 | } | ||
136 | EXPORT_SYMBOL(__fscache_acquire_cookie); | ||
137 | |||
138 | /* | ||
139 | * acquire a non-index cookie | ||
140 | * - this must make sure the index chain is instantiated and instantiate the | ||
141 | * object representation too | ||
142 | */ | ||
143 | static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) | ||
144 | { | ||
145 | struct fscache_object *object; | ||
146 | struct fscache_cache *cache; | ||
147 | uint64_t i_size; | ||
148 | int ret; | ||
149 | |||
150 | _enter(""); | ||
151 | |||
152 | cookie->flags = 1 << FSCACHE_COOKIE_UNAVAILABLE; | ||
153 | |||
154 | /* now we need to see whether the backing objects for this cookie yet | ||
155 | * exist, if not there'll be nothing to search */ | ||
156 | down_read(&fscache_addremove_sem); | ||
157 | |||
158 | if (list_empty(&fscache_cache_list)) { | ||
159 | up_read(&fscache_addremove_sem); | ||
160 | _leave(" = 0 [no caches]"); | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | /* select a cache in which to store the object */ | ||
165 | cache = fscache_select_cache_for_object(cookie->parent); | ||
166 | if (!cache) { | ||
167 | up_read(&fscache_addremove_sem); | ||
168 | fscache_stat(&fscache_n_acquires_no_cache); | ||
169 | _leave(" = -ENOMEDIUM [no cache]"); | ||
170 | return -ENOMEDIUM; | ||
171 | } | ||
172 | |||
173 | _debug("cache %s", cache->tag->name); | ||
174 | |||
175 | cookie->flags = | ||
176 | (1 << FSCACHE_COOKIE_LOOKING_UP) | | ||
177 | (1 << FSCACHE_COOKIE_CREATING) | | ||
178 | (1 << FSCACHE_COOKIE_NO_DATA_YET); | ||
179 | |||
180 | /* ask the cache to allocate objects for this cookie and its parent | ||
181 | * chain */ | ||
182 | ret = fscache_alloc_object(cache, cookie); | ||
183 | if (ret < 0) { | ||
184 | up_read(&fscache_addremove_sem); | ||
185 | _leave(" = %d", ret); | ||
186 | return ret; | ||
187 | } | ||
188 | |||
189 | /* pass on how big the object we're caching is supposed to be */ | ||
190 | cookie->def->get_attr(cookie->netfs_data, &i_size); | ||
191 | |||
192 | spin_lock(&cookie->lock); | ||
193 | if (hlist_empty(&cookie->backing_objects)) { | ||
194 | spin_unlock(&cookie->lock); | ||
195 | goto unavailable; | ||
196 | } | ||
197 | |||
198 | object = hlist_entry(cookie->backing_objects.first, | ||
199 | struct fscache_object, cookie_link); | ||
200 | |||
201 | fscache_set_store_limit(object, i_size); | ||
202 | |||
203 | /* initiate the process of looking up all the objects in the chain | ||
204 | * (done by fscache_initialise_object()) */ | ||
205 | fscache_enqueue_object(object); | ||
206 | |||
207 | spin_unlock(&cookie->lock); | ||
208 | |||
209 | /* we may be required to wait for lookup to complete at this point */ | ||
210 | if (!fscache_defer_lookup) { | ||
211 | _debug("non-deferred lookup %p", &cookie->flags); | ||
212 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, | ||
213 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
214 | _debug("complete"); | ||
215 | if (test_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags)) | ||
216 | goto unavailable; | ||
217 | } | ||
218 | |||
219 | up_read(&fscache_addremove_sem); | ||
220 | _leave(" = 0 [deferred]"); | ||
221 | return 0; | ||
222 | |||
223 | unavailable: | ||
224 | up_read(&fscache_addremove_sem); | ||
225 | _leave(" = -ENOBUFS"); | ||
226 | return -ENOBUFS; | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * recursively allocate cache object records for a cookie/cache combination | ||
231 | * - caller must be holding the addremove sem | ||
232 | */ | ||
233 | static int fscache_alloc_object(struct fscache_cache *cache, | ||
234 | struct fscache_cookie *cookie) | ||
235 | { | ||
236 | struct fscache_object *object; | ||
237 | struct hlist_node *_n; | ||
238 | int ret; | ||
239 | |||
240 | _enter("%p,%p{%s}", cache, cookie, cookie->def->name); | ||
241 | |||
242 | spin_lock(&cookie->lock); | ||
243 | hlist_for_each_entry(object, _n, &cookie->backing_objects, | ||
244 | cookie_link) { | ||
245 | if (object->cache == cache) | ||
246 | goto object_already_extant; | ||
247 | } | ||
248 | spin_unlock(&cookie->lock); | ||
249 | |||
250 | /* ask the cache to allocate an object (we may end up with duplicate | ||
251 | * objects at this stage, but we sort that out later) */ | ||
252 | object = cache->ops->alloc_object(cache, cookie); | ||
253 | if (IS_ERR(object)) { | ||
254 | fscache_stat(&fscache_n_object_no_alloc); | ||
255 | ret = PTR_ERR(object); | ||
256 | goto error; | ||
257 | } | ||
258 | |||
259 | fscache_stat(&fscache_n_object_alloc); | ||
260 | |||
261 | object->debug_id = atomic_inc_return(&fscache_object_debug_id); | ||
262 | |||
263 | _debug("ALLOC OBJ%x: %s {%lx}", | ||
264 | object->debug_id, cookie->def->name, object->events); | ||
265 | |||
266 | ret = fscache_alloc_object(cache, cookie->parent); | ||
267 | if (ret < 0) | ||
268 | goto error_put; | ||
269 | |||
270 | /* only attach if we managed to allocate all we needed, otherwise | ||
271 | * discard the object we just allocated and instead use the one | ||
272 | * attached to the cookie */ | ||
273 | if (fscache_attach_object(cookie, object) < 0) | ||
274 | cache->ops->put_object(object); | ||
275 | |||
276 | _leave(" = 0"); | ||
277 | return 0; | ||
278 | |||
279 | object_already_extant: | ||
280 | ret = -ENOBUFS; | ||
281 | if (object->state >= FSCACHE_OBJECT_DYING) { | ||
282 | spin_unlock(&cookie->lock); | ||
283 | goto error; | ||
284 | } | ||
285 | spin_unlock(&cookie->lock); | ||
286 | _leave(" = 0 [found]"); | ||
287 | return 0; | ||
288 | |||
289 | error_put: | ||
290 | cache->ops->put_object(object); | ||
291 | error: | ||
292 | _leave(" = %d", ret); | ||
293 | return ret; | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * attach a cache object to a cookie | ||
298 | */ | ||
299 | static int fscache_attach_object(struct fscache_cookie *cookie, | ||
300 | struct fscache_object *object) | ||
301 | { | ||
302 | struct fscache_object *p; | ||
303 | struct fscache_cache *cache = object->cache; | ||
304 | struct hlist_node *_n; | ||
305 | int ret; | ||
306 | |||
307 | _enter("{%s},{OBJ%x}", cookie->def->name, object->debug_id); | ||
308 | |||
309 | spin_lock(&cookie->lock); | ||
310 | |||
311 | /* there may be multiple initial creations of this object, but we only | ||
312 | * want one */ | ||
313 | ret = -EEXIST; | ||
314 | hlist_for_each_entry(p, _n, &cookie->backing_objects, cookie_link) { | ||
315 | if (p->cache == object->cache) { | ||
316 | if (p->state >= FSCACHE_OBJECT_DYING) | ||
317 | ret = -ENOBUFS; | ||
318 | goto cant_attach_object; | ||
319 | } | ||
320 | } | ||
321 | |||
322 | /* pin the parent object */ | ||
323 | spin_lock_nested(&cookie->parent->lock, 1); | ||
324 | hlist_for_each_entry(p, _n, &cookie->parent->backing_objects, | ||
325 | cookie_link) { | ||
326 | if (p->cache == object->cache) { | ||
327 | if (p->state >= FSCACHE_OBJECT_DYING) { | ||
328 | ret = -ENOBUFS; | ||
329 | spin_unlock(&cookie->parent->lock); | ||
330 | goto cant_attach_object; | ||
331 | } | ||
332 | object->parent = p; | ||
333 | spin_lock(&p->lock); | ||
334 | p->n_children++; | ||
335 | spin_unlock(&p->lock); | ||
336 | break; | ||
337 | } | ||
338 | } | ||
339 | spin_unlock(&cookie->parent->lock); | ||
340 | |||
341 | /* attach to the cache's object list */ | ||
342 | if (list_empty(&object->cache_link)) { | ||
343 | spin_lock(&cache->object_list_lock); | ||
344 | list_add(&object->cache_link, &cache->object_list); | ||
345 | spin_unlock(&cache->object_list_lock); | ||
346 | } | ||
347 | |||
348 | /* attach to the cookie */ | ||
349 | object->cookie = cookie; | ||
350 | atomic_inc(&cookie->usage); | ||
351 | hlist_add_head(&object->cookie_link, &cookie->backing_objects); | ||
352 | ret = 0; | ||
353 | |||
354 | cant_attach_object: | ||
355 | spin_unlock(&cookie->lock); | ||
356 | _leave(" = %d", ret); | ||
357 | return ret; | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * update the index entries backing a cookie | ||
362 | */ | ||
363 | void __fscache_update_cookie(struct fscache_cookie *cookie) | ||
364 | { | ||
365 | struct fscache_object *object; | ||
366 | struct hlist_node *_p; | ||
367 | |||
368 | fscache_stat(&fscache_n_updates); | ||
369 | |||
370 | if (!cookie) { | ||
371 | fscache_stat(&fscache_n_updates_null); | ||
372 | _leave(" [no cookie]"); | ||
373 | return; | ||
374 | } | ||
375 | |||
376 | _enter("{%s}", cookie->def->name); | ||
377 | |||
378 | BUG_ON(!cookie->def->get_aux); | ||
379 | |||
380 | spin_lock(&cookie->lock); | ||
381 | |||
382 | /* update the index entry on disk in each cache backing this cookie */ | ||
383 | hlist_for_each_entry(object, _p, | ||
384 | &cookie->backing_objects, cookie_link) { | ||
385 | fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); | ||
386 | } | ||
387 | |||
388 | spin_unlock(&cookie->lock); | ||
389 | _leave(""); | ||
390 | } | ||
391 | EXPORT_SYMBOL(__fscache_update_cookie); | ||
392 | |||
393 | /* | ||
394 | * release a cookie back to the cache | ||
395 | * - the object will be marked as recyclable on disk if retire is true | ||
396 | * - all dependents of this cookie must have already been unregistered | ||
397 | * (indices/files/pages) | ||
398 | */ | ||
399 | void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) | ||
400 | { | ||
401 | struct fscache_cache *cache; | ||
402 | struct fscache_object *object; | ||
403 | unsigned long event; | ||
404 | |||
405 | fscache_stat(&fscache_n_relinquishes); | ||
406 | |||
407 | if (!cookie) { | ||
408 | fscache_stat(&fscache_n_relinquishes_null); | ||
409 | _leave(" [no cookie]"); | ||
410 | return; | ||
411 | } | ||
412 | |||
413 | _enter("%p{%s,%p},%d", | ||
414 | cookie, cookie->def->name, cookie->netfs_data, retire); | ||
415 | |||
416 | if (atomic_read(&cookie->n_children) != 0) { | ||
417 | printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", | ||
418 | cookie->def->name); | ||
419 | BUG(); | ||
420 | } | ||
421 | |||
422 | /* wait for the cookie to finish being instantiated (or to fail) */ | ||
423 | if (test_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) { | ||
424 | fscache_stat(&fscache_n_relinquishes_waitcrt); | ||
425 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_CREATING, | ||
426 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
427 | } | ||
428 | |||
429 | event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; | ||
430 | |||
431 | /* detach pointers back to the netfs */ | ||
432 | spin_lock(&cookie->lock); | ||
433 | |||
434 | cookie->netfs_data = NULL; | ||
435 | cookie->def = NULL; | ||
436 | |||
437 | /* break links with all the active objects */ | ||
438 | while (!hlist_empty(&cookie->backing_objects)) { | ||
439 | object = hlist_entry(cookie->backing_objects.first, | ||
440 | struct fscache_object, | ||
441 | cookie_link); | ||
442 | |||
443 | _debug("RELEASE OBJ%x", object->debug_id); | ||
444 | |||
445 | /* detach each cache object from the object cookie */ | ||
446 | spin_lock(&object->lock); | ||
447 | hlist_del_init(&object->cookie_link); | ||
448 | |||
449 | cache = object->cache; | ||
450 | object->cookie = NULL; | ||
451 | fscache_raise_event(object, event); | ||
452 | spin_unlock(&object->lock); | ||
453 | |||
454 | if (atomic_dec_and_test(&cookie->usage)) | ||
455 | /* the cookie refcount shouldn't be reduced to 0 yet */ | ||
456 | BUG(); | ||
457 | } | ||
458 | |||
459 | spin_unlock(&cookie->lock); | ||
460 | |||
461 | if (cookie->parent) { | ||
462 | ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); | ||
463 | ASSERTCMP(atomic_read(&cookie->parent->n_children), >, 0); | ||
464 | atomic_dec(&cookie->parent->n_children); | ||
465 | } | ||
466 | |||
467 | /* finally dispose of the cookie */ | ||
468 | ASSERTCMP(atomic_read(&cookie->usage), >, 0); | ||
469 | fscache_cookie_put(cookie); | ||
470 | |||
471 | _leave(""); | ||
472 | } | ||
473 | EXPORT_SYMBOL(__fscache_relinquish_cookie); | ||
474 | |||
475 | /* | ||
476 | * destroy a cookie | ||
477 | */ | ||
478 | void __fscache_cookie_put(struct fscache_cookie *cookie) | ||
479 | { | ||
480 | struct fscache_cookie *parent; | ||
481 | |||
482 | _enter("%p", cookie); | ||
483 | |||
484 | for (;;) { | ||
485 | _debug("FREE COOKIE %p", cookie); | ||
486 | parent = cookie->parent; | ||
487 | BUG_ON(!hlist_empty(&cookie->backing_objects)); | ||
488 | kmem_cache_free(fscache_cookie_jar, cookie); | ||
489 | |||
490 | if (!parent) | ||
491 | break; | ||
492 | |||
493 | cookie = parent; | ||
494 | BUG_ON(atomic_read(&cookie->usage) <= 0); | ||
495 | if (!atomic_dec_and_test(&cookie->usage)) | ||
496 | break; | ||
497 | } | ||
498 | |||
499 | _leave(""); | ||
500 | } | ||
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c new file mode 100644 index 000000000000..f5b4baee7352 --- /dev/null +++ b/fs/fscache/fsdef.c | |||
@@ -0,0 +1,144 @@ | |||
1 | /* Filesystem index definition | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL CACHE | ||
13 | #include <linux/module.h> | ||
14 | #include "internal.h" | ||
15 | |||
16 | static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, | ||
17 | void *buffer, uint16_t bufmax); | ||
18 | |||
19 | static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, | ||
20 | void *buffer, uint16_t bufmax); | ||
21 | |||
22 | static | ||
23 | enum fscache_checkaux fscache_fsdef_netfs_check_aux(void *cookie_netfs_data, | ||
24 | const void *data, | ||
25 | uint16_t datalen); | ||
26 | |||
27 | /* | ||
28 | * The root index is owned by FS-Cache itself. | ||
29 | * | ||
30 | * When a netfs requests caching facilities, FS-Cache will, if one doesn't | ||
31 | * already exist, create an entry in the root index with the key being the name | ||
32 | * of the netfs ("AFS" for example), and the auxiliary data holding the index | ||
33 | * structure version supplied by the netfs: | ||
34 | * | ||
35 | * FSDEF | ||
36 | * | | ||
37 | * +-----------+ | ||
38 | * | | | ||
39 | * NFS AFS | ||
40 | * [v=1] [v=1] | ||
41 | * | ||
42 | * If an entry with the appropriate name does already exist, the version is | ||
43 | * compared. If the version is different, the entire subtree from that entry | ||
44 | * will be discarded and a new entry created. | ||
45 | * | ||
46 | * The new entry will be an index, and a cookie referring to it will be passed | ||
47 | * to the netfs. This is then the root handle by which the netfs accesses the | ||
48 | * cache. It can create whatever objects it likes in that index, including | ||
49 | * further indices. | ||
50 | */ | ||
51 | static struct fscache_cookie_def fscache_fsdef_index_def = { | ||
52 | .name = ".FS-Cache", | ||
53 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
54 | }; | ||
55 | |||
56 | struct fscache_cookie fscache_fsdef_index = { | ||
57 | .usage = ATOMIC_INIT(1), | ||
58 | .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), | ||
59 | .backing_objects = HLIST_HEAD_INIT, | ||
60 | .def = &fscache_fsdef_index_def, | ||
61 | }; | ||
62 | EXPORT_SYMBOL(fscache_fsdef_index); | ||
63 | |||
64 | /* | ||
65 | * Definition of an entry in the root index. Each entry is an index, keyed to | ||
66 | * a specific netfs and only applicable to a particular version of the index | ||
67 | * structure used by that netfs. | ||
68 | */ | ||
69 | struct fscache_cookie_def fscache_fsdef_netfs_def = { | ||
70 | .name = "FSDEF.netfs", | ||
71 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
72 | .get_key = fscache_fsdef_netfs_get_key, | ||
73 | .get_aux = fscache_fsdef_netfs_get_aux, | ||
74 | .check_aux = fscache_fsdef_netfs_check_aux, | ||
75 | }; | ||
76 | |||
77 | /* | ||
78 | * get the key data for an FSDEF index record - this is the name of the netfs | ||
79 | * for which this entry is created | ||
80 | */ | ||
81 | static uint16_t fscache_fsdef_netfs_get_key(const void *cookie_netfs_data, | ||
82 | void *buffer, uint16_t bufmax) | ||
83 | { | ||
84 | const struct fscache_netfs *netfs = cookie_netfs_data; | ||
85 | unsigned klen; | ||
86 | |||
87 | _enter("{%s.%u},", netfs->name, netfs->version); | ||
88 | |||
89 | klen = strlen(netfs->name); | ||
90 | if (klen > bufmax) | ||
91 | return 0; | ||
92 | |||
93 | memcpy(buffer, netfs->name, klen); | ||
94 | return klen; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * get the auxiliary data for an FSDEF index record - this is the index | ||
99 | * structure version number of the netfs for which this version is created | ||
100 | */ | ||
101 | static uint16_t fscache_fsdef_netfs_get_aux(const void *cookie_netfs_data, | ||
102 | void *buffer, uint16_t bufmax) | ||
103 | { | ||
104 | const struct fscache_netfs *netfs = cookie_netfs_data; | ||
105 | unsigned dlen; | ||
106 | |||
107 | _enter("{%s.%u},", netfs->name, netfs->version); | ||
108 | |||
109 | dlen = sizeof(uint32_t); | ||
110 | if (dlen > bufmax) | ||
111 | return 0; | ||
112 | |||
113 | memcpy(buffer, &netfs->version, dlen); | ||
114 | return dlen; | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * check that the index structure version number stored in the auxiliary data | ||
119 | * matches the one the netfs gave us | ||
120 | */ | ||
121 | static enum fscache_checkaux fscache_fsdef_netfs_check_aux( | ||
122 | void *cookie_netfs_data, | ||
123 | const void *data, | ||
124 | uint16_t datalen) | ||
125 | { | ||
126 | struct fscache_netfs *netfs = cookie_netfs_data; | ||
127 | uint32_t version; | ||
128 | |||
129 | _enter("{%s},,%hu", netfs->name, datalen); | ||
130 | |||
131 | if (datalen != sizeof(version)) { | ||
132 | _leave(" = OBSOLETE [dl=%d v=%zu]", datalen, sizeof(version)); | ||
133 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
134 | } | ||
135 | |||
136 | memcpy(&version, data, sizeof(version)); | ||
137 | if (version != netfs->version) { | ||
138 | _leave(" = OBSOLETE [ver=%x net=%x]", version, netfs->version); | ||
139 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
140 | } | ||
141 | |||
142 | _leave(" = OKAY"); | ||
143 | return FSCACHE_CHECKAUX_OKAY; | ||
144 | } | ||
diff --git a/fs/fscache/histogram.c b/fs/fscache/histogram.c new file mode 100644 index 000000000000..bad496748a59 --- /dev/null +++ b/fs/fscache/histogram.c | |||
@@ -0,0 +1,109 @@ | |||
1 | /* FS-Cache latency histogram | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL THREAD | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/proc_fs.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | #include "internal.h" | ||
17 | |||
18 | atomic_t fscache_obj_instantiate_histogram[HZ]; | ||
19 | atomic_t fscache_objs_histogram[HZ]; | ||
20 | atomic_t fscache_ops_histogram[HZ]; | ||
21 | atomic_t fscache_retrieval_delay_histogram[HZ]; | ||
22 | atomic_t fscache_retrieval_histogram[HZ]; | ||
23 | |||
24 | /* | ||
25 | * display the time-taken histogram | ||
26 | */ | ||
27 | static int fscache_histogram_show(struct seq_file *m, void *v) | ||
28 | { | ||
29 | unsigned long index; | ||
30 | unsigned n[5], t; | ||
31 | |||
32 | switch ((unsigned long) v) { | ||
33 | case 1: | ||
34 | seq_puts(m, "JIFS SECS OBJ INST OP RUNS OBJ RUNS " | ||
35 | " RETRV DLY RETRIEVLS\n"); | ||
36 | return 0; | ||
37 | case 2: | ||
38 | seq_puts(m, "===== ===== ========= ========= =========" | ||
39 | " ========= =========\n"); | ||
40 | return 0; | ||
41 | default: | ||
42 | index = (unsigned long) v - 3; | ||
43 | n[0] = atomic_read(&fscache_obj_instantiate_histogram[index]); | ||
44 | n[1] = atomic_read(&fscache_ops_histogram[index]); | ||
45 | n[2] = atomic_read(&fscache_objs_histogram[index]); | ||
46 | n[3] = atomic_read(&fscache_retrieval_delay_histogram[index]); | ||
47 | n[4] = atomic_read(&fscache_retrieval_histogram[index]); | ||
48 | if (!(n[0] | n[1] | n[2] | n[3] | n[4])) | ||
49 | return 0; | ||
50 | |||
51 | t = (index * 1000) / HZ; | ||
52 | |||
53 | seq_printf(m, "%4lu 0.%03u %9u %9u %9u %9u %9u\n", | ||
54 | index, t, n[0], n[1], n[2], n[3], n[4]); | ||
55 | return 0; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * set up the iterator to start reading from the first line | ||
61 | */ | ||
62 | static void *fscache_histogram_start(struct seq_file *m, loff_t *_pos) | ||
63 | { | ||
64 | if ((unsigned long long)*_pos >= HZ + 2) | ||
65 | return NULL; | ||
66 | if (*_pos == 0) | ||
67 | *_pos = 1; | ||
68 | return (void *)(unsigned long) *_pos; | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * move to the next line | ||
73 | */ | ||
74 | static void *fscache_histogram_next(struct seq_file *m, void *v, loff_t *pos) | ||
75 | { | ||
76 | (*pos)++; | ||
77 | return (unsigned long long)*pos > HZ + 2 ? | ||
78 | NULL : (void *)(unsigned long) *pos; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * clean up after reading | ||
83 | */ | ||
84 | static void fscache_histogram_stop(struct seq_file *m, void *v) | ||
85 | { | ||
86 | } | ||
87 | |||
88 | static const struct seq_operations fscache_histogram_ops = { | ||
89 | .start = fscache_histogram_start, | ||
90 | .stop = fscache_histogram_stop, | ||
91 | .next = fscache_histogram_next, | ||
92 | .show = fscache_histogram_show, | ||
93 | }; | ||
94 | |||
95 | /* | ||
96 | * open "/proc/fs/fscache/histogram" to provide latency data | ||
97 | */ | ||
98 | static int fscache_histogram_open(struct inode *inode, struct file *file) | ||
99 | { | ||
100 | return seq_open(file, &fscache_histogram_ops); | ||
101 | } | ||
102 | |||
103 | const struct file_operations fscache_histogram_fops = { | ||
104 | .owner = THIS_MODULE, | ||
105 | .open = fscache_histogram_open, | ||
106 | .read = seq_read, | ||
107 | .llseek = seq_lseek, | ||
108 | .release = seq_release, | ||
109 | }; | ||
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h new file mode 100644 index 000000000000..e0cbd16f6dc9 --- /dev/null +++ b/fs/fscache/internal.h | |||
@@ -0,0 +1,380 @@ | |||
1 | /* Internal definitions for FS-Cache | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | /* | ||
13 | * Lock order, in the order in which multiple locks should be obtained: | ||
14 | * - fscache_addremove_sem | ||
15 | * - cookie->lock | ||
16 | * - cookie->parent->lock | ||
17 | * - cache->object_list_lock | ||
18 | * - object->lock | ||
19 | * - object->parent->lock | ||
20 | * - fscache_thread_lock | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/fscache-cache.h> | ||
25 | #include <linux/sched.h> | ||
26 | |||
27 | #define FSCACHE_MIN_THREADS 4 | ||
28 | #define FSCACHE_MAX_THREADS 32 | ||
29 | |||
30 | /* | ||
31 | * fsc-cache.c | ||
32 | */ | ||
33 | extern struct list_head fscache_cache_list; | ||
34 | extern struct rw_semaphore fscache_addremove_sem; | ||
35 | |||
36 | extern struct fscache_cache *fscache_select_cache_for_object( | ||
37 | struct fscache_cookie *); | ||
38 | |||
39 | /* | ||
40 | * fsc-cookie.c | ||
41 | */ | ||
42 | extern struct kmem_cache *fscache_cookie_jar; | ||
43 | |||
44 | extern void fscache_cookie_init_once(void *); | ||
45 | extern void __fscache_cookie_put(struct fscache_cookie *); | ||
46 | |||
47 | /* | ||
48 | * fsc-fsdef.c | ||
49 | */ | ||
50 | extern struct fscache_cookie fscache_fsdef_index; | ||
51 | extern struct fscache_cookie_def fscache_fsdef_netfs_def; | ||
52 | |||
53 | /* | ||
54 | * fsc-histogram.c | ||
55 | */ | ||
56 | #ifdef CONFIG_FSCACHE_HISTOGRAM | ||
57 | extern atomic_t fscache_obj_instantiate_histogram[HZ]; | ||
58 | extern atomic_t fscache_objs_histogram[HZ]; | ||
59 | extern atomic_t fscache_ops_histogram[HZ]; | ||
60 | extern atomic_t fscache_retrieval_delay_histogram[HZ]; | ||
61 | extern atomic_t fscache_retrieval_histogram[HZ]; | ||
62 | |||
63 | static inline void fscache_hist(atomic_t histogram[], unsigned long start_jif) | ||
64 | { | ||
65 | unsigned long jif = jiffies - start_jif; | ||
66 | if (jif >= HZ) | ||
67 | jif = HZ - 1; | ||
68 | atomic_inc(&histogram[jif]); | ||
69 | } | ||
70 | |||
71 | extern const struct file_operations fscache_histogram_fops; | ||
72 | |||
73 | #else | ||
74 | #define fscache_hist(hist, start_jif) do {} while (0) | ||
75 | #endif | ||
76 | |||
77 | /* | ||
78 | * fsc-main.c | ||
79 | */ | ||
80 | extern unsigned fscache_defer_lookup; | ||
81 | extern unsigned fscache_defer_create; | ||
82 | extern unsigned fscache_debug; | ||
83 | extern struct kobject *fscache_root; | ||
84 | |||
85 | extern int fscache_wait_bit(void *); | ||
86 | extern int fscache_wait_bit_interruptible(void *); | ||
87 | |||
88 | /* | ||
89 | * fsc-object.c | ||
90 | */ | ||
91 | extern void fscache_withdrawing_object(struct fscache_cache *, | ||
92 | struct fscache_object *); | ||
93 | extern void fscache_enqueue_object(struct fscache_object *); | ||
94 | |||
95 | /* | ||
96 | * fsc-operation.c | ||
97 | */ | ||
98 | extern int fscache_submit_exclusive_op(struct fscache_object *, | ||
99 | struct fscache_operation *); | ||
100 | extern int fscache_submit_op(struct fscache_object *, | ||
101 | struct fscache_operation *); | ||
102 | extern void fscache_abort_object(struct fscache_object *); | ||
103 | extern void fscache_start_operations(struct fscache_object *); | ||
104 | extern void fscache_operation_gc(struct work_struct *); | ||
105 | |||
106 | /* | ||
107 | * fsc-proc.c | ||
108 | */ | ||
109 | #ifdef CONFIG_PROC_FS | ||
110 | extern int __init fscache_proc_init(void); | ||
111 | extern void fscache_proc_cleanup(void); | ||
112 | #else | ||
113 | #define fscache_proc_init() (0) | ||
114 | #define fscache_proc_cleanup() do {} while (0) | ||
115 | #endif | ||
116 | |||
117 | /* | ||
118 | * fsc-stats.c | ||
119 | */ | ||
120 | #ifdef CONFIG_FSCACHE_STATS | ||
121 | extern atomic_t fscache_n_ops_processed[FSCACHE_MAX_THREADS]; | ||
122 | extern atomic_t fscache_n_objs_processed[FSCACHE_MAX_THREADS]; | ||
123 | |||
124 | extern atomic_t fscache_n_op_pend; | ||
125 | extern atomic_t fscache_n_op_run; | ||
126 | extern atomic_t fscache_n_op_enqueue; | ||
127 | extern atomic_t fscache_n_op_deferred_release; | ||
128 | extern atomic_t fscache_n_op_release; | ||
129 | extern atomic_t fscache_n_op_gc; | ||
130 | |||
131 | extern atomic_t fscache_n_attr_changed; | ||
132 | extern atomic_t fscache_n_attr_changed_ok; | ||
133 | extern atomic_t fscache_n_attr_changed_nobufs; | ||
134 | extern atomic_t fscache_n_attr_changed_nomem; | ||
135 | extern atomic_t fscache_n_attr_changed_calls; | ||
136 | |||
137 | extern atomic_t fscache_n_allocs; | ||
138 | extern atomic_t fscache_n_allocs_ok; | ||
139 | extern atomic_t fscache_n_allocs_wait; | ||
140 | extern atomic_t fscache_n_allocs_nobufs; | ||
141 | extern atomic_t fscache_n_alloc_ops; | ||
142 | extern atomic_t fscache_n_alloc_op_waits; | ||
143 | |||
144 | extern atomic_t fscache_n_retrievals; | ||
145 | extern atomic_t fscache_n_retrievals_ok; | ||
146 | extern atomic_t fscache_n_retrievals_wait; | ||
147 | extern atomic_t fscache_n_retrievals_nodata; | ||
148 | extern atomic_t fscache_n_retrievals_nobufs; | ||
149 | extern atomic_t fscache_n_retrievals_intr; | ||
150 | extern atomic_t fscache_n_retrievals_nomem; | ||
151 | extern atomic_t fscache_n_retrieval_ops; | ||
152 | extern atomic_t fscache_n_retrieval_op_waits; | ||
153 | |||
154 | extern atomic_t fscache_n_stores; | ||
155 | extern atomic_t fscache_n_stores_ok; | ||
156 | extern atomic_t fscache_n_stores_again; | ||
157 | extern atomic_t fscache_n_stores_nobufs; | ||
158 | extern atomic_t fscache_n_stores_oom; | ||
159 | extern atomic_t fscache_n_store_ops; | ||
160 | extern atomic_t fscache_n_store_calls; | ||
161 | |||
162 | extern atomic_t fscache_n_marks; | ||
163 | extern atomic_t fscache_n_uncaches; | ||
164 | |||
165 | extern atomic_t fscache_n_acquires; | ||
166 | extern atomic_t fscache_n_acquires_null; | ||
167 | extern atomic_t fscache_n_acquires_no_cache; | ||
168 | extern atomic_t fscache_n_acquires_ok; | ||
169 | extern atomic_t fscache_n_acquires_nobufs; | ||
170 | extern atomic_t fscache_n_acquires_oom; | ||
171 | |||
172 | extern atomic_t fscache_n_updates; | ||
173 | extern atomic_t fscache_n_updates_null; | ||
174 | extern atomic_t fscache_n_updates_run; | ||
175 | |||
176 | extern atomic_t fscache_n_relinquishes; | ||
177 | extern atomic_t fscache_n_relinquishes_null; | ||
178 | extern atomic_t fscache_n_relinquishes_waitcrt; | ||
179 | |||
180 | extern atomic_t fscache_n_cookie_index; | ||
181 | extern atomic_t fscache_n_cookie_data; | ||
182 | extern atomic_t fscache_n_cookie_special; | ||
183 | |||
184 | extern atomic_t fscache_n_object_alloc; | ||
185 | extern atomic_t fscache_n_object_no_alloc; | ||
186 | extern atomic_t fscache_n_object_lookups; | ||
187 | extern atomic_t fscache_n_object_lookups_negative; | ||
188 | extern atomic_t fscache_n_object_lookups_positive; | ||
189 | extern atomic_t fscache_n_object_created; | ||
190 | extern atomic_t fscache_n_object_avail; | ||
191 | extern atomic_t fscache_n_object_dead; | ||
192 | |||
193 | extern atomic_t fscache_n_checkaux_none; | ||
194 | extern atomic_t fscache_n_checkaux_okay; | ||
195 | extern atomic_t fscache_n_checkaux_update; | ||
196 | extern atomic_t fscache_n_checkaux_obsolete; | ||
197 | |||
198 | static inline void fscache_stat(atomic_t *stat) | ||
199 | { | ||
200 | atomic_inc(stat); | ||
201 | } | ||
202 | |||
203 | extern const struct file_operations fscache_stats_fops; | ||
204 | #else | ||
205 | |||
206 | #define fscache_stat(stat) do {} while (0) | ||
207 | #endif | ||
208 | |||
209 | /* | ||
210 | * raise an event on an object | ||
211 | * - if the event is not masked for that object, then the object is | ||
212 | * queued for attention by the thread pool. | ||
213 | */ | ||
214 | static inline void fscache_raise_event(struct fscache_object *object, | ||
215 | unsigned event) | ||
216 | { | ||
217 | if (!test_and_set_bit(event, &object->events) && | ||
218 | test_bit(event, &object->event_mask)) | ||
219 | fscache_enqueue_object(object); | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * drop a reference to a cookie | ||
224 | */ | ||
225 | static inline void fscache_cookie_put(struct fscache_cookie *cookie) | ||
226 | { | ||
227 | BUG_ON(atomic_read(&cookie->usage) <= 0); | ||
228 | if (atomic_dec_and_test(&cookie->usage)) | ||
229 | __fscache_cookie_put(cookie); | ||
230 | } | ||
231 | |||
232 | /* | ||
233 | * get an extra reference to a netfs retrieval context | ||
234 | */ | ||
235 | static inline | ||
236 | void *fscache_get_context(struct fscache_cookie *cookie, void *context) | ||
237 | { | ||
238 | if (cookie->def->get_context) | ||
239 | cookie->def->get_context(cookie->netfs_data, context); | ||
240 | return context; | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * release a reference to a netfs retrieval context | ||
245 | */ | ||
246 | static inline | ||
247 | void fscache_put_context(struct fscache_cookie *cookie, void *context) | ||
248 | { | ||
249 | if (cookie->def->put_context) | ||
250 | cookie->def->put_context(cookie->netfs_data, context); | ||
251 | } | ||
252 | |||
253 | /*****************************************************************************/ | ||
254 | /* | ||
255 | * debug tracing | ||
256 | */ | ||
257 | #define dbgprintk(FMT, ...) \ | ||
258 | printk(KERN_DEBUG "[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__) | ||
259 | |||
260 | /* make sure we maintain the format strings, even when debugging is disabled */ | ||
261 | static inline __attribute__((format(printf, 1, 2))) | ||
262 | void _dbprintk(const char *fmt, ...) | ||
263 | { | ||
264 | } | ||
265 | |||
266 | #define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | ||
267 | #define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | ||
268 | #define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__) | ||
269 | |||
270 | #define kjournal(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | ||
271 | |||
272 | #ifdef __KDEBUG | ||
273 | #define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__) | ||
274 | #define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__) | ||
275 | #define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__) | ||
276 | |||
277 | #elif defined(CONFIG_FSCACHE_DEBUG) | ||
278 | #define _enter(FMT, ...) \ | ||
279 | do { \ | ||
280 | if (__do_kdebug(ENTER)) \ | ||
281 | kenter(FMT, ##__VA_ARGS__); \ | ||
282 | } while (0) | ||
283 | |||
284 | #define _leave(FMT, ...) \ | ||
285 | do { \ | ||
286 | if (__do_kdebug(LEAVE)) \ | ||
287 | kleave(FMT, ##__VA_ARGS__); \ | ||
288 | } while (0) | ||
289 | |||
290 | #define _debug(FMT, ...) \ | ||
291 | do { \ | ||
292 | if (__do_kdebug(DEBUG)) \ | ||
293 | kdebug(FMT, ##__VA_ARGS__); \ | ||
294 | } while (0) | ||
295 | |||
296 | #else | ||
297 | #define _enter(FMT, ...) _dbprintk("==> %s("FMT")", __func__, ##__VA_ARGS__) | ||
298 | #define _leave(FMT, ...) _dbprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__) | ||
299 | #define _debug(FMT, ...) _dbprintk(FMT, ##__VA_ARGS__) | ||
300 | #endif | ||
301 | |||
302 | /* | ||
303 | * determine whether a particular optional debugging point should be logged | ||
304 | * - we need to go through three steps to persuade cpp to correctly join the | ||
305 | * shorthand in FSCACHE_DEBUG_LEVEL with its prefix | ||
306 | */ | ||
307 | #define ____do_kdebug(LEVEL, POINT) \ | ||
308 | unlikely((fscache_debug & \ | ||
309 | (FSCACHE_POINT_##POINT << (FSCACHE_DEBUG_ ## LEVEL * 3)))) | ||
310 | #define ___do_kdebug(LEVEL, POINT) \ | ||
311 | ____do_kdebug(LEVEL, POINT) | ||
312 | #define __do_kdebug(POINT) \ | ||
313 | ___do_kdebug(FSCACHE_DEBUG_LEVEL, POINT) | ||
314 | |||
315 | #define FSCACHE_DEBUG_CACHE 0 | ||
316 | #define FSCACHE_DEBUG_COOKIE 1 | ||
317 | #define FSCACHE_DEBUG_PAGE 2 | ||
318 | #define FSCACHE_DEBUG_OPERATION 3 | ||
319 | |||
320 | #define FSCACHE_POINT_ENTER 1 | ||
321 | #define FSCACHE_POINT_LEAVE 2 | ||
322 | #define FSCACHE_POINT_DEBUG 4 | ||
323 | |||
324 | #ifndef FSCACHE_DEBUG_LEVEL | ||
325 | #define FSCACHE_DEBUG_LEVEL CACHE | ||
326 | #endif | ||
327 | |||
328 | /* | ||
329 | * assertions | ||
330 | */ | ||
331 | #if 1 /* defined(__KDEBUGALL) */ | ||
332 | |||
333 | #define ASSERT(X) \ | ||
334 | do { \ | ||
335 | if (unlikely(!(X))) { \ | ||
336 | printk(KERN_ERR "\n"); \ | ||
337 | printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ | ||
338 | BUG(); \ | ||
339 | } \ | ||
340 | } while (0) | ||
341 | |||
342 | #define ASSERTCMP(X, OP, Y) \ | ||
343 | do { \ | ||
344 | if (unlikely(!((X) OP (Y)))) { \ | ||
345 | printk(KERN_ERR "\n"); \ | ||
346 | printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ | ||
347 | printk(KERN_ERR "%lx " #OP " %lx is false\n", \ | ||
348 | (unsigned long)(X), (unsigned long)(Y)); \ | ||
349 | BUG(); \ | ||
350 | } \ | ||
351 | } while (0) | ||
352 | |||
353 | #define ASSERTIF(C, X) \ | ||
354 | do { \ | ||
355 | if (unlikely((C) && !(X))) { \ | ||
356 | printk(KERN_ERR "\n"); \ | ||
357 | printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ | ||
358 | BUG(); \ | ||
359 | } \ | ||
360 | } while (0) | ||
361 | |||
362 | #define ASSERTIFCMP(C, X, OP, Y) \ | ||
363 | do { \ | ||
364 | if (unlikely((C) && !((X) OP (Y)))) { \ | ||
365 | printk(KERN_ERR "\n"); \ | ||
366 | printk(KERN_ERR "FS-Cache: Assertion failed\n"); \ | ||
367 | printk(KERN_ERR "%lx " #OP " %lx is false\n", \ | ||
368 | (unsigned long)(X), (unsigned long)(Y)); \ | ||
369 | BUG(); \ | ||
370 | } \ | ||
371 | } while (0) | ||
372 | |||
373 | #else | ||
374 | |||
375 | #define ASSERT(X) do {} while (0) | ||
376 | #define ASSERTCMP(X, OP, Y) do {} while (0) | ||
377 | #define ASSERTIF(C, X) do {} while (0) | ||
378 | #define ASSERTIFCMP(C, X, OP, Y) do {} while (0) | ||
379 | |||
380 | #endif /* assert or not */ | ||
diff --git a/fs/fscache/main.c b/fs/fscache/main.c new file mode 100644 index 000000000000..4de41b597499 --- /dev/null +++ b/fs/fscache/main.c | |||
@@ -0,0 +1,124 @@ | |||
1 | /* General filesystem local caching manager | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL CACHE | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/completion.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include "internal.h" | ||
19 | |||
20 | MODULE_DESCRIPTION("FS Cache Manager"); | ||
21 | MODULE_AUTHOR("Red Hat, Inc."); | ||
22 | MODULE_LICENSE("GPL"); | ||
23 | |||
24 | unsigned fscache_defer_lookup = 1; | ||
25 | module_param_named(defer_lookup, fscache_defer_lookup, uint, | ||
26 | S_IWUSR | S_IRUGO); | ||
27 | MODULE_PARM_DESC(fscache_defer_lookup, | ||
28 | "Defer cookie lookup to background thread"); | ||
29 | |||
30 | unsigned fscache_defer_create = 1; | ||
31 | module_param_named(defer_create, fscache_defer_create, uint, | ||
32 | S_IWUSR | S_IRUGO); | ||
33 | MODULE_PARM_DESC(fscache_defer_create, | ||
34 | "Defer cookie creation to background thread"); | ||
35 | |||
36 | unsigned fscache_debug; | ||
37 | module_param_named(debug, fscache_debug, uint, | ||
38 | S_IWUSR | S_IRUGO); | ||
39 | MODULE_PARM_DESC(fscache_debug, | ||
40 | "FS-Cache debugging mask"); | ||
41 | |||
42 | struct kobject *fscache_root; | ||
43 | |||
44 | /* | ||
45 | * initialise the fs caching module | ||
46 | */ | ||
47 | static int __init fscache_init(void) | ||
48 | { | ||
49 | int ret; | ||
50 | |||
51 | ret = slow_work_register_user(); | ||
52 | if (ret < 0) | ||
53 | goto error_slow_work; | ||
54 | |||
55 | ret = fscache_proc_init(); | ||
56 | if (ret < 0) | ||
57 | goto error_proc; | ||
58 | |||
59 | fscache_cookie_jar = kmem_cache_create("fscache_cookie_jar", | ||
60 | sizeof(struct fscache_cookie), | ||
61 | 0, | ||
62 | 0, | ||
63 | fscache_cookie_init_once); | ||
64 | if (!fscache_cookie_jar) { | ||
65 | printk(KERN_NOTICE | ||
66 | "FS-Cache: Failed to allocate a cookie jar\n"); | ||
67 | ret = -ENOMEM; | ||
68 | goto error_cookie_jar; | ||
69 | } | ||
70 | |||
71 | fscache_root = kobject_create_and_add("fscache", kernel_kobj); | ||
72 | if (!fscache_root) | ||
73 | goto error_kobj; | ||
74 | |||
75 | printk(KERN_NOTICE "FS-Cache: Loaded\n"); | ||
76 | return 0; | ||
77 | |||
78 | error_kobj: | ||
79 | kmem_cache_destroy(fscache_cookie_jar); | ||
80 | error_cookie_jar: | ||
81 | fscache_proc_cleanup(); | ||
82 | error_proc: | ||
83 | slow_work_unregister_user(); | ||
84 | error_slow_work: | ||
85 | return ret; | ||
86 | } | ||
87 | |||
88 | fs_initcall(fscache_init); | ||
89 | |||
90 | /* | ||
91 | * clean up on module removal | ||
92 | */ | ||
93 | static void __exit fscache_exit(void) | ||
94 | { | ||
95 | _enter(""); | ||
96 | |||
97 | kobject_put(fscache_root); | ||
98 | kmem_cache_destroy(fscache_cookie_jar); | ||
99 | fscache_proc_cleanup(); | ||
100 | slow_work_unregister_user(); | ||
101 | printk(KERN_NOTICE "FS-Cache: Unloaded\n"); | ||
102 | } | ||
103 | |||
104 | module_exit(fscache_exit); | ||
105 | |||
106 | /* | ||
107 | * wait_on_bit() sleep function for uninterruptible waiting | ||
108 | */ | ||
109 | int fscache_wait_bit(void *flags) | ||
110 | { | ||
111 | schedule(); | ||
112 | return 0; | ||
113 | } | ||
114 | EXPORT_SYMBOL(fscache_wait_bit); | ||
115 | |||
116 | /* | ||
117 | * wait_on_bit() sleep function for interruptible waiting | ||
118 | */ | ||
119 | int fscache_wait_bit_interruptible(void *flags) | ||
120 | { | ||
121 | schedule(); | ||
122 | return signal_pending(current); | ||
123 | } | ||
124 | EXPORT_SYMBOL(fscache_wait_bit_interruptible); | ||
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c new file mode 100644 index 000000000000..e028b8eb1c40 --- /dev/null +++ b/fs/fscache/netfs.c | |||
@@ -0,0 +1,103 @@ | |||
1 | /* FS-Cache netfs (client) registration | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL COOKIE | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/slab.h> | ||
15 | #include "internal.h" | ||
16 | |||
17 | static LIST_HEAD(fscache_netfs_list); | ||
18 | |||
19 | /* | ||
20 | * register a network filesystem for caching | ||
21 | */ | ||
22 | int __fscache_register_netfs(struct fscache_netfs *netfs) | ||
23 | { | ||
24 | struct fscache_netfs *ptr; | ||
25 | int ret; | ||
26 | |||
27 | _enter("{%s}", netfs->name); | ||
28 | |||
29 | INIT_LIST_HEAD(&netfs->link); | ||
30 | |||
31 | /* allocate a cookie for the primary index */ | ||
32 | netfs->primary_index = | ||
33 | kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); | ||
34 | |||
35 | if (!netfs->primary_index) { | ||
36 | _leave(" = -ENOMEM"); | ||
37 | return -ENOMEM; | ||
38 | } | ||
39 | |||
40 | /* initialise the primary index cookie */ | ||
41 | atomic_set(&netfs->primary_index->usage, 1); | ||
42 | atomic_set(&netfs->primary_index->n_children, 0); | ||
43 | |||
44 | netfs->primary_index->def = &fscache_fsdef_netfs_def; | ||
45 | netfs->primary_index->parent = &fscache_fsdef_index; | ||
46 | netfs->primary_index->netfs_data = netfs; | ||
47 | |||
48 | atomic_inc(&netfs->primary_index->parent->usage); | ||
49 | atomic_inc(&netfs->primary_index->parent->n_children); | ||
50 | |||
51 | spin_lock_init(&netfs->primary_index->lock); | ||
52 | INIT_HLIST_HEAD(&netfs->primary_index->backing_objects); | ||
53 | |||
54 | /* check the netfs type is not already present */ | ||
55 | down_write(&fscache_addremove_sem); | ||
56 | |||
57 | ret = -EEXIST; | ||
58 | list_for_each_entry(ptr, &fscache_netfs_list, link) { | ||
59 | if (strcmp(ptr->name, netfs->name) == 0) | ||
60 | goto already_registered; | ||
61 | } | ||
62 | |||
63 | list_add(&netfs->link, &fscache_netfs_list); | ||
64 | ret = 0; | ||
65 | |||
66 | printk(KERN_NOTICE "FS-Cache: Netfs '%s' registered for caching\n", | ||
67 | netfs->name); | ||
68 | |||
69 | already_registered: | ||
70 | up_write(&fscache_addremove_sem); | ||
71 | |||
72 | if (ret < 0) { | ||
73 | netfs->primary_index->parent = NULL; | ||
74 | __fscache_cookie_put(netfs->primary_index); | ||
75 | netfs->primary_index = NULL; | ||
76 | } | ||
77 | |||
78 | _leave(" = %d", ret); | ||
79 | return ret; | ||
80 | } | ||
81 | EXPORT_SYMBOL(__fscache_register_netfs); | ||
82 | |||
83 | /* | ||
84 | * unregister a network filesystem from the cache | ||
85 | * - all cookies must have been released first | ||
86 | */ | ||
87 | void __fscache_unregister_netfs(struct fscache_netfs *netfs) | ||
88 | { | ||
89 | _enter("{%s.%u}", netfs->name, netfs->version); | ||
90 | |||
91 | down_write(&fscache_addremove_sem); | ||
92 | |||
93 | list_del(&netfs->link); | ||
94 | fscache_relinquish_cookie(netfs->primary_index, 0); | ||
95 | |||
96 | up_write(&fscache_addremove_sem); | ||
97 | |||
98 | printk(KERN_NOTICE "FS-Cache: Netfs '%s' unregistered from caching\n", | ||
99 | netfs->name); | ||
100 | |||
101 | _leave(""); | ||
102 | } | ||
103 | EXPORT_SYMBOL(__fscache_unregister_netfs); | ||
diff --git a/fs/fscache/object.c b/fs/fscache/object.c new file mode 100644 index 000000000000..392a41b1b79d --- /dev/null +++ b/fs/fscache/object.c | |||
@@ -0,0 +1,810 @@ | |||
1 | /* FS-Cache object state machine handler | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/filesystems/caching/object.txt for a description of the | ||
12 | * object state machine and the in-kernel representations. | ||
13 | */ | ||
14 | |||
15 | #define FSCACHE_DEBUG_LEVEL COOKIE | ||
16 | #include <linux/module.h> | ||
17 | #include "internal.h" | ||
18 | |||
19 | const char *fscache_object_states[] = { | ||
20 | [FSCACHE_OBJECT_INIT] = "OBJECT_INIT", | ||
21 | [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP", | ||
22 | [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", | ||
23 | [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", | ||
24 | [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", | ||
25 | [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", | ||
26 | [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", | ||
27 | [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", | ||
28 | [FSCACHE_OBJECT_ABORT_INIT] = "OBJECT_ABORT_INIT", | ||
29 | [FSCACHE_OBJECT_RELEASING] = "OBJECT_RELEASING", | ||
30 | [FSCACHE_OBJECT_RECYCLING] = "OBJECT_RECYCLING", | ||
31 | [FSCACHE_OBJECT_WITHDRAWING] = "OBJECT_WITHDRAWING", | ||
32 | [FSCACHE_OBJECT_DEAD] = "OBJECT_DEAD", | ||
33 | }; | ||
34 | EXPORT_SYMBOL(fscache_object_states); | ||
35 | |||
36 | static void fscache_object_slow_work_put_ref(struct slow_work *); | ||
37 | static int fscache_object_slow_work_get_ref(struct slow_work *); | ||
38 | static void fscache_object_slow_work_execute(struct slow_work *); | ||
39 | static void fscache_initialise_object(struct fscache_object *); | ||
40 | static void fscache_lookup_object(struct fscache_object *); | ||
41 | static void fscache_object_available(struct fscache_object *); | ||
42 | static void fscache_release_object(struct fscache_object *); | ||
43 | static void fscache_withdraw_object(struct fscache_object *); | ||
44 | static void fscache_enqueue_dependents(struct fscache_object *); | ||
45 | static void fscache_dequeue_object(struct fscache_object *); | ||
46 | |||
47 | const struct slow_work_ops fscache_object_slow_work_ops = { | ||
48 | .get_ref = fscache_object_slow_work_get_ref, | ||
49 | .put_ref = fscache_object_slow_work_put_ref, | ||
50 | .execute = fscache_object_slow_work_execute, | ||
51 | }; | ||
52 | EXPORT_SYMBOL(fscache_object_slow_work_ops); | ||
53 | |||
54 | /* | ||
55 | * we need to notify the parent when an op completes that we had outstanding | ||
56 | * upon it | ||
57 | */ | ||
58 | static inline void fscache_done_parent_op(struct fscache_object *object) | ||
59 | { | ||
60 | struct fscache_object *parent = object->parent; | ||
61 | |||
62 | _enter("OBJ%x {OBJ%x,%x}", | ||
63 | object->debug_id, parent->debug_id, parent->n_ops); | ||
64 | |||
65 | spin_lock_nested(&parent->lock, 1); | ||
66 | parent->n_ops--; | ||
67 | parent->n_obj_ops--; | ||
68 | if (parent->n_ops == 0) | ||
69 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); | ||
70 | spin_unlock(&parent->lock); | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * process events that have been sent to an object's state machine | ||
75 | * - initiates parent lookup | ||
76 | * - does object lookup | ||
77 | * - does object creation | ||
78 | * - does object recycling and retirement | ||
79 | * - does object withdrawal | ||
80 | */ | ||
81 | static void fscache_object_state_machine(struct fscache_object *object) | ||
82 | { | ||
83 | enum fscache_object_state new_state; | ||
84 | |||
85 | ASSERT(object != NULL); | ||
86 | |||
87 | _enter("{OBJ%x,%s,%lx}", | ||
88 | object->debug_id, fscache_object_states[object->state], | ||
89 | object->events); | ||
90 | |||
91 | switch (object->state) { | ||
92 | /* wait for the parent object to become ready */ | ||
93 | case FSCACHE_OBJECT_INIT: | ||
94 | object->event_mask = | ||
95 | ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
96 | fscache_initialise_object(object); | ||
97 | goto done; | ||
98 | |||
99 | /* look up the object metadata on disk */ | ||
100 | case FSCACHE_OBJECT_LOOKING_UP: | ||
101 | fscache_lookup_object(object); | ||
102 | goto lookup_transit; | ||
103 | |||
104 | /* create the object metadata on disk */ | ||
105 | case FSCACHE_OBJECT_CREATING: | ||
106 | fscache_lookup_object(object); | ||
107 | goto lookup_transit; | ||
108 | |||
109 | /* handle an object becoming available; start pending | ||
110 | * operations and queue dependent operations for processing */ | ||
111 | case FSCACHE_OBJECT_AVAILABLE: | ||
112 | fscache_object_available(object); | ||
113 | goto active_transit; | ||
114 | |||
115 | /* normal running state */ | ||
116 | case FSCACHE_OBJECT_ACTIVE: | ||
117 | goto active_transit; | ||
118 | |||
119 | /* update the object metadata on disk */ | ||
120 | case FSCACHE_OBJECT_UPDATING: | ||
121 | clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); | ||
122 | fscache_stat(&fscache_n_updates_run); | ||
123 | object->cache->ops->update_object(object); | ||
124 | goto active_transit; | ||
125 | |||
126 | /* handle an object dying during lookup or creation */ | ||
127 | case FSCACHE_OBJECT_LC_DYING: | ||
128 | object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); | ||
129 | object->cache->ops->lookup_complete(object); | ||
130 | |||
131 | spin_lock(&object->lock); | ||
132 | object->state = FSCACHE_OBJECT_DYING; | ||
133 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, | ||
134 | &object->cookie->flags)) | ||
135 | wake_up_bit(&object->cookie->flags, | ||
136 | FSCACHE_COOKIE_CREATING); | ||
137 | spin_unlock(&object->lock); | ||
138 | |||
139 | fscache_done_parent_op(object); | ||
140 | |||
141 | /* wait for completion of all active operations on this object | ||
142 | * and the death of all child objects of this object */ | ||
143 | case FSCACHE_OBJECT_DYING: | ||
144 | dying: | ||
145 | clear_bit(FSCACHE_OBJECT_EV_CLEARED, &object->events); | ||
146 | spin_lock(&object->lock); | ||
147 | _debug("dying OBJ%x {%d,%d}", | ||
148 | object->debug_id, object->n_ops, object->n_children); | ||
149 | if (object->n_ops == 0 && object->n_children == 0) { | ||
150 | object->event_mask &= | ||
151 | ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
152 | object->event_mask |= | ||
153 | (1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
154 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
155 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
156 | (1 << FSCACHE_OBJECT_EV_ERROR); | ||
157 | } else { | ||
158 | object->event_mask &= | ||
159 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
160 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
161 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
162 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
163 | object->event_mask |= | ||
164 | 1 << FSCACHE_OBJECT_EV_CLEARED; | ||
165 | } | ||
166 | spin_unlock(&object->lock); | ||
167 | fscache_enqueue_dependents(object); | ||
168 | goto terminal_transit; | ||
169 | |||
170 | /* handle an abort during initialisation */ | ||
171 | case FSCACHE_OBJECT_ABORT_INIT: | ||
172 | _debug("handle abort init %lx", object->events); | ||
173 | object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); | ||
174 | |||
175 | spin_lock(&object->lock); | ||
176 | fscache_dequeue_object(object); | ||
177 | |||
178 | object->state = FSCACHE_OBJECT_DYING; | ||
179 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, | ||
180 | &object->cookie->flags)) | ||
181 | wake_up_bit(&object->cookie->flags, | ||
182 | FSCACHE_COOKIE_CREATING); | ||
183 | spin_unlock(&object->lock); | ||
184 | goto dying; | ||
185 | |||
186 | /* handle the netfs releasing an object and possibly marking it | ||
187 | * obsolete too */ | ||
188 | case FSCACHE_OBJECT_RELEASING: | ||
189 | case FSCACHE_OBJECT_RECYCLING: | ||
190 | object->event_mask &= | ||
191 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
192 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
193 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
194 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
195 | fscache_release_object(object); | ||
196 | spin_lock(&object->lock); | ||
197 | object->state = FSCACHE_OBJECT_DEAD; | ||
198 | spin_unlock(&object->lock); | ||
199 | fscache_stat(&fscache_n_object_dead); | ||
200 | goto terminal_transit; | ||
201 | |||
202 | /* handle the parent cache of this object being withdrawn from | ||
203 | * active service */ | ||
204 | case FSCACHE_OBJECT_WITHDRAWING: | ||
205 | object->event_mask &= | ||
206 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
207 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
208 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
209 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
210 | fscache_withdraw_object(object); | ||
211 | spin_lock(&object->lock); | ||
212 | object->state = FSCACHE_OBJECT_DEAD; | ||
213 | spin_unlock(&object->lock); | ||
214 | fscache_stat(&fscache_n_object_dead); | ||
215 | goto terminal_transit; | ||
216 | |||
217 | /* complain about the object being woken up once it is | ||
218 | * deceased */ | ||
219 | case FSCACHE_OBJECT_DEAD: | ||
220 | printk(KERN_ERR "FS-Cache:" | ||
221 | " Unexpected event in dead state %lx\n", | ||
222 | object->events & object->event_mask); | ||
223 | BUG(); | ||
224 | |||
225 | default: | ||
226 | printk(KERN_ERR "FS-Cache: Unknown object state %u\n", | ||
227 | object->state); | ||
228 | BUG(); | ||
229 | } | ||
230 | |||
231 | /* determine the transition from a lookup state */ | ||
232 | lookup_transit: | ||
233 | switch (fls(object->events & object->event_mask) - 1) { | ||
234 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
235 | case FSCACHE_OBJECT_EV_RETIRE: | ||
236 | case FSCACHE_OBJECT_EV_RELEASE: | ||
237 | case FSCACHE_OBJECT_EV_ERROR: | ||
238 | new_state = FSCACHE_OBJECT_LC_DYING; | ||
239 | goto change_state; | ||
240 | case FSCACHE_OBJECT_EV_REQUEUE: | ||
241 | goto done; | ||
242 | case -1: | ||
243 | goto done; /* sleep until event */ | ||
244 | default: | ||
245 | goto unsupported_event; | ||
246 | } | ||
247 | |||
248 | /* determine the transition from an active state */ | ||
249 | active_transit: | ||
250 | switch (fls(object->events & object->event_mask) - 1) { | ||
251 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
252 | case FSCACHE_OBJECT_EV_RETIRE: | ||
253 | case FSCACHE_OBJECT_EV_RELEASE: | ||
254 | case FSCACHE_OBJECT_EV_ERROR: | ||
255 | new_state = FSCACHE_OBJECT_DYING; | ||
256 | goto change_state; | ||
257 | case FSCACHE_OBJECT_EV_UPDATE: | ||
258 | new_state = FSCACHE_OBJECT_UPDATING; | ||
259 | goto change_state; | ||
260 | case -1: | ||
261 | new_state = FSCACHE_OBJECT_ACTIVE; | ||
262 | goto change_state; /* sleep until event */ | ||
263 | default: | ||
264 | goto unsupported_event; | ||
265 | } | ||
266 | |||
267 | /* determine the transition from a terminal state */ | ||
268 | terminal_transit: | ||
269 | switch (fls(object->events & object->event_mask) - 1) { | ||
270 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
271 | new_state = FSCACHE_OBJECT_WITHDRAWING; | ||
272 | goto change_state; | ||
273 | case FSCACHE_OBJECT_EV_RETIRE: | ||
274 | new_state = FSCACHE_OBJECT_RECYCLING; | ||
275 | goto change_state; | ||
276 | case FSCACHE_OBJECT_EV_RELEASE: | ||
277 | new_state = FSCACHE_OBJECT_RELEASING; | ||
278 | goto change_state; | ||
279 | case FSCACHE_OBJECT_EV_ERROR: | ||
280 | new_state = FSCACHE_OBJECT_WITHDRAWING; | ||
281 | goto change_state; | ||
282 | case FSCACHE_OBJECT_EV_CLEARED: | ||
283 | new_state = FSCACHE_OBJECT_DYING; | ||
284 | goto change_state; | ||
285 | case -1: | ||
286 | goto done; /* sleep until event */ | ||
287 | default: | ||
288 | goto unsupported_event; | ||
289 | } | ||
290 | |||
291 | change_state: | ||
292 | spin_lock(&object->lock); | ||
293 | object->state = new_state; | ||
294 | spin_unlock(&object->lock); | ||
295 | |||
296 | done: | ||
297 | _leave(" [->%s]", fscache_object_states[object->state]); | ||
298 | return; | ||
299 | |||
300 | unsupported_event: | ||
301 | printk(KERN_ERR "FS-Cache:" | ||
302 | " Unsupported event %lx [mask %lx] in state %s\n", | ||
303 | object->events, object->event_mask, | ||
304 | fscache_object_states[object->state]); | ||
305 | BUG(); | ||
306 | } | ||
307 | |||
308 | /* | ||
309 | * execute an object | ||
310 | */ | ||
311 | static void fscache_object_slow_work_execute(struct slow_work *work) | ||
312 | { | ||
313 | struct fscache_object *object = | ||
314 | container_of(work, struct fscache_object, work); | ||
315 | unsigned long start; | ||
316 | |||
317 | _enter("{OBJ%x}", object->debug_id); | ||
318 | |||
319 | clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
320 | |||
321 | start = jiffies; | ||
322 | fscache_object_state_machine(object); | ||
323 | fscache_hist(fscache_objs_histogram, start); | ||
324 | if (object->events & object->event_mask) | ||
325 | fscache_enqueue_object(object); | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * initialise an object | ||
330 | * - check the specified object's parent to see if we can make use of it | ||
331 | * immediately to do a creation | ||
332 | * - we may need to start the process of creating a parent and we need to wait | ||
333 | * for the parent's lookup and creation to complete if it's not there yet | ||
334 | * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the | ||
335 | * leaf-most cookies of the object and all its children | ||
336 | */ | ||
337 | static void fscache_initialise_object(struct fscache_object *object) | ||
338 | { | ||
339 | struct fscache_object *parent; | ||
340 | |||
341 | _enter(""); | ||
342 | ASSERT(object->cookie != NULL); | ||
343 | ASSERT(object->cookie->parent != NULL); | ||
344 | ASSERT(list_empty(&object->work.link)); | ||
345 | |||
346 | if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | | ||
347 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
348 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
349 | (1 << FSCACHE_OBJECT_EV_WITHDRAW))) { | ||
350 | _debug("abort init %lx", object->events); | ||
351 | spin_lock(&object->lock); | ||
352 | object->state = FSCACHE_OBJECT_ABORT_INIT; | ||
353 | spin_unlock(&object->lock); | ||
354 | return; | ||
355 | } | ||
356 | |||
357 | spin_lock(&object->cookie->lock); | ||
358 | spin_lock_nested(&object->cookie->parent->lock, 1); | ||
359 | |||
360 | parent = object->parent; | ||
361 | if (!parent) { | ||
362 | _debug("no parent"); | ||
363 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | ||
364 | } else { | ||
365 | spin_lock(&object->lock); | ||
366 | spin_lock_nested(&parent->lock, 1); | ||
367 | _debug("parent %s", fscache_object_states[parent->state]); | ||
368 | |||
369 | if (parent->state >= FSCACHE_OBJECT_DYING) { | ||
370 | _debug("bad parent"); | ||
371 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | ||
372 | } else if (parent->state < FSCACHE_OBJECT_AVAILABLE) { | ||
373 | _debug("wait"); | ||
374 | |||
375 | /* we may get woken up in this state by child objects | ||
376 | * binding on to us, so we need to make sure we don't | ||
377 | * add ourself to the list multiple times */ | ||
378 | if (list_empty(&object->dep_link)) { | ||
379 | object->cache->ops->grab_object(object); | ||
380 | list_add(&object->dep_link, | ||
381 | &parent->dependents); | ||
382 | |||
383 | /* fscache_acquire_non_index_cookie() uses this | ||
384 | * to wake the chain up */ | ||
385 | if (parent->state == FSCACHE_OBJECT_INIT) | ||
386 | fscache_enqueue_object(parent); | ||
387 | } | ||
388 | } else { | ||
389 | _debug("go"); | ||
390 | parent->n_ops++; | ||
391 | parent->n_obj_ops++; | ||
392 | object->lookup_jif = jiffies; | ||
393 | object->state = FSCACHE_OBJECT_LOOKING_UP; | ||
394 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
395 | } | ||
396 | |||
397 | spin_unlock(&parent->lock); | ||
398 | spin_unlock(&object->lock); | ||
399 | } | ||
400 | |||
401 | spin_unlock(&object->cookie->parent->lock); | ||
402 | spin_unlock(&object->cookie->lock); | ||
403 | _leave(""); | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * look an object up in the cache from which it was allocated | ||
408 | * - we hold an "access lock" on the parent object, so the parent object cannot | ||
409 | * be withdrawn by either party till we've finished | ||
410 | * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the | ||
411 | * leaf-most cookies of the object and all its children | ||
412 | */ | ||
413 | static void fscache_lookup_object(struct fscache_object *object) | ||
414 | { | ||
415 | struct fscache_cookie *cookie = object->cookie; | ||
416 | struct fscache_object *parent; | ||
417 | |||
418 | _enter(""); | ||
419 | |||
420 | parent = object->parent; | ||
421 | ASSERT(parent != NULL); | ||
422 | ASSERTCMP(parent->n_ops, >, 0); | ||
423 | ASSERTCMP(parent->n_obj_ops, >, 0); | ||
424 | |||
425 | /* make sure the parent is still available */ | ||
426 | ASSERTCMP(parent->state, >=, FSCACHE_OBJECT_AVAILABLE); | ||
427 | |||
428 | if (parent->state >= FSCACHE_OBJECT_DYING || | ||
429 | test_bit(FSCACHE_IOERROR, &object->cache->flags)) { | ||
430 | _debug("unavailable"); | ||
431 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | ||
432 | _leave(""); | ||
433 | return; | ||
434 | } | ||
435 | |||
436 | _debug("LOOKUP \"%s/%s\" in \"%s\"", | ||
437 | parent->cookie->def->name, cookie->def->name, | ||
438 | object->cache->tag->name); | ||
439 | |||
440 | fscache_stat(&fscache_n_object_lookups); | ||
441 | object->cache->ops->lookup_object(object); | ||
442 | |||
443 | if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) | ||
444 | set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); | ||
445 | |||
446 | _leave(""); | ||
447 | } | ||
448 | |||
449 | /** | ||
450 | * fscache_object_lookup_negative - Note negative cookie lookup | ||
451 | * @object: Object pointing to cookie to mark | ||
452 | * | ||
453 | * Note negative lookup, permitting those waiting to read data from an already | ||
454 | * existing backing object to continue as there's no data for them to read. | ||
455 | */ | ||
456 | void fscache_object_lookup_negative(struct fscache_object *object) | ||
457 | { | ||
458 | struct fscache_cookie *cookie = object->cookie; | ||
459 | |||
460 | _enter("{OBJ%x,%s}", | ||
461 | object->debug_id, fscache_object_states[object->state]); | ||
462 | |||
463 | spin_lock(&object->lock); | ||
464 | if (object->state == FSCACHE_OBJECT_LOOKING_UP) { | ||
465 | fscache_stat(&fscache_n_object_lookups_negative); | ||
466 | |||
467 | /* transit here to allow write requests to begin stacking up | ||
468 | * and read requests to begin returning ENODATA */ | ||
469 | object->state = FSCACHE_OBJECT_CREATING; | ||
470 | spin_unlock(&object->lock); | ||
471 | |||
472 | set_bit(FSCACHE_COOKIE_PENDING_FILL, &cookie->flags); | ||
473 | set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | ||
474 | |||
475 | _debug("wake up lookup %p", &cookie->flags); | ||
476 | smp_mb__before_clear_bit(); | ||
477 | clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); | ||
478 | smp_mb__after_clear_bit(); | ||
479 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); | ||
480 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
481 | } else { | ||
482 | ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); | ||
483 | spin_unlock(&object->lock); | ||
484 | } | ||
485 | |||
486 | _leave(""); | ||
487 | } | ||
488 | EXPORT_SYMBOL(fscache_object_lookup_negative); | ||
489 | |||
490 | /** | ||
491 | * fscache_obtained_object - Note successful object lookup or creation | ||
492 | * @object: Object pointing to cookie to mark | ||
493 | * | ||
494 | * Note successful lookup and/or creation, permitting those waiting to write | ||
495 | * data to a backing object to continue. | ||
496 | * | ||
497 | * Note that after calling this, an object's cookie may be relinquished by the | ||
498 | * netfs, and so must be accessed with object lock held. | ||
499 | */ | ||
500 | void fscache_obtained_object(struct fscache_object *object) | ||
501 | { | ||
502 | struct fscache_cookie *cookie = object->cookie; | ||
503 | |||
504 | _enter("{OBJ%x,%s}", | ||
505 | object->debug_id, fscache_object_states[object->state]); | ||
506 | |||
507 | /* if we were still looking up, then we must have a positive lookup | ||
508 | * result, in which case there may be data available */ | ||
509 | spin_lock(&object->lock); | ||
510 | if (object->state == FSCACHE_OBJECT_LOOKING_UP) { | ||
511 | fscache_stat(&fscache_n_object_lookups_positive); | ||
512 | |||
513 | clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | ||
514 | |||
515 | object->state = FSCACHE_OBJECT_AVAILABLE; | ||
516 | spin_unlock(&object->lock); | ||
517 | |||
518 | smp_mb__before_clear_bit(); | ||
519 | clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); | ||
520 | smp_mb__after_clear_bit(); | ||
521 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); | ||
522 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
523 | } else { | ||
524 | ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); | ||
525 | fscache_stat(&fscache_n_object_created); | ||
526 | |||
527 | object->state = FSCACHE_OBJECT_AVAILABLE; | ||
528 | spin_unlock(&object->lock); | ||
529 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
530 | smp_wmb(); | ||
531 | } | ||
532 | |||
533 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) | ||
534 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_CREATING); | ||
535 | |||
536 | _leave(""); | ||
537 | } | ||
538 | EXPORT_SYMBOL(fscache_obtained_object); | ||
539 | |||
540 | /* | ||
541 | * handle an object that has just become available | ||
542 | */ | ||
543 | static void fscache_object_available(struct fscache_object *object) | ||
544 | { | ||
545 | _enter("{OBJ%x}", object->debug_id); | ||
546 | |||
547 | spin_lock(&object->lock); | ||
548 | |||
549 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) | ||
550 | wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING); | ||
551 | |||
552 | fscache_done_parent_op(object); | ||
553 | if (object->n_in_progress == 0) { | ||
554 | if (object->n_ops > 0) { | ||
555 | ASSERTCMP(object->n_ops, >=, object->n_obj_ops); | ||
556 | ASSERTIF(object->n_ops > object->n_obj_ops, | ||
557 | !list_empty(&object->pending_ops)); | ||
558 | fscache_start_operations(object); | ||
559 | } else { | ||
560 | ASSERT(list_empty(&object->pending_ops)); | ||
561 | } | ||
562 | } | ||
563 | spin_unlock(&object->lock); | ||
564 | |||
565 | object->cache->ops->lookup_complete(object); | ||
566 | fscache_enqueue_dependents(object); | ||
567 | |||
568 | fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); | ||
569 | fscache_stat(&fscache_n_object_avail); | ||
570 | |||
571 | _leave(""); | ||
572 | } | ||
573 | |||
574 | /* | ||
575 | * drop an object's attachments | ||
576 | */ | ||
577 | static void fscache_drop_object(struct fscache_object *object) | ||
578 | { | ||
579 | struct fscache_object *parent = object->parent; | ||
580 | struct fscache_cache *cache = object->cache; | ||
581 | |||
582 | _enter("{OBJ%x,%d}", object->debug_id, object->n_children); | ||
583 | |||
584 | spin_lock(&cache->object_list_lock); | ||
585 | list_del_init(&object->cache_link); | ||
586 | spin_unlock(&cache->object_list_lock); | ||
587 | |||
588 | cache->ops->drop_object(object); | ||
589 | |||
590 | if (parent) { | ||
591 | _debug("release parent OBJ%x {%d}", | ||
592 | parent->debug_id, parent->n_children); | ||
593 | |||
594 | spin_lock(&parent->lock); | ||
595 | parent->n_children--; | ||
596 | if (parent->n_children == 0) | ||
597 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); | ||
598 | spin_unlock(&parent->lock); | ||
599 | object->parent = NULL; | ||
600 | } | ||
601 | |||
602 | /* this just shifts the object release to the slow work processor */ | ||
603 | object->cache->ops->put_object(object); | ||
604 | |||
605 | _leave(""); | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * release or recycle an object that the netfs has discarded | ||
610 | */ | ||
611 | static void fscache_release_object(struct fscache_object *object) | ||
612 | { | ||
613 | _enter(""); | ||
614 | |||
615 | fscache_drop_object(object); | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * withdraw an object from active service | ||
620 | */ | ||
621 | static void fscache_withdraw_object(struct fscache_object *object) | ||
622 | { | ||
623 | struct fscache_cookie *cookie; | ||
624 | bool detached; | ||
625 | |||
626 | _enter(""); | ||
627 | |||
628 | spin_lock(&object->lock); | ||
629 | cookie = object->cookie; | ||
630 | if (cookie) { | ||
631 | /* need to get the cookie lock before the object lock, starting | ||
632 | * from the object pointer */ | ||
633 | atomic_inc(&cookie->usage); | ||
634 | spin_unlock(&object->lock); | ||
635 | |||
636 | detached = false; | ||
637 | spin_lock(&cookie->lock); | ||
638 | spin_lock(&object->lock); | ||
639 | |||
640 | if (object->cookie == cookie) { | ||
641 | hlist_del_init(&object->cookie_link); | ||
642 | object->cookie = NULL; | ||
643 | detached = true; | ||
644 | } | ||
645 | spin_unlock(&cookie->lock); | ||
646 | fscache_cookie_put(cookie); | ||
647 | if (detached) | ||
648 | fscache_cookie_put(cookie); | ||
649 | } | ||
650 | |||
651 | spin_unlock(&object->lock); | ||
652 | |||
653 | fscache_drop_object(object); | ||
654 | } | ||
655 | |||
656 | /* | ||
657 | * withdraw an object from active service at the behest of the cache | ||
658 | * - need break the links to a cached object cookie | ||
659 | * - called under two situations: | ||
660 | * (1) recycler decides to reclaim an in-use object | ||
661 | * (2) a cache is unmounted | ||
662 | * - have to take care as the cookie can be being relinquished by the netfs | ||
663 | * simultaneously | ||
664 | * - the object is pinned by the caller holding a refcount on it | ||
665 | */ | ||
666 | void fscache_withdrawing_object(struct fscache_cache *cache, | ||
667 | struct fscache_object *object) | ||
668 | { | ||
669 | bool enqueue = false; | ||
670 | |||
671 | _enter(",OBJ%x", object->debug_id); | ||
672 | |||
673 | spin_lock(&object->lock); | ||
674 | if (object->state < FSCACHE_OBJECT_WITHDRAWING) { | ||
675 | object->state = FSCACHE_OBJECT_WITHDRAWING; | ||
676 | enqueue = true; | ||
677 | } | ||
678 | spin_unlock(&object->lock); | ||
679 | |||
680 | if (enqueue) | ||
681 | fscache_enqueue_object(object); | ||
682 | |||
683 | _leave(""); | ||
684 | } | ||
685 | |||
686 | /* | ||
687 | * allow the slow work item processor to get a ref on an object | ||
688 | */ | ||
689 | static int fscache_object_slow_work_get_ref(struct slow_work *work) | ||
690 | { | ||
691 | struct fscache_object *object = | ||
692 | container_of(work, struct fscache_object, work); | ||
693 | |||
694 | return object->cache->ops->grab_object(object) ? 0 : -EAGAIN; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * allow the slow work item processor to discard a ref on a work item | ||
699 | */ | ||
700 | static void fscache_object_slow_work_put_ref(struct slow_work *work) | ||
701 | { | ||
702 | struct fscache_object *object = | ||
703 | container_of(work, struct fscache_object, work); | ||
704 | |||
705 | return object->cache->ops->put_object(object); | ||
706 | } | ||
707 | |||
708 | /* | ||
709 | * enqueue an object for metadata-type processing | ||
710 | */ | ||
711 | void fscache_enqueue_object(struct fscache_object *object) | ||
712 | { | ||
713 | _enter("{OBJ%x}", object->debug_id); | ||
714 | |||
715 | slow_work_enqueue(&object->work); | ||
716 | } | ||
717 | |||
718 | /* | ||
719 | * enqueue the dependents of an object for metadata-type processing | ||
720 | * - the caller must hold the object's lock | ||
721 | * - this may cause an already locked object to wind up being processed again | ||
722 | */ | ||
723 | static void fscache_enqueue_dependents(struct fscache_object *object) | ||
724 | { | ||
725 | struct fscache_object *dep; | ||
726 | |||
727 | _enter("{OBJ%x}", object->debug_id); | ||
728 | |||
729 | if (list_empty(&object->dependents)) | ||
730 | return; | ||
731 | |||
732 | spin_lock(&object->lock); | ||
733 | |||
734 | while (!list_empty(&object->dependents)) { | ||
735 | dep = list_entry(object->dependents.next, | ||
736 | struct fscache_object, dep_link); | ||
737 | list_del_init(&dep->dep_link); | ||
738 | |||
739 | |||
740 | /* sort onto appropriate lists */ | ||
741 | fscache_enqueue_object(dep); | ||
742 | dep->cache->ops->put_object(dep); | ||
743 | |||
744 | if (!list_empty(&object->dependents)) | ||
745 | cond_resched_lock(&object->lock); | ||
746 | } | ||
747 | |||
748 | spin_unlock(&object->lock); | ||
749 | } | ||
750 | |||
751 | /* | ||
752 | * remove an object from whatever queue it's waiting on | ||
753 | * - the caller must hold object->lock | ||
754 | */ | ||
755 | void fscache_dequeue_object(struct fscache_object *object) | ||
756 | { | ||
757 | _enter("{OBJ%x}", object->debug_id); | ||
758 | |||
759 | if (!list_empty(&object->dep_link)) { | ||
760 | spin_lock(&object->parent->lock); | ||
761 | list_del_init(&object->dep_link); | ||
762 | spin_unlock(&object->parent->lock); | ||
763 | } | ||
764 | |||
765 | _leave(""); | ||
766 | } | ||
767 | |||
768 | /** | ||
769 | * fscache_check_aux - Ask the netfs whether an object on disk is still valid | ||
770 | * @object: The object to ask about | ||
771 | * @data: The auxiliary data for the object | ||
772 | * @datalen: The size of the auxiliary data | ||
773 | * | ||
774 | * This function consults the netfs about the coherency state of an object | ||
775 | */ | ||
776 | enum fscache_checkaux fscache_check_aux(struct fscache_object *object, | ||
777 | const void *data, uint16_t datalen) | ||
778 | { | ||
779 | enum fscache_checkaux result; | ||
780 | |||
781 | if (!object->cookie->def->check_aux) { | ||
782 | fscache_stat(&fscache_n_checkaux_none); | ||
783 | return FSCACHE_CHECKAUX_OKAY; | ||
784 | } | ||
785 | |||
786 | result = object->cookie->def->check_aux(object->cookie->netfs_data, | ||
787 | data, datalen); | ||
788 | switch (result) { | ||
789 | /* entry okay as is */ | ||
790 | case FSCACHE_CHECKAUX_OKAY: | ||
791 | fscache_stat(&fscache_n_checkaux_okay); | ||
792 | break; | ||
793 | |||
794 | /* entry requires update */ | ||
795 | case FSCACHE_CHECKAUX_NEEDS_UPDATE: | ||
796 | fscache_stat(&fscache_n_checkaux_update); | ||
797 | break; | ||
798 | |||
799 | /* entry requires deletion */ | ||
800 | case FSCACHE_CHECKAUX_OBSOLETE: | ||
801 | fscache_stat(&fscache_n_checkaux_obsolete); | ||
802 | break; | ||
803 | |||
804 | default: | ||
805 | BUG(); | ||
806 | } | ||
807 | |||
808 | return result; | ||
809 | } | ||
810 | EXPORT_SYMBOL(fscache_check_aux); | ||
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c new file mode 100644 index 000000000000..e7f8d53b8b6b --- /dev/null +++ b/fs/fscache/operation.c | |||
@@ -0,0 +1,459 @@ | |||
1 | /* FS-Cache worker operation management routines | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/filesystems/caching/operations.txt | ||
12 | */ | ||
13 | |||
14 | #define FSCACHE_DEBUG_LEVEL OPERATION | ||
15 | #include <linux/module.h> | ||
16 | #include "internal.h" | ||
17 | |||
18 | atomic_t fscache_op_debug_id; | ||
19 | EXPORT_SYMBOL(fscache_op_debug_id); | ||
20 | |||
21 | /** | ||
22 | * fscache_enqueue_operation - Enqueue an operation for processing | ||
23 | * @op: The operation to enqueue | ||
24 | * | ||
25 | * Enqueue an operation for processing by the FS-Cache thread pool. | ||
26 | * | ||
27 | * This will get its own ref on the object. | ||
28 | */ | ||
29 | void fscache_enqueue_operation(struct fscache_operation *op) | ||
30 | { | ||
31 | _enter("{OBJ%x OP%x,%u}", | ||
32 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); | ||
33 | |||
34 | ASSERT(op->processor != NULL); | ||
35 | ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); | ||
36 | ASSERTCMP(atomic_read(&op->usage), >, 0); | ||
37 | |||
38 | if (list_empty(&op->pend_link)) { | ||
39 | switch (op->flags & FSCACHE_OP_TYPE) { | ||
40 | case FSCACHE_OP_FAST: | ||
41 | _debug("queue fast"); | ||
42 | atomic_inc(&op->usage); | ||
43 | if (!schedule_work(&op->fast_work)) | ||
44 | fscache_put_operation(op); | ||
45 | break; | ||
46 | case FSCACHE_OP_SLOW: | ||
47 | _debug("queue slow"); | ||
48 | slow_work_enqueue(&op->slow_work); | ||
49 | break; | ||
50 | case FSCACHE_OP_MYTHREAD: | ||
51 | _debug("queue for caller's attention"); | ||
52 | break; | ||
53 | default: | ||
54 | printk(KERN_ERR "FS-Cache: Unexpected op type %lx", | ||
55 | op->flags); | ||
56 | BUG(); | ||
57 | break; | ||
58 | } | ||
59 | fscache_stat(&fscache_n_op_enqueue); | ||
60 | } | ||
61 | } | ||
62 | EXPORT_SYMBOL(fscache_enqueue_operation); | ||
63 | |||
64 | /* | ||
65 | * start an op running | ||
66 | */ | ||
67 | static void fscache_run_op(struct fscache_object *object, | ||
68 | struct fscache_operation *op) | ||
69 | { | ||
70 | object->n_in_progress++; | ||
71 | if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) | ||
72 | wake_up_bit(&op->flags, FSCACHE_OP_WAITING); | ||
73 | if (op->processor) | ||
74 | fscache_enqueue_operation(op); | ||
75 | fscache_stat(&fscache_n_op_run); | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * submit an exclusive operation for an object | ||
80 | * - other ops are excluded from running simultaneously with this one | ||
81 | * - this gets any extra refs it needs on an op | ||
82 | */ | ||
83 | int fscache_submit_exclusive_op(struct fscache_object *object, | ||
84 | struct fscache_operation *op) | ||
85 | { | ||
86 | int ret; | ||
87 | |||
88 | _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); | ||
89 | |||
90 | spin_lock(&object->lock); | ||
91 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); | ||
92 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); | ||
93 | |||
94 | ret = -ENOBUFS; | ||
95 | if (fscache_object_is_active(object)) { | ||
96 | op->object = object; | ||
97 | object->n_ops++; | ||
98 | object->n_exclusive++; /* reads and writes must wait */ | ||
99 | |||
100 | if (object->n_ops > 0) { | ||
101 | atomic_inc(&op->usage); | ||
102 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
103 | fscache_stat(&fscache_n_op_pend); | ||
104 | } else if (!list_empty(&object->pending_ops)) { | ||
105 | atomic_inc(&op->usage); | ||
106 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
107 | fscache_stat(&fscache_n_op_pend); | ||
108 | fscache_start_operations(object); | ||
109 | } else { | ||
110 | ASSERTCMP(object->n_in_progress, ==, 0); | ||
111 | fscache_run_op(object, op); | ||
112 | } | ||
113 | |||
114 | /* need to issue a new write op after this */ | ||
115 | clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); | ||
116 | ret = 0; | ||
117 | } else if (object->state == FSCACHE_OBJECT_CREATING) { | ||
118 | op->object = object; | ||
119 | object->n_ops++; | ||
120 | object->n_exclusive++; /* reads and writes must wait */ | ||
121 | atomic_inc(&op->usage); | ||
122 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
123 | fscache_stat(&fscache_n_op_pend); | ||
124 | ret = 0; | ||
125 | } else { | ||
126 | /* not allowed to submit ops in any other state */ | ||
127 | BUG(); | ||
128 | } | ||
129 | |||
130 | spin_unlock(&object->lock); | ||
131 | return ret; | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * report an unexpected submission | ||
136 | */ | ||
137 | static void fscache_report_unexpected_submission(struct fscache_object *object, | ||
138 | struct fscache_operation *op, | ||
139 | unsigned long ostate) | ||
140 | { | ||
141 | static bool once_only; | ||
142 | struct fscache_operation *p; | ||
143 | unsigned n; | ||
144 | |||
145 | if (once_only) | ||
146 | return; | ||
147 | once_only = true; | ||
148 | |||
149 | kdebug("unexpected submission OP%x [OBJ%x %s]", | ||
150 | op->debug_id, object->debug_id, | ||
151 | fscache_object_states[object->state]); | ||
152 | kdebug("objstate=%s [%s]", | ||
153 | fscache_object_states[object->state], | ||
154 | fscache_object_states[ostate]); | ||
155 | kdebug("objflags=%lx", object->flags); | ||
156 | kdebug("objevent=%lx [%lx]", object->events, object->event_mask); | ||
157 | kdebug("ops=%u inp=%u exc=%u", | ||
158 | object->n_ops, object->n_in_progress, object->n_exclusive); | ||
159 | |||
160 | if (!list_empty(&object->pending_ops)) { | ||
161 | n = 0; | ||
162 | list_for_each_entry(p, &object->pending_ops, pend_link) { | ||
163 | ASSERTCMP(p->object, ==, object); | ||
164 | kdebug("%p %p", op->processor, op->release); | ||
165 | n++; | ||
166 | } | ||
167 | |||
168 | kdebug("n=%u", n); | ||
169 | } | ||
170 | |||
171 | dump_stack(); | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * submit an operation for an object | ||
176 | * - objects may be submitted only in the following states: | ||
177 | * - during object creation (write ops may be submitted) | ||
178 | * - whilst the object is active | ||
179 | * - after an I/O error incurred in one of the two above states (op rejected) | ||
180 | * - this gets any extra refs it needs on an op | ||
181 | */ | ||
182 | int fscache_submit_op(struct fscache_object *object, | ||
183 | struct fscache_operation *op) | ||
184 | { | ||
185 | unsigned long ostate; | ||
186 | int ret; | ||
187 | |||
188 | _enter("{OBJ%x OP%x},{%u}", | ||
189 | object->debug_id, op->debug_id, atomic_read(&op->usage)); | ||
190 | |||
191 | ASSERTCMP(atomic_read(&op->usage), >, 0); | ||
192 | |||
193 | spin_lock(&object->lock); | ||
194 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); | ||
195 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); | ||
196 | |||
197 | ostate = object->state; | ||
198 | smp_rmb(); | ||
199 | |||
200 | if (fscache_object_is_active(object)) { | ||
201 | op->object = object; | ||
202 | object->n_ops++; | ||
203 | |||
204 | if (object->n_exclusive > 0) { | ||
205 | atomic_inc(&op->usage); | ||
206 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
207 | fscache_stat(&fscache_n_op_pend); | ||
208 | } else if (!list_empty(&object->pending_ops)) { | ||
209 | atomic_inc(&op->usage); | ||
210 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
211 | fscache_stat(&fscache_n_op_pend); | ||
212 | fscache_start_operations(object); | ||
213 | } else { | ||
214 | ASSERTCMP(object->n_exclusive, ==, 0); | ||
215 | fscache_run_op(object, op); | ||
216 | } | ||
217 | ret = 0; | ||
218 | } else if (object->state == FSCACHE_OBJECT_CREATING) { | ||
219 | op->object = object; | ||
220 | object->n_ops++; | ||
221 | atomic_inc(&op->usage); | ||
222 | list_add_tail(&op->pend_link, &object->pending_ops); | ||
223 | fscache_stat(&fscache_n_op_pend); | ||
224 | ret = 0; | ||
225 | } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { | ||
226 | fscache_report_unexpected_submission(object, op, ostate); | ||
227 | ASSERT(!fscache_object_is_active(object)); | ||
228 | ret = -ENOBUFS; | ||
229 | } else { | ||
230 | ret = -ENOBUFS; | ||
231 | } | ||
232 | |||
233 | spin_unlock(&object->lock); | ||
234 | return ret; | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * queue an object for withdrawal on error, aborting all following asynchronous | ||
239 | * operations | ||
240 | */ | ||
241 | void fscache_abort_object(struct fscache_object *object) | ||
242 | { | ||
243 | _enter("{OBJ%x}", object->debug_id); | ||
244 | |||
245 | fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); | ||
246 | } | ||
247 | |||
248 | /* | ||
249 | * jump start the operation processing on an object | ||
250 | * - caller must hold object->lock | ||
251 | */ | ||
252 | void fscache_start_operations(struct fscache_object *object) | ||
253 | { | ||
254 | struct fscache_operation *op; | ||
255 | bool stop = false; | ||
256 | |||
257 | while (!list_empty(&object->pending_ops) && !stop) { | ||
258 | op = list_entry(object->pending_ops.next, | ||
259 | struct fscache_operation, pend_link); | ||
260 | |||
261 | if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { | ||
262 | if (object->n_in_progress > 0) | ||
263 | break; | ||
264 | stop = true; | ||
265 | } | ||
266 | list_del_init(&op->pend_link); | ||
267 | object->n_in_progress++; | ||
268 | |||
269 | if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) | ||
270 | wake_up_bit(&op->flags, FSCACHE_OP_WAITING); | ||
271 | if (op->processor) | ||
272 | fscache_enqueue_operation(op); | ||
273 | |||
274 | /* the pending queue was holding a ref on the object */ | ||
275 | fscache_put_operation(op); | ||
276 | } | ||
277 | |||
278 | ASSERTCMP(object->n_in_progress, <=, object->n_ops); | ||
279 | |||
280 | _debug("woke %d ops on OBJ%x", | ||
281 | object->n_in_progress, object->debug_id); | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * release an operation | ||
286 | * - queues pending ops if this is the last in-progress op | ||
287 | */ | ||
288 | void fscache_put_operation(struct fscache_operation *op) | ||
289 | { | ||
290 | struct fscache_object *object; | ||
291 | struct fscache_cache *cache; | ||
292 | |||
293 | _enter("{OBJ%x OP%x,%d}", | ||
294 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); | ||
295 | |||
296 | ASSERTCMP(atomic_read(&op->usage), >, 0); | ||
297 | |||
298 | if (!atomic_dec_and_test(&op->usage)) | ||
299 | return; | ||
300 | |||
301 | _debug("PUT OP"); | ||
302 | if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) | ||
303 | BUG(); | ||
304 | |||
305 | fscache_stat(&fscache_n_op_release); | ||
306 | |||
307 | if (op->release) { | ||
308 | op->release(op); | ||
309 | op->release = NULL; | ||
310 | } | ||
311 | |||
312 | object = op->object; | ||
313 | |||
314 | /* now... we may get called with the object spinlock held, so we | ||
315 | * complete the cleanup here only if we can immediately acquire the | ||
316 | * lock, and defer it otherwise */ | ||
317 | if (!spin_trylock(&object->lock)) { | ||
318 | _debug("defer put"); | ||
319 | fscache_stat(&fscache_n_op_deferred_release); | ||
320 | |||
321 | cache = object->cache; | ||
322 | spin_lock(&cache->op_gc_list_lock); | ||
323 | list_add_tail(&op->pend_link, &cache->op_gc_list); | ||
324 | spin_unlock(&cache->op_gc_list_lock); | ||
325 | schedule_work(&cache->op_gc); | ||
326 | _leave(" [defer]"); | ||
327 | return; | ||
328 | } | ||
329 | |||
330 | if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { | ||
331 | ASSERTCMP(object->n_exclusive, >, 0); | ||
332 | object->n_exclusive--; | ||
333 | } | ||
334 | |||
335 | ASSERTCMP(object->n_in_progress, >, 0); | ||
336 | object->n_in_progress--; | ||
337 | if (object->n_in_progress == 0) | ||
338 | fscache_start_operations(object); | ||
339 | |||
340 | ASSERTCMP(object->n_ops, >, 0); | ||
341 | object->n_ops--; | ||
342 | if (object->n_ops == 0) | ||
343 | fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); | ||
344 | |||
345 | spin_unlock(&object->lock); | ||
346 | |||
347 | kfree(op); | ||
348 | _leave(" [done]"); | ||
349 | } | ||
350 | EXPORT_SYMBOL(fscache_put_operation); | ||
351 | |||
352 | /* | ||
353 | * garbage collect operations that have had their release deferred | ||
354 | */ | ||
355 | void fscache_operation_gc(struct work_struct *work) | ||
356 | { | ||
357 | struct fscache_operation *op; | ||
358 | struct fscache_object *object; | ||
359 | struct fscache_cache *cache = | ||
360 | container_of(work, struct fscache_cache, op_gc); | ||
361 | int count = 0; | ||
362 | |||
363 | _enter(""); | ||
364 | |||
365 | do { | ||
366 | spin_lock(&cache->op_gc_list_lock); | ||
367 | if (list_empty(&cache->op_gc_list)) { | ||
368 | spin_unlock(&cache->op_gc_list_lock); | ||
369 | break; | ||
370 | } | ||
371 | |||
372 | op = list_entry(cache->op_gc_list.next, | ||
373 | struct fscache_operation, pend_link); | ||
374 | list_del(&op->pend_link); | ||
375 | spin_unlock(&cache->op_gc_list_lock); | ||
376 | |||
377 | object = op->object; | ||
378 | |||
379 | _debug("GC DEFERRED REL OBJ%x OP%x", | ||
380 | object->debug_id, op->debug_id); | ||
381 | fscache_stat(&fscache_n_op_gc); | ||
382 | |||
383 | ASSERTCMP(atomic_read(&op->usage), ==, 0); | ||
384 | |||
385 | spin_lock(&object->lock); | ||
386 | if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags)) { | ||
387 | ASSERTCMP(object->n_exclusive, >, 0); | ||
388 | object->n_exclusive--; | ||
389 | } | ||
390 | |||
391 | ASSERTCMP(object->n_in_progress, >, 0); | ||
392 | object->n_in_progress--; | ||
393 | if (object->n_in_progress == 0) | ||
394 | fscache_start_operations(object); | ||
395 | |||
396 | ASSERTCMP(object->n_ops, >, 0); | ||
397 | object->n_ops--; | ||
398 | if (object->n_ops == 0) | ||
399 | fscache_raise_event(object, FSCACHE_OBJECT_EV_CLEARED); | ||
400 | |||
401 | spin_unlock(&object->lock); | ||
402 | |||
403 | } while (count++ < 20); | ||
404 | |||
405 | if (!list_empty(&cache->op_gc_list)) | ||
406 | schedule_work(&cache->op_gc); | ||
407 | |||
408 | _leave(""); | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * allow the slow work item processor to get a ref on an operation | ||
413 | */ | ||
414 | static int fscache_op_get_ref(struct slow_work *work) | ||
415 | { | ||
416 | struct fscache_operation *op = | ||
417 | container_of(work, struct fscache_operation, slow_work); | ||
418 | |||
419 | atomic_inc(&op->usage); | ||
420 | return 0; | ||
421 | } | ||
422 | |||
423 | /* | ||
424 | * allow the slow work item processor to discard a ref on an operation | ||
425 | */ | ||
426 | static void fscache_op_put_ref(struct slow_work *work) | ||
427 | { | ||
428 | struct fscache_operation *op = | ||
429 | container_of(work, struct fscache_operation, slow_work); | ||
430 | |||
431 | fscache_put_operation(op); | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * execute an operation using the slow thread pool to provide processing context | ||
436 | * - the caller holds a ref to this object, so we don't need to hold one | ||
437 | */ | ||
438 | static void fscache_op_execute(struct slow_work *work) | ||
439 | { | ||
440 | struct fscache_operation *op = | ||
441 | container_of(work, struct fscache_operation, slow_work); | ||
442 | unsigned long start; | ||
443 | |||
444 | _enter("{OBJ%x OP%x,%d}", | ||
445 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); | ||
446 | |||
447 | ASSERT(op->processor != NULL); | ||
448 | start = jiffies; | ||
449 | op->processor(op); | ||
450 | fscache_hist(fscache_ops_histogram, start); | ||
451 | |||
452 | _leave(""); | ||
453 | } | ||
454 | |||
455 | const struct slow_work_ops fscache_op_slow_work_ops = { | ||
456 | .get_ref = fscache_op_get_ref, | ||
457 | .put_ref = fscache_op_put_ref, | ||
458 | .execute = fscache_op_execute, | ||
459 | }; | ||
diff --git a/fs/fscache/page.c b/fs/fscache/page.c new file mode 100644 index 000000000000..2568e0eb644f --- /dev/null +++ b/fs/fscache/page.c | |||
@@ -0,0 +1,816 @@ | |||
1 | /* Cache page management and data I/O routines | ||
2 | * | ||
3 | * Copyright (C) 2004-2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL PAGE | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/fscache-cache.h> | ||
15 | #include <linux/buffer_head.h> | ||
16 | #include <linux/pagevec.h> | ||
17 | #include "internal.h" | ||
18 | |||
19 | /* | ||
20 | * check to see if a page is being written to the cache | ||
21 | */ | ||
22 | bool __fscache_check_page_write(struct fscache_cookie *cookie, struct page *page) | ||
23 | { | ||
24 | void *val; | ||
25 | |||
26 | rcu_read_lock(); | ||
27 | val = radix_tree_lookup(&cookie->stores, page->index); | ||
28 | rcu_read_unlock(); | ||
29 | |||
30 | return val != NULL; | ||
31 | } | ||
32 | EXPORT_SYMBOL(__fscache_check_page_write); | ||
33 | |||
34 | /* | ||
35 | * wait for a page to finish being written to the cache | ||
36 | */ | ||
37 | void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *page) | ||
38 | { | ||
39 | wait_queue_head_t *wq = bit_waitqueue(&cookie->flags, 0); | ||
40 | |||
41 | wait_event(*wq, !__fscache_check_page_write(cookie, page)); | ||
42 | } | ||
43 | EXPORT_SYMBOL(__fscache_wait_on_page_write); | ||
44 | |||
45 | /* | ||
46 | * note that a page has finished being written to the cache | ||
47 | */ | ||
48 | static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page) | ||
49 | { | ||
50 | struct page *xpage; | ||
51 | |||
52 | spin_lock(&cookie->lock); | ||
53 | xpage = radix_tree_delete(&cookie->stores, page->index); | ||
54 | spin_unlock(&cookie->lock); | ||
55 | ASSERT(xpage != NULL); | ||
56 | |||
57 | wake_up_bit(&cookie->flags, 0); | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * actually apply the changed attributes to a cache object | ||
62 | */ | ||
63 | static void fscache_attr_changed_op(struct fscache_operation *op) | ||
64 | { | ||
65 | struct fscache_object *object = op->object; | ||
66 | |||
67 | _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id); | ||
68 | |||
69 | fscache_stat(&fscache_n_attr_changed_calls); | ||
70 | |||
71 | if (fscache_object_is_active(object) && | ||
72 | object->cache->ops->attr_changed(object) < 0) | ||
73 | fscache_abort_object(object); | ||
74 | |||
75 | _leave(""); | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * notification that the attributes on an object have changed | ||
80 | */ | ||
81 | int __fscache_attr_changed(struct fscache_cookie *cookie) | ||
82 | { | ||
83 | struct fscache_operation *op; | ||
84 | struct fscache_object *object; | ||
85 | |||
86 | _enter("%p", cookie); | ||
87 | |||
88 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
89 | |||
90 | fscache_stat(&fscache_n_attr_changed); | ||
91 | |||
92 | op = kzalloc(sizeof(*op), GFP_KERNEL); | ||
93 | if (!op) { | ||
94 | fscache_stat(&fscache_n_attr_changed_nomem); | ||
95 | _leave(" = -ENOMEM"); | ||
96 | return -ENOMEM; | ||
97 | } | ||
98 | |||
99 | fscache_operation_init(op, NULL); | ||
100 | fscache_operation_init_slow(op, fscache_attr_changed_op); | ||
101 | op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE); | ||
102 | |||
103 | spin_lock(&cookie->lock); | ||
104 | |||
105 | if (hlist_empty(&cookie->backing_objects)) | ||
106 | goto nobufs; | ||
107 | object = hlist_entry(cookie->backing_objects.first, | ||
108 | struct fscache_object, cookie_link); | ||
109 | |||
110 | if (fscache_submit_exclusive_op(object, op) < 0) | ||
111 | goto nobufs; | ||
112 | spin_unlock(&cookie->lock); | ||
113 | fscache_stat(&fscache_n_attr_changed_ok); | ||
114 | fscache_put_operation(op); | ||
115 | _leave(" = 0"); | ||
116 | return 0; | ||
117 | |||
118 | nobufs: | ||
119 | spin_unlock(&cookie->lock); | ||
120 | kfree(op); | ||
121 | fscache_stat(&fscache_n_attr_changed_nobufs); | ||
122 | _leave(" = %d", -ENOBUFS); | ||
123 | return -ENOBUFS; | ||
124 | } | ||
125 | EXPORT_SYMBOL(__fscache_attr_changed); | ||
126 | |||
127 | /* | ||
128 | * handle secondary execution given to a retrieval op on behalf of the | ||
129 | * cache | ||
130 | */ | ||
131 | static void fscache_retrieval_work(struct work_struct *work) | ||
132 | { | ||
133 | struct fscache_retrieval *op = | ||
134 | container_of(work, struct fscache_retrieval, op.fast_work); | ||
135 | unsigned long start; | ||
136 | |||
137 | _enter("{OP%x}", op->op.debug_id); | ||
138 | |||
139 | start = jiffies; | ||
140 | op->op.processor(&op->op); | ||
141 | fscache_hist(fscache_ops_histogram, start); | ||
142 | fscache_put_operation(&op->op); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * release a retrieval op reference | ||
147 | */ | ||
148 | static void fscache_release_retrieval_op(struct fscache_operation *_op) | ||
149 | { | ||
150 | struct fscache_retrieval *op = | ||
151 | container_of(_op, struct fscache_retrieval, op); | ||
152 | |||
153 | _enter("{OP%x}", op->op.debug_id); | ||
154 | |||
155 | fscache_hist(fscache_retrieval_histogram, op->start_time); | ||
156 | if (op->context) | ||
157 | fscache_put_context(op->op.object->cookie, op->context); | ||
158 | |||
159 | _leave(""); | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * allocate a retrieval op | ||
164 | */ | ||
165 | static struct fscache_retrieval *fscache_alloc_retrieval( | ||
166 | struct address_space *mapping, | ||
167 | fscache_rw_complete_t end_io_func, | ||
168 | void *context) | ||
169 | { | ||
170 | struct fscache_retrieval *op; | ||
171 | |||
172 | /* allocate a retrieval operation and attempt to submit it */ | ||
173 | op = kzalloc(sizeof(*op), GFP_NOIO); | ||
174 | if (!op) { | ||
175 | fscache_stat(&fscache_n_retrievals_nomem); | ||
176 | return NULL; | ||
177 | } | ||
178 | |||
179 | fscache_operation_init(&op->op, fscache_release_retrieval_op); | ||
180 | op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); | ||
181 | op->mapping = mapping; | ||
182 | op->end_io_func = end_io_func; | ||
183 | op->context = context; | ||
184 | op->start_time = jiffies; | ||
185 | INIT_WORK(&op->op.fast_work, fscache_retrieval_work); | ||
186 | INIT_LIST_HEAD(&op->to_do); | ||
187 | return op; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * wait for a deferred lookup to complete | ||
192 | */ | ||
193 | static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie) | ||
194 | { | ||
195 | unsigned long jif; | ||
196 | |||
197 | _enter(""); | ||
198 | |||
199 | if (!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) { | ||
200 | _leave(" = 0 [imm]"); | ||
201 | return 0; | ||
202 | } | ||
203 | |||
204 | fscache_stat(&fscache_n_retrievals_wait); | ||
205 | |||
206 | jif = jiffies; | ||
207 | if (wait_on_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP, | ||
208 | fscache_wait_bit_interruptible, | ||
209 | TASK_INTERRUPTIBLE) != 0) { | ||
210 | fscache_stat(&fscache_n_retrievals_intr); | ||
211 | _leave(" = -ERESTARTSYS"); | ||
212 | return -ERESTARTSYS; | ||
213 | } | ||
214 | |||
215 | ASSERT(!test_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)); | ||
216 | |||
217 | smp_rmb(); | ||
218 | fscache_hist(fscache_retrieval_delay_histogram, jif); | ||
219 | _leave(" = 0 [dly]"); | ||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * read a page from the cache or allocate a block in which to store it | ||
225 | * - we return: | ||
226 | * -ENOMEM - out of memory, nothing done | ||
227 | * -ERESTARTSYS - interrupted | ||
228 | * -ENOBUFS - no backing object available in which to cache the block | ||
229 | * -ENODATA - no data available in the backing object for this block | ||
230 | * 0 - dispatched a read - it'll call end_io_func() when finished | ||
231 | */ | ||
232 | int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, | ||
233 | struct page *page, | ||
234 | fscache_rw_complete_t end_io_func, | ||
235 | void *context, | ||
236 | gfp_t gfp) | ||
237 | { | ||
238 | struct fscache_retrieval *op; | ||
239 | struct fscache_object *object; | ||
240 | int ret; | ||
241 | |||
242 | _enter("%p,%p,,,", cookie, page); | ||
243 | |||
244 | fscache_stat(&fscache_n_retrievals); | ||
245 | |||
246 | if (hlist_empty(&cookie->backing_objects)) | ||
247 | goto nobufs; | ||
248 | |||
249 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
250 | ASSERTCMP(page, !=, NULL); | ||
251 | |||
252 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | ||
253 | return -ERESTARTSYS; | ||
254 | |||
255 | op = fscache_alloc_retrieval(page->mapping, end_io_func, context); | ||
256 | if (!op) { | ||
257 | _leave(" = -ENOMEM"); | ||
258 | return -ENOMEM; | ||
259 | } | ||
260 | |||
261 | spin_lock(&cookie->lock); | ||
262 | |||
263 | if (hlist_empty(&cookie->backing_objects)) | ||
264 | goto nobufs_unlock; | ||
265 | object = hlist_entry(cookie->backing_objects.first, | ||
266 | struct fscache_object, cookie_link); | ||
267 | |||
268 | ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); | ||
269 | |||
270 | if (fscache_submit_op(object, &op->op) < 0) | ||
271 | goto nobufs_unlock; | ||
272 | spin_unlock(&cookie->lock); | ||
273 | |||
274 | fscache_stat(&fscache_n_retrieval_ops); | ||
275 | |||
276 | /* pin the netfs read context in case we need to do the actual netfs | ||
277 | * read because we've encountered a cache read failure */ | ||
278 | fscache_get_context(object->cookie, op->context); | ||
279 | |||
280 | /* we wait for the operation to become active, and then process it | ||
281 | * *here*, in this thread, and not in the thread pool */ | ||
282 | if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { | ||
283 | _debug(">>> WT"); | ||
284 | fscache_stat(&fscache_n_retrieval_op_waits); | ||
285 | wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, | ||
286 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
287 | _debug("<<< GO"); | ||
288 | } | ||
289 | |||
290 | /* ask the cache to honour the operation */ | ||
291 | if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { | ||
292 | ret = object->cache->ops->allocate_page(op, page, gfp); | ||
293 | if (ret == 0) | ||
294 | ret = -ENODATA; | ||
295 | } else { | ||
296 | ret = object->cache->ops->read_or_alloc_page(op, page, gfp); | ||
297 | } | ||
298 | |||
299 | if (ret == -ENOMEM) | ||
300 | fscache_stat(&fscache_n_retrievals_nomem); | ||
301 | else if (ret == -ERESTARTSYS) | ||
302 | fscache_stat(&fscache_n_retrievals_intr); | ||
303 | else if (ret == -ENODATA) | ||
304 | fscache_stat(&fscache_n_retrievals_nodata); | ||
305 | else if (ret < 0) | ||
306 | fscache_stat(&fscache_n_retrievals_nobufs); | ||
307 | else | ||
308 | fscache_stat(&fscache_n_retrievals_ok); | ||
309 | |||
310 | fscache_put_retrieval(op); | ||
311 | _leave(" = %d", ret); | ||
312 | return ret; | ||
313 | |||
314 | nobufs_unlock: | ||
315 | spin_unlock(&cookie->lock); | ||
316 | kfree(op); | ||
317 | nobufs: | ||
318 | fscache_stat(&fscache_n_retrievals_nobufs); | ||
319 | _leave(" = -ENOBUFS"); | ||
320 | return -ENOBUFS; | ||
321 | } | ||
322 | EXPORT_SYMBOL(__fscache_read_or_alloc_page); | ||
323 | |||
324 | /* | ||
325 | * read a list of page from the cache or allocate a block in which to store | ||
326 | * them | ||
327 | * - we return: | ||
328 | * -ENOMEM - out of memory, some pages may be being read | ||
329 | * -ERESTARTSYS - interrupted, some pages may be being read | ||
330 | * -ENOBUFS - no backing object or space available in which to cache any | ||
331 | * pages not being read | ||
332 | * -ENODATA - no data available in the backing object for some or all of | ||
333 | * the pages | ||
334 | * 0 - dispatched a read on all pages | ||
335 | * | ||
336 | * end_io_func() will be called for each page read from the cache as it is | ||
337 | * finishes being read | ||
338 | * | ||
339 | * any pages for which a read is dispatched will be removed from pages and | ||
340 | * nr_pages | ||
341 | */ | ||
342 | int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, | ||
343 | struct address_space *mapping, | ||
344 | struct list_head *pages, | ||
345 | unsigned *nr_pages, | ||
346 | fscache_rw_complete_t end_io_func, | ||
347 | void *context, | ||
348 | gfp_t gfp) | ||
349 | { | ||
350 | fscache_pages_retrieval_func_t func; | ||
351 | struct fscache_retrieval *op; | ||
352 | struct fscache_object *object; | ||
353 | int ret; | ||
354 | |||
355 | _enter("%p,,%d,,,", cookie, *nr_pages); | ||
356 | |||
357 | fscache_stat(&fscache_n_retrievals); | ||
358 | |||
359 | if (hlist_empty(&cookie->backing_objects)) | ||
360 | goto nobufs; | ||
361 | |||
362 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
363 | ASSERTCMP(*nr_pages, >, 0); | ||
364 | ASSERT(!list_empty(pages)); | ||
365 | |||
366 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | ||
367 | return -ERESTARTSYS; | ||
368 | |||
369 | op = fscache_alloc_retrieval(mapping, end_io_func, context); | ||
370 | if (!op) | ||
371 | return -ENOMEM; | ||
372 | |||
373 | spin_lock(&cookie->lock); | ||
374 | |||
375 | if (hlist_empty(&cookie->backing_objects)) | ||
376 | goto nobufs_unlock; | ||
377 | object = hlist_entry(cookie->backing_objects.first, | ||
378 | struct fscache_object, cookie_link); | ||
379 | |||
380 | if (fscache_submit_op(object, &op->op) < 0) | ||
381 | goto nobufs_unlock; | ||
382 | spin_unlock(&cookie->lock); | ||
383 | |||
384 | fscache_stat(&fscache_n_retrieval_ops); | ||
385 | |||
386 | /* pin the netfs read context in case we need to do the actual netfs | ||
387 | * read because we've encountered a cache read failure */ | ||
388 | fscache_get_context(object->cookie, op->context); | ||
389 | |||
390 | /* we wait for the operation to become active, and then process it | ||
391 | * *here*, in this thread, and not in the thread pool */ | ||
392 | if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { | ||
393 | _debug(">>> WT"); | ||
394 | fscache_stat(&fscache_n_retrieval_op_waits); | ||
395 | wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, | ||
396 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
397 | _debug("<<< GO"); | ||
398 | } | ||
399 | |||
400 | /* ask the cache to honour the operation */ | ||
401 | if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) | ||
402 | func = object->cache->ops->allocate_pages; | ||
403 | else | ||
404 | func = object->cache->ops->read_or_alloc_pages; | ||
405 | ret = func(op, pages, nr_pages, gfp); | ||
406 | |||
407 | if (ret == -ENOMEM) | ||
408 | fscache_stat(&fscache_n_retrievals_nomem); | ||
409 | else if (ret == -ERESTARTSYS) | ||
410 | fscache_stat(&fscache_n_retrievals_intr); | ||
411 | else if (ret == -ENODATA) | ||
412 | fscache_stat(&fscache_n_retrievals_nodata); | ||
413 | else if (ret < 0) | ||
414 | fscache_stat(&fscache_n_retrievals_nobufs); | ||
415 | else | ||
416 | fscache_stat(&fscache_n_retrievals_ok); | ||
417 | |||
418 | fscache_put_retrieval(op); | ||
419 | _leave(" = %d", ret); | ||
420 | return ret; | ||
421 | |||
422 | nobufs_unlock: | ||
423 | spin_unlock(&cookie->lock); | ||
424 | kfree(op); | ||
425 | nobufs: | ||
426 | fscache_stat(&fscache_n_retrievals_nobufs); | ||
427 | _leave(" = -ENOBUFS"); | ||
428 | return -ENOBUFS; | ||
429 | } | ||
430 | EXPORT_SYMBOL(__fscache_read_or_alloc_pages); | ||
431 | |||
432 | /* | ||
433 | * allocate a block in the cache on which to store a page | ||
434 | * - we return: | ||
435 | * -ENOMEM - out of memory, nothing done | ||
436 | * -ERESTARTSYS - interrupted | ||
437 | * -ENOBUFS - no backing object available in which to cache the block | ||
438 | * 0 - block allocated | ||
439 | */ | ||
440 | int __fscache_alloc_page(struct fscache_cookie *cookie, | ||
441 | struct page *page, | ||
442 | gfp_t gfp) | ||
443 | { | ||
444 | struct fscache_retrieval *op; | ||
445 | struct fscache_object *object; | ||
446 | int ret; | ||
447 | |||
448 | _enter("%p,%p,,,", cookie, page); | ||
449 | |||
450 | fscache_stat(&fscache_n_allocs); | ||
451 | |||
452 | if (hlist_empty(&cookie->backing_objects)) | ||
453 | goto nobufs; | ||
454 | |||
455 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
456 | ASSERTCMP(page, !=, NULL); | ||
457 | |||
458 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | ||
459 | return -ERESTARTSYS; | ||
460 | |||
461 | op = fscache_alloc_retrieval(page->mapping, NULL, NULL); | ||
462 | if (!op) | ||
463 | return -ENOMEM; | ||
464 | |||
465 | spin_lock(&cookie->lock); | ||
466 | |||
467 | if (hlist_empty(&cookie->backing_objects)) | ||
468 | goto nobufs_unlock; | ||
469 | object = hlist_entry(cookie->backing_objects.first, | ||
470 | struct fscache_object, cookie_link); | ||
471 | |||
472 | if (fscache_submit_op(object, &op->op) < 0) | ||
473 | goto nobufs_unlock; | ||
474 | spin_unlock(&cookie->lock); | ||
475 | |||
476 | fscache_stat(&fscache_n_alloc_ops); | ||
477 | |||
478 | if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { | ||
479 | _debug(">>> WT"); | ||
480 | fscache_stat(&fscache_n_alloc_op_waits); | ||
481 | wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, | ||
482 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
483 | _debug("<<< GO"); | ||
484 | } | ||
485 | |||
486 | /* ask the cache to honour the operation */ | ||
487 | ret = object->cache->ops->allocate_page(op, page, gfp); | ||
488 | |||
489 | if (ret < 0) | ||
490 | fscache_stat(&fscache_n_allocs_nobufs); | ||
491 | else | ||
492 | fscache_stat(&fscache_n_allocs_ok); | ||
493 | |||
494 | fscache_put_retrieval(op); | ||
495 | _leave(" = %d", ret); | ||
496 | return ret; | ||
497 | |||
498 | nobufs_unlock: | ||
499 | spin_unlock(&cookie->lock); | ||
500 | kfree(op); | ||
501 | nobufs: | ||
502 | fscache_stat(&fscache_n_allocs_nobufs); | ||
503 | _leave(" = -ENOBUFS"); | ||
504 | return -ENOBUFS; | ||
505 | } | ||
506 | EXPORT_SYMBOL(__fscache_alloc_page); | ||
507 | |||
508 | /* | ||
509 | * release a write op reference | ||
510 | */ | ||
511 | static void fscache_release_write_op(struct fscache_operation *_op) | ||
512 | { | ||
513 | _enter("{OP%x}", _op->debug_id); | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * perform the background storage of a page into the cache | ||
518 | */ | ||
519 | static void fscache_write_op(struct fscache_operation *_op) | ||
520 | { | ||
521 | struct fscache_storage *op = | ||
522 | container_of(_op, struct fscache_storage, op); | ||
523 | struct fscache_object *object = op->op.object; | ||
524 | struct fscache_cookie *cookie = object->cookie; | ||
525 | struct page *page; | ||
526 | unsigned n; | ||
527 | void *results[1]; | ||
528 | int ret; | ||
529 | |||
530 | _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); | ||
531 | |||
532 | spin_lock(&cookie->lock); | ||
533 | spin_lock(&object->lock); | ||
534 | |||
535 | if (!fscache_object_is_active(object)) { | ||
536 | spin_unlock(&object->lock); | ||
537 | spin_unlock(&cookie->lock); | ||
538 | _leave(""); | ||
539 | return; | ||
540 | } | ||
541 | |||
542 | fscache_stat(&fscache_n_store_calls); | ||
543 | |||
544 | /* find a page to store */ | ||
545 | page = NULL; | ||
546 | n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, 1, | ||
547 | FSCACHE_COOKIE_PENDING_TAG); | ||
548 | if (n != 1) | ||
549 | goto superseded; | ||
550 | page = results[0]; | ||
551 | _debug("gang %d [%lx]", n, page->index); | ||
552 | if (page->index > op->store_limit) | ||
553 | goto superseded; | ||
554 | |||
555 | radix_tree_tag_clear(&cookie->stores, page->index, | ||
556 | FSCACHE_COOKIE_PENDING_TAG); | ||
557 | |||
558 | spin_unlock(&object->lock); | ||
559 | spin_unlock(&cookie->lock); | ||
560 | |||
561 | if (page) { | ||
562 | ret = object->cache->ops->write_page(op, page); | ||
563 | fscache_end_page_write(cookie, page); | ||
564 | page_cache_release(page); | ||
565 | if (ret < 0) | ||
566 | fscache_abort_object(object); | ||
567 | else | ||
568 | fscache_enqueue_operation(&op->op); | ||
569 | } | ||
570 | |||
571 | _leave(""); | ||
572 | return; | ||
573 | |||
574 | superseded: | ||
575 | /* this writer is going away and there aren't any more things to | ||
576 | * write */ | ||
577 | _debug("cease"); | ||
578 | clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); | ||
579 | spin_unlock(&object->lock); | ||
580 | spin_unlock(&cookie->lock); | ||
581 | _leave(""); | ||
582 | } | ||
583 | |||
584 | /* | ||
585 | * request a page be stored in the cache | ||
586 | * - returns: | ||
587 | * -ENOMEM - out of memory, nothing done | ||
588 | * -ENOBUFS - no backing object available in which to cache the page | ||
589 | * 0 - dispatched a write - it'll call end_io_func() when finished | ||
590 | * | ||
591 | * if the cookie still has a backing object at this point, that object can be | ||
592 | * in one of a few states with respect to storage processing: | ||
593 | * | ||
594 | * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is | ||
595 | * set) | ||
596 | * | ||
597 | * (a) no writes yet (set FSCACHE_COOKIE_PENDING_FILL and queue deferred | ||
598 | * fill op) | ||
599 | * | ||
600 | * (b) writes deferred till post-creation (mark page for writing and | ||
601 | * return immediately) | ||
602 | * | ||
603 | * (2) negative lookup, object created, initial fill being made from netfs | ||
604 | * (FSCACHE_COOKIE_INITIAL_FILL is set) | ||
605 | * | ||
606 | * (a) fill point not yet reached this page (mark page for writing and | ||
607 | * return) | ||
608 | * | ||
609 | * (b) fill point passed this page (queue op to store this page) | ||
610 | * | ||
611 | * (3) object extant (queue op to store this page) | ||
612 | * | ||
613 | * any other state is invalid | ||
614 | */ | ||
615 | int __fscache_write_page(struct fscache_cookie *cookie, | ||
616 | struct page *page, | ||
617 | gfp_t gfp) | ||
618 | { | ||
619 | struct fscache_storage *op; | ||
620 | struct fscache_object *object; | ||
621 | int ret; | ||
622 | |||
623 | _enter("%p,%x,", cookie, (u32) page->flags); | ||
624 | |||
625 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
626 | ASSERT(PageFsCache(page)); | ||
627 | |||
628 | fscache_stat(&fscache_n_stores); | ||
629 | |||
630 | op = kzalloc(sizeof(*op), GFP_NOIO); | ||
631 | if (!op) | ||
632 | goto nomem; | ||
633 | |||
634 | fscache_operation_init(&op->op, fscache_release_write_op); | ||
635 | fscache_operation_init_slow(&op->op, fscache_write_op); | ||
636 | op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); | ||
637 | |||
638 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); | ||
639 | if (ret < 0) | ||
640 | goto nomem_free; | ||
641 | |||
642 | ret = -ENOBUFS; | ||
643 | spin_lock(&cookie->lock); | ||
644 | |||
645 | if (hlist_empty(&cookie->backing_objects)) | ||
646 | goto nobufs; | ||
647 | object = hlist_entry(cookie->backing_objects.first, | ||
648 | struct fscache_object, cookie_link); | ||
649 | if (test_bit(FSCACHE_IOERROR, &object->cache->flags)) | ||
650 | goto nobufs; | ||
651 | |||
652 | /* add the page to the pending-storage radix tree on the backing | ||
653 | * object */ | ||
654 | spin_lock(&object->lock); | ||
655 | |||
656 | _debug("store limit %llx", (unsigned long long) object->store_limit); | ||
657 | |||
658 | ret = radix_tree_insert(&cookie->stores, page->index, page); | ||
659 | if (ret < 0) { | ||
660 | if (ret == -EEXIST) | ||
661 | goto already_queued; | ||
662 | _debug("insert failed %d", ret); | ||
663 | goto nobufs_unlock_obj; | ||
664 | } | ||
665 | |||
666 | radix_tree_tag_set(&cookie->stores, page->index, | ||
667 | FSCACHE_COOKIE_PENDING_TAG); | ||
668 | page_cache_get(page); | ||
669 | |||
670 | /* we only want one writer at a time, but we do need to queue new | ||
671 | * writers after exclusive ops */ | ||
672 | if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags)) | ||
673 | goto already_pending; | ||
674 | |||
675 | spin_unlock(&object->lock); | ||
676 | |||
677 | op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); | ||
678 | op->store_limit = object->store_limit; | ||
679 | |||
680 | if (fscache_submit_op(object, &op->op) < 0) | ||
681 | goto submit_failed; | ||
682 | |||
683 | spin_unlock(&cookie->lock); | ||
684 | radix_tree_preload_end(); | ||
685 | fscache_stat(&fscache_n_store_ops); | ||
686 | fscache_stat(&fscache_n_stores_ok); | ||
687 | |||
688 | /* the slow work queue now carries its own ref on the object */ | ||
689 | fscache_put_operation(&op->op); | ||
690 | _leave(" = 0"); | ||
691 | return 0; | ||
692 | |||
693 | already_queued: | ||
694 | fscache_stat(&fscache_n_stores_again); | ||
695 | already_pending: | ||
696 | spin_unlock(&object->lock); | ||
697 | spin_unlock(&cookie->lock); | ||
698 | radix_tree_preload_end(); | ||
699 | kfree(op); | ||
700 | fscache_stat(&fscache_n_stores_ok); | ||
701 | _leave(" = 0"); | ||
702 | return 0; | ||
703 | |||
704 | submit_failed: | ||
705 | radix_tree_delete(&cookie->stores, page->index); | ||
706 | page_cache_release(page); | ||
707 | ret = -ENOBUFS; | ||
708 | goto nobufs; | ||
709 | |||
710 | nobufs_unlock_obj: | ||
711 | spin_unlock(&object->lock); | ||
712 | nobufs: | ||
713 | spin_unlock(&cookie->lock); | ||
714 | radix_tree_preload_end(); | ||
715 | kfree(op); | ||
716 | fscache_stat(&fscache_n_stores_nobufs); | ||
717 | _leave(" = -ENOBUFS"); | ||
718 | return -ENOBUFS; | ||
719 | |||
720 | nomem_free: | ||
721 | kfree(op); | ||
722 | nomem: | ||
723 | fscache_stat(&fscache_n_stores_oom); | ||
724 | _leave(" = -ENOMEM"); | ||
725 | return -ENOMEM; | ||
726 | } | ||
727 | EXPORT_SYMBOL(__fscache_write_page); | ||
728 | |||
729 | /* | ||
730 | * remove a page from the cache | ||
731 | */ | ||
732 | void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) | ||
733 | { | ||
734 | struct fscache_object *object; | ||
735 | |||
736 | _enter(",%p", page); | ||
737 | |||
738 | ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); | ||
739 | ASSERTCMP(page, !=, NULL); | ||
740 | |||
741 | fscache_stat(&fscache_n_uncaches); | ||
742 | |||
743 | /* cache withdrawal may beat us to it */ | ||
744 | if (!PageFsCache(page)) | ||
745 | goto done; | ||
746 | |||
747 | /* get the object */ | ||
748 | spin_lock(&cookie->lock); | ||
749 | |||
750 | if (hlist_empty(&cookie->backing_objects)) { | ||
751 | ClearPageFsCache(page); | ||
752 | goto done_unlock; | ||
753 | } | ||
754 | |||
755 | object = hlist_entry(cookie->backing_objects.first, | ||
756 | struct fscache_object, cookie_link); | ||
757 | |||
758 | /* there might now be stuff on disk we could read */ | ||
759 | clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | ||
760 | |||
761 | /* only invoke the cache backend if we managed to mark the page | ||
762 | * uncached here; this deals with synchronisation vs withdrawal */ | ||
763 | if (TestClearPageFsCache(page) && | ||
764 | object->cache->ops->uncache_page) { | ||
765 | /* the cache backend releases the cookie lock */ | ||
766 | object->cache->ops->uncache_page(object, page); | ||
767 | goto done; | ||
768 | } | ||
769 | |||
770 | done_unlock: | ||
771 | spin_unlock(&cookie->lock); | ||
772 | done: | ||
773 | _leave(""); | ||
774 | } | ||
775 | EXPORT_SYMBOL(__fscache_uncache_page); | ||
776 | |||
777 | /** | ||
778 | * fscache_mark_pages_cached - Mark pages as being cached | ||
779 | * @op: The retrieval op pages are being marked for | ||
780 | * @pagevec: The pages to be marked | ||
781 | * | ||
782 | * Mark a bunch of netfs pages as being cached. After this is called, | ||
783 | * the netfs must call fscache_uncache_page() to remove the mark. | ||
784 | */ | ||
785 | void fscache_mark_pages_cached(struct fscache_retrieval *op, | ||
786 | struct pagevec *pagevec) | ||
787 | { | ||
788 | struct fscache_cookie *cookie = op->op.object->cookie; | ||
789 | unsigned long loop; | ||
790 | |||
791 | #ifdef CONFIG_FSCACHE_STATS | ||
792 | atomic_add(pagevec->nr, &fscache_n_marks); | ||
793 | #endif | ||
794 | |||
795 | for (loop = 0; loop < pagevec->nr; loop++) { | ||
796 | struct page *page = pagevec->pages[loop]; | ||
797 | |||
798 | _debug("- mark %p{%lx}", page, page->index); | ||
799 | if (TestSetPageFsCache(page)) { | ||
800 | static bool once_only; | ||
801 | if (!once_only) { | ||
802 | once_only = true; | ||
803 | printk(KERN_WARNING "FS-Cache:" | ||
804 | " Cookie type %s marked page %lx" | ||
805 | " multiple times\n", | ||
806 | cookie->def->name, page->index); | ||
807 | } | ||
808 | } | ||
809 | } | ||
810 | |||
811 | if (cookie->def->mark_pages_cached) | ||
812 | cookie->def->mark_pages_cached(cookie->netfs_data, | ||
813 | op->mapping, pagevec); | ||
814 | pagevec_reinit(pagevec); | ||
815 | } | ||
816 | EXPORT_SYMBOL(fscache_mark_pages_cached); | ||
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c new file mode 100644 index 000000000000..beeab44bc31a --- /dev/null +++ b/fs/fscache/proc.c | |||
@@ -0,0 +1,68 @@ | |||
1 | /* FS-Cache statistics viewing interface | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL OPERATION | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/proc_fs.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | #include "internal.h" | ||
17 | |||
18 | /* | ||
19 | * initialise the /proc/fs/fscache/ directory | ||
20 | */ | ||
21 | int __init fscache_proc_init(void) | ||
22 | { | ||
23 | _enter(""); | ||
24 | |||
25 | if (!proc_mkdir("fs/fscache", NULL)) | ||
26 | goto error_dir; | ||
27 | |||
28 | #ifdef CONFIG_FSCACHE_STATS | ||
29 | if (!proc_create("fs/fscache/stats", S_IFREG | 0444, NULL, | ||
30 | &fscache_stats_fops)) | ||
31 | goto error_stats; | ||
32 | #endif | ||
33 | |||
34 | #ifdef CONFIG_FSCACHE_HISTOGRAM | ||
35 | if (!proc_create("fs/fscache/histogram", S_IFREG | 0444, NULL, | ||
36 | &fscache_histogram_fops)) | ||
37 | goto error_histogram; | ||
38 | #endif | ||
39 | |||
40 | _leave(" = 0"); | ||
41 | return 0; | ||
42 | |||
43 | #ifdef CONFIG_FSCACHE_HISTOGRAM | ||
44 | error_histogram: | ||
45 | #endif | ||
46 | #ifdef CONFIG_FSCACHE_STATS | ||
47 | remove_proc_entry("fs/fscache/stats", NULL); | ||
48 | error_stats: | ||
49 | #endif | ||
50 | remove_proc_entry("fs/fscache", NULL); | ||
51 | error_dir: | ||
52 | _leave(" = -ENOMEM"); | ||
53 | return -ENOMEM; | ||
54 | } | ||
55 | |||
56 | /* | ||
57 | * clean up the /proc/fs/fscache/ directory | ||
58 | */ | ||
59 | void fscache_proc_cleanup(void) | ||
60 | { | ||
61 | #ifdef CONFIG_FSCACHE_HISTOGRAM | ||
62 | remove_proc_entry("fs/fscache/histogram", NULL); | ||
63 | #endif | ||
64 | #ifdef CONFIG_FSCACHE_STATS | ||
65 | remove_proc_entry("fs/fscache/stats", NULL); | ||
66 | #endif | ||
67 | remove_proc_entry("fs/fscache", NULL); | ||
68 | } | ||
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c new file mode 100644 index 000000000000..65deb99e756b --- /dev/null +++ b/fs/fscache/stats.c | |||
@@ -0,0 +1,212 @@ | |||
1 | /* FS-Cache statistics | ||
2 | * | ||
3 | * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #define FSCACHE_DEBUG_LEVEL THREAD | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/proc_fs.h> | ||
15 | #include <linux/seq_file.h> | ||
16 | #include "internal.h" | ||
17 | |||
18 | /* | ||
19 | * operation counters | ||
20 | */ | ||
21 | atomic_t fscache_n_op_pend; | ||
22 | atomic_t fscache_n_op_run; | ||
23 | atomic_t fscache_n_op_enqueue; | ||
24 | atomic_t fscache_n_op_requeue; | ||
25 | atomic_t fscache_n_op_deferred_release; | ||
26 | atomic_t fscache_n_op_release; | ||
27 | atomic_t fscache_n_op_gc; | ||
28 | |||
29 | atomic_t fscache_n_attr_changed; | ||
30 | atomic_t fscache_n_attr_changed_ok; | ||
31 | atomic_t fscache_n_attr_changed_nobufs; | ||
32 | atomic_t fscache_n_attr_changed_nomem; | ||
33 | atomic_t fscache_n_attr_changed_calls; | ||
34 | |||
35 | atomic_t fscache_n_allocs; | ||
36 | atomic_t fscache_n_allocs_ok; | ||
37 | atomic_t fscache_n_allocs_wait; | ||
38 | atomic_t fscache_n_allocs_nobufs; | ||
39 | atomic_t fscache_n_alloc_ops; | ||
40 | atomic_t fscache_n_alloc_op_waits; | ||
41 | |||
42 | atomic_t fscache_n_retrievals; | ||
43 | atomic_t fscache_n_retrievals_ok; | ||
44 | atomic_t fscache_n_retrievals_wait; | ||
45 | atomic_t fscache_n_retrievals_nodata; | ||
46 | atomic_t fscache_n_retrievals_nobufs; | ||
47 | atomic_t fscache_n_retrievals_intr; | ||
48 | atomic_t fscache_n_retrievals_nomem; | ||
49 | atomic_t fscache_n_retrieval_ops; | ||
50 | atomic_t fscache_n_retrieval_op_waits; | ||
51 | |||
52 | atomic_t fscache_n_stores; | ||
53 | atomic_t fscache_n_stores_ok; | ||
54 | atomic_t fscache_n_stores_again; | ||
55 | atomic_t fscache_n_stores_nobufs; | ||
56 | atomic_t fscache_n_stores_oom; | ||
57 | atomic_t fscache_n_store_ops; | ||
58 | atomic_t fscache_n_store_calls; | ||
59 | |||
60 | atomic_t fscache_n_marks; | ||
61 | atomic_t fscache_n_uncaches; | ||
62 | |||
63 | atomic_t fscache_n_acquires; | ||
64 | atomic_t fscache_n_acquires_null; | ||
65 | atomic_t fscache_n_acquires_no_cache; | ||
66 | atomic_t fscache_n_acquires_ok; | ||
67 | atomic_t fscache_n_acquires_nobufs; | ||
68 | atomic_t fscache_n_acquires_oom; | ||
69 | |||
70 | atomic_t fscache_n_updates; | ||
71 | atomic_t fscache_n_updates_null; | ||
72 | atomic_t fscache_n_updates_run; | ||
73 | |||
74 | atomic_t fscache_n_relinquishes; | ||
75 | atomic_t fscache_n_relinquishes_null; | ||
76 | atomic_t fscache_n_relinquishes_waitcrt; | ||
77 | |||
78 | atomic_t fscache_n_cookie_index; | ||
79 | atomic_t fscache_n_cookie_data; | ||
80 | atomic_t fscache_n_cookie_special; | ||
81 | |||
82 | atomic_t fscache_n_object_alloc; | ||
83 | atomic_t fscache_n_object_no_alloc; | ||
84 | atomic_t fscache_n_object_lookups; | ||
85 | atomic_t fscache_n_object_lookups_negative; | ||
86 | atomic_t fscache_n_object_lookups_positive; | ||
87 | atomic_t fscache_n_object_created; | ||
88 | atomic_t fscache_n_object_avail; | ||
89 | atomic_t fscache_n_object_dead; | ||
90 | |||
91 | atomic_t fscache_n_checkaux_none; | ||
92 | atomic_t fscache_n_checkaux_okay; | ||
93 | atomic_t fscache_n_checkaux_update; | ||
94 | atomic_t fscache_n_checkaux_obsolete; | ||
95 | |||
96 | /* | ||
97 | * display the general statistics | ||
98 | */ | ||
99 | static int fscache_stats_show(struct seq_file *m, void *v) | ||
100 | { | ||
101 | seq_puts(m, "FS-Cache statistics\n"); | ||
102 | |||
103 | seq_printf(m, "Cookies: idx=%u dat=%u spc=%u\n", | ||
104 | atomic_read(&fscache_n_cookie_index), | ||
105 | atomic_read(&fscache_n_cookie_data), | ||
106 | atomic_read(&fscache_n_cookie_special)); | ||
107 | |||
108 | seq_printf(m, "Objects: alc=%u nal=%u avl=%u ded=%u\n", | ||
109 | atomic_read(&fscache_n_object_alloc), | ||
110 | atomic_read(&fscache_n_object_no_alloc), | ||
111 | atomic_read(&fscache_n_object_avail), | ||
112 | atomic_read(&fscache_n_object_dead)); | ||
113 | seq_printf(m, "ChkAux : non=%u ok=%u upd=%u obs=%u\n", | ||
114 | atomic_read(&fscache_n_checkaux_none), | ||
115 | atomic_read(&fscache_n_checkaux_okay), | ||
116 | atomic_read(&fscache_n_checkaux_update), | ||
117 | atomic_read(&fscache_n_checkaux_obsolete)); | ||
118 | |||
119 | seq_printf(m, "Pages : mrk=%u unc=%u\n", | ||
120 | atomic_read(&fscache_n_marks), | ||
121 | atomic_read(&fscache_n_uncaches)); | ||
122 | |||
123 | seq_printf(m, "Acquire: n=%u nul=%u noc=%u ok=%u nbf=%u" | ||
124 | " oom=%u\n", | ||
125 | atomic_read(&fscache_n_acquires), | ||
126 | atomic_read(&fscache_n_acquires_null), | ||
127 | atomic_read(&fscache_n_acquires_no_cache), | ||
128 | atomic_read(&fscache_n_acquires_ok), | ||
129 | atomic_read(&fscache_n_acquires_nobufs), | ||
130 | atomic_read(&fscache_n_acquires_oom)); | ||
131 | |||
132 | seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n", | ||
133 | atomic_read(&fscache_n_object_lookups), | ||
134 | atomic_read(&fscache_n_object_lookups_negative), | ||
135 | atomic_read(&fscache_n_object_lookups_positive), | ||
136 | atomic_read(&fscache_n_object_created)); | ||
137 | |||
138 | seq_printf(m, "Updates: n=%u nul=%u run=%u\n", | ||
139 | atomic_read(&fscache_n_updates), | ||
140 | atomic_read(&fscache_n_updates_null), | ||
141 | atomic_read(&fscache_n_updates_run)); | ||
142 | |||
143 | seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u\n", | ||
144 | atomic_read(&fscache_n_relinquishes), | ||
145 | atomic_read(&fscache_n_relinquishes_null), | ||
146 | atomic_read(&fscache_n_relinquishes_waitcrt)); | ||
147 | |||
148 | seq_printf(m, "AttrChg: n=%u ok=%u nbf=%u oom=%u run=%u\n", | ||
149 | atomic_read(&fscache_n_attr_changed), | ||
150 | atomic_read(&fscache_n_attr_changed_ok), | ||
151 | atomic_read(&fscache_n_attr_changed_nobufs), | ||
152 | atomic_read(&fscache_n_attr_changed_nomem), | ||
153 | atomic_read(&fscache_n_attr_changed_calls)); | ||
154 | |||
155 | seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u\n", | ||
156 | atomic_read(&fscache_n_allocs), | ||
157 | atomic_read(&fscache_n_allocs_ok), | ||
158 | atomic_read(&fscache_n_allocs_wait), | ||
159 | atomic_read(&fscache_n_allocs_nobufs)); | ||
160 | seq_printf(m, "Allocs : ops=%u owt=%u\n", | ||
161 | atomic_read(&fscache_n_alloc_ops), | ||
162 | atomic_read(&fscache_n_alloc_op_waits)); | ||
163 | |||
164 | seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u" | ||
165 | " int=%u oom=%u\n", | ||
166 | atomic_read(&fscache_n_retrievals), | ||
167 | atomic_read(&fscache_n_retrievals_ok), | ||
168 | atomic_read(&fscache_n_retrievals_wait), | ||
169 | atomic_read(&fscache_n_retrievals_nodata), | ||
170 | atomic_read(&fscache_n_retrievals_nobufs), | ||
171 | atomic_read(&fscache_n_retrievals_intr), | ||
172 | atomic_read(&fscache_n_retrievals_nomem)); | ||
173 | seq_printf(m, "Retrvls: ops=%u owt=%u\n", | ||
174 | atomic_read(&fscache_n_retrieval_ops), | ||
175 | atomic_read(&fscache_n_retrieval_op_waits)); | ||
176 | |||
177 | seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n", | ||
178 | atomic_read(&fscache_n_stores), | ||
179 | atomic_read(&fscache_n_stores_ok), | ||
180 | atomic_read(&fscache_n_stores_again), | ||
181 | atomic_read(&fscache_n_stores_nobufs), | ||
182 | atomic_read(&fscache_n_stores_oom)); | ||
183 | seq_printf(m, "Stores : ops=%u run=%u\n", | ||
184 | atomic_read(&fscache_n_store_ops), | ||
185 | atomic_read(&fscache_n_store_calls)); | ||
186 | |||
187 | seq_printf(m, "Ops : pend=%u run=%u enq=%u\n", | ||
188 | atomic_read(&fscache_n_op_pend), | ||
189 | atomic_read(&fscache_n_op_run), | ||
190 | atomic_read(&fscache_n_op_enqueue)); | ||
191 | seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", | ||
192 | atomic_read(&fscache_n_op_deferred_release), | ||
193 | atomic_read(&fscache_n_op_release), | ||
194 | atomic_read(&fscache_n_op_gc)); | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * open "/proc/fs/fscache/stats" allowing provision of a statistical summary | ||
200 | */ | ||
201 | static int fscache_stats_open(struct inode *inode, struct file *file) | ||
202 | { | ||
203 | return single_open(file, fscache_stats_show, NULL); | ||
204 | } | ||
205 | |||
206 | const struct file_operations fscache_stats_fops = { | ||
207 | .owner = THIS_MODULE, | ||
208 | .open = fscache_stats_open, | ||
209 | .read = seq_read, | ||
210 | .llseek = seq_lseek, | ||
211 | .release = seq_release, | ||
212 | }; | ||
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 36fe20d6eba2..e67f3ec07736 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -84,3 +84,11 @@ config ROOT_NFS | |||
84 | <file:Documentation/filesystems/nfsroot.txt>. | 84 | <file:Documentation/filesystems/nfsroot.txt>. |
85 | 85 | ||
86 | Most people say N here. | 86 | Most people say N here. |
87 | |||
88 | config NFS_FSCACHE | ||
89 | bool "Provide NFS client caching support (EXPERIMENTAL)" | ||
90 | depends on EXPERIMENTAL | ||
91 | depends on NFS_FS=m && FSCACHE || NFS_FS=y && FSCACHE=y | ||
92 | help | ||
93 | Say Y here if you want NFS data to be cached locally on disc through | ||
94 | the general filesystem cache manager | ||
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index ac6170c594a3..845159814de2 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -15,3 +15,4 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ | |||
15 | callback.o callback_xdr.o callback_proc.o \ | 15 | callback.o callback_xdr.o callback_proc.o \ |
16 | nfs4namespace.o | 16 | nfs4namespace.o |
17 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 17 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
18 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o | ||
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index aba38017bdef..75c9cd2aa119 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "delegation.h" | 45 | #include "delegation.h" |
46 | #include "iostat.h" | 46 | #include "iostat.h" |
47 | #include "internal.h" | 47 | #include "internal.h" |
48 | #include "fscache.h" | ||
48 | 49 | ||
49 | #define NFSDBG_FACILITY NFSDBG_CLIENT | 50 | #define NFSDBG_FACILITY NFSDBG_CLIENT |
50 | 51 | ||
@@ -154,6 +155,8 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ | |||
154 | if (!IS_ERR(cred)) | 155 | if (!IS_ERR(cred)) |
155 | clp->cl_machine_cred = cred; | 156 | clp->cl_machine_cred = cred; |
156 | 157 | ||
158 | nfs_fscache_get_client_cookie(clp); | ||
159 | |||
157 | return clp; | 160 | return clp; |
158 | 161 | ||
159 | error_3: | 162 | error_3: |
@@ -187,6 +190,8 @@ static void nfs_free_client(struct nfs_client *clp) | |||
187 | 190 | ||
188 | nfs4_shutdown_client(clp); | 191 | nfs4_shutdown_client(clp); |
189 | 192 | ||
193 | nfs_fscache_release_client_cookie(clp); | ||
194 | |||
190 | /* -EIO all pending I/O */ | 195 | /* -EIO all pending I/O */ |
191 | if (!IS_ERR(clp->cl_rpcclient)) | 196 | if (!IS_ERR(clp->cl_rpcclient)) |
192 | rpc_shutdown_client(clp->cl_rpcclient); | 197 | rpc_shutdown_client(clp->cl_rpcclient); |
@@ -760,6 +765,7 @@ static int nfs_init_server(struct nfs_server *server, | |||
760 | 765 | ||
761 | /* Initialise the client representation from the mount data */ | 766 | /* Initialise the client representation from the mount data */ |
762 | server->flags = data->flags; | 767 | server->flags = data->flags; |
768 | server->options = data->options; | ||
763 | 769 | ||
764 | if (data->rsize) | 770 | if (data->rsize) |
765 | server->rsize = nfs_block_size(data->rsize, NULL); | 771 | server->rsize = nfs_block_size(data->rsize, NULL); |
@@ -1148,6 +1154,7 @@ static int nfs4_init_server(struct nfs_server *server, | |||
1148 | /* Initialise the client representation from the mount data */ | 1154 | /* Initialise the client representation from the mount data */ |
1149 | server->flags = data->flags; | 1155 | server->flags = data->flags; |
1150 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1156 | server->caps |= NFS_CAP_ATOMIC_OPEN; |
1157 | server->options = data->options; | ||
1151 | 1158 | ||
1152 | /* Get a client record */ | 1159 | /* Get a client record */ |
1153 | error = nfs4_set_client(server, | 1160 | error = nfs4_set_client(server, |
@@ -1559,7 +1566,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1559 | 1566 | ||
1560 | /* display header on line 1 */ | 1567 | /* display header on line 1 */ |
1561 | if (v == &nfs_volume_list) { | 1568 | if (v == &nfs_volume_list) { |
1562 | seq_puts(m, "NV SERVER PORT DEV FSID\n"); | 1569 | seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); |
1563 | return 0; | 1570 | return 0; |
1564 | } | 1571 | } |
1565 | /* display one transport per line on subsequent lines */ | 1572 | /* display one transport per line on subsequent lines */ |
@@ -1573,12 +1580,13 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1573 | (unsigned long long) server->fsid.major, | 1580 | (unsigned long long) server->fsid.major, |
1574 | (unsigned long long) server->fsid.minor); | 1581 | (unsigned long long) server->fsid.minor); |
1575 | 1582 | ||
1576 | seq_printf(m, "v%u %s %s %-7s %-17s\n", | 1583 | seq_printf(m, "v%u %s %s %-7s %-17s %s\n", |
1577 | clp->rpc_ops->version, | 1584 | clp->rpc_ops->version, |
1578 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), | 1585 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), |
1579 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), | 1586 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), |
1580 | dev, | 1587 | dev, |
1581 | fsid); | 1588 | fsid, |
1589 | nfs_server_fscache_state(server)); | ||
1582 | 1590 | ||
1583 | return 0; | 1591 | return 0; |
1584 | } | 1592 | } |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0abf3f331f56..3523b895eb4b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "delegation.h" | 35 | #include "delegation.h" |
36 | #include "internal.h" | 36 | #include "internal.h" |
37 | #include "iostat.h" | 37 | #include "iostat.h" |
38 | #include "fscache.h" | ||
38 | 39 | ||
39 | #define NFSDBG_FACILITY NFSDBG_FILE | 40 | #define NFSDBG_FACILITY NFSDBG_FILE |
40 | 41 | ||
@@ -409,6 +410,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, | |||
409 | return copied; | 410 | return copied; |
410 | } | 411 | } |
411 | 412 | ||
413 | /* | ||
414 | * Partially or wholly invalidate a page | ||
415 | * - Release the private state associated with a page if undergoing complete | ||
416 | * page invalidation | ||
417 | * - Called if either PG_private or PG_fscache is set on the page | ||
418 | * - Caller holds page lock | ||
419 | */ | ||
412 | static void nfs_invalidate_page(struct page *page, unsigned long offset) | 420 | static void nfs_invalidate_page(struct page *page, unsigned long offset) |
413 | { | 421 | { |
414 | dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); | 422 | dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); |
@@ -417,23 +425,43 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) | |||
417 | return; | 425 | return; |
418 | /* Cancel any unstarted writes on this page */ | 426 | /* Cancel any unstarted writes on this page */ |
419 | nfs_wb_page_cancel(page->mapping->host, page); | 427 | nfs_wb_page_cancel(page->mapping->host, page); |
428 | |||
429 | nfs_fscache_invalidate_page(page, page->mapping->host); | ||
420 | } | 430 | } |
421 | 431 | ||
432 | /* | ||
433 | * Attempt to release the private state associated with a page | ||
434 | * - Called if either PG_private or PG_fscache is set on the page | ||
435 | * - Caller holds page lock | ||
436 | * - Return true (may release page) or false (may not) | ||
437 | */ | ||
422 | static int nfs_release_page(struct page *page, gfp_t gfp) | 438 | static int nfs_release_page(struct page *page, gfp_t gfp) |
423 | { | 439 | { |
424 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); | 440 | dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page); |
425 | 441 | ||
426 | /* If PagePrivate() is set, then the page is not freeable */ | 442 | /* If PagePrivate() is set, then the page is not freeable */ |
427 | return 0; | 443 | if (PagePrivate(page)) |
444 | return 0; | ||
445 | return nfs_fscache_release_page(page, gfp); | ||
428 | } | 446 | } |
429 | 447 | ||
448 | /* | ||
449 | * Attempt to clear the private state associated with a page when an error | ||
450 | * occurs that requires the cached contents of an inode to be written back or | ||
451 | * destroyed | ||
452 | * - Called if either PG_private or fscache is set on the page | ||
453 | * - Caller holds page lock | ||
454 | * - Return 0 if successful, -error otherwise | ||
455 | */ | ||
430 | static int nfs_launder_page(struct page *page) | 456 | static int nfs_launder_page(struct page *page) |
431 | { | 457 | { |
432 | struct inode *inode = page->mapping->host; | 458 | struct inode *inode = page->mapping->host; |
459 | struct nfs_inode *nfsi = NFS_I(inode); | ||
433 | 460 | ||
434 | dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", | 461 | dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n", |
435 | inode->i_ino, (long long)page_offset(page)); | 462 | inode->i_ino, (long long)page_offset(page)); |
436 | 463 | ||
464 | nfs_fscache_wait_on_page_write(nfsi, page); | ||
437 | return nfs_wb_page(inode, page); | 465 | return nfs_wb_page(inode, page); |
438 | } | 466 | } |
439 | 467 | ||
@@ -451,6 +479,11 @@ const struct address_space_operations nfs_file_aops = { | |||
451 | .launder_page = nfs_launder_page, | 479 | .launder_page = nfs_launder_page, |
452 | }; | 480 | }; |
453 | 481 | ||
482 | /* | ||
483 | * Notification that a PTE pointing to an NFS page is about to be made | ||
484 | * writable, implying that someone is about to modify the page through a | ||
485 | * shared-writable mapping | ||
486 | */ | ||
454 | static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | 487 | static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) |
455 | { | 488 | { |
456 | struct page *page = vmf->page; | 489 | struct page *page = vmf->page; |
@@ -465,6 +498,9 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
465 | filp->f_mapping->host->i_ino, | 498 | filp->f_mapping->host->i_ino, |
466 | (long long)page_offset(page)); | 499 | (long long)page_offset(page)); |
467 | 500 | ||
501 | /* make sure the cache has finished storing the page */ | ||
502 | nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page); | ||
503 | |||
468 | lock_page(page); | 504 | lock_page(page); |
469 | mapping = page->mapping; | 505 | mapping = page->mapping; |
470 | if (mapping != dentry->d_inode->i_mapping) | 506 | if (mapping != dentry->d_inode->i_mapping) |
diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c new file mode 100644 index 000000000000..5b1006480bc2 --- /dev/null +++ b/fs/nfs/fscache-index.c | |||
@@ -0,0 +1,337 @@ | |||
1 | /* NFS FS-Cache index structure definition | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/mm.h> | ||
16 | #include <linux/nfs_fs.h> | ||
17 | #include <linux/nfs_fs_sb.h> | ||
18 | #include <linux/in6.h> | ||
19 | |||
20 | #include "internal.h" | ||
21 | #include "fscache.h" | ||
22 | |||
23 | #define NFSDBG_FACILITY NFSDBG_FSCACHE | ||
24 | |||
25 | /* | ||
26 | * Define the NFS filesystem for FS-Cache. Upon registration FS-Cache sticks | ||
27 | * the cookie for the top-level index object for NFS into here. The top-level | ||
28 | * index can than have other cache objects inserted into it. | ||
29 | */ | ||
30 | struct fscache_netfs nfs_fscache_netfs = { | ||
31 | .name = "nfs", | ||
32 | .version = 0, | ||
33 | }; | ||
34 | |||
35 | /* | ||
36 | * Register NFS for caching | ||
37 | */ | ||
38 | int nfs_fscache_register(void) | ||
39 | { | ||
40 | return fscache_register_netfs(&nfs_fscache_netfs); | ||
41 | } | ||
42 | |||
43 | /* | ||
44 | * Unregister NFS for caching | ||
45 | */ | ||
46 | void nfs_fscache_unregister(void) | ||
47 | { | ||
48 | fscache_unregister_netfs(&nfs_fscache_netfs); | ||
49 | } | ||
50 | |||
51 | /* | ||
52 | * Layout of the key for an NFS server cache object. | ||
53 | */ | ||
54 | struct nfs_server_key { | ||
55 | uint16_t nfsversion; /* NFS protocol version */ | ||
56 | uint16_t family; /* address family */ | ||
57 | uint16_t port; /* IP port */ | ||
58 | union { | ||
59 | struct in_addr ipv4_addr; /* IPv4 address */ | ||
60 | struct in6_addr ipv6_addr; /* IPv6 address */ | ||
61 | } addr[0]; | ||
62 | }; | ||
63 | |||
64 | /* | ||
65 | * Generate a key to describe a server in the main NFS index | ||
66 | * - We return the length of the key, or 0 if we can't generate one | ||
67 | */ | ||
68 | static uint16_t nfs_server_get_key(const void *cookie_netfs_data, | ||
69 | void *buffer, uint16_t bufmax) | ||
70 | { | ||
71 | const struct nfs_client *clp = cookie_netfs_data; | ||
72 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) &clp->cl_addr; | ||
73 | const struct sockaddr_in *sin = (struct sockaddr_in *) &clp->cl_addr; | ||
74 | struct nfs_server_key *key = buffer; | ||
75 | uint16_t len = sizeof(struct nfs_server_key); | ||
76 | |||
77 | key->nfsversion = clp->rpc_ops->version; | ||
78 | key->family = clp->cl_addr.ss_family; | ||
79 | |||
80 | memset(key, 0, len); | ||
81 | |||
82 | switch (clp->cl_addr.ss_family) { | ||
83 | case AF_INET: | ||
84 | key->port = sin->sin_port; | ||
85 | key->addr[0].ipv4_addr = sin->sin_addr; | ||
86 | len += sizeof(key->addr[0].ipv4_addr); | ||
87 | break; | ||
88 | |||
89 | case AF_INET6: | ||
90 | key->port = sin6->sin6_port; | ||
91 | key->addr[0].ipv6_addr = sin6->sin6_addr; | ||
92 | len += sizeof(key->addr[0].ipv6_addr); | ||
93 | break; | ||
94 | |||
95 | default: | ||
96 | printk(KERN_WARNING "NFS: Unknown network family '%d'\n", | ||
97 | clp->cl_addr.ss_family); | ||
98 | len = 0; | ||
99 | break; | ||
100 | } | ||
101 | |||
102 | return len; | ||
103 | } | ||
104 | |||
105 | /* | ||
106 | * Define the server object for FS-Cache. This is used to describe a server | ||
107 | * object to fscache_acquire_cookie(). It is keyed by the NFS protocol and | ||
108 | * server address parameters. | ||
109 | */ | ||
110 | const struct fscache_cookie_def nfs_fscache_server_index_def = { | ||
111 | .name = "NFS.server", | ||
112 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
113 | .get_key = nfs_server_get_key, | ||
114 | }; | ||
115 | |||
116 | /* | ||
117 | * Generate a key to describe a superblock key in the main NFS index | ||
118 | */ | ||
119 | static uint16_t nfs_super_get_key(const void *cookie_netfs_data, | ||
120 | void *buffer, uint16_t bufmax) | ||
121 | { | ||
122 | const struct nfs_fscache_key *key; | ||
123 | const struct nfs_server *nfss = cookie_netfs_data; | ||
124 | uint16_t len; | ||
125 | |||
126 | key = nfss->fscache_key; | ||
127 | len = sizeof(key->key) + key->key.uniq_len; | ||
128 | if (len > bufmax) { | ||
129 | len = 0; | ||
130 | } else { | ||
131 | memcpy(buffer, &key->key, sizeof(key->key)); | ||
132 | memcpy(buffer + sizeof(key->key), | ||
133 | key->key.uniquifier, key->key.uniq_len); | ||
134 | } | ||
135 | |||
136 | return len; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Define the superblock object for FS-Cache. This is used to describe a | ||
141 | * superblock object to fscache_acquire_cookie(). It is keyed by all the NFS | ||
142 | * parameters that might cause a separate superblock. | ||
143 | */ | ||
144 | const struct fscache_cookie_def nfs_fscache_super_index_def = { | ||
145 | .name = "NFS.super", | ||
146 | .type = FSCACHE_COOKIE_TYPE_INDEX, | ||
147 | .get_key = nfs_super_get_key, | ||
148 | }; | ||
149 | |||
150 | /* | ||
151 | * Definition of the auxiliary data attached to NFS inode storage objects | ||
152 | * within the cache. | ||
153 | * | ||
154 | * The contents of this struct are recorded in the on-disk local cache in the | ||
155 | * auxiliary data attached to the data storage object backing an inode. This | ||
156 | * permits coherency to be managed when a new inode binds to an already extant | ||
157 | * cache object. | ||
158 | */ | ||
159 | struct nfs_fscache_inode_auxdata { | ||
160 | struct timespec mtime; | ||
161 | struct timespec ctime; | ||
162 | loff_t size; | ||
163 | u64 change_attr; | ||
164 | }; | ||
165 | |||
166 | /* | ||
167 | * Generate a key to describe an NFS inode in an NFS server's index | ||
168 | */ | ||
169 | static uint16_t nfs_fscache_inode_get_key(const void *cookie_netfs_data, | ||
170 | void *buffer, uint16_t bufmax) | ||
171 | { | ||
172 | const struct nfs_inode *nfsi = cookie_netfs_data; | ||
173 | uint16_t nsize; | ||
174 | |||
175 | /* use the inode's NFS filehandle as the key */ | ||
176 | nsize = nfsi->fh.size; | ||
177 | memcpy(buffer, nfsi->fh.data, nsize); | ||
178 | return nsize; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * Get certain file attributes from the netfs data | ||
183 | * - This function can be absent for an index | ||
184 | * - Not permitted to return an error | ||
185 | * - The netfs data from the cookie being used as the source is presented | ||
186 | */ | ||
187 | static void nfs_fscache_inode_get_attr(const void *cookie_netfs_data, | ||
188 | uint64_t *size) | ||
189 | { | ||
190 | const struct nfs_inode *nfsi = cookie_netfs_data; | ||
191 | |||
192 | *size = nfsi->vfs_inode.i_size; | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * Get the auxiliary data from netfs data | ||
197 | * - This function can be absent if the index carries no state data | ||
198 | * - Should store the auxiliary data in the buffer | ||
199 | * - Should return the amount of amount stored | ||
200 | * - Not permitted to return an error | ||
201 | * - The netfs data from the cookie being used as the source is presented | ||
202 | */ | ||
203 | static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data, | ||
204 | void *buffer, uint16_t bufmax) | ||
205 | { | ||
206 | struct nfs_fscache_inode_auxdata auxdata; | ||
207 | const struct nfs_inode *nfsi = cookie_netfs_data; | ||
208 | |||
209 | memset(&auxdata, 0, sizeof(auxdata)); | ||
210 | auxdata.size = nfsi->vfs_inode.i_size; | ||
211 | auxdata.mtime = nfsi->vfs_inode.i_mtime; | ||
212 | auxdata.ctime = nfsi->vfs_inode.i_ctime; | ||
213 | |||
214 | if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) | ||
215 | auxdata.change_attr = nfsi->change_attr; | ||
216 | |||
217 | if (bufmax > sizeof(auxdata)) | ||
218 | bufmax = sizeof(auxdata); | ||
219 | |||
220 | memcpy(buffer, &auxdata, bufmax); | ||
221 | return bufmax; | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * Consult the netfs about the state of an object | ||
226 | * - This function can be absent if the index carries no state data | ||
227 | * - The netfs data from the cookie being used as the target is | ||
228 | * presented, as is the auxiliary data | ||
229 | */ | ||
230 | static | ||
231 | enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, | ||
232 | const void *data, | ||
233 | uint16_t datalen) | ||
234 | { | ||
235 | struct nfs_fscache_inode_auxdata auxdata; | ||
236 | struct nfs_inode *nfsi = cookie_netfs_data; | ||
237 | |||
238 | if (datalen != sizeof(auxdata)) | ||
239 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
240 | |||
241 | memset(&auxdata, 0, sizeof(auxdata)); | ||
242 | auxdata.size = nfsi->vfs_inode.i_size; | ||
243 | auxdata.mtime = nfsi->vfs_inode.i_mtime; | ||
244 | auxdata.ctime = nfsi->vfs_inode.i_ctime; | ||
245 | |||
246 | if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) | ||
247 | auxdata.change_attr = nfsi->change_attr; | ||
248 | |||
249 | if (memcmp(data, &auxdata, datalen) != 0) | ||
250 | return FSCACHE_CHECKAUX_OBSOLETE; | ||
251 | |||
252 | return FSCACHE_CHECKAUX_OKAY; | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Indication from FS-Cache that the cookie is no longer cached | ||
257 | * - This function is called when the backing store currently caching a cookie | ||
258 | * is removed | ||
259 | * - The netfs should use this to clean up any markers indicating cached pages | ||
260 | * - This is mandatory for any object that may have data | ||
261 | */ | ||
262 | static void nfs_fscache_inode_now_uncached(void *cookie_netfs_data) | ||
263 | { | ||
264 | struct nfs_inode *nfsi = cookie_netfs_data; | ||
265 | struct pagevec pvec; | ||
266 | pgoff_t first; | ||
267 | int loop, nr_pages; | ||
268 | |||
269 | pagevec_init(&pvec, 0); | ||
270 | first = 0; | ||
271 | |||
272 | dprintk("NFS: nfs_inode_now_uncached: nfs_inode 0x%p\n", nfsi); | ||
273 | |||
274 | for (;;) { | ||
275 | /* grab a bunch of pages to unmark */ | ||
276 | nr_pages = pagevec_lookup(&pvec, | ||
277 | nfsi->vfs_inode.i_mapping, | ||
278 | first, | ||
279 | PAGEVEC_SIZE - pagevec_count(&pvec)); | ||
280 | if (!nr_pages) | ||
281 | break; | ||
282 | |||
283 | for (loop = 0; loop < nr_pages; loop++) | ||
284 | ClearPageFsCache(pvec.pages[loop]); | ||
285 | |||
286 | first = pvec.pages[nr_pages - 1]->index + 1; | ||
287 | |||
288 | pvec.nr = nr_pages; | ||
289 | pagevec_release(&pvec); | ||
290 | cond_resched(); | ||
291 | } | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Get an extra reference on a read context. | ||
296 | * - This function can be absent if the completion function doesn't require a | ||
297 | * context. | ||
298 | * - The read context is passed back to NFS in the event that a data read on the | ||
299 | * cache fails with EIO - in which case the server must be contacted to | ||
300 | * retrieve the data, which requires the read context for security. | ||
301 | */ | ||
302 | static void nfs_fh_get_context(void *cookie_netfs_data, void *context) | ||
303 | { | ||
304 | get_nfs_open_context(context); | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * Release an extra reference on a read context. | ||
309 | * - This function can be absent if the completion function doesn't require a | ||
310 | * context. | ||
311 | */ | ||
312 | static void nfs_fh_put_context(void *cookie_netfs_data, void *context) | ||
313 | { | ||
314 | if (context) | ||
315 | put_nfs_open_context(context); | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * Define the inode object for FS-Cache. This is used to describe an inode | ||
320 | * object to fscache_acquire_cookie(). It is keyed by the NFS file handle for | ||
321 | * an inode. | ||
322 | * | ||
323 | * Coherency is managed by comparing the copies of i_size, i_mtime and i_ctime | ||
324 | * held in the cache auxiliary data for the data storage object with those in | ||
325 | * the inode struct in memory. | ||
326 | */ | ||
327 | const struct fscache_cookie_def nfs_fscache_inode_object_def = { | ||
328 | .name = "NFS.fh", | ||
329 | .type = FSCACHE_COOKIE_TYPE_DATAFILE, | ||
330 | .get_key = nfs_fscache_inode_get_key, | ||
331 | .get_attr = nfs_fscache_inode_get_attr, | ||
332 | .get_aux = nfs_fscache_inode_get_aux, | ||
333 | .check_aux = nfs_fscache_inode_check_aux, | ||
334 | .now_uncached = nfs_fscache_inode_now_uncached, | ||
335 | .get_context = nfs_fh_get_context, | ||
336 | .put_context = nfs_fh_put_context, | ||
337 | }; | ||
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c new file mode 100644 index 000000000000..379be678cb7e --- /dev/null +++ b/fs/nfs/fscache.c | |||
@@ -0,0 +1,523 @@ | |||
1 | /* NFS filesystem cache interface | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/mm.h> | ||
16 | #include <linux/nfs_fs.h> | ||
17 | #include <linux/nfs_fs_sb.h> | ||
18 | #include <linux/in6.h> | ||
19 | #include <linux/seq_file.h> | ||
20 | |||
21 | #include "internal.h" | ||
22 | #include "iostat.h" | ||
23 | #include "fscache.h" | ||
24 | |||
25 | #define NFSDBG_FACILITY NFSDBG_FSCACHE | ||
26 | |||
27 | static struct rb_root nfs_fscache_keys = RB_ROOT; | ||
28 | static DEFINE_SPINLOCK(nfs_fscache_keys_lock); | ||
29 | |||
30 | /* | ||
31 | * Get the per-client index cookie for an NFS client if the appropriate mount | ||
32 | * flag was set | ||
33 | * - We always try and get an index cookie for the client, but get filehandle | ||
34 | * cookies on a per-superblock basis, depending on the mount flags | ||
35 | */ | ||
36 | void nfs_fscache_get_client_cookie(struct nfs_client *clp) | ||
37 | { | ||
38 | /* create a cache index for looking up filehandles */ | ||
39 | clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index, | ||
40 | &nfs_fscache_server_index_def, | ||
41 | clp); | ||
42 | dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n", | ||
43 | clp, clp->fscache); | ||
44 | } | ||
45 | |||
46 | /* | ||
47 | * Dispose of a per-client cookie | ||
48 | */ | ||
49 | void nfs_fscache_release_client_cookie(struct nfs_client *clp) | ||
50 | { | ||
51 | dfprintk(FSCACHE, "NFS: releasing client cookie (0x%p/0x%p)\n", | ||
52 | clp, clp->fscache); | ||
53 | |||
54 | fscache_relinquish_cookie(clp->fscache, 0); | ||
55 | clp->fscache = NULL; | ||
56 | } | ||
57 | |||
58 | /* | ||
59 | * Get the cache cookie for an NFS superblock. We have to handle | ||
60 | * uniquification here because the cache doesn't do it for us. | ||
61 | */ | ||
62 | void nfs_fscache_get_super_cookie(struct super_block *sb, | ||
63 | struct nfs_parsed_mount_data *data) | ||
64 | { | ||
65 | struct nfs_fscache_key *key, *xkey; | ||
66 | struct nfs_server *nfss = NFS_SB(sb); | ||
67 | struct rb_node **p, *parent; | ||
68 | const char *uniq = data->fscache_uniq ?: ""; | ||
69 | int diff, ulen; | ||
70 | |||
71 | ulen = strlen(uniq); | ||
72 | key = kzalloc(sizeof(*key) + ulen, GFP_KERNEL); | ||
73 | if (!key) | ||
74 | return; | ||
75 | |||
76 | key->nfs_client = nfss->nfs_client; | ||
77 | key->key.super.s_flags = sb->s_flags & NFS_MS_MASK; | ||
78 | key->key.nfs_server.flags = nfss->flags; | ||
79 | key->key.nfs_server.rsize = nfss->rsize; | ||
80 | key->key.nfs_server.wsize = nfss->wsize; | ||
81 | key->key.nfs_server.acregmin = nfss->acregmin; | ||
82 | key->key.nfs_server.acregmax = nfss->acregmax; | ||
83 | key->key.nfs_server.acdirmin = nfss->acdirmin; | ||
84 | key->key.nfs_server.acdirmax = nfss->acdirmax; | ||
85 | key->key.nfs_server.fsid = nfss->fsid; | ||
86 | key->key.rpc_auth.au_flavor = nfss->client->cl_auth->au_flavor; | ||
87 | |||
88 | key->key.uniq_len = ulen; | ||
89 | memcpy(key->key.uniquifier, uniq, ulen); | ||
90 | |||
91 | spin_lock(&nfs_fscache_keys_lock); | ||
92 | p = &nfs_fscache_keys.rb_node; | ||
93 | parent = NULL; | ||
94 | while (*p) { | ||
95 | parent = *p; | ||
96 | xkey = rb_entry(parent, struct nfs_fscache_key, node); | ||
97 | |||
98 | if (key->nfs_client < xkey->nfs_client) | ||
99 | goto go_left; | ||
100 | if (key->nfs_client > xkey->nfs_client) | ||
101 | goto go_right; | ||
102 | |||
103 | diff = memcmp(&key->key, &xkey->key, sizeof(key->key)); | ||
104 | if (diff < 0) | ||
105 | goto go_left; | ||
106 | if (diff > 0) | ||
107 | goto go_right; | ||
108 | |||
109 | if (key->key.uniq_len == 0) | ||
110 | goto non_unique; | ||
111 | diff = memcmp(key->key.uniquifier, | ||
112 | xkey->key.uniquifier, | ||
113 | key->key.uniq_len); | ||
114 | if (diff < 0) | ||
115 | goto go_left; | ||
116 | if (diff > 0) | ||
117 | goto go_right; | ||
118 | goto non_unique; | ||
119 | |||
120 | go_left: | ||
121 | p = &(*p)->rb_left; | ||
122 | continue; | ||
123 | go_right: | ||
124 | p = &(*p)->rb_right; | ||
125 | } | ||
126 | |||
127 | rb_link_node(&key->node, parent, p); | ||
128 | rb_insert_color(&key->node, &nfs_fscache_keys); | ||
129 | spin_unlock(&nfs_fscache_keys_lock); | ||
130 | nfss->fscache_key = key; | ||
131 | |||
132 | /* create a cache index for looking up filehandles */ | ||
133 | nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, | ||
134 | &nfs_fscache_super_index_def, | ||
135 | nfss); | ||
136 | dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", | ||
137 | nfss, nfss->fscache); | ||
138 | return; | ||
139 | |||
140 | non_unique: | ||
141 | spin_unlock(&nfs_fscache_keys_lock); | ||
142 | kfree(key); | ||
143 | nfss->fscache_key = NULL; | ||
144 | nfss->fscache = NULL; | ||
145 | printk(KERN_WARNING "NFS:" | ||
146 | " Cache request denied due to non-unique superblock keys\n"); | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * release a per-superblock cookie | ||
151 | */ | ||
152 | void nfs_fscache_release_super_cookie(struct super_block *sb) | ||
153 | { | ||
154 | struct nfs_server *nfss = NFS_SB(sb); | ||
155 | |||
156 | dfprintk(FSCACHE, "NFS: releasing superblock cookie (0x%p/0x%p)\n", | ||
157 | nfss, nfss->fscache); | ||
158 | |||
159 | fscache_relinquish_cookie(nfss->fscache, 0); | ||
160 | nfss->fscache = NULL; | ||
161 | |||
162 | if (nfss->fscache_key) { | ||
163 | spin_lock(&nfs_fscache_keys_lock); | ||
164 | rb_erase(&nfss->fscache_key->node, &nfs_fscache_keys); | ||
165 | spin_unlock(&nfs_fscache_keys_lock); | ||
166 | kfree(nfss->fscache_key); | ||
167 | nfss->fscache_key = NULL; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Initialise the per-inode cache cookie pointer for an NFS inode. | ||
173 | */ | ||
174 | void nfs_fscache_init_inode_cookie(struct inode *inode) | ||
175 | { | ||
176 | NFS_I(inode)->fscache = NULL; | ||
177 | if (S_ISREG(inode->i_mode)) | ||
178 | set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * Get the per-inode cache cookie for an NFS inode. | ||
183 | */ | ||
184 | static void nfs_fscache_enable_inode_cookie(struct inode *inode) | ||
185 | { | ||
186 | struct super_block *sb = inode->i_sb; | ||
187 | struct nfs_inode *nfsi = NFS_I(inode); | ||
188 | |||
189 | if (nfsi->fscache || !NFS_FSCACHE(inode)) | ||
190 | return; | ||
191 | |||
192 | if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) { | ||
193 | nfsi->fscache = fscache_acquire_cookie( | ||
194 | NFS_SB(sb)->fscache, | ||
195 | &nfs_fscache_inode_object_def, | ||
196 | nfsi); | ||
197 | |||
198 | dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n", | ||
199 | sb, nfsi, nfsi->fscache); | ||
200 | } | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Release a per-inode cookie. | ||
205 | */ | ||
206 | void nfs_fscache_release_inode_cookie(struct inode *inode) | ||
207 | { | ||
208 | struct nfs_inode *nfsi = NFS_I(inode); | ||
209 | |||
210 | dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", | ||
211 | nfsi, nfsi->fscache); | ||
212 | |||
213 | fscache_relinquish_cookie(nfsi->fscache, 0); | ||
214 | nfsi->fscache = NULL; | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Retire a per-inode cookie, destroying the data attached to it. | ||
219 | */ | ||
220 | void nfs_fscache_zap_inode_cookie(struct inode *inode) | ||
221 | { | ||
222 | struct nfs_inode *nfsi = NFS_I(inode); | ||
223 | |||
224 | dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n", | ||
225 | nfsi, nfsi->fscache); | ||
226 | |||
227 | fscache_relinquish_cookie(nfsi->fscache, 1); | ||
228 | nfsi->fscache = NULL; | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * Turn off the cache with regard to a per-inode cookie if opened for writing, | ||
233 | * invalidating all the pages in the page cache relating to the associated | ||
234 | * inode to clear the per-page caching. | ||
235 | */ | ||
236 | static void nfs_fscache_disable_inode_cookie(struct inode *inode) | ||
237 | { | ||
238 | clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); | ||
239 | |||
240 | if (NFS_I(inode)->fscache) { | ||
241 | dfprintk(FSCACHE, | ||
242 | "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); | ||
243 | |||
244 | /* Need to invalidate any mapped pages that were read in before | ||
245 | * turning off the cache. | ||
246 | */ | ||
247 | if (inode->i_mapping && inode->i_mapping->nrpages) | ||
248 | invalidate_inode_pages2(inode->i_mapping); | ||
249 | |||
250 | nfs_fscache_zap_inode_cookie(inode); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * wait_on_bit() sleep function for uninterruptible waiting | ||
256 | */ | ||
257 | static int nfs_fscache_wait_bit(void *flags) | ||
258 | { | ||
259 | schedule(); | ||
260 | return 0; | ||
261 | } | ||
262 | |||
263 | /* | ||
264 | * Lock against someone else trying to also acquire or relinquish a cookie | ||
265 | */ | ||
266 | static inline void nfs_fscache_inode_lock(struct inode *inode) | ||
267 | { | ||
268 | struct nfs_inode *nfsi = NFS_I(inode); | ||
269 | |||
270 | while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags)) | ||
271 | wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK, | ||
272 | nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Unlock cookie management lock | ||
277 | */ | ||
278 | static inline void nfs_fscache_inode_unlock(struct inode *inode) | ||
279 | { | ||
280 | struct nfs_inode *nfsi = NFS_I(inode); | ||
281 | |||
282 | smp_mb__before_clear_bit(); | ||
283 | clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags); | ||
284 | smp_mb__after_clear_bit(); | ||
285 | wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK); | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * Decide if we should enable or disable local caching for this inode. | ||
290 | * - For now, with NFS, only regular files that are open read-only will be able | ||
291 | * to use the cache. | ||
292 | * - May be invoked multiple times in parallel by parallel nfs_open() functions. | ||
293 | */ | ||
294 | void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) | ||
295 | { | ||
296 | if (NFS_FSCACHE(inode)) { | ||
297 | nfs_fscache_inode_lock(inode); | ||
298 | if ((filp->f_flags & O_ACCMODE) != O_RDONLY) | ||
299 | nfs_fscache_disable_inode_cookie(inode); | ||
300 | else | ||
301 | nfs_fscache_enable_inode_cookie(inode); | ||
302 | nfs_fscache_inode_unlock(inode); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * Replace a per-inode cookie due to revalidation detecting a file having | ||
308 | * changed on the server. | ||
309 | */ | ||
310 | void nfs_fscache_reset_inode_cookie(struct inode *inode) | ||
311 | { | ||
312 | struct nfs_inode *nfsi = NFS_I(inode); | ||
313 | struct nfs_server *nfss = NFS_SERVER(inode); | ||
314 | struct fscache_cookie *old = nfsi->fscache; | ||
315 | |||
316 | nfs_fscache_inode_lock(inode); | ||
317 | if (nfsi->fscache) { | ||
318 | /* retire the current fscache cache and get a new one */ | ||
319 | fscache_relinquish_cookie(nfsi->fscache, 1); | ||
320 | |||
321 | nfsi->fscache = fscache_acquire_cookie( | ||
322 | nfss->nfs_client->fscache, | ||
323 | &nfs_fscache_inode_object_def, | ||
324 | nfsi); | ||
325 | |||
326 | dfprintk(FSCACHE, | ||
327 | "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n", | ||
328 | nfss, nfsi, old, nfsi->fscache); | ||
329 | } | ||
330 | nfs_fscache_inode_unlock(inode); | ||
331 | } | ||
332 | |||
333 | /* | ||
334 | * Release the caching state associated with a page, if the page isn't busy | ||
335 | * interacting with the cache. | ||
336 | * - Returns true (can release page) or false (page busy). | ||
337 | */ | ||
338 | int nfs_fscache_release_page(struct page *page, gfp_t gfp) | ||
339 | { | ||
340 | struct nfs_inode *nfsi = NFS_I(page->mapping->host); | ||
341 | struct fscache_cookie *cookie = nfsi->fscache; | ||
342 | |||
343 | BUG_ON(!cookie); | ||
344 | |||
345 | if (fscache_check_page_write(cookie, page)) { | ||
346 | if (!(gfp & __GFP_WAIT)) | ||
347 | return 0; | ||
348 | fscache_wait_on_page_write(cookie, page); | ||
349 | } | ||
350 | |||
351 | if (PageFsCache(page)) { | ||
352 | dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", | ||
353 | cookie, page, nfsi); | ||
354 | |||
355 | fscache_uncache_page(cookie, page); | ||
356 | nfs_add_fscache_stats(page->mapping->host, | ||
357 | NFSIOS_FSCACHE_PAGES_UNCACHED, 1); | ||
358 | } | ||
359 | |||
360 | return 1; | ||
361 | } | ||
362 | |||
363 | /* | ||
364 | * Release the caching state associated with a page if undergoing complete page | ||
365 | * invalidation. | ||
366 | */ | ||
367 | void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) | ||
368 | { | ||
369 | struct nfs_inode *nfsi = NFS_I(inode); | ||
370 | struct fscache_cookie *cookie = nfsi->fscache; | ||
371 | |||
372 | BUG_ON(!cookie); | ||
373 | |||
374 | dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", | ||
375 | cookie, page, nfsi); | ||
376 | |||
377 | fscache_wait_on_page_write(cookie, page); | ||
378 | |||
379 | BUG_ON(!PageLocked(page)); | ||
380 | fscache_uncache_page(cookie, page); | ||
381 | nfs_add_fscache_stats(page->mapping->host, | ||
382 | NFSIOS_FSCACHE_PAGES_UNCACHED, 1); | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * Handle completion of a page being read from the cache. | ||
387 | * - Called in process (keventd) context. | ||
388 | */ | ||
389 | static void nfs_readpage_from_fscache_complete(struct page *page, | ||
390 | void *context, | ||
391 | int error) | ||
392 | { | ||
393 | dfprintk(FSCACHE, | ||
394 | "NFS: readpage_from_fscache_complete (0x%p/0x%p/%d)\n", | ||
395 | page, context, error); | ||
396 | |||
397 | /* if the read completes with an error, we just unlock the page and let | ||
398 | * the VM reissue the readpage */ | ||
399 | if (!error) { | ||
400 | SetPageUptodate(page); | ||
401 | unlock_page(page); | ||
402 | } else { | ||
403 | error = nfs_readpage_async(context, page->mapping->host, page); | ||
404 | if (error) | ||
405 | unlock_page(page); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * Retrieve a page from fscache | ||
411 | */ | ||
412 | int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, | ||
413 | struct inode *inode, struct page *page) | ||
414 | { | ||
415 | int ret; | ||
416 | |||
417 | dfprintk(FSCACHE, | ||
418 | "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", | ||
419 | NFS_I(inode)->fscache, page, page->index, page->flags, inode); | ||
420 | |||
421 | ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache, | ||
422 | page, | ||
423 | nfs_readpage_from_fscache_complete, | ||
424 | ctx, | ||
425 | GFP_KERNEL); | ||
426 | |||
427 | switch (ret) { | ||
428 | case 0: /* read BIO submitted (page in fscache) */ | ||
429 | dfprintk(FSCACHE, | ||
430 | "NFS: readpage_from_fscache: BIO submitted\n"); | ||
431 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1); | ||
432 | return ret; | ||
433 | |||
434 | case -ENOBUFS: /* inode not in cache */ | ||
435 | case -ENODATA: /* page not in cache */ | ||
436 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); | ||
437 | dfprintk(FSCACHE, | ||
438 | "NFS: readpage_from_fscache %d\n", ret); | ||
439 | return 1; | ||
440 | |||
441 | default: | ||
442 | dfprintk(FSCACHE, "NFS: readpage_from_fscache %d\n", ret); | ||
443 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1); | ||
444 | } | ||
445 | return ret; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * Retrieve a set of pages from fscache | ||
450 | */ | ||
451 | int __nfs_readpages_from_fscache(struct nfs_open_context *ctx, | ||
452 | struct inode *inode, | ||
453 | struct address_space *mapping, | ||
454 | struct list_head *pages, | ||
455 | unsigned *nr_pages) | ||
456 | { | ||
457 | int ret, npages = *nr_pages; | ||
458 | |||
459 | dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", | ||
460 | NFS_I(inode)->fscache, npages, inode); | ||
461 | |||
462 | ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache, | ||
463 | mapping, pages, nr_pages, | ||
464 | nfs_readpage_from_fscache_complete, | ||
465 | ctx, | ||
466 | mapping_gfp_mask(mapping)); | ||
467 | if (*nr_pages < npages) | ||
468 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, | ||
469 | npages); | ||
470 | if (*nr_pages > 0) | ||
471 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, | ||
472 | *nr_pages); | ||
473 | |||
474 | switch (ret) { | ||
475 | case 0: /* read submitted to the cache for all pages */ | ||
476 | BUG_ON(!list_empty(pages)); | ||
477 | BUG_ON(*nr_pages != 0); | ||
478 | dfprintk(FSCACHE, | ||
479 | "NFS: nfs_getpages_from_fscache: submitted\n"); | ||
480 | |||
481 | return ret; | ||
482 | |||
483 | case -ENOBUFS: /* some pages aren't cached and can't be */ | ||
484 | case -ENODATA: /* some pages aren't cached */ | ||
485 | dfprintk(FSCACHE, | ||
486 | "NFS: nfs_getpages_from_fscache: no page: %d\n", ret); | ||
487 | return 1; | ||
488 | |||
489 | default: | ||
490 | dfprintk(FSCACHE, | ||
491 | "NFS: nfs_getpages_from_fscache: ret %d\n", ret); | ||
492 | } | ||
493 | |||
494 | return ret; | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * Store a newly fetched page in fscache | ||
499 | * - PG_fscache must be set on the page | ||
500 | */ | ||
501 | void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) | ||
502 | { | ||
503 | int ret; | ||
504 | |||
505 | dfprintk(FSCACHE, | ||
506 | "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", | ||
507 | NFS_I(inode)->fscache, page, page->index, page->flags, sync); | ||
508 | |||
509 | ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL); | ||
510 | dfprintk(FSCACHE, | ||
511 | "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", | ||
512 | page, page->index, page->flags, ret); | ||
513 | |||
514 | if (ret != 0) { | ||
515 | fscache_uncache_page(NFS_I(inode)->fscache, page); | ||
516 | nfs_add_fscache_stats(inode, | ||
517 | NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); | ||
518 | nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); | ||
519 | } else { | ||
520 | nfs_add_fscache_stats(inode, | ||
521 | NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1); | ||
522 | } | ||
523 | } | ||
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h new file mode 100644 index 000000000000..6e809bb0ff08 --- /dev/null +++ b/fs/nfs/fscache.h | |||
@@ -0,0 +1,220 @@ | |||
1 | /* NFS filesystem cache interface definitions | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #ifndef _NFS_FSCACHE_H | ||
13 | #define _NFS_FSCACHE_H | ||
14 | |||
15 | #include <linux/nfs_fs.h> | ||
16 | #include <linux/nfs_mount.h> | ||
17 | #include <linux/nfs4_mount.h> | ||
18 | #include <linux/fscache.h> | ||
19 | |||
20 | #ifdef CONFIG_NFS_FSCACHE | ||
21 | |||
22 | /* | ||
23 | * set of NFS FS-Cache objects that form a superblock key | ||
24 | */ | ||
25 | struct nfs_fscache_key { | ||
26 | struct rb_node node; | ||
27 | struct nfs_client *nfs_client; /* the server */ | ||
28 | |||
29 | /* the elements of the unique key - as used by nfs_compare_super() and | ||
30 | * nfs_compare_mount_options() to distinguish superblocks */ | ||
31 | struct { | ||
32 | struct { | ||
33 | unsigned long s_flags; /* various flags | ||
34 | * (& NFS_MS_MASK) */ | ||
35 | } super; | ||
36 | |||
37 | struct { | ||
38 | struct nfs_fsid fsid; | ||
39 | int flags; | ||
40 | unsigned int rsize; /* read size */ | ||
41 | unsigned int wsize; /* write size */ | ||
42 | unsigned int acregmin; /* attr cache timeouts */ | ||
43 | unsigned int acregmax; | ||
44 | unsigned int acdirmin; | ||
45 | unsigned int acdirmax; | ||
46 | } nfs_server; | ||
47 | |||
48 | struct { | ||
49 | rpc_authflavor_t au_flavor; | ||
50 | } rpc_auth; | ||
51 | |||
52 | /* uniquifier - can be used if nfs_server.flags includes | ||
53 | * NFS_MOUNT_UNSHARED */ | ||
54 | u8 uniq_len; | ||
55 | char uniquifier[0]; | ||
56 | } key; | ||
57 | }; | ||
58 | |||
59 | /* | ||
60 | * fscache-index.c | ||
61 | */ | ||
62 | extern struct fscache_netfs nfs_fscache_netfs; | ||
63 | extern const struct fscache_cookie_def nfs_fscache_server_index_def; | ||
64 | extern const struct fscache_cookie_def nfs_fscache_super_index_def; | ||
65 | extern const struct fscache_cookie_def nfs_fscache_inode_object_def; | ||
66 | |||
67 | extern int nfs_fscache_register(void); | ||
68 | extern void nfs_fscache_unregister(void); | ||
69 | |||
70 | /* | ||
71 | * fscache.c | ||
72 | */ | ||
73 | extern void nfs_fscache_get_client_cookie(struct nfs_client *); | ||
74 | extern void nfs_fscache_release_client_cookie(struct nfs_client *); | ||
75 | |||
76 | extern void nfs_fscache_get_super_cookie(struct super_block *, | ||
77 | struct nfs_parsed_mount_data *); | ||
78 | extern void nfs_fscache_release_super_cookie(struct super_block *); | ||
79 | |||
80 | extern void nfs_fscache_init_inode_cookie(struct inode *); | ||
81 | extern void nfs_fscache_release_inode_cookie(struct inode *); | ||
82 | extern void nfs_fscache_zap_inode_cookie(struct inode *); | ||
83 | extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *); | ||
84 | extern void nfs_fscache_reset_inode_cookie(struct inode *); | ||
85 | |||
86 | extern void __nfs_fscache_invalidate_page(struct page *, struct inode *); | ||
87 | extern int nfs_fscache_release_page(struct page *, gfp_t); | ||
88 | |||
89 | extern int __nfs_readpage_from_fscache(struct nfs_open_context *, | ||
90 | struct inode *, struct page *); | ||
91 | extern int __nfs_readpages_from_fscache(struct nfs_open_context *, | ||
92 | struct inode *, struct address_space *, | ||
93 | struct list_head *, unsigned *); | ||
94 | extern void __nfs_readpage_to_fscache(struct inode *, struct page *, int); | ||
95 | |||
96 | /* | ||
97 | * wait for a page to complete writing to the cache | ||
98 | */ | ||
99 | static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, | ||
100 | struct page *page) | ||
101 | { | ||
102 | if (PageFsCache(page)) | ||
103 | fscache_wait_on_page_write(nfsi->fscache, page); | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * release the caching state associated with a page if undergoing complete page | ||
108 | * invalidation | ||
109 | */ | ||
110 | static inline void nfs_fscache_invalidate_page(struct page *page, | ||
111 | struct inode *inode) | ||
112 | { | ||
113 | if (PageFsCache(page)) | ||
114 | __nfs_fscache_invalidate_page(page, inode); | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * Retrieve a page from an inode data storage object. | ||
119 | */ | ||
120 | static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, | ||
121 | struct inode *inode, | ||
122 | struct page *page) | ||
123 | { | ||
124 | if (NFS_I(inode)->fscache) | ||
125 | return __nfs_readpage_from_fscache(ctx, inode, page); | ||
126 | return -ENOBUFS; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Retrieve a set of pages from an inode data storage object. | ||
131 | */ | ||
132 | static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, | ||
133 | struct inode *inode, | ||
134 | struct address_space *mapping, | ||
135 | struct list_head *pages, | ||
136 | unsigned *nr_pages) | ||
137 | { | ||
138 | if (NFS_I(inode)->fscache) | ||
139 | return __nfs_readpages_from_fscache(ctx, inode, mapping, pages, | ||
140 | nr_pages); | ||
141 | return -ENOBUFS; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * Store a page newly fetched from the server in an inode data storage object | ||
146 | * in the cache. | ||
147 | */ | ||
148 | static inline void nfs_readpage_to_fscache(struct inode *inode, | ||
149 | struct page *page, | ||
150 | int sync) | ||
151 | { | ||
152 | if (PageFsCache(page)) | ||
153 | __nfs_readpage_to_fscache(inode, page, sync); | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * indicate the client caching state as readable text | ||
158 | */ | ||
159 | static inline const char *nfs_server_fscache_state(struct nfs_server *server) | ||
160 | { | ||
161 | if (server->fscache && (server->options & NFS_OPTION_FSCACHE)) | ||
162 | return "yes"; | ||
163 | return "no "; | ||
164 | } | ||
165 | |||
166 | |||
167 | #else /* CONFIG_NFS_FSCACHE */ | ||
168 | static inline int nfs_fscache_register(void) { return 0; } | ||
169 | static inline void nfs_fscache_unregister(void) {} | ||
170 | |||
171 | static inline void nfs_fscache_get_client_cookie(struct nfs_client *clp) {} | ||
172 | static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} | ||
173 | |||
174 | static inline void nfs_fscache_get_super_cookie( | ||
175 | struct super_block *sb, | ||
176 | struct nfs_parsed_mount_data *data) | ||
177 | { | ||
178 | } | ||
179 | static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} | ||
180 | |||
181 | static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} | ||
182 | static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {} | ||
183 | static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {} | ||
184 | static inline void nfs_fscache_set_inode_cookie(struct inode *inode, | ||
185 | struct file *filp) {} | ||
186 | static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {} | ||
187 | |||
188 | static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) | ||
189 | { | ||
190 | return 1; /* True: may release page */ | ||
191 | } | ||
192 | static inline void nfs_fscache_invalidate_page(struct page *page, | ||
193 | struct inode *inode) {} | ||
194 | static inline void nfs_fscache_wait_on_page_write(struct nfs_inode *nfsi, | ||
195 | struct page *page) {} | ||
196 | |||
197 | static inline int nfs_readpage_from_fscache(struct nfs_open_context *ctx, | ||
198 | struct inode *inode, | ||
199 | struct page *page) | ||
200 | { | ||
201 | return -ENOBUFS; | ||
202 | } | ||
203 | static inline int nfs_readpages_from_fscache(struct nfs_open_context *ctx, | ||
204 | struct inode *inode, | ||
205 | struct address_space *mapping, | ||
206 | struct list_head *pages, | ||
207 | unsigned *nr_pages) | ||
208 | { | ||
209 | return -ENOBUFS; | ||
210 | } | ||
211 | static inline void nfs_readpage_to_fscache(struct inode *inode, | ||
212 | struct page *page, int sync) {} | ||
213 | |||
214 | static inline const char *nfs_server_fscache_state(struct nfs_server *server) | ||
215 | { | ||
216 | return "no "; | ||
217 | } | ||
218 | |||
219 | #endif /* CONFIG_NFS_FSCACHE */ | ||
220 | #endif /* _NFS_FSCACHE_H */ | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a834d1d850b7..64f87194d390 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include "delegation.h" | 46 | #include "delegation.h" |
47 | #include "iostat.h" | 47 | #include "iostat.h" |
48 | #include "internal.h" | 48 | #include "internal.h" |
49 | #include "fscache.h" | ||
49 | 50 | ||
50 | #define NFSDBG_FACILITY NFSDBG_VFS | 51 | #define NFSDBG_FACILITY NFSDBG_VFS |
51 | 52 | ||
@@ -121,6 +122,7 @@ void nfs_clear_inode(struct inode *inode) | |||
121 | BUG_ON(!list_empty(&NFS_I(inode)->open_files)); | 122 | BUG_ON(!list_empty(&NFS_I(inode)->open_files)); |
122 | nfs_zap_acl_cache(inode); | 123 | nfs_zap_acl_cache(inode); |
123 | nfs_access_zap_cache(inode); | 124 | nfs_access_zap_cache(inode); |
125 | nfs_fscache_release_inode_cookie(inode); | ||
124 | } | 126 | } |
125 | 127 | ||
126 | /** | 128 | /** |
@@ -355,6 +357,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
355 | nfsi->attrtimeo_timestamp = now; | 357 | nfsi->attrtimeo_timestamp = now; |
356 | nfsi->access_cache = RB_ROOT; | 358 | nfsi->access_cache = RB_ROOT; |
357 | 359 | ||
360 | nfs_fscache_init_inode_cookie(inode); | ||
361 | |||
358 | unlock_new_inode(inode); | 362 | unlock_new_inode(inode); |
359 | } else | 363 | } else |
360 | nfs_refresh_inode(inode, fattr); | 364 | nfs_refresh_inode(inode, fattr); |
@@ -686,6 +690,7 @@ int nfs_open(struct inode *inode, struct file *filp) | |||
686 | ctx->mode = filp->f_mode; | 690 | ctx->mode = filp->f_mode; |
687 | nfs_file_set_open_context(filp, ctx); | 691 | nfs_file_set_open_context(filp, ctx); |
688 | put_nfs_open_context(ctx); | 692 | put_nfs_open_context(ctx); |
693 | nfs_fscache_set_inode_cookie(inode, filp); | ||
689 | return 0; | 694 | return 0; |
690 | } | 695 | } |
691 | 696 | ||
@@ -786,6 +791,7 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa | |||
786 | memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); | 791 | memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); |
787 | spin_unlock(&inode->i_lock); | 792 | spin_unlock(&inode->i_lock); |
788 | nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); | 793 | nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); |
794 | nfs_fscache_reset_inode_cookie(inode); | ||
789 | dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", | 795 | dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", |
790 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); | 796 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); |
791 | return 0; | 797 | return 0; |
@@ -1030,6 +1036,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
1030 | spin_lock(&inode->i_lock); | 1036 | spin_lock(&inode->i_lock); |
1031 | status = nfs_refresh_inode_locked(inode, fattr); | 1037 | status = nfs_refresh_inode_locked(inode, fattr); |
1032 | spin_unlock(&inode->i_lock); | 1038 | spin_unlock(&inode->i_lock); |
1039 | |||
1033 | return status; | 1040 | return status; |
1034 | } | 1041 | } |
1035 | 1042 | ||
@@ -1436,6 +1443,10 @@ static int __init init_nfs_fs(void) | |||
1436 | { | 1443 | { |
1437 | int err; | 1444 | int err; |
1438 | 1445 | ||
1446 | err = nfs_fscache_register(); | ||
1447 | if (err < 0) | ||
1448 | goto out7; | ||
1449 | |||
1439 | err = nfsiod_start(); | 1450 | err = nfsiod_start(); |
1440 | if (err) | 1451 | if (err) |
1441 | goto out6; | 1452 | goto out6; |
@@ -1488,6 +1499,8 @@ out4: | |||
1488 | out5: | 1499 | out5: |
1489 | nfsiod_stop(); | 1500 | nfsiod_stop(); |
1490 | out6: | 1501 | out6: |
1502 | nfs_fscache_unregister(); | ||
1503 | out7: | ||
1491 | return err; | 1504 | return err; |
1492 | } | 1505 | } |
1493 | 1506 | ||
@@ -1498,6 +1511,7 @@ static void __exit exit_nfs_fs(void) | |||
1498 | nfs_destroy_readpagecache(); | 1511 | nfs_destroy_readpagecache(); |
1499 | nfs_destroy_inodecache(); | 1512 | nfs_destroy_inodecache(); |
1500 | nfs_destroy_nfspagecache(); | 1513 | nfs_destroy_nfspagecache(); |
1514 | nfs_fscache_unregister(); | ||
1501 | #ifdef CONFIG_PROC_FS | 1515 | #ifdef CONFIG_PROC_FS |
1502 | rpc_proc_unregister("nfs"); | 1516 | rpc_proc_unregister("nfs"); |
1503 | #endif | 1517 | #endif |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2041f68ff1cc..e4d6a8348adf 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -5,6 +5,8 @@ | |||
5 | #include <linux/mount.h> | 5 | #include <linux/mount.h> |
6 | #include <linux/security.h> | 6 | #include <linux/security.h> |
7 | 7 | ||
8 | #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) | ||
9 | |||
8 | struct nfs_string; | 10 | struct nfs_string; |
9 | 11 | ||
10 | /* Maximum number of readahead requests | 12 | /* Maximum number of readahead requests |
@@ -37,10 +39,12 @@ struct nfs_parsed_mount_data { | |||
37 | int acregmin, acregmax, | 39 | int acregmin, acregmax, |
38 | acdirmin, acdirmax; | 40 | acdirmin, acdirmax; |
39 | int namlen; | 41 | int namlen; |
42 | unsigned int options; | ||
40 | unsigned int bsize; | 43 | unsigned int bsize; |
41 | unsigned int auth_flavor_len; | 44 | unsigned int auth_flavor_len; |
42 | rpc_authflavor_t auth_flavors[1]; | 45 | rpc_authflavor_t auth_flavors[1]; |
43 | char *client_address; | 46 | char *client_address; |
47 | char *fscache_uniq; | ||
44 | 48 | ||
45 | struct { | 49 | struct { |
46 | struct sockaddr_storage address; | 50 | struct sockaddr_storage address; |
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h index a36952810032..a2ab2529b5ca 100644 --- a/fs/nfs/iostat.h +++ b/fs/nfs/iostat.h | |||
@@ -16,6 +16,9 @@ | |||
16 | 16 | ||
17 | struct nfs_iostats { | 17 | struct nfs_iostats { |
18 | unsigned long long bytes[__NFSIOS_BYTESMAX]; | 18 | unsigned long long bytes[__NFSIOS_BYTESMAX]; |
19 | #ifdef CONFIG_NFS_FSCACHE | ||
20 | unsigned long long fscache[__NFSIOS_FSCACHEMAX]; | ||
21 | #endif | ||
19 | unsigned long events[__NFSIOS_COUNTSMAX]; | 22 | unsigned long events[__NFSIOS_COUNTSMAX]; |
20 | } ____cacheline_aligned; | 23 | } ____cacheline_aligned; |
21 | 24 | ||
@@ -57,6 +60,21 @@ static inline void nfs_add_stats(const struct inode *inode, | |||
57 | nfs_add_server_stats(NFS_SERVER(inode), stat, addend); | 60 | nfs_add_server_stats(NFS_SERVER(inode), stat, addend); |
58 | } | 61 | } |
59 | 62 | ||
63 | #ifdef CONFIG_NFS_FSCACHE | ||
64 | static inline void nfs_add_fscache_stats(struct inode *inode, | ||
65 | enum nfs_stat_fscachecounters stat, | ||
66 | unsigned long addend) | ||
67 | { | ||
68 | struct nfs_iostats *iostats; | ||
69 | int cpu; | ||
70 | |||
71 | cpu = get_cpu(); | ||
72 | iostats = per_cpu_ptr(NFS_SERVER(inode)->io_stats, cpu); | ||
73 | iostats->fscache[stat] += addend; | ||
74 | put_cpu_no_resched(); | ||
75 | } | ||
76 | #endif | ||
77 | |||
60 | static inline struct nfs_iostats *nfs_alloc_iostats(void) | 78 | static inline struct nfs_iostats *nfs_alloc_iostats(void) |
61 | { | 79 | { |
62 | return alloc_percpu(struct nfs_iostats); | 80 | return alloc_percpu(struct nfs_iostats); |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f856004bb7fa..4ace3c50a8eb 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -24,6 +24,7 @@ | |||
24 | 24 | ||
25 | #include "internal.h" | 25 | #include "internal.h" |
26 | #include "iostat.h" | 26 | #include "iostat.h" |
27 | #include "fscache.h" | ||
27 | 28 | ||
28 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | 29 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE |
29 | 30 | ||
@@ -111,8 +112,8 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) | |||
111 | } | 112 | } |
112 | } | 113 | } |
113 | 114 | ||
114 | static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | 115 | int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, |
115 | struct page *page) | 116 | struct page *page) |
116 | { | 117 | { |
117 | LIST_HEAD(one_request); | 118 | LIST_HEAD(one_request); |
118 | struct nfs_page *new; | 119 | struct nfs_page *new; |
@@ -139,6 +140,11 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
139 | 140 | ||
140 | static void nfs_readpage_release(struct nfs_page *req) | 141 | static void nfs_readpage_release(struct nfs_page *req) |
141 | { | 142 | { |
143 | struct inode *d_inode = req->wb_context->path.dentry->d_inode; | ||
144 | |||
145 | if (PageUptodate(req->wb_page)) | ||
146 | nfs_readpage_to_fscache(d_inode, req->wb_page, 0); | ||
147 | |||
142 | unlock_page(req->wb_page); | 148 | unlock_page(req->wb_page); |
143 | 149 | ||
144 | dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", | 150 | dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", |
@@ -510,8 +516,15 @@ int nfs_readpage(struct file *file, struct page *page) | |||
510 | } else | 516 | } else |
511 | ctx = get_nfs_open_context(nfs_file_open_context(file)); | 517 | ctx = get_nfs_open_context(nfs_file_open_context(file)); |
512 | 518 | ||
519 | if (!IS_SYNC(inode)) { | ||
520 | error = nfs_readpage_from_fscache(ctx, inode, page); | ||
521 | if (error == 0) | ||
522 | goto out; | ||
523 | } | ||
524 | |||
513 | error = nfs_readpage_async(ctx, inode, page); | 525 | error = nfs_readpage_async(ctx, inode, page); |
514 | 526 | ||
527 | out: | ||
515 | put_nfs_open_context(ctx); | 528 | put_nfs_open_context(ctx); |
516 | return error; | 529 | return error; |
517 | out_unlock: | 530 | out_unlock: |
@@ -584,6 +597,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
584 | return -EBADF; | 597 | return -EBADF; |
585 | } else | 598 | } else |
586 | desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); | 599 | desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); |
600 | |||
601 | /* attempt to read as many of the pages as possible from the cache | ||
602 | * - this returns -ENOBUFS immediately if the cookie is negative | ||
603 | */ | ||
604 | ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, | ||
605 | pages, &nr_pages); | ||
606 | if (ret == 0) | ||
607 | goto read_complete; /* all pages were read */ | ||
608 | |||
587 | if (rsize < PAGE_CACHE_SIZE) | 609 | if (rsize < PAGE_CACHE_SIZE) |
588 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); | 610 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); |
589 | else | 611 | else |
@@ -594,6 +616,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
594 | nfs_pageio_complete(&pgio); | 616 | nfs_pageio_complete(&pgio); |
595 | npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 617 | npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
596 | nfs_add_stats(inode, NFSIOS_READPAGES, npages); | 618 | nfs_add_stats(inode, NFSIOS_READPAGES, npages); |
619 | read_complete: | ||
597 | put_nfs_open_context(desc.ctx); | 620 | put_nfs_open_context(desc.ctx); |
598 | out: | 621 | out: |
599 | return ret; | 622 | return ret; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0942fcbbad3c..82eaadbff408 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include "delegation.h" | 60 | #include "delegation.h" |
61 | #include "iostat.h" | 61 | #include "iostat.h" |
62 | #include "internal.h" | 62 | #include "internal.h" |
63 | #include "fscache.h" | ||
63 | 64 | ||
64 | #define NFSDBG_FACILITY NFSDBG_VFS | 65 | #define NFSDBG_FACILITY NFSDBG_VFS |
65 | 66 | ||
@@ -76,6 +77,7 @@ enum { | |||
76 | Opt_rdirplus, Opt_nordirplus, | 77 | Opt_rdirplus, Opt_nordirplus, |
77 | Opt_sharecache, Opt_nosharecache, | 78 | Opt_sharecache, Opt_nosharecache, |
78 | Opt_resvport, Opt_noresvport, | 79 | Opt_resvport, Opt_noresvport, |
80 | Opt_fscache, Opt_nofscache, | ||
79 | 81 | ||
80 | /* Mount options that take integer arguments */ | 82 | /* Mount options that take integer arguments */ |
81 | Opt_port, | 83 | Opt_port, |
@@ -93,6 +95,7 @@ enum { | |||
93 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, | 95 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, |
94 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 96 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
95 | Opt_lookupcache, | 97 | Opt_lookupcache, |
98 | Opt_fscache_uniq, | ||
96 | 99 | ||
97 | /* Special mount options */ | 100 | /* Special mount options */ |
98 | Opt_userspace, Opt_deprecated, Opt_sloppy, | 101 | Opt_userspace, Opt_deprecated, Opt_sloppy, |
@@ -132,6 +135,9 @@ static const match_table_t nfs_mount_option_tokens = { | |||
132 | { Opt_nosharecache, "nosharecache" }, | 135 | { Opt_nosharecache, "nosharecache" }, |
133 | { Opt_resvport, "resvport" }, | 136 | { Opt_resvport, "resvport" }, |
134 | { Opt_noresvport, "noresvport" }, | 137 | { Opt_noresvport, "noresvport" }, |
138 | { Opt_fscache, "fsc" }, | ||
139 | { Opt_fscache_uniq, "fsc=%s" }, | ||
140 | { Opt_nofscache, "nofsc" }, | ||
135 | 141 | ||
136 | { Opt_port, "port=%u" }, | 142 | { Opt_port, "port=%u" }, |
137 | { Opt_rsize, "rsize=%u" }, | 143 | { Opt_rsize, "rsize=%u" }, |
@@ -563,6 +569,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
563 | if (clp->rpc_ops->version == 4) | 569 | if (clp->rpc_ops->version == 4) |
564 | seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); | 570 | seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); |
565 | #endif | 571 | #endif |
572 | if (nfss->options & NFS_OPTION_FSCACHE) | ||
573 | seq_printf(m, ",fsc"); | ||
566 | } | 574 | } |
567 | 575 | ||
568 | /* | 576 | /* |
@@ -641,6 +649,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) | |||
641 | totals.events[i] += stats->events[i]; | 649 | totals.events[i] += stats->events[i]; |
642 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) | 650 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) |
643 | totals.bytes[i] += stats->bytes[i]; | 651 | totals.bytes[i] += stats->bytes[i]; |
652 | #ifdef CONFIG_NFS_FSCACHE | ||
653 | for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) | ||
654 | totals.fscache[i] += stats->fscache[i]; | ||
655 | #endif | ||
644 | 656 | ||
645 | preempt_enable(); | 657 | preempt_enable(); |
646 | } | 658 | } |
@@ -651,6 +663,13 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) | |||
651 | seq_printf(m, "\n\tbytes:\t"); | 663 | seq_printf(m, "\n\tbytes:\t"); |
652 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) | 664 | for (i = 0; i < __NFSIOS_BYTESMAX; i++) |
653 | seq_printf(m, "%Lu ", totals.bytes[i]); | 665 | seq_printf(m, "%Lu ", totals.bytes[i]); |
666 | #ifdef CONFIG_NFS_FSCACHE | ||
667 | if (nfss->options & NFS_OPTION_FSCACHE) { | ||
668 | seq_printf(m, "\n\tfsc:\t"); | ||
669 | for (i = 0; i < __NFSIOS_FSCACHEMAX; i++) | ||
670 | seq_printf(m, "%Lu ", totals.bytes[i]); | ||
671 | } | ||
672 | #endif | ||
654 | seq_printf(m, "\n"); | 673 | seq_printf(m, "\n"); |
655 | 674 | ||
656 | rpc_print_iostats(m, nfss->client); | 675 | rpc_print_iostats(m, nfss->client); |
@@ -1044,6 +1063,24 @@ static int nfs_parse_mount_options(char *raw, | |||
1044 | case Opt_noresvport: | 1063 | case Opt_noresvport: |
1045 | mnt->flags |= NFS_MOUNT_NORESVPORT; | 1064 | mnt->flags |= NFS_MOUNT_NORESVPORT; |
1046 | break; | 1065 | break; |
1066 | case Opt_fscache: | ||
1067 | mnt->options |= NFS_OPTION_FSCACHE; | ||
1068 | kfree(mnt->fscache_uniq); | ||
1069 | mnt->fscache_uniq = NULL; | ||
1070 | break; | ||
1071 | case Opt_nofscache: | ||
1072 | mnt->options &= ~NFS_OPTION_FSCACHE; | ||
1073 | kfree(mnt->fscache_uniq); | ||
1074 | mnt->fscache_uniq = NULL; | ||
1075 | break; | ||
1076 | case Opt_fscache_uniq: | ||
1077 | string = match_strdup(args); | ||
1078 | if (!string) | ||
1079 | goto out_nomem; | ||
1080 | kfree(mnt->fscache_uniq); | ||
1081 | mnt->fscache_uniq = string; | ||
1082 | mnt->options |= NFS_OPTION_FSCACHE; | ||
1083 | break; | ||
1047 | 1084 | ||
1048 | /* | 1085 | /* |
1049 | * options that take numeric values | 1086 | * options that take numeric values |
@@ -1870,8 +1907,6 @@ static void nfs_clone_super(struct super_block *sb, | |||
1870 | nfs_initialise_sb(sb); | 1907 | nfs_initialise_sb(sb); |
1871 | } | 1908 | } |
1872 | 1909 | ||
1873 | #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) | ||
1874 | |||
1875 | static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) | 1910 | static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) |
1876 | { | 1911 | { |
1877 | const struct nfs_server *a = s->s_fs_info; | 1912 | const struct nfs_server *a = s->s_fs_info; |
@@ -2036,6 +2071,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
2036 | if (!s->s_root) { | 2071 | if (!s->s_root) { |
2037 | /* initial superblock/root creation */ | 2072 | /* initial superblock/root creation */ |
2038 | nfs_fill_super(s, data); | 2073 | nfs_fill_super(s, data); |
2074 | nfs_fscache_get_super_cookie(s, data); | ||
2039 | } | 2075 | } |
2040 | 2076 | ||
2041 | mntroot = nfs_get_root(s, mntfh); | 2077 | mntroot = nfs_get_root(s, mntfh); |
@@ -2056,6 +2092,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, | |||
2056 | out: | 2092 | out: |
2057 | kfree(data->nfs_server.hostname); | 2093 | kfree(data->nfs_server.hostname); |
2058 | kfree(data->mount_server.hostname); | 2094 | kfree(data->mount_server.hostname); |
2095 | kfree(data->fscache_uniq); | ||
2059 | security_free_mnt_opts(&data->lsm_opts); | 2096 | security_free_mnt_opts(&data->lsm_opts); |
2060 | out_free_fh: | 2097 | out_free_fh: |
2061 | kfree(mntfh); | 2098 | kfree(mntfh); |
@@ -2083,6 +2120,7 @@ static void nfs_kill_super(struct super_block *s) | |||
2083 | 2120 | ||
2084 | bdi_unregister(&server->backing_dev_info); | 2121 | bdi_unregister(&server->backing_dev_info); |
2085 | kill_anon_super(s); | 2122 | kill_anon_super(s); |
2123 | nfs_fscache_release_super_cookie(s); | ||
2086 | nfs_free_server(server); | 2124 | nfs_free_server(server); |
2087 | } | 2125 | } |
2088 | 2126 | ||
@@ -2390,6 +2428,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, | |||
2390 | if (!s->s_root) { | 2428 | if (!s->s_root) { |
2391 | /* initial superblock/root creation */ | 2429 | /* initial superblock/root creation */ |
2392 | nfs4_fill_super(s); | 2430 | nfs4_fill_super(s); |
2431 | nfs_fscache_get_super_cookie(s, data); | ||
2393 | } | 2432 | } |
2394 | 2433 | ||
2395 | mntroot = nfs4_get_root(s, mntfh); | 2434 | mntroot = nfs4_get_root(s, mntfh); |
@@ -2411,6 +2450,7 @@ out: | |||
2411 | kfree(data->client_address); | 2450 | kfree(data->client_address); |
2412 | kfree(data->nfs_server.export_path); | 2451 | kfree(data->nfs_server.export_path); |
2413 | kfree(data->nfs_server.hostname); | 2452 | kfree(data->nfs_server.hostname); |
2453 | kfree(data->fscache_uniq); | ||
2414 | security_free_mnt_opts(&data->lsm_opts); | 2454 | security_free_mnt_opts(&data->lsm_opts); |
2415 | out_free_fh: | 2455 | out_free_fh: |
2416 | kfree(mntfh); | 2456 | kfree(mntfh); |
@@ -2437,6 +2477,7 @@ static void nfs4_kill_super(struct super_block *sb) | |||
2437 | kill_anon_super(sb); | 2477 | kill_anon_super(sb); |
2438 | 2478 | ||
2439 | nfs4_renewd_prepare_shutdown(server); | 2479 | nfs4_renewd_prepare_shutdown(server); |
2480 | nfs_fscache_release_super_cookie(sb); | ||
2440 | nfs_free_server(server); | 2481 | nfs_free_server(server); |
2441 | } | 2482 | } |
2442 | 2483 | ||
diff --git a/fs/splice.c b/fs/splice.c index 4ed0ba44a966..dd727d43e5b7 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -59,7 +59,8 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, | |||
59 | */ | 59 | */ |
60 | wait_on_page_writeback(page); | 60 | wait_on_page_writeback(page); |
61 | 61 | ||
62 | if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) | 62 | if (page_has_private(page) && |
63 | !try_to_release_page(page, GFP_KERNEL)) | ||
63 | goto out_unlock; | 64 | goto out_unlock; |
64 | 65 | ||
65 | /* | 66 | /* |
diff --git a/fs/super.c b/fs/super.c index 2ba481518ba7..77cb4ec919b9 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -287,6 +287,7 @@ int fsync_super(struct super_block *sb) | |||
287 | __fsync_super(sb); | 287 | __fsync_super(sb); |
288 | return sync_blockdev(sb->s_bdev); | 288 | return sync_blockdev(sb->s_bdev); |
289 | } | 289 | } |
290 | EXPORT_SYMBOL_GPL(fsync_super); | ||
290 | 291 | ||
291 | /** | 292 | /** |
292 | * generic_shutdown_super - common helper for ->kill_sb() | 293 | * generic_shutdown_super - common helper for ->kill_sb() |
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h new file mode 100644 index 000000000000..84d3532dd3ea --- /dev/null +++ b/include/linux/fscache-cache.h | |||
@@ -0,0 +1,505 @@ | |||
1 | /* General filesystem caching backing cache interface | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * NOTE!!! See: | ||
12 | * | ||
13 | * Documentation/filesystems/caching/backend-api.txt | ||
14 | * | ||
15 | * for a description of the cache backend interface declared here. | ||
16 | */ | ||
17 | |||
18 | #ifndef _LINUX_FSCACHE_CACHE_H | ||
19 | #define _LINUX_FSCACHE_CACHE_H | ||
20 | |||
21 | #include <linux/fscache.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/slow-work.h> | ||
24 | |||
25 | #define NR_MAXCACHES BITS_PER_LONG | ||
26 | |||
27 | struct fscache_cache; | ||
28 | struct fscache_cache_ops; | ||
29 | struct fscache_object; | ||
30 | struct fscache_operation; | ||
31 | |||
32 | /* | ||
33 | * cache tag definition | ||
34 | */ | ||
35 | struct fscache_cache_tag { | ||
36 | struct list_head link; | ||
37 | struct fscache_cache *cache; /* cache referred to by this tag */ | ||
38 | unsigned long flags; | ||
39 | #define FSCACHE_TAG_RESERVED 0 /* T if tag is reserved for a cache */ | ||
40 | atomic_t usage; | ||
41 | char name[0]; /* tag name */ | ||
42 | }; | ||
43 | |||
44 | /* | ||
45 | * cache definition | ||
46 | */ | ||
47 | struct fscache_cache { | ||
48 | const struct fscache_cache_ops *ops; | ||
49 | struct fscache_cache_tag *tag; /* tag representing this cache */ | ||
50 | struct kobject *kobj; /* system representation of this cache */ | ||
51 | struct list_head link; /* link in list of caches */ | ||
52 | size_t max_index_size; /* maximum size of index data */ | ||
53 | char identifier[36]; /* cache label */ | ||
54 | |||
55 | /* node management */ | ||
56 | struct work_struct op_gc; /* operation garbage collector */ | ||
57 | struct list_head object_list; /* list of data/index objects */ | ||
58 | struct list_head op_gc_list; /* list of ops to be deleted */ | ||
59 | spinlock_t object_list_lock; | ||
60 | spinlock_t op_gc_list_lock; | ||
61 | atomic_t object_count; /* no. of live objects in this cache */ | ||
62 | struct fscache_object *fsdef; /* object for the fsdef index */ | ||
63 | unsigned long flags; | ||
64 | #define FSCACHE_IOERROR 0 /* cache stopped on I/O error */ | ||
65 | #define FSCACHE_CACHE_WITHDRAWN 1 /* cache has been withdrawn */ | ||
66 | }; | ||
67 | |||
68 | extern wait_queue_head_t fscache_cache_cleared_wq; | ||
69 | |||
70 | /* | ||
71 | * operation to be applied to a cache object | ||
72 | * - retrieval initiation operations are done in the context of the process | ||
73 | * that issued them, and not in an async thread pool | ||
74 | */ | ||
75 | typedef void (*fscache_operation_release_t)(struct fscache_operation *op); | ||
76 | typedef void (*fscache_operation_processor_t)(struct fscache_operation *op); | ||
77 | |||
78 | struct fscache_operation { | ||
79 | union { | ||
80 | struct work_struct fast_work; /* record for fast ops */ | ||
81 | struct slow_work slow_work; /* record for (very) slow ops */ | ||
82 | }; | ||
83 | struct list_head pend_link; /* link in object->pending_ops */ | ||
84 | struct fscache_object *object; /* object to be operated upon */ | ||
85 | |||
86 | unsigned long flags; | ||
87 | #define FSCACHE_OP_TYPE 0x000f /* operation type */ | ||
88 | #define FSCACHE_OP_FAST 0x0001 /* - fast op, processor may not sleep for disk */ | ||
89 | #define FSCACHE_OP_SLOW 0x0002 /* - (very) slow op, processor may sleep for disk */ | ||
90 | #define FSCACHE_OP_MYTHREAD 0x0003 /* - processing is done be issuing thread, not pool */ | ||
91 | #define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ | ||
92 | #define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ | ||
93 | #define FSCACHE_OP_DEAD 6 /* op is now dead */ | ||
94 | |||
95 | atomic_t usage; | ||
96 | unsigned debug_id; /* debugging ID */ | ||
97 | |||
98 | /* operation processor callback | ||
99 | * - can be NULL if FSCACHE_OP_WAITING is going to be used to perform | ||
100 | * the op in a non-pool thread */ | ||
101 | fscache_operation_processor_t processor; | ||
102 | |||
103 | /* operation releaser */ | ||
104 | fscache_operation_release_t release; | ||
105 | }; | ||
106 | |||
107 | extern atomic_t fscache_op_debug_id; | ||
108 | extern const struct slow_work_ops fscache_op_slow_work_ops; | ||
109 | |||
110 | extern void fscache_enqueue_operation(struct fscache_operation *); | ||
111 | extern void fscache_put_operation(struct fscache_operation *); | ||
112 | |||
113 | /** | ||
114 | * fscache_operation_init - Do basic initialisation of an operation | ||
115 | * @op: The operation to initialise | ||
116 | * @release: The release function to assign | ||
117 | * | ||
118 | * Do basic initialisation of an operation. The caller must still set flags, | ||
119 | * object, either fast_work or slow_work if necessary, and processor if needed. | ||
120 | */ | ||
121 | static inline void fscache_operation_init(struct fscache_operation *op, | ||
122 | fscache_operation_release_t release) | ||
123 | { | ||
124 | atomic_set(&op->usage, 1); | ||
125 | op->debug_id = atomic_inc_return(&fscache_op_debug_id); | ||
126 | op->release = release; | ||
127 | INIT_LIST_HEAD(&op->pend_link); | ||
128 | } | ||
129 | |||
130 | /** | ||
131 | * fscache_operation_init_slow - Do additional initialisation of a slow op | ||
132 | * @op: The operation to initialise | ||
133 | * @processor: The processor function to assign | ||
134 | * | ||
135 | * Do additional initialisation of an operation as required for slow work. | ||
136 | */ | ||
137 | static inline | ||
138 | void fscache_operation_init_slow(struct fscache_operation *op, | ||
139 | fscache_operation_processor_t processor) | ||
140 | { | ||
141 | op->processor = processor; | ||
142 | slow_work_init(&op->slow_work, &fscache_op_slow_work_ops); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * data read operation | ||
147 | */ | ||
148 | struct fscache_retrieval { | ||
149 | struct fscache_operation op; | ||
150 | struct address_space *mapping; /* netfs pages */ | ||
151 | fscache_rw_complete_t end_io_func; /* function to call on I/O completion */ | ||
152 | void *context; /* netfs read context (pinned) */ | ||
153 | struct list_head to_do; /* list of things to be done by the backend */ | ||
154 | unsigned long start_time; /* time at which retrieval started */ | ||
155 | }; | ||
156 | |||
157 | typedef int (*fscache_page_retrieval_func_t)(struct fscache_retrieval *op, | ||
158 | struct page *page, | ||
159 | gfp_t gfp); | ||
160 | |||
161 | typedef int (*fscache_pages_retrieval_func_t)(struct fscache_retrieval *op, | ||
162 | struct list_head *pages, | ||
163 | unsigned *nr_pages, | ||
164 | gfp_t gfp); | ||
165 | |||
166 | /** | ||
167 | * fscache_get_retrieval - Get an extra reference on a retrieval operation | ||
168 | * @op: The retrieval operation to get a reference on | ||
169 | * | ||
170 | * Get an extra reference on a retrieval operation. | ||
171 | */ | ||
172 | static inline | ||
173 | struct fscache_retrieval *fscache_get_retrieval(struct fscache_retrieval *op) | ||
174 | { | ||
175 | atomic_inc(&op->op.usage); | ||
176 | return op; | ||
177 | } | ||
178 | |||
179 | /** | ||
180 | * fscache_enqueue_retrieval - Enqueue a retrieval operation for processing | ||
181 | * @op: The retrieval operation affected | ||
182 | * | ||
183 | * Enqueue a retrieval operation for processing by the FS-Cache thread pool. | ||
184 | */ | ||
185 | static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op) | ||
186 | { | ||
187 | fscache_enqueue_operation(&op->op); | ||
188 | } | ||
189 | |||
190 | /** | ||
191 | * fscache_put_retrieval - Drop a reference to a retrieval operation | ||
192 | * @op: The retrieval operation affected | ||
193 | * | ||
194 | * Drop a reference to a retrieval operation. | ||
195 | */ | ||
196 | static inline void fscache_put_retrieval(struct fscache_retrieval *op) | ||
197 | { | ||
198 | fscache_put_operation(&op->op); | ||
199 | } | ||
200 | |||
201 | /* | ||
202 | * cached page storage work item | ||
203 | * - used to do three things: | ||
204 | * - batch writes to the cache | ||
205 | * - do cache writes asynchronously | ||
206 | * - defer writes until cache object lookup completion | ||
207 | */ | ||
208 | struct fscache_storage { | ||
209 | struct fscache_operation op; | ||
210 | pgoff_t store_limit; /* don't write more than this */ | ||
211 | }; | ||
212 | |||
213 | /* | ||
214 | * cache operations | ||
215 | */ | ||
216 | struct fscache_cache_ops { | ||
217 | /* name of cache provider */ | ||
218 | const char *name; | ||
219 | |||
220 | /* allocate an object record for a cookie */ | ||
221 | struct fscache_object *(*alloc_object)(struct fscache_cache *cache, | ||
222 | struct fscache_cookie *cookie); | ||
223 | |||
224 | /* look up the object for a cookie */ | ||
225 | void (*lookup_object)(struct fscache_object *object); | ||
226 | |||
227 | /* finished looking up */ | ||
228 | void (*lookup_complete)(struct fscache_object *object); | ||
229 | |||
230 | /* increment the usage count on this object (may fail if unmounting) */ | ||
231 | struct fscache_object *(*grab_object)(struct fscache_object *object); | ||
232 | |||
233 | /* pin an object in the cache */ | ||
234 | int (*pin_object)(struct fscache_object *object); | ||
235 | |||
236 | /* unpin an object in the cache */ | ||
237 | void (*unpin_object)(struct fscache_object *object); | ||
238 | |||
239 | /* store the updated auxilliary data on an object */ | ||
240 | void (*update_object)(struct fscache_object *object); | ||
241 | |||
242 | /* discard the resources pinned by an object and effect retirement if | ||
243 | * necessary */ | ||
244 | void (*drop_object)(struct fscache_object *object); | ||
245 | |||
246 | /* dispose of a reference to an object */ | ||
247 | void (*put_object)(struct fscache_object *object); | ||
248 | |||
249 | /* sync a cache */ | ||
250 | void (*sync_cache)(struct fscache_cache *cache); | ||
251 | |||
252 | /* notification that the attributes of a non-index object (such as | ||
253 | * i_size) have changed */ | ||
254 | int (*attr_changed)(struct fscache_object *object); | ||
255 | |||
256 | /* reserve space for an object's data and associated metadata */ | ||
257 | int (*reserve_space)(struct fscache_object *object, loff_t i_size); | ||
258 | |||
259 | /* request a backing block for a page be read or allocated in the | ||
260 | * cache */ | ||
261 | fscache_page_retrieval_func_t read_or_alloc_page; | ||
262 | |||
263 | /* request backing blocks for a list of pages be read or allocated in | ||
264 | * the cache */ | ||
265 | fscache_pages_retrieval_func_t read_or_alloc_pages; | ||
266 | |||
267 | /* request a backing block for a page be allocated in the cache so that | ||
268 | * it can be written directly */ | ||
269 | fscache_page_retrieval_func_t allocate_page; | ||
270 | |||
271 | /* request backing blocks for pages be allocated in the cache so that | ||
272 | * they can be written directly */ | ||
273 | fscache_pages_retrieval_func_t allocate_pages; | ||
274 | |||
275 | /* write a page to its backing block in the cache */ | ||
276 | int (*write_page)(struct fscache_storage *op, struct page *page); | ||
277 | |||
278 | /* detach backing block from a page (optional) | ||
279 | * - must release the cookie lock before returning | ||
280 | * - may sleep | ||
281 | */ | ||
282 | void (*uncache_page)(struct fscache_object *object, | ||
283 | struct page *page); | ||
284 | |||
285 | /* dissociate a cache from all the pages it was backing */ | ||
286 | void (*dissociate_pages)(struct fscache_cache *cache); | ||
287 | }; | ||
288 | |||
289 | /* | ||
290 | * data file or index object cookie | ||
291 | * - a file will only appear in one cache | ||
292 | * - a request to cache a file may or may not be honoured, subject to | ||
293 | * constraints such as disk space | ||
294 | * - indices are created on disk just-in-time | ||
295 | */ | ||
296 | struct fscache_cookie { | ||
297 | atomic_t usage; /* number of users of this cookie */ | ||
298 | atomic_t n_children; /* number of children of this cookie */ | ||
299 | spinlock_t lock; | ||
300 | struct hlist_head backing_objects; /* object(s) backing this file/index */ | ||
301 | const struct fscache_cookie_def *def; /* definition */ | ||
302 | struct fscache_cookie *parent; /* parent of this entry */ | ||
303 | void *netfs_data; /* back pointer to netfs */ | ||
304 | struct radix_tree_root stores; /* pages to be stored on this cookie */ | ||
305 | #define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ | ||
306 | |||
307 | unsigned long flags; | ||
308 | #define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ | ||
309 | #define FSCACHE_COOKIE_CREATING 1 /* T if non-index object being created still */ | ||
310 | #define FSCACHE_COOKIE_NO_DATA_YET 2 /* T if new object with no cached data yet */ | ||
311 | #define FSCACHE_COOKIE_PENDING_FILL 3 /* T if pending initial fill on object */ | ||
312 | #define FSCACHE_COOKIE_FILLING 4 /* T if filling object incrementally */ | ||
313 | #define FSCACHE_COOKIE_UNAVAILABLE 5 /* T if cookie is unavailable (error, etc) */ | ||
314 | }; | ||
315 | |||
316 | extern struct fscache_cookie fscache_fsdef_index; | ||
317 | |||
318 | /* | ||
319 | * on-disk cache file or index handle | ||
320 | */ | ||
321 | struct fscache_object { | ||
322 | enum fscache_object_state { | ||
323 | FSCACHE_OBJECT_INIT, /* object in initial unbound state */ | ||
324 | FSCACHE_OBJECT_LOOKING_UP, /* looking up object */ | ||
325 | FSCACHE_OBJECT_CREATING, /* creating object */ | ||
326 | |||
327 | /* active states */ | ||
328 | FSCACHE_OBJECT_AVAILABLE, /* cleaning up object after creation */ | ||
329 | FSCACHE_OBJECT_ACTIVE, /* object is usable */ | ||
330 | FSCACHE_OBJECT_UPDATING, /* object is updating */ | ||
331 | |||
332 | /* terminal states */ | ||
333 | FSCACHE_OBJECT_DYING, /* object waiting for accessors to finish */ | ||
334 | FSCACHE_OBJECT_LC_DYING, /* object cleaning up after lookup/create */ | ||
335 | FSCACHE_OBJECT_ABORT_INIT, /* abort the init state */ | ||
336 | FSCACHE_OBJECT_RELEASING, /* releasing object */ | ||
337 | FSCACHE_OBJECT_RECYCLING, /* retiring object */ | ||
338 | FSCACHE_OBJECT_WITHDRAWING, /* withdrawing object */ | ||
339 | FSCACHE_OBJECT_DEAD, /* object is now dead */ | ||
340 | } state; | ||
341 | |||
342 | int debug_id; /* debugging ID */ | ||
343 | int n_children; /* number of child objects */ | ||
344 | int n_ops; /* number of ops outstanding on object */ | ||
345 | int n_obj_ops; /* number of object ops outstanding on object */ | ||
346 | int n_in_progress; /* number of ops in progress */ | ||
347 | int n_exclusive; /* number of exclusive ops queued */ | ||
348 | spinlock_t lock; /* state and operations lock */ | ||
349 | |||
350 | unsigned long lookup_jif; /* time at which lookup started */ | ||
351 | unsigned long event_mask; /* events this object is interested in */ | ||
352 | unsigned long events; /* events to be processed by this object | ||
353 | * (order is important - using fls) */ | ||
354 | #define FSCACHE_OBJECT_EV_REQUEUE 0 /* T if object should be requeued */ | ||
355 | #define FSCACHE_OBJECT_EV_UPDATE 1 /* T if object should be updated */ | ||
356 | #define FSCACHE_OBJECT_EV_CLEARED 2 /* T if accessors all gone */ | ||
357 | #define FSCACHE_OBJECT_EV_ERROR 3 /* T if fatal error occurred during processing */ | ||
358 | #define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ | ||
359 | #define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ | ||
360 | #define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ | ||
361 | |||
362 | unsigned long flags; | ||
363 | #define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ | ||
364 | #define FSCACHE_OBJECT_PENDING_WRITE 1 /* T if object has pending write */ | ||
365 | #define FSCACHE_OBJECT_WAITING 2 /* T if object is waiting on its parent */ | ||
366 | |||
367 | struct list_head cache_link; /* link in cache->object_list */ | ||
368 | struct hlist_node cookie_link; /* link in cookie->backing_objects */ | ||
369 | struct fscache_cache *cache; /* cache that supplied this object */ | ||
370 | struct fscache_cookie *cookie; /* netfs's file/index object */ | ||
371 | struct fscache_object *parent; /* parent object */ | ||
372 | struct slow_work work; /* attention scheduling record */ | ||
373 | struct list_head dependents; /* FIFO of dependent objects */ | ||
374 | struct list_head dep_link; /* link in parent's dependents list */ | ||
375 | struct list_head pending_ops; /* unstarted operations on this object */ | ||
376 | pgoff_t store_limit; /* current storage limit */ | ||
377 | }; | ||
378 | |||
379 | extern const char *fscache_object_states[]; | ||
380 | |||
381 | #define fscache_object_is_active(obj) \ | ||
382 | (!test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) && \ | ||
383 | (obj)->state >= FSCACHE_OBJECT_AVAILABLE && \ | ||
384 | (obj)->state < FSCACHE_OBJECT_DYING) | ||
385 | |||
386 | extern const struct slow_work_ops fscache_object_slow_work_ops; | ||
387 | |||
388 | /** | ||
389 | * fscache_object_init - Initialise a cache object description | ||
390 | * @object: Object description | ||
391 | * | ||
392 | * Initialise a cache object description to its basic values. | ||
393 | * | ||
394 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
395 | * description. | ||
396 | */ | ||
397 | static inline | ||
398 | void fscache_object_init(struct fscache_object *object, | ||
399 | struct fscache_cookie *cookie, | ||
400 | struct fscache_cache *cache) | ||
401 | { | ||
402 | atomic_inc(&cache->object_count); | ||
403 | |||
404 | object->state = FSCACHE_OBJECT_INIT; | ||
405 | spin_lock_init(&object->lock); | ||
406 | INIT_LIST_HEAD(&object->cache_link); | ||
407 | INIT_HLIST_NODE(&object->cookie_link); | ||
408 | vslow_work_init(&object->work, &fscache_object_slow_work_ops); | ||
409 | INIT_LIST_HEAD(&object->dependents); | ||
410 | INIT_LIST_HEAD(&object->dep_link); | ||
411 | INIT_LIST_HEAD(&object->pending_ops); | ||
412 | object->n_children = 0; | ||
413 | object->n_ops = object->n_in_progress = object->n_exclusive = 0; | ||
414 | object->events = object->event_mask = 0; | ||
415 | object->flags = 0; | ||
416 | object->store_limit = 0; | ||
417 | object->cache = cache; | ||
418 | object->cookie = cookie; | ||
419 | object->parent = NULL; | ||
420 | } | ||
421 | |||
422 | extern void fscache_object_lookup_negative(struct fscache_object *object); | ||
423 | extern void fscache_obtained_object(struct fscache_object *object); | ||
424 | |||
425 | /** | ||
426 | * fscache_object_destroyed - Note destruction of an object in a cache | ||
427 | * @cache: The cache from which the object came | ||
428 | * | ||
429 | * Note the destruction and deallocation of an object record in a cache. | ||
430 | */ | ||
431 | static inline void fscache_object_destroyed(struct fscache_cache *cache) | ||
432 | { | ||
433 | if (atomic_dec_and_test(&cache->object_count)) | ||
434 | wake_up_all(&fscache_cache_cleared_wq); | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * fscache_object_lookup_error - Note an object encountered an error | ||
439 | * @object: The object on which the error was encountered | ||
440 | * | ||
441 | * Note that an object encountered a fatal error (usually an I/O error) and | ||
442 | * that it should be withdrawn as soon as possible. | ||
443 | */ | ||
444 | static inline void fscache_object_lookup_error(struct fscache_object *object) | ||
445 | { | ||
446 | set_bit(FSCACHE_OBJECT_EV_ERROR, &object->events); | ||
447 | } | ||
448 | |||
449 | /** | ||
450 | * fscache_set_store_limit - Set the maximum size to be stored in an object | ||
451 | * @object: The object to set the maximum on | ||
452 | * @i_size: The limit to set in bytes | ||
453 | * | ||
454 | * Set the maximum size an object is permitted to reach, implying the highest | ||
455 | * byte that may be written. Intended to be called by the attr_changed() op. | ||
456 | * | ||
457 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
458 | * description. | ||
459 | */ | ||
460 | static inline | ||
461 | void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) | ||
462 | { | ||
463 | object->store_limit = i_size >> PAGE_SHIFT; | ||
464 | if (i_size & ~PAGE_MASK) | ||
465 | object->store_limit++; | ||
466 | } | ||
467 | |||
468 | /** | ||
469 | * fscache_end_io - End a retrieval operation on a page | ||
470 | * @op: The FS-Cache operation covering the retrieval | ||
471 | * @page: The page that was to be fetched | ||
472 | * @error: The error code (0 if successful) | ||
473 | * | ||
474 | * Note the end of an operation to retrieve a page, as covered by a particular | ||
475 | * operation record. | ||
476 | */ | ||
477 | static inline void fscache_end_io(struct fscache_retrieval *op, | ||
478 | struct page *page, int error) | ||
479 | { | ||
480 | op->end_io_func(page, op->context, error); | ||
481 | } | ||
482 | |||
483 | /* | ||
484 | * out-of-line cache backend functions | ||
485 | */ | ||
486 | extern void fscache_init_cache(struct fscache_cache *cache, | ||
487 | const struct fscache_cache_ops *ops, | ||
488 | const char *idfmt, | ||
489 | ...) __attribute__ ((format (printf, 3, 4))); | ||
490 | |||
491 | extern int fscache_add_cache(struct fscache_cache *cache, | ||
492 | struct fscache_object *fsdef, | ||
493 | const char *tagname); | ||
494 | extern void fscache_withdraw_cache(struct fscache_cache *cache); | ||
495 | |||
496 | extern void fscache_io_error(struct fscache_cache *cache); | ||
497 | |||
498 | extern void fscache_mark_pages_cached(struct fscache_retrieval *op, | ||
499 | struct pagevec *pagevec); | ||
500 | |||
501 | extern enum fscache_checkaux fscache_check_aux(struct fscache_object *object, | ||
502 | const void *data, | ||
503 | uint16_t datalen); | ||
504 | |||
505 | #endif /* _LINUX_FSCACHE_CACHE_H */ | ||
diff --git a/include/linux/fscache.h b/include/linux/fscache.h new file mode 100644 index 000000000000..6d8ee466e0a0 --- /dev/null +++ b/include/linux/fscache.h | |||
@@ -0,0 +1,618 @@ | |||
1 | /* General filesystem caching interface | ||
2 | * | ||
3 | * Copyright (C) 2004-2007 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * NOTE!!! See: | ||
12 | * | ||
13 | * Documentation/filesystems/caching/netfs-api.txt | ||
14 | * | ||
15 | * for a description of the network filesystem interface declared here. | ||
16 | */ | ||
17 | |||
18 | #ifndef _LINUX_FSCACHE_H | ||
19 | #define _LINUX_FSCACHE_H | ||
20 | |||
21 | #include <linux/fs.h> | ||
22 | #include <linux/list.h> | ||
23 | #include <linux/pagemap.h> | ||
24 | #include <linux/pagevec.h> | ||
25 | |||
26 | #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) | ||
27 | #define fscache_available() (1) | ||
28 | #define fscache_cookie_valid(cookie) (cookie) | ||
29 | #else | ||
30 | #define fscache_available() (0) | ||
31 | #define fscache_cookie_valid(cookie) (0) | ||
32 | #endif | ||
33 | |||
34 | |||
35 | /* | ||
36 | * overload PG_private_2 to give us PG_fscache - this is used to indicate that | ||
37 | * a page is currently backed by a local disk cache | ||
38 | */ | ||
39 | #define PageFsCache(page) PagePrivate2((page)) | ||
40 | #define SetPageFsCache(page) SetPagePrivate2((page)) | ||
41 | #define ClearPageFsCache(page) ClearPagePrivate2((page)) | ||
42 | #define TestSetPageFsCache(page) TestSetPagePrivate2((page)) | ||
43 | #define TestClearPageFsCache(page) TestClearPagePrivate2((page)) | ||
44 | |||
45 | /* pattern used to fill dead space in an index entry */ | ||
46 | #define FSCACHE_INDEX_DEADFILL_PATTERN 0x79 | ||
47 | |||
48 | struct pagevec; | ||
49 | struct fscache_cache_tag; | ||
50 | struct fscache_cookie; | ||
51 | struct fscache_netfs; | ||
52 | |||
53 | typedef void (*fscache_rw_complete_t)(struct page *page, | ||
54 | void *context, | ||
55 | int error); | ||
56 | |||
57 | /* result of index entry consultation */ | ||
58 | enum fscache_checkaux { | ||
59 | FSCACHE_CHECKAUX_OKAY, /* entry okay as is */ | ||
60 | FSCACHE_CHECKAUX_NEEDS_UPDATE, /* entry requires update */ | ||
61 | FSCACHE_CHECKAUX_OBSOLETE, /* entry requires deletion */ | ||
62 | }; | ||
63 | |||
64 | /* | ||
65 | * fscache cookie definition | ||
66 | */ | ||
67 | struct fscache_cookie_def { | ||
68 | /* name of cookie type */ | ||
69 | char name[16]; | ||
70 | |||
71 | /* cookie type */ | ||
72 | uint8_t type; | ||
73 | #define FSCACHE_COOKIE_TYPE_INDEX 0 | ||
74 | #define FSCACHE_COOKIE_TYPE_DATAFILE 1 | ||
75 | |||
76 | /* select the cache into which to insert an entry in this index | ||
77 | * - optional | ||
78 | * - should return a cache identifier or NULL to cause the cache to be | ||
79 | * inherited from the parent if possible or the first cache picked | ||
80 | * for a non-index file if not | ||
81 | */ | ||
82 | struct fscache_cache_tag *(*select_cache)( | ||
83 | const void *parent_netfs_data, | ||
84 | const void *cookie_netfs_data); | ||
85 | |||
86 | /* get an index key | ||
87 | * - should store the key data in the buffer | ||
88 | * - should return the amount of amount stored | ||
89 | * - not permitted to return an error | ||
90 | * - the netfs data from the cookie being used as the source is | ||
91 | * presented | ||
92 | */ | ||
93 | uint16_t (*get_key)(const void *cookie_netfs_data, | ||
94 | void *buffer, | ||
95 | uint16_t bufmax); | ||
96 | |||
97 | /* get certain file attributes from the netfs data | ||
98 | * - this function can be absent for an index | ||
99 | * - not permitted to return an error | ||
100 | * - the netfs data from the cookie being used as the source is | ||
101 | * presented | ||
102 | */ | ||
103 | void (*get_attr)(const void *cookie_netfs_data, uint64_t *size); | ||
104 | |||
105 | /* get the auxilliary data from netfs data | ||
106 | * - this function can be absent if the index carries no state data | ||
107 | * - should store the auxilliary data in the buffer | ||
108 | * - should return the amount of amount stored | ||
109 | * - not permitted to return an error | ||
110 | * - the netfs data from the cookie being used as the source is | ||
111 | * presented | ||
112 | */ | ||
113 | uint16_t (*get_aux)(const void *cookie_netfs_data, | ||
114 | void *buffer, | ||
115 | uint16_t bufmax); | ||
116 | |||
117 | /* consult the netfs about the state of an object | ||
118 | * - this function can be absent if the index carries no state data | ||
119 | * - the netfs data from the cookie being used as the target is | ||
120 | * presented, as is the auxilliary data | ||
121 | */ | ||
122 | enum fscache_checkaux (*check_aux)(void *cookie_netfs_data, | ||
123 | const void *data, | ||
124 | uint16_t datalen); | ||
125 | |||
126 | /* get an extra reference on a read context | ||
127 | * - this function can be absent if the completion function doesn't | ||
128 | * require a context | ||
129 | */ | ||
130 | void (*get_context)(void *cookie_netfs_data, void *context); | ||
131 | |||
132 | /* release an extra reference on a read context | ||
133 | * - this function can be absent if the completion function doesn't | ||
134 | * require a context | ||
135 | */ | ||
136 | void (*put_context)(void *cookie_netfs_data, void *context); | ||
137 | |||
138 | /* indicate pages that now have cache metadata retained | ||
139 | * - this function should mark the specified pages as now being cached | ||
140 | * - the pages will have been marked with PG_fscache before this is | ||
141 | * called, so this is optional | ||
142 | */ | ||
143 | void (*mark_pages_cached)(void *cookie_netfs_data, | ||
144 | struct address_space *mapping, | ||
145 | struct pagevec *cached_pvec); | ||
146 | |||
147 | /* indicate the cookie is no longer cached | ||
148 | * - this function is called when the backing store currently caching | ||
149 | * a cookie is removed | ||
150 | * - the netfs should use this to clean up any markers indicating | ||
151 | * cached pages | ||
152 | * - this is mandatory for any object that may have data | ||
153 | */ | ||
154 | void (*now_uncached)(void *cookie_netfs_data); | ||
155 | }; | ||
156 | |||
157 | /* | ||
158 | * fscache cached network filesystem type | ||
159 | * - name, version and ops must be filled in before registration | ||
160 | * - all other fields will be set during registration | ||
161 | */ | ||
162 | struct fscache_netfs { | ||
163 | uint32_t version; /* indexing version */ | ||
164 | const char *name; /* filesystem name */ | ||
165 | struct fscache_cookie *primary_index; | ||
166 | struct list_head link; /* internal link */ | ||
167 | }; | ||
168 | |||
169 | /* | ||
170 | * slow-path functions for when there is actually caching available, and the | ||
171 | * netfs does actually have a valid token | ||
172 | * - these are not to be called directly | ||
173 | * - these are undefined symbols when FS-Cache is not configured and the | ||
174 | * optimiser takes care of not using them | ||
175 | */ | ||
176 | extern int __fscache_register_netfs(struct fscache_netfs *); | ||
177 | extern void __fscache_unregister_netfs(struct fscache_netfs *); | ||
178 | extern struct fscache_cache_tag *__fscache_lookup_cache_tag(const char *); | ||
179 | extern void __fscache_release_cache_tag(struct fscache_cache_tag *); | ||
180 | |||
181 | extern struct fscache_cookie *__fscache_acquire_cookie( | ||
182 | struct fscache_cookie *, | ||
183 | const struct fscache_cookie_def *, | ||
184 | void *); | ||
185 | extern void __fscache_relinquish_cookie(struct fscache_cookie *, int); | ||
186 | extern void __fscache_update_cookie(struct fscache_cookie *); | ||
187 | extern int __fscache_attr_changed(struct fscache_cookie *); | ||
188 | extern int __fscache_read_or_alloc_page(struct fscache_cookie *, | ||
189 | struct page *, | ||
190 | fscache_rw_complete_t, | ||
191 | void *, | ||
192 | gfp_t); | ||
193 | extern int __fscache_read_or_alloc_pages(struct fscache_cookie *, | ||
194 | struct address_space *, | ||
195 | struct list_head *, | ||
196 | unsigned *, | ||
197 | fscache_rw_complete_t, | ||
198 | void *, | ||
199 | gfp_t); | ||
200 | extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t); | ||
201 | extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t); | ||
202 | extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); | ||
203 | extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); | ||
204 | extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); | ||
205 | |||
206 | /** | ||
207 | * fscache_register_netfs - Register a filesystem as desiring caching services | ||
208 | * @netfs: The description of the filesystem | ||
209 | * | ||
210 | * Register a filesystem as desiring caching services if they're available. | ||
211 | * | ||
212 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
213 | * description. | ||
214 | */ | ||
215 | static inline | ||
216 | int fscache_register_netfs(struct fscache_netfs *netfs) | ||
217 | { | ||
218 | if (fscache_available()) | ||
219 | return __fscache_register_netfs(netfs); | ||
220 | else | ||
221 | return 0; | ||
222 | } | ||
223 | |||
224 | /** | ||
225 | * fscache_unregister_netfs - Indicate that a filesystem no longer desires | ||
226 | * caching services | ||
227 | * @netfs: The description of the filesystem | ||
228 | * | ||
229 | * Indicate that a filesystem no longer desires caching services for the | ||
230 | * moment. | ||
231 | * | ||
232 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
233 | * description. | ||
234 | */ | ||
235 | static inline | ||
236 | void fscache_unregister_netfs(struct fscache_netfs *netfs) | ||
237 | { | ||
238 | if (fscache_available()) | ||
239 | __fscache_unregister_netfs(netfs); | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * fscache_lookup_cache_tag - Look up a cache tag | ||
244 | * @name: The name of the tag to search for | ||
245 | * | ||
246 | * Acquire a specific cache referral tag that can be used to select a specific | ||
247 | * cache in which to cache an index. | ||
248 | * | ||
249 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
250 | * description. | ||
251 | */ | ||
252 | static inline | ||
253 | struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name) | ||
254 | { | ||
255 | if (fscache_available()) | ||
256 | return __fscache_lookup_cache_tag(name); | ||
257 | else | ||
258 | return NULL; | ||
259 | } | ||
260 | |||
261 | /** | ||
262 | * fscache_release_cache_tag - Release a cache tag | ||
263 | * @tag: The tag to release | ||
264 | * | ||
265 | * Release a reference to a cache referral tag previously looked up. | ||
266 | * | ||
267 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
268 | * description. | ||
269 | */ | ||
270 | static inline | ||
271 | void fscache_release_cache_tag(struct fscache_cache_tag *tag) | ||
272 | { | ||
273 | if (fscache_available()) | ||
274 | __fscache_release_cache_tag(tag); | ||
275 | } | ||
276 | |||
277 | /** | ||
278 | * fscache_acquire_cookie - Acquire a cookie to represent a cache object | ||
279 | * @parent: The cookie that's to be the parent of this one | ||
280 | * @def: A description of the cache object, including callback operations | ||
281 | * @netfs_data: An arbitrary piece of data to be kept in the cookie to | ||
282 | * represent the cache object to the netfs | ||
283 | * | ||
284 | * This function is used to inform FS-Cache about part of an index hierarchy | ||
285 | * that can be used to locate files. This is done by requesting a cookie for | ||
286 | * each index in the path to the file. | ||
287 | * | ||
288 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
289 | * description. | ||
290 | */ | ||
291 | static inline | ||
292 | struct fscache_cookie *fscache_acquire_cookie( | ||
293 | struct fscache_cookie *parent, | ||
294 | const struct fscache_cookie_def *def, | ||
295 | void *netfs_data) | ||
296 | { | ||
297 | if (fscache_cookie_valid(parent)) | ||
298 | return __fscache_acquire_cookie(parent, def, netfs_data); | ||
299 | else | ||
300 | return NULL; | ||
301 | } | ||
302 | |||
303 | /** | ||
304 | * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding | ||
305 | * it | ||
306 | * @cookie: The cookie being returned | ||
307 | * @retire: True if the cache object the cookie represents is to be discarded | ||
308 | * | ||
309 | * This function returns a cookie to the cache, forcibly discarding the | ||
310 | * associated cache object if retire is set to true. | ||
311 | * | ||
312 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
313 | * description. | ||
314 | */ | ||
315 | static inline | ||
316 | void fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) | ||
317 | { | ||
318 | if (fscache_cookie_valid(cookie)) | ||
319 | __fscache_relinquish_cookie(cookie, retire); | ||
320 | } | ||
321 | |||
322 | /** | ||
323 | * fscache_update_cookie - Request that a cache object be updated | ||
324 | * @cookie: The cookie representing the cache object | ||
325 | * | ||
326 | * Request an update of the index data for the cache object associated with the | ||
327 | * cookie. | ||
328 | * | ||
329 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
330 | * description. | ||
331 | */ | ||
332 | static inline | ||
333 | void fscache_update_cookie(struct fscache_cookie *cookie) | ||
334 | { | ||
335 | if (fscache_cookie_valid(cookie)) | ||
336 | __fscache_update_cookie(cookie); | ||
337 | } | ||
338 | |||
339 | /** | ||
340 | * fscache_pin_cookie - Pin a data-storage cache object in its cache | ||
341 | * @cookie: The cookie representing the cache object | ||
342 | * | ||
343 | * Permit data-storage cache objects to be pinned in the cache. | ||
344 | * | ||
345 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
346 | * description. | ||
347 | */ | ||
348 | static inline | ||
349 | int fscache_pin_cookie(struct fscache_cookie *cookie) | ||
350 | { | ||
351 | return -ENOBUFS; | ||
352 | } | ||
353 | |||
354 | /** | ||
355 | * fscache_pin_cookie - Unpin a data-storage cache object in its cache | ||
356 | * @cookie: The cookie representing the cache object | ||
357 | * | ||
358 | * Permit data-storage cache objects to be unpinned from the cache. | ||
359 | * | ||
360 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
361 | * description. | ||
362 | */ | ||
363 | static inline | ||
364 | void fscache_unpin_cookie(struct fscache_cookie *cookie) | ||
365 | { | ||
366 | } | ||
367 | |||
368 | /** | ||
369 | * fscache_attr_changed - Notify cache that an object's attributes changed | ||
370 | * @cookie: The cookie representing the cache object | ||
371 | * | ||
372 | * Send a notification to the cache indicating that an object's attributes have | ||
373 | * changed. This includes the data size. These attributes will be obtained | ||
374 | * through the get_attr() cookie definition op. | ||
375 | * | ||
376 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
377 | * description. | ||
378 | */ | ||
379 | static inline | ||
380 | int fscache_attr_changed(struct fscache_cookie *cookie) | ||
381 | { | ||
382 | if (fscache_cookie_valid(cookie)) | ||
383 | return __fscache_attr_changed(cookie); | ||
384 | else | ||
385 | return -ENOBUFS; | ||
386 | } | ||
387 | |||
388 | /** | ||
389 | * fscache_reserve_space - Reserve data space for a cached object | ||
390 | * @cookie: The cookie representing the cache object | ||
391 | * @i_size: The amount of space to be reserved | ||
392 | * | ||
393 | * Reserve an amount of space in the cache for the cache object attached to a | ||
394 | * cookie so that a write to that object within the space can always be | ||
395 | * honoured. | ||
396 | * | ||
397 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
398 | * description. | ||
399 | */ | ||
400 | static inline | ||
401 | int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size) | ||
402 | { | ||
403 | return -ENOBUFS; | ||
404 | } | ||
405 | |||
406 | /** | ||
407 | * fscache_read_or_alloc_page - Read a page from the cache or allocate a block | ||
408 | * in which to store it | ||
409 | * @cookie: The cookie representing the cache object | ||
410 | * @page: The netfs page to fill if possible | ||
411 | * @end_io_func: The callback to invoke when and if the page is filled | ||
412 | * @context: An arbitrary piece of data to pass on to end_io_func() | ||
413 | * @gfp: The conditions under which memory allocation should be made | ||
414 | * | ||
415 | * Read a page from the cache, or if that's not possible make a potential | ||
416 | * one-block reservation in the cache into which the page may be stored once | ||
417 | * fetched from the server. | ||
418 | * | ||
419 | * If the page is not backed by the cache object, or if it there's some reason | ||
420 | * it can't be, -ENOBUFS will be returned and nothing more will be done for | ||
421 | * that page. | ||
422 | * | ||
423 | * Else, if that page is backed by the cache, a read will be initiated directly | ||
424 | * to the netfs's page and 0 will be returned by this function. The | ||
425 | * end_io_func() callback will be invoked when the operation terminates on a | ||
426 | * completion or failure. Note that the callback may be invoked before the | ||
427 | * return. | ||
428 | * | ||
429 | * Else, if the page is unbacked, -ENODATA is returned and a block may have | ||
430 | * been allocated in the cache. | ||
431 | * | ||
432 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
433 | * description. | ||
434 | */ | ||
435 | static inline | ||
436 | int fscache_read_or_alloc_page(struct fscache_cookie *cookie, | ||
437 | struct page *page, | ||
438 | fscache_rw_complete_t end_io_func, | ||
439 | void *context, | ||
440 | gfp_t gfp) | ||
441 | { | ||
442 | if (fscache_cookie_valid(cookie)) | ||
443 | return __fscache_read_or_alloc_page(cookie, page, end_io_func, | ||
444 | context, gfp); | ||
445 | else | ||
446 | return -ENOBUFS; | ||
447 | } | ||
448 | |||
449 | /** | ||
450 | * fscache_read_or_alloc_pages - Read pages from the cache and/or allocate | ||
451 | * blocks in which to store them | ||
452 | * @cookie: The cookie representing the cache object | ||
453 | * @mapping: The netfs inode mapping to which the pages will be attached | ||
454 | * @pages: A list of potential netfs pages to be filled | ||
455 | * @end_io_func: The callback to invoke when and if each page is filled | ||
456 | * @context: An arbitrary piece of data to pass on to end_io_func() | ||
457 | * @gfp: The conditions under which memory allocation should be made | ||
458 | * | ||
459 | * Read a set of pages from the cache, or if that's not possible, attempt to | ||
460 | * make a potential one-block reservation for each page in the cache into which | ||
461 | * that page may be stored once fetched from the server. | ||
462 | * | ||
463 | * If some pages are not backed by the cache object, or if it there's some | ||
464 | * reason they can't be, -ENOBUFS will be returned and nothing more will be | ||
465 | * done for that pages. | ||
466 | * | ||
467 | * Else, if some of the pages are backed by the cache, a read will be initiated | ||
468 | * directly to the netfs's page and 0 will be returned by this function. The | ||
469 | * end_io_func() callback will be invoked when the operation terminates on a | ||
470 | * completion or failure. Note that the callback may be invoked before the | ||
471 | * return. | ||
472 | * | ||
473 | * Else, if a page is unbacked, -ENODATA is returned and a block may have | ||
474 | * been allocated in the cache. | ||
475 | * | ||
476 | * Because the function may want to return all of -ENOBUFS, -ENODATA and 0 in | ||
477 | * regard to different pages, the return values are prioritised in that order. | ||
478 | * Any pages submitted for reading are removed from the pages list. | ||
479 | * | ||
480 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
481 | * description. | ||
482 | */ | ||
483 | static inline | ||
484 | int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, | ||
485 | struct address_space *mapping, | ||
486 | struct list_head *pages, | ||
487 | unsigned *nr_pages, | ||
488 | fscache_rw_complete_t end_io_func, | ||
489 | void *context, | ||
490 | gfp_t gfp) | ||
491 | { | ||
492 | if (fscache_cookie_valid(cookie)) | ||
493 | return __fscache_read_or_alloc_pages(cookie, mapping, pages, | ||
494 | nr_pages, end_io_func, | ||
495 | context, gfp); | ||
496 | else | ||
497 | return -ENOBUFS; | ||
498 | } | ||
499 | |||
500 | /** | ||
501 | * fscache_alloc_page - Allocate a block in which to store a page | ||
502 | * @cookie: The cookie representing the cache object | ||
503 | * @page: The netfs page to allocate a page for | ||
504 | * @gfp: The conditions under which memory allocation should be made | ||
505 | * | ||
506 | * Request Allocation a block in the cache in which to store a netfs page | ||
507 | * without retrieving any contents from the cache. | ||
508 | * | ||
509 | * If the page is not backed by a file then -ENOBUFS will be returned and | ||
510 | * nothing more will be done, and no reservation will be made. | ||
511 | * | ||
512 | * Else, a block will be allocated if one wasn't already, and 0 will be | ||
513 | * returned | ||
514 | * | ||
515 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
516 | * description. | ||
517 | */ | ||
518 | static inline | ||
519 | int fscache_alloc_page(struct fscache_cookie *cookie, | ||
520 | struct page *page, | ||
521 | gfp_t gfp) | ||
522 | { | ||
523 | if (fscache_cookie_valid(cookie)) | ||
524 | return __fscache_alloc_page(cookie, page, gfp); | ||
525 | else | ||
526 | return -ENOBUFS; | ||
527 | } | ||
528 | |||
529 | /** | ||
530 | * fscache_write_page - Request storage of a page in the cache | ||
531 | * @cookie: The cookie representing the cache object | ||
532 | * @page: The netfs page to store | ||
533 | * @gfp: The conditions under which memory allocation should be made | ||
534 | * | ||
535 | * Request the contents of the netfs page be written into the cache. This | ||
536 | * request may be ignored if no cache block is currently allocated, in which | ||
537 | * case it will return -ENOBUFS. | ||
538 | * | ||
539 | * If a cache block was already allocated, a write will be initiated and 0 will | ||
540 | * be returned. The PG_fscache_write page bit is set immediately and will then | ||
541 | * be cleared at the completion of the write to indicate the success or failure | ||
542 | * of the operation. Note that the completion may happen before the return. | ||
543 | * | ||
544 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
545 | * description. | ||
546 | */ | ||
547 | static inline | ||
548 | int fscache_write_page(struct fscache_cookie *cookie, | ||
549 | struct page *page, | ||
550 | gfp_t gfp) | ||
551 | { | ||
552 | if (fscache_cookie_valid(cookie)) | ||
553 | return __fscache_write_page(cookie, page, gfp); | ||
554 | else | ||
555 | return -ENOBUFS; | ||
556 | } | ||
557 | |||
558 | /** | ||
559 | * fscache_uncache_page - Indicate that caching is no longer required on a page | ||
560 | * @cookie: The cookie representing the cache object | ||
561 | * @page: The netfs page that was being cached. | ||
562 | * | ||
563 | * Tell the cache that we no longer want a page to be cached and that it should | ||
564 | * remove any knowledge of the netfs page it may have. | ||
565 | * | ||
566 | * Note that this cannot cancel any outstanding I/O operations between this | ||
567 | * page and the cache. | ||
568 | * | ||
569 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
570 | * description. | ||
571 | */ | ||
572 | static inline | ||
573 | void fscache_uncache_page(struct fscache_cookie *cookie, | ||
574 | struct page *page) | ||
575 | { | ||
576 | if (fscache_cookie_valid(cookie)) | ||
577 | __fscache_uncache_page(cookie, page); | ||
578 | } | ||
579 | |||
580 | /** | ||
581 | * fscache_check_page_write - Ask if a page is being writing to the cache | ||
582 | * @cookie: The cookie representing the cache object | ||
583 | * @page: The netfs page that is being cached. | ||
584 | * | ||
585 | * Ask the cache if a page is being written to the cache. | ||
586 | * | ||
587 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
588 | * description. | ||
589 | */ | ||
590 | static inline | ||
591 | bool fscache_check_page_write(struct fscache_cookie *cookie, | ||
592 | struct page *page) | ||
593 | { | ||
594 | if (fscache_cookie_valid(cookie)) | ||
595 | return __fscache_check_page_write(cookie, page); | ||
596 | return false; | ||
597 | } | ||
598 | |||
599 | /** | ||
600 | * fscache_wait_on_page_write - Wait for a page to complete writing to the cache | ||
601 | * @cookie: The cookie representing the cache object | ||
602 | * @page: The netfs page that is being cached. | ||
603 | * | ||
604 | * Ask the cache to wake us up when a page is no longer being written to the | ||
605 | * cache. | ||
606 | * | ||
607 | * See Documentation/filesystems/caching/netfs-api.txt for a complete | ||
608 | * description. | ||
609 | */ | ||
610 | static inline | ||
611 | void fscache_wait_on_page_write(struct fscache_cookie *cookie, | ||
612 | struct page *page) | ||
613 | { | ||
614 | if (fscache_cookie_valid(cookie)) | ||
615 | __fscache_wait_on_page_write(cookie, page); | ||
616 | } | ||
617 | |||
618 | #endif /* _LINUX_FSCACHE_H */ | ||
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index bde2557c2a9c..fdffb413b192 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -185,6 +185,9 @@ struct nfs_inode { | |||
185 | fmode_t delegation_state; | 185 | fmode_t delegation_state; |
186 | struct rw_semaphore rwsem; | 186 | struct rw_semaphore rwsem; |
187 | #endif /* CONFIG_NFS_V4*/ | 187 | #endif /* CONFIG_NFS_V4*/ |
188 | #ifdef CONFIG_NFS_FSCACHE | ||
189 | struct fscache_cookie *fscache; | ||
190 | #endif | ||
188 | struct inode vfs_inode; | 191 | struct inode vfs_inode; |
189 | }; | 192 | }; |
190 | 193 | ||
@@ -207,6 +210,8 @@ struct nfs_inode { | |||
207 | #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ | 210 | #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ |
208 | #define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ | 211 | #define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ |
209 | #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ | 212 | #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ |
213 | #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ | ||
214 | #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ | ||
210 | 215 | ||
211 | static inline struct nfs_inode *NFS_I(const struct inode *inode) | 216 | static inline struct nfs_inode *NFS_I(const struct inode *inode) |
212 | { | 217 | { |
@@ -260,6 +265,11 @@ static inline int NFS_STALE(const struct inode *inode) | |||
260 | return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags); | 265 | return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags); |
261 | } | 266 | } |
262 | 267 | ||
268 | static inline int NFS_FSCACHE(const struct inode *inode) | ||
269 | { | ||
270 | return test_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); | ||
271 | } | ||
272 | |||
263 | static inline __u64 NFS_FILEID(const struct inode *inode) | 273 | static inline __u64 NFS_FILEID(const struct inode *inode) |
264 | { | 274 | { |
265 | return NFS_I(inode)->fileid; | 275 | return NFS_I(inode)->fileid; |
@@ -506,6 +516,8 @@ extern int nfs_readpages(struct file *, struct address_space *, | |||
506 | struct list_head *, unsigned); | 516 | struct list_head *, unsigned); |
507 | extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); | 517 | extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); |
508 | extern void nfs_readdata_release(void *data); | 518 | extern void nfs_readdata_release(void *data); |
519 | extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, | ||
520 | struct page *); | ||
509 | 521 | ||
510 | /* | 522 | /* |
511 | * Allocate nfs_read_data structures | 523 | * Allocate nfs_read_data structures |
@@ -583,6 +595,7 @@ extern void * nfs_root_data(void); | |||
583 | #define NFSDBG_CALLBACK 0x0100 | 595 | #define NFSDBG_CALLBACK 0x0100 |
584 | #define NFSDBG_CLIENT 0x0200 | 596 | #define NFSDBG_CLIENT 0x0200 |
585 | #define NFSDBG_MOUNT 0x0400 | 597 | #define NFSDBG_MOUNT 0x0400 |
598 | #define NFSDBG_FSCACHE 0x0800 | ||
586 | #define NFSDBG_ALL 0xFFFF | 599 | #define NFSDBG_ALL 0xFFFF |
587 | 600 | ||
588 | #ifdef __KERNEL__ | 601 | #ifdef __KERNEL__ |
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 29b1e40dce99..6ad75948cbf7 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
@@ -64,6 +64,10 @@ struct nfs_client { | |||
64 | char cl_ipaddr[48]; | 64 | char cl_ipaddr[48]; |
65 | unsigned char cl_id_uniquifier; | 65 | unsigned char cl_id_uniquifier; |
66 | #endif | 66 | #endif |
67 | |||
68 | #ifdef CONFIG_NFS_FSCACHE | ||
69 | struct fscache_cookie *fscache; /* client index cache cookie */ | ||
70 | #endif | ||
67 | }; | 71 | }; |
68 | 72 | ||
69 | /* | 73 | /* |
@@ -96,12 +100,19 @@ struct nfs_server { | |||
96 | unsigned int acdirmin; | 100 | unsigned int acdirmin; |
97 | unsigned int acdirmax; | 101 | unsigned int acdirmax; |
98 | unsigned int namelen; | 102 | unsigned int namelen; |
103 | unsigned int options; /* extra options enabled by mount */ | ||
104 | #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ | ||
99 | 105 | ||
100 | struct nfs_fsid fsid; | 106 | struct nfs_fsid fsid; |
101 | __u64 maxfilesize; /* maximum file size */ | 107 | __u64 maxfilesize; /* maximum file size */ |
102 | unsigned long mount_time; /* when this fs was mounted */ | 108 | unsigned long mount_time; /* when this fs was mounted */ |
103 | dev_t s_dev; /* superblock dev numbers */ | 109 | dev_t s_dev; /* superblock dev numbers */ |
104 | 110 | ||
111 | #ifdef CONFIG_NFS_FSCACHE | ||
112 | struct nfs_fscache_key *fscache_key; /* unique key for superblock */ | ||
113 | struct fscache_cookie *fscache; /* superblock cookie */ | ||
114 | #endif | ||
115 | |||
105 | #ifdef CONFIG_NFS_V4 | 116 | #ifdef CONFIG_NFS_V4 |
106 | u32 attr_bitmask[2];/* V4 bitmask representing the set | 117 | u32 attr_bitmask[2];/* V4 bitmask representing the set |
107 | of attributes supported on this | 118 | of attributes supported on this |
diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h index 1cb9a3fed2b3..68b10f5f8907 100644 --- a/include/linux/nfs_iostat.h +++ b/include/linux/nfs_iostat.h | |||
@@ -116,4 +116,16 @@ enum nfs_stat_eventcounters { | |||
116 | __NFSIOS_COUNTSMAX, | 116 | __NFSIOS_COUNTSMAX, |
117 | }; | 117 | }; |
118 | 118 | ||
119 | /* | ||
120 | * NFS local caching servicing counters | ||
121 | */ | ||
122 | enum nfs_stat_fscachecounters { | ||
123 | NFSIOS_FSCACHE_PAGES_READ_OK, | ||
124 | NFSIOS_FSCACHE_PAGES_READ_FAIL, | ||
125 | NFSIOS_FSCACHE_PAGES_WRITTEN_OK, | ||
126 | NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, | ||
127 | NFSIOS_FSCACHE_PAGES_UNCACHED, | ||
128 | __NFSIOS_FSCACHEMAX, | ||
129 | }; | ||
130 | |||
119 | #endif /* _LINUX_NFS_IOSTAT */ | 131 | #endif /* _LINUX_NFS_IOSTAT */ |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 61df1779b2a5..62214c7d2d93 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -82,6 +82,7 @@ enum pageflags { | |||
82 | PG_arch_1, | 82 | PG_arch_1, |
83 | PG_reserved, | 83 | PG_reserved, |
84 | PG_private, /* If pagecache, has fs-private data */ | 84 | PG_private, /* If pagecache, has fs-private data */ |
85 | PG_private_2, /* If pagecache, has fs aux data */ | ||
85 | PG_writeback, /* Page is under writeback */ | 86 | PG_writeback, /* Page is under writeback */ |
86 | #ifdef CONFIG_PAGEFLAGS_EXTENDED | 87 | #ifdef CONFIG_PAGEFLAGS_EXTENDED |
87 | PG_head, /* A head page */ | 88 | PG_head, /* A head page */ |
@@ -108,6 +109,12 @@ enum pageflags { | |||
108 | /* Filesystems */ | 109 | /* Filesystems */ |
109 | PG_checked = PG_owner_priv_1, | 110 | PG_checked = PG_owner_priv_1, |
110 | 111 | ||
112 | /* Two page bits are conscripted by FS-Cache to maintain local caching | ||
113 | * state. These bits are set on pages belonging to the netfs's inodes | ||
114 | * when those inodes are being locally cached. | ||
115 | */ | ||
116 | PG_fscache = PG_private_2, /* page backed by cache */ | ||
117 | |||
111 | /* XEN */ | 118 | /* XEN */ |
112 | PG_pinned = PG_owner_priv_1, | 119 | PG_pinned = PG_owner_priv_1, |
113 | PG_savepinned = PG_dirty, | 120 | PG_savepinned = PG_dirty, |
@@ -182,7 +189,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } | |||
182 | 189 | ||
183 | struct page; /* forward declaration */ | 190 | struct page; /* forward declaration */ |
184 | 191 | ||
185 | TESTPAGEFLAG(Locked, locked) | 192 | TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked) |
186 | PAGEFLAG(Error, error) | 193 | PAGEFLAG(Error, error) |
187 | PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) | 194 | PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) |
188 | PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) | 195 | PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) |
@@ -194,8 +201,6 @@ PAGEFLAG(Checked, checked) /* Used by some filesystems */ | |||
194 | PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ | 201 | PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ |
195 | PAGEFLAG(SavePinned, savepinned); /* Xen */ | 202 | PAGEFLAG(SavePinned, savepinned); /* Xen */ |
196 | PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) | 203 | PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) |
197 | PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) | ||
198 | __SETPAGEFLAG(Private, private) | ||
199 | PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) | 204 | PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) |
200 | 205 | ||
201 | __PAGEFLAG(SlobPage, slob_page) | 206 | __PAGEFLAG(SlobPage, slob_page) |
@@ -205,6 +210,16 @@ __PAGEFLAG(SlubFrozen, slub_frozen) | |||
205 | __PAGEFLAG(SlubDebug, slub_debug) | 210 | __PAGEFLAG(SlubDebug, slub_debug) |
206 | 211 | ||
207 | /* | 212 | /* |
213 | * Private page markings that may be used by the filesystem that owns the page | ||
214 | * for its own purposes. | ||
215 | * - PG_private and PG_private_2 cause releasepage() and co to be invoked | ||
216 | */ | ||
217 | PAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) | ||
218 | __CLEARPAGEFLAG(Private, private) | ||
219 | PAGEFLAG(Private2, private_2) TESTSCFLAG(Private2, private_2) | ||
220 | PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) | ||
221 | |||
222 | /* | ||
208 | * Only test-and-set exist for PG_writeback. The unconditional operators are | 223 | * Only test-and-set exist for PG_writeback. The unconditional operators are |
209 | * risky: they bypass page accounting. | 224 | * risky: they bypass page accounting. |
210 | */ | 225 | */ |
@@ -384,9 +399,10 @@ static inline void __ClearPageTail(struct page *page) | |||
384 | * these flags set. It they are, there is a problem. | 399 | * these flags set. It they are, there is a problem. |
385 | */ | 400 | */ |
386 | #define PAGE_FLAGS_CHECK_AT_FREE \ | 401 | #define PAGE_FLAGS_CHECK_AT_FREE \ |
387 | (1 << PG_lru | 1 << PG_private | 1 << PG_locked | \ | 402 | (1 << PG_lru | 1 << PG_locked | \ |
388 | 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ | 403 | 1 << PG_private | 1 << PG_private_2 | \ |
389 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ | 404 | 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ |
405 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ | ||
390 | __PG_UNEVICTABLE | __PG_MLOCKED) | 406 | __PG_UNEVICTABLE | __PG_MLOCKED) |
391 | 407 | ||
392 | /* | 408 | /* |
@@ -397,4 +413,16 @@ static inline void __ClearPageTail(struct page *page) | |||
397 | #define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) | 413 | #define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) |
398 | 414 | ||
399 | #endif /* !__GENERATING_BOUNDS_H */ | 415 | #endif /* !__GENERATING_BOUNDS_H */ |
416 | |||
417 | /** | ||
418 | * page_has_private - Determine if page has private stuff | ||
419 | * @page: The page to be checked | ||
420 | * | ||
421 | * Determine if a page has private stuff, indicating that release routines | ||
422 | * should be invoked upon it. | ||
423 | */ | ||
424 | #define page_has_private(page) \ | ||
425 | ((page)->flags & ((1 << PG_private) | \ | ||
426 | (1 << PG_private_2))) | ||
427 | |||
400 | #endif /* PAGE_FLAGS_H */ | 428 | #endif /* PAGE_FLAGS_H */ |
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 076a7dc67c2b..34da5230faab 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
@@ -384,6 +384,11 @@ static inline void wait_on_page_writeback(struct page *page) | |||
384 | extern void end_page_writeback(struct page *page); | 384 | extern void end_page_writeback(struct page *page); |
385 | 385 | ||
386 | /* | 386 | /* |
387 | * Add an arbitrary waiter to a page's wait queue | ||
388 | */ | ||
389 | extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter); | ||
390 | |||
391 | /* | ||
387 | * Fault a userspace page into pagetables. Return non-zero on a fault. | 392 | * Fault a userspace page into pagetables. Return non-zero on a fault. |
388 | * | 393 | * |
389 | * This assumes that two userspace pages are always sufficient. That's | 394 | * This assumes that two userspace pages are always sufficient. That's |
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h new file mode 100644 index 000000000000..85958277f83d --- /dev/null +++ b/include/linux/slow-work.h | |||
@@ -0,0 +1,95 @@ | |||
1 | /* Worker thread pool for slow items, such as filesystem lookups or mkdirs | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/slow-work.txt | ||
12 | */ | ||
13 | |||
14 | #ifndef _LINUX_SLOW_WORK_H | ||
15 | #define _LINUX_SLOW_WORK_H | ||
16 | |||
17 | #ifdef CONFIG_SLOW_WORK | ||
18 | |||
19 | #include <linux/sysctl.h> | ||
20 | |||
21 | struct slow_work; | ||
22 | |||
23 | /* | ||
24 | * The operations used to support slow work items | ||
25 | */ | ||
26 | struct slow_work_ops { | ||
27 | /* get a ref on a work item | ||
28 | * - return 0 if successful, -ve if not | ||
29 | */ | ||
30 | int (*get_ref)(struct slow_work *work); | ||
31 | |||
32 | /* discard a ref to a work item */ | ||
33 | void (*put_ref)(struct slow_work *work); | ||
34 | |||
35 | /* execute a work item */ | ||
36 | void (*execute)(struct slow_work *work); | ||
37 | }; | ||
38 | |||
39 | /* | ||
40 | * A slow work item | ||
41 | * - A reference is held on the parent object by the thread pool when it is | ||
42 | * queued | ||
43 | */ | ||
44 | struct slow_work { | ||
45 | unsigned long flags; | ||
46 | #define SLOW_WORK_PENDING 0 /* item pending (further) execution */ | ||
47 | #define SLOW_WORK_EXECUTING 1 /* item currently executing */ | ||
48 | #define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ | ||
49 | #define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ | ||
50 | const struct slow_work_ops *ops; /* operations table for this item */ | ||
51 | struct list_head link; /* link in queue */ | ||
52 | }; | ||
53 | |||
54 | /** | ||
55 | * slow_work_init - Initialise a slow work item | ||
56 | * @work: The work item to initialise | ||
57 | * @ops: The operations to use to handle the slow work item | ||
58 | * | ||
59 | * Initialise a slow work item. | ||
60 | */ | ||
61 | static inline void slow_work_init(struct slow_work *work, | ||
62 | const struct slow_work_ops *ops) | ||
63 | { | ||
64 | work->flags = 0; | ||
65 | work->ops = ops; | ||
66 | INIT_LIST_HEAD(&work->link); | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * slow_work_init - Initialise a very slow work item | ||
71 | * @work: The work item to initialise | ||
72 | * @ops: The operations to use to handle the slow work item | ||
73 | * | ||
74 | * Initialise a very slow work item. This item will be restricted such that | ||
75 | * only a certain number of the pool threads will be able to execute items of | ||
76 | * this type. | ||
77 | */ | ||
78 | static inline void vslow_work_init(struct slow_work *work, | ||
79 | const struct slow_work_ops *ops) | ||
80 | { | ||
81 | work->flags = 1 << SLOW_WORK_VERY_SLOW; | ||
82 | work->ops = ops; | ||
83 | INIT_LIST_HEAD(&work->link); | ||
84 | } | ||
85 | |||
86 | extern int slow_work_enqueue(struct slow_work *work); | ||
87 | extern int slow_work_register_user(void); | ||
88 | extern void slow_work_unregister_user(void); | ||
89 | |||
90 | #ifdef CONFIG_SYSCTL | ||
91 | extern ctl_table slow_work_sysctls[]; | ||
92 | #endif | ||
93 | |||
94 | #endif /* CONFIG_SLOW_WORK */ | ||
95 | #endif /* _LINUX_SLOW_WORK_H */ | ||
diff --git a/init/Kconfig b/init/Kconfig index 1398a14b0191..236a79377b8e 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -1014,6 +1014,18 @@ config MARKERS | |||
1014 | 1014 | ||
1015 | source "arch/Kconfig" | 1015 | source "arch/Kconfig" |
1016 | 1016 | ||
1017 | config SLOW_WORK | ||
1018 | default n | ||
1019 | bool "Enable slow work thread pool" | ||
1020 | help | ||
1021 | The slow work thread pool provides a number of dynamically allocated | ||
1022 | threads that can be used by the kernel to perform operations that | ||
1023 | take a relatively long time. | ||
1024 | |||
1025 | An example of this would be CacheFiles doing a path lookup followed | ||
1026 | by a series of mkdirs and a create call, all of which have to touch | ||
1027 | disk. | ||
1028 | |||
1017 | endmenu # General setup | 1029 | endmenu # General setup |
1018 | 1030 | ||
1019 | config HAVE_GENERIC_DMA_COHERENT | 1031 | config HAVE_GENERIC_DMA_COHERENT |
diff --git a/kernel/Makefile b/kernel/Makefile index e4791b3ba55d..bab1dffe37e9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -93,6 +93,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o | |||
93 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ | 93 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ |
94 | obj-$(CONFIG_TRACING) += trace/ | 94 | obj-$(CONFIG_TRACING) += trace/ |
95 | obj-$(CONFIG_SMP) += sched_cpupri.o | 95 | obj-$(CONFIG_SMP) += sched_cpupri.o |
96 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | ||
96 | 97 | ||
97 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 98 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
98 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 99 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/slow-work.c b/kernel/slow-work.c new file mode 100644 index 000000000000..cf2bc01186ef --- /dev/null +++ b/kernel/slow-work.c | |||
@@ -0,0 +1,640 @@ | |||
1 | /* Worker thread pool for slow items, such as filesystem lookups or mkdirs | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/slow-work.txt | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/slow-work.h> | ||
16 | #include <linux/kthread.h> | ||
17 | #include <linux/freezer.h> | ||
18 | #include <linux/wait.h> | ||
19 | |||
20 | #define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of | ||
21 | * things to do */ | ||
22 | #define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after | ||
23 | * OOM */ | ||
24 | |||
25 | static void slow_work_cull_timeout(unsigned long); | ||
26 | static void slow_work_oom_timeout(unsigned long); | ||
27 | |||
28 | #ifdef CONFIG_SYSCTL | ||
29 | static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *, | ||
30 | void __user *, size_t *, loff_t *); | ||
31 | |||
32 | static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *, | ||
33 | void __user *, size_t *, loff_t *); | ||
34 | #endif | ||
35 | |||
36 | /* | ||
37 | * The pool of threads has at least min threads in it as long as someone is | ||
38 | * using the facility, and may have as many as max. | ||
39 | * | ||
40 | * A portion of the pool may be processing very slow operations. | ||
41 | */ | ||
42 | static unsigned slow_work_min_threads = 2; | ||
43 | static unsigned slow_work_max_threads = 4; | ||
44 | static unsigned vslow_work_proportion = 50; /* % of threads that may process | ||
45 | * very slow work */ | ||
46 | |||
47 | #ifdef CONFIG_SYSCTL | ||
48 | static const int slow_work_min_min_threads = 2; | ||
49 | static int slow_work_max_max_threads = 255; | ||
50 | static const int slow_work_min_vslow = 1; | ||
51 | static const int slow_work_max_vslow = 99; | ||
52 | |||
53 | ctl_table slow_work_sysctls[] = { | ||
54 | { | ||
55 | .ctl_name = CTL_UNNUMBERED, | ||
56 | .procname = "min-threads", | ||
57 | .data = &slow_work_min_threads, | ||
58 | .maxlen = sizeof(unsigned), | ||
59 | .mode = 0644, | ||
60 | .proc_handler = slow_work_min_threads_sysctl, | ||
61 | .extra1 = (void *) &slow_work_min_min_threads, | ||
62 | .extra2 = &slow_work_max_threads, | ||
63 | }, | ||
64 | { | ||
65 | .ctl_name = CTL_UNNUMBERED, | ||
66 | .procname = "max-threads", | ||
67 | .data = &slow_work_max_threads, | ||
68 | .maxlen = sizeof(unsigned), | ||
69 | .mode = 0644, | ||
70 | .proc_handler = slow_work_max_threads_sysctl, | ||
71 | .extra1 = &slow_work_min_threads, | ||
72 | .extra2 = (void *) &slow_work_max_max_threads, | ||
73 | }, | ||
74 | { | ||
75 | .ctl_name = CTL_UNNUMBERED, | ||
76 | .procname = "vslow-percentage", | ||
77 | .data = &vslow_work_proportion, | ||
78 | .maxlen = sizeof(unsigned), | ||
79 | .mode = 0644, | ||
80 | .proc_handler = &proc_dointvec_minmax, | ||
81 | .extra1 = (void *) &slow_work_min_vslow, | ||
82 | .extra2 = (void *) &slow_work_max_vslow, | ||
83 | }, | ||
84 | { .ctl_name = 0 } | ||
85 | }; | ||
86 | #endif | ||
87 | |||
88 | /* | ||
89 | * The active state of the thread pool | ||
90 | */ | ||
91 | static atomic_t slow_work_thread_count; | ||
92 | static atomic_t vslow_work_executing_count; | ||
93 | |||
94 | static bool slow_work_may_not_start_new_thread; | ||
95 | static bool slow_work_cull; /* cull a thread due to lack of activity */ | ||
96 | static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0); | ||
97 | static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0); | ||
98 | static struct slow_work slow_work_new_thread; /* new thread starter */ | ||
99 | |||
100 | /* | ||
101 | * The queues of work items and the lock governing access to them. These are | ||
102 | * shared between all the CPUs. It doesn't make sense to have per-CPU queues | ||
103 | * as the number of threads bears no relation to the number of CPUs. | ||
104 | * | ||
105 | * There are two queues of work items: one for slow work items, and one for | ||
106 | * very slow work items. | ||
107 | */ | ||
108 | static LIST_HEAD(slow_work_queue); | ||
109 | static LIST_HEAD(vslow_work_queue); | ||
110 | static DEFINE_SPINLOCK(slow_work_queue_lock); | ||
111 | |||
112 | /* | ||
113 | * The thread controls. A variable used to signal to the threads that they | ||
114 | * should exit when the queue is empty, a waitqueue used by the threads to wait | ||
115 | * for signals, and a completion set by the last thread to exit. | ||
116 | */ | ||
117 | static bool slow_work_threads_should_exit; | ||
118 | static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq); | ||
119 | static DECLARE_COMPLETION(slow_work_last_thread_exited); | ||
120 | |||
121 | /* | ||
122 | * The number of users of the thread pool and its lock. Whilst this is zero we | ||
123 | * have no threads hanging around, and when this reaches zero, we wait for all | ||
124 | * active or queued work items to complete and kill all the threads we do have. | ||
125 | */ | ||
126 | static int slow_work_user_count; | ||
127 | static DEFINE_MUTEX(slow_work_user_lock); | ||
128 | |||
129 | /* | ||
130 | * Calculate the maximum number of active threads in the pool that are | ||
131 | * permitted to process very slow work items. | ||
132 | * | ||
133 | * The answer is rounded up to at least 1, but may not equal or exceed the | ||
134 | * maximum number of the threads in the pool. This means we always have at | ||
135 | * least one thread that can process slow work items, and we always have at | ||
136 | * least one thread that won't get tied up doing so. | ||
137 | */ | ||
138 | static unsigned slow_work_calc_vsmax(void) | ||
139 | { | ||
140 | unsigned vsmax; | ||
141 | |||
142 | vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion; | ||
143 | vsmax /= 100; | ||
144 | vsmax = max(vsmax, 1U); | ||
145 | return min(vsmax, slow_work_max_threads - 1); | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Attempt to execute stuff queued on a slow thread. Return true if we managed | ||
150 | * it, false if there was nothing to do. | ||
151 | */ | ||
152 | static bool slow_work_execute(void) | ||
153 | { | ||
154 | struct slow_work *work = NULL; | ||
155 | unsigned vsmax; | ||
156 | bool very_slow; | ||
157 | |||
158 | vsmax = slow_work_calc_vsmax(); | ||
159 | |||
160 | /* see if we can schedule a new thread to be started if we're not | ||
161 | * keeping up with the work */ | ||
162 | if (!waitqueue_active(&slow_work_thread_wq) && | ||
163 | (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) && | ||
164 | atomic_read(&slow_work_thread_count) < slow_work_max_threads && | ||
165 | !slow_work_may_not_start_new_thread) | ||
166 | slow_work_enqueue(&slow_work_new_thread); | ||
167 | |||
168 | /* find something to execute */ | ||
169 | spin_lock_irq(&slow_work_queue_lock); | ||
170 | if (!list_empty(&vslow_work_queue) && | ||
171 | atomic_read(&vslow_work_executing_count) < vsmax) { | ||
172 | work = list_entry(vslow_work_queue.next, | ||
173 | struct slow_work, link); | ||
174 | if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) | ||
175 | BUG(); | ||
176 | list_del_init(&work->link); | ||
177 | atomic_inc(&vslow_work_executing_count); | ||
178 | very_slow = true; | ||
179 | } else if (!list_empty(&slow_work_queue)) { | ||
180 | work = list_entry(slow_work_queue.next, | ||
181 | struct slow_work, link); | ||
182 | if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) | ||
183 | BUG(); | ||
184 | list_del_init(&work->link); | ||
185 | very_slow = false; | ||
186 | } else { | ||
187 | very_slow = false; /* avoid the compiler warning */ | ||
188 | } | ||
189 | spin_unlock_irq(&slow_work_queue_lock); | ||
190 | |||
191 | if (!work) | ||
192 | return false; | ||
193 | |||
194 | if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags)) | ||
195 | BUG(); | ||
196 | |||
197 | work->ops->execute(work); | ||
198 | |||
199 | if (very_slow) | ||
200 | atomic_dec(&vslow_work_executing_count); | ||
201 | clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags); | ||
202 | |||
203 | /* if someone tried to enqueue the item whilst we were executing it, | ||
204 | * then it'll be left unenqueued to avoid multiple threads trying to | ||
205 | * execute it simultaneously | ||
206 | * | ||
207 | * there is, however, a race between us testing the pending flag and | ||
208 | * getting the spinlock, and between the enqueuer setting the pending | ||
209 | * flag and getting the spinlock, so we use a deferral bit to tell us | ||
210 | * if the enqueuer got there first | ||
211 | */ | ||
212 | if (test_bit(SLOW_WORK_PENDING, &work->flags)) { | ||
213 | spin_lock_irq(&slow_work_queue_lock); | ||
214 | |||
215 | if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) && | ||
216 | test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) | ||
217 | goto auto_requeue; | ||
218 | |||
219 | spin_unlock_irq(&slow_work_queue_lock); | ||
220 | } | ||
221 | |||
222 | work->ops->put_ref(work); | ||
223 | return true; | ||
224 | |||
225 | auto_requeue: | ||
226 | /* we must complete the enqueue operation | ||
227 | * - we transfer our ref on the item back to the appropriate queue | ||
228 | * - don't wake another thread up as we're awake already | ||
229 | */ | ||
230 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) | ||
231 | list_add_tail(&work->link, &vslow_work_queue); | ||
232 | else | ||
233 | list_add_tail(&work->link, &slow_work_queue); | ||
234 | spin_unlock_irq(&slow_work_queue_lock); | ||
235 | return true; | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * slow_work_enqueue - Schedule a slow work item for processing | ||
240 | * @work: The work item to queue | ||
241 | * | ||
242 | * Schedule a slow work item for processing. If the item is already undergoing | ||
243 | * execution, this guarantees not to re-enter the execution routine until the | ||
244 | * first execution finishes. | ||
245 | * | ||
246 | * The item is pinned by this function as it retains a reference to it, managed | ||
247 | * through the item operations. The item is unpinned once it has been | ||
248 | * executed. | ||
249 | * | ||
250 | * An item may hog the thread that is running it for a relatively large amount | ||
251 | * of time, sufficient, for example, to perform several lookup, mkdir, create | ||
252 | * and setxattr operations. It may sleep on I/O and may sleep to obtain locks. | ||
253 | * | ||
254 | * Conversely, if a number of items are awaiting processing, it may take some | ||
255 | * time before any given item is given attention. The number of threads in the | ||
256 | * pool may be increased to deal with demand, but only up to a limit. | ||
257 | * | ||
258 | * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in | ||
259 | * the very slow queue, from which only a portion of the threads will be | ||
260 | * allowed to pick items to execute. This ensures that very slow items won't | ||
261 | * overly block ones that are just ordinarily slow. | ||
262 | * | ||
263 | * Returns 0 if successful, -EAGAIN if not. | ||
264 | */ | ||
265 | int slow_work_enqueue(struct slow_work *work) | ||
266 | { | ||
267 | unsigned long flags; | ||
268 | |||
269 | BUG_ON(slow_work_user_count <= 0); | ||
270 | BUG_ON(!work); | ||
271 | BUG_ON(!work->ops); | ||
272 | BUG_ON(!work->ops->get_ref); | ||
273 | |||
274 | /* when honouring an enqueue request, we only promise that we will run | ||
275 | * the work function in the future; we do not promise to run it once | ||
276 | * per enqueue request | ||
277 | * | ||
278 | * we use the PENDING bit to merge together repeat requests without | ||
279 | * having to disable IRQs and take the spinlock, whilst still | ||
280 | * maintaining our promise | ||
281 | */ | ||
282 | if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { | ||
283 | spin_lock_irqsave(&slow_work_queue_lock, flags); | ||
284 | |||
285 | /* we promise that we will not attempt to execute the work | ||
286 | * function in more than one thread simultaneously | ||
287 | * | ||
288 | * this, however, leaves us with a problem if we're asked to | ||
289 | * enqueue the work whilst someone is executing the work | ||
290 | * function as simply queueing the work immediately means that | ||
291 | * another thread may try executing it whilst it is already | ||
292 | * under execution | ||
293 | * | ||
294 | * to deal with this, we set the ENQ_DEFERRED bit instead of | ||
295 | * enqueueing, and the thread currently executing the work | ||
296 | * function will enqueue the work item when the work function | ||
297 | * returns and it has cleared the EXECUTING bit | ||
298 | */ | ||
299 | if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { | ||
300 | set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); | ||
301 | } else { | ||
302 | if (work->ops->get_ref(work) < 0) | ||
303 | goto cant_get_ref; | ||
304 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) | ||
305 | list_add_tail(&work->link, &vslow_work_queue); | ||
306 | else | ||
307 | list_add_tail(&work->link, &slow_work_queue); | ||
308 | wake_up(&slow_work_thread_wq); | ||
309 | } | ||
310 | |||
311 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
312 | } | ||
313 | return 0; | ||
314 | |||
315 | cant_get_ref: | ||
316 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
317 | return -EAGAIN; | ||
318 | } | ||
319 | EXPORT_SYMBOL(slow_work_enqueue); | ||
320 | |||
321 | /* | ||
322 | * Worker thread culling algorithm | ||
323 | */ | ||
324 | static bool slow_work_cull_thread(void) | ||
325 | { | ||
326 | unsigned long flags; | ||
327 | bool do_cull = false; | ||
328 | |||
329 | spin_lock_irqsave(&slow_work_queue_lock, flags); | ||
330 | |||
331 | if (slow_work_cull) { | ||
332 | slow_work_cull = false; | ||
333 | |||
334 | if (list_empty(&slow_work_queue) && | ||
335 | list_empty(&vslow_work_queue) && | ||
336 | atomic_read(&slow_work_thread_count) > | ||
337 | slow_work_min_threads) { | ||
338 | mod_timer(&slow_work_cull_timer, | ||
339 | jiffies + SLOW_WORK_CULL_TIMEOUT); | ||
340 | do_cull = true; | ||
341 | } | ||
342 | } | ||
343 | |||
344 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
345 | return do_cull; | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * Determine if there is slow work available for dispatch | ||
350 | */ | ||
351 | static inline bool slow_work_available(int vsmax) | ||
352 | { | ||
353 | return !list_empty(&slow_work_queue) || | ||
354 | (!list_empty(&vslow_work_queue) && | ||
355 | atomic_read(&vslow_work_executing_count) < vsmax); | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Worker thread dispatcher | ||
360 | */ | ||
361 | static int slow_work_thread(void *_data) | ||
362 | { | ||
363 | int vsmax; | ||
364 | |||
365 | DEFINE_WAIT(wait); | ||
366 | |||
367 | set_freezable(); | ||
368 | set_user_nice(current, -5); | ||
369 | |||
370 | for (;;) { | ||
371 | vsmax = vslow_work_proportion; | ||
372 | vsmax *= atomic_read(&slow_work_thread_count); | ||
373 | vsmax /= 100; | ||
374 | |||
375 | prepare_to_wait(&slow_work_thread_wq, &wait, | ||
376 | TASK_INTERRUPTIBLE); | ||
377 | if (!freezing(current) && | ||
378 | !slow_work_threads_should_exit && | ||
379 | !slow_work_available(vsmax) && | ||
380 | !slow_work_cull) | ||
381 | schedule(); | ||
382 | finish_wait(&slow_work_thread_wq, &wait); | ||
383 | |||
384 | try_to_freeze(); | ||
385 | |||
386 | vsmax = vslow_work_proportion; | ||
387 | vsmax *= atomic_read(&slow_work_thread_count); | ||
388 | vsmax /= 100; | ||
389 | |||
390 | if (slow_work_available(vsmax) && slow_work_execute()) { | ||
391 | cond_resched(); | ||
392 | if (list_empty(&slow_work_queue) && | ||
393 | list_empty(&vslow_work_queue) && | ||
394 | atomic_read(&slow_work_thread_count) > | ||
395 | slow_work_min_threads) | ||
396 | mod_timer(&slow_work_cull_timer, | ||
397 | jiffies + SLOW_WORK_CULL_TIMEOUT); | ||
398 | continue; | ||
399 | } | ||
400 | |||
401 | if (slow_work_threads_should_exit) | ||
402 | break; | ||
403 | |||
404 | if (slow_work_cull && slow_work_cull_thread()) | ||
405 | break; | ||
406 | } | ||
407 | |||
408 | if (atomic_dec_and_test(&slow_work_thread_count)) | ||
409 | complete_and_exit(&slow_work_last_thread_exited, 0); | ||
410 | return 0; | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Handle thread cull timer expiration | ||
415 | */ | ||
416 | static void slow_work_cull_timeout(unsigned long data) | ||
417 | { | ||
418 | slow_work_cull = true; | ||
419 | wake_up(&slow_work_thread_wq); | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * Get a reference on slow work thread starter | ||
424 | */ | ||
425 | static int slow_work_new_thread_get_ref(struct slow_work *work) | ||
426 | { | ||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | /* | ||
431 | * Drop a reference on slow work thread starter | ||
432 | */ | ||
433 | static void slow_work_new_thread_put_ref(struct slow_work *work) | ||
434 | { | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Start a new slow work thread | ||
439 | */ | ||
440 | static void slow_work_new_thread_execute(struct slow_work *work) | ||
441 | { | ||
442 | struct task_struct *p; | ||
443 | |||
444 | if (slow_work_threads_should_exit) | ||
445 | return; | ||
446 | |||
447 | if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads) | ||
448 | return; | ||
449 | |||
450 | if (!mutex_trylock(&slow_work_user_lock)) | ||
451 | return; | ||
452 | |||
453 | slow_work_may_not_start_new_thread = true; | ||
454 | atomic_inc(&slow_work_thread_count); | ||
455 | p = kthread_run(slow_work_thread, NULL, "kslowd"); | ||
456 | if (IS_ERR(p)) { | ||
457 | printk(KERN_DEBUG "Slow work thread pool: OOM\n"); | ||
458 | if (atomic_dec_and_test(&slow_work_thread_count)) | ||
459 | BUG(); /* we're running on a slow work thread... */ | ||
460 | mod_timer(&slow_work_oom_timer, | ||
461 | jiffies + SLOW_WORK_OOM_TIMEOUT); | ||
462 | } else { | ||
463 | /* ratelimit the starting of new threads */ | ||
464 | mod_timer(&slow_work_oom_timer, jiffies + 1); | ||
465 | } | ||
466 | |||
467 | mutex_unlock(&slow_work_user_lock); | ||
468 | } | ||
469 | |||
470 | static const struct slow_work_ops slow_work_new_thread_ops = { | ||
471 | .get_ref = slow_work_new_thread_get_ref, | ||
472 | .put_ref = slow_work_new_thread_put_ref, | ||
473 | .execute = slow_work_new_thread_execute, | ||
474 | }; | ||
475 | |||
476 | /* | ||
477 | * post-OOM new thread start suppression expiration | ||
478 | */ | ||
479 | static void slow_work_oom_timeout(unsigned long data) | ||
480 | { | ||
481 | slow_work_may_not_start_new_thread = false; | ||
482 | } | ||
483 | |||
484 | #ifdef CONFIG_SYSCTL | ||
485 | /* | ||
486 | * Handle adjustment of the minimum number of threads | ||
487 | */ | ||
488 | static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, | ||
489 | struct file *filp, void __user *buffer, | ||
490 | size_t *lenp, loff_t *ppos) | ||
491 | { | ||
492 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
493 | int n; | ||
494 | |||
495 | if (ret == 0) { | ||
496 | mutex_lock(&slow_work_user_lock); | ||
497 | if (slow_work_user_count > 0) { | ||
498 | /* see if we need to start or stop threads */ | ||
499 | n = atomic_read(&slow_work_thread_count) - | ||
500 | slow_work_min_threads; | ||
501 | |||
502 | if (n < 0 && !slow_work_may_not_start_new_thread) | ||
503 | slow_work_enqueue(&slow_work_new_thread); | ||
504 | else if (n > 0) | ||
505 | mod_timer(&slow_work_cull_timer, | ||
506 | jiffies + SLOW_WORK_CULL_TIMEOUT); | ||
507 | } | ||
508 | mutex_unlock(&slow_work_user_lock); | ||
509 | } | ||
510 | |||
511 | return ret; | ||
512 | } | ||
513 | |||
514 | /* | ||
515 | * Handle adjustment of the maximum number of threads | ||
516 | */ | ||
517 | static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, | ||
518 | struct file *filp, void __user *buffer, | ||
519 | size_t *lenp, loff_t *ppos) | ||
520 | { | ||
521 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
522 | int n; | ||
523 | |||
524 | if (ret == 0) { | ||
525 | mutex_lock(&slow_work_user_lock); | ||
526 | if (slow_work_user_count > 0) { | ||
527 | /* see if we need to stop threads */ | ||
528 | n = slow_work_max_threads - | ||
529 | atomic_read(&slow_work_thread_count); | ||
530 | |||
531 | if (n < 0) | ||
532 | mod_timer(&slow_work_cull_timer, | ||
533 | jiffies + SLOW_WORK_CULL_TIMEOUT); | ||
534 | } | ||
535 | mutex_unlock(&slow_work_user_lock); | ||
536 | } | ||
537 | |||
538 | return ret; | ||
539 | } | ||
540 | #endif /* CONFIG_SYSCTL */ | ||
541 | |||
542 | /** | ||
543 | * slow_work_register_user - Register a user of the facility | ||
544 | * | ||
545 | * Register a user of the facility, starting up the initial threads if there | ||
546 | * aren't any other users at this point. This will return 0 if successful, or | ||
547 | * an error if not. | ||
548 | */ | ||
549 | int slow_work_register_user(void) | ||
550 | { | ||
551 | struct task_struct *p; | ||
552 | int loop; | ||
553 | |||
554 | mutex_lock(&slow_work_user_lock); | ||
555 | |||
556 | if (slow_work_user_count == 0) { | ||
557 | printk(KERN_NOTICE "Slow work thread pool: Starting up\n"); | ||
558 | init_completion(&slow_work_last_thread_exited); | ||
559 | |||
560 | slow_work_threads_should_exit = false; | ||
561 | slow_work_init(&slow_work_new_thread, | ||
562 | &slow_work_new_thread_ops); | ||
563 | slow_work_may_not_start_new_thread = false; | ||
564 | slow_work_cull = false; | ||
565 | |||
566 | /* start the minimum number of threads */ | ||
567 | for (loop = 0; loop < slow_work_min_threads; loop++) { | ||
568 | atomic_inc(&slow_work_thread_count); | ||
569 | p = kthread_run(slow_work_thread, NULL, "kslowd"); | ||
570 | if (IS_ERR(p)) | ||
571 | goto error; | ||
572 | } | ||
573 | printk(KERN_NOTICE "Slow work thread pool: Ready\n"); | ||
574 | } | ||
575 | |||
576 | slow_work_user_count++; | ||
577 | mutex_unlock(&slow_work_user_lock); | ||
578 | return 0; | ||
579 | |||
580 | error: | ||
581 | if (atomic_dec_and_test(&slow_work_thread_count)) | ||
582 | complete(&slow_work_last_thread_exited); | ||
583 | if (loop > 0) { | ||
584 | printk(KERN_ERR "Slow work thread pool:" | ||
585 | " Aborting startup on ENOMEM\n"); | ||
586 | slow_work_threads_should_exit = true; | ||
587 | wake_up_all(&slow_work_thread_wq); | ||
588 | wait_for_completion(&slow_work_last_thread_exited); | ||
589 | printk(KERN_ERR "Slow work thread pool: Aborted\n"); | ||
590 | } | ||
591 | mutex_unlock(&slow_work_user_lock); | ||
592 | return PTR_ERR(p); | ||
593 | } | ||
594 | EXPORT_SYMBOL(slow_work_register_user); | ||
595 | |||
596 | /** | ||
597 | * slow_work_unregister_user - Unregister a user of the facility | ||
598 | * | ||
599 | * Unregister a user of the facility, killing all the threads if this was the | ||
600 | * last one. | ||
601 | */ | ||
602 | void slow_work_unregister_user(void) | ||
603 | { | ||
604 | mutex_lock(&slow_work_user_lock); | ||
605 | |||
606 | BUG_ON(slow_work_user_count <= 0); | ||
607 | |||
608 | slow_work_user_count--; | ||
609 | if (slow_work_user_count == 0) { | ||
610 | printk(KERN_NOTICE "Slow work thread pool: Shutting down\n"); | ||
611 | slow_work_threads_should_exit = true; | ||
612 | wake_up_all(&slow_work_thread_wq); | ||
613 | wait_for_completion(&slow_work_last_thread_exited); | ||
614 | printk(KERN_NOTICE "Slow work thread pool:" | ||
615 | " Shut down complete\n"); | ||
616 | } | ||
617 | |||
618 | del_timer_sync(&slow_work_cull_timer); | ||
619 | |||
620 | mutex_unlock(&slow_work_user_lock); | ||
621 | } | ||
622 | EXPORT_SYMBOL(slow_work_unregister_user); | ||
623 | |||
624 | /* | ||
625 | * Initialise the slow work facility | ||
626 | */ | ||
627 | static int __init init_slow_work(void) | ||
628 | { | ||
629 | unsigned nr_cpus = num_possible_cpus(); | ||
630 | |||
631 | if (slow_work_max_threads < nr_cpus) | ||
632 | slow_work_max_threads = nr_cpus; | ||
633 | #ifdef CONFIG_SYSCTL | ||
634 | if (slow_work_max_max_threads < nr_cpus * 2) | ||
635 | slow_work_max_max_threads = nr_cpus * 2; | ||
636 | #endif | ||
637 | return 0; | ||
638 | } | ||
639 | |||
640 | subsys_initcall(init_slow_work); | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5ec4543dfc06..82350f8f04f6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/acpi.h> | 48 | #include <linux/acpi.h> |
49 | #include <linux/reboot.h> | 49 | #include <linux/reboot.h> |
50 | #include <linux/ftrace.h> | 50 | #include <linux/ftrace.h> |
51 | #include <linux/slow-work.h> | ||
51 | 52 | ||
52 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
53 | #include <asm/processor.h> | 54 | #include <asm/processor.h> |
@@ -897,6 +898,14 @@ static struct ctl_table kern_table[] = { | |||
897 | .proc_handler = &scan_unevictable_handler, | 898 | .proc_handler = &scan_unevictable_handler, |
898 | }, | 899 | }, |
899 | #endif | 900 | #endif |
901 | #ifdef CONFIG_SLOW_WORK | ||
902 | { | ||
903 | .ctl_name = CTL_UNNUMBERED, | ||
904 | .procname = "slow-work", | ||
905 | .mode = 0555, | ||
906 | .child = slow_work_sysctls, | ||
907 | }, | ||
908 | #endif | ||
900 | /* | 909 | /* |
901 | * NOTE: do not add new entries to this table unless you have read | 910 | * NOTE: do not add new entries to this table unless you have read |
902 | * Documentation/sysctl/ctl_unnumbered.txt | 911 | * Documentation/sysctl/ctl_unnumbered.txt |
diff --git a/mm/filemap.c b/mm/filemap.c index 126d3973b3d1..fc11974f2bee 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -565,6 +565,24 @@ void wait_on_page_bit(struct page *page, int bit_nr) | |||
565 | EXPORT_SYMBOL(wait_on_page_bit); | 565 | EXPORT_SYMBOL(wait_on_page_bit); |
566 | 566 | ||
567 | /** | 567 | /** |
568 | * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue | ||
569 | * @page - Page defining the wait queue of interest | ||
570 | * @waiter - Waiter to add to the queue | ||
571 | * | ||
572 | * Add an arbitrary @waiter to the wait queue for the nominated @page. | ||
573 | */ | ||
574 | void add_page_wait_queue(struct page *page, wait_queue_t *waiter) | ||
575 | { | ||
576 | wait_queue_head_t *q = page_waitqueue(page); | ||
577 | unsigned long flags; | ||
578 | |||
579 | spin_lock_irqsave(&q->lock, flags); | ||
580 | __add_wait_queue(q, waiter); | ||
581 | spin_unlock_irqrestore(&q->lock, flags); | ||
582 | } | ||
583 | EXPORT_SYMBOL_GPL(add_page_wait_queue); | ||
584 | |||
585 | /** | ||
568 | * unlock_page - unlock a locked page | 586 | * unlock_page - unlock a locked page |
569 | * @page: the page | 587 | * @page: the page |
570 | * | 588 | * |
@@ -2463,6 +2481,9 @@ EXPORT_SYMBOL(generic_file_aio_write); | |||
2463 | * (presumably at page->private). If the release was successful, return `1'. | 2481 | * (presumably at page->private). If the release was successful, return `1'. |
2464 | * Otherwise return zero. | 2482 | * Otherwise return zero. |
2465 | * | 2483 | * |
2484 | * This may also be called if PG_fscache is set on a page, indicating that the | ||
2485 | * page is known to the local caching routines. | ||
2486 | * | ||
2466 | * The @gfp_mask argument specifies whether I/O may be performed to release | 2487 | * The @gfp_mask argument specifies whether I/O may be performed to release |
2467 | * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). | 2488 | * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). |
2468 | * | 2489 | * |
diff --git a/mm/migrate.c b/mm/migrate.c index a9eff3f092f6..068655d8f883 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -250,7 +250,7 @@ out: | |||
250 | * The number of remaining references must be: | 250 | * The number of remaining references must be: |
251 | * 1 for anonymous pages without a mapping | 251 | * 1 for anonymous pages without a mapping |
252 | * 2 for pages with a mapping | 252 | * 2 for pages with a mapping |
253 | * 3 for pages with a mapping and PagePrivate set. | 253 | * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. |
254 | */ | 254 | */ |
255 | static int migrate_page_move_mapping(struct address_space *mapping, | 255 | static int migrate_page_move_mapping(struct address_space *mapping, |
256 | struct page *newpage, struct page *page) | 256 | struct page *newpage, struct page *page) |
@@ -270,7 +270,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
270 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | 270 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
271 | page_index(page)); | 271 | page_index(page)); |
272 | 272 | ||
273 | expected_count = 2 + !!PagePrivate(page); | 273 | expected_count = 2 + !!page_has_private(page); |
274 | if (page_count(page) != expected_count || | 274 | if (page_count(page) != expected_count || |
275 | (struct page *)radix_tree_deref_slot(pslot) != page) { | 275 | (struct page *)radix_tree_deref_slot(pslot) != page) { |
276 | spin_unlock_irq(&mapping->tree_lock); | 276 | spin_unlock_irq(&mapping->tree_lock); |
@@ -386,7 +386,7 @@ EXPORT_SYMBOL(fail_migrate_page); | |||
386 | 386 | ||
387 | /* | 387 | /* |
388 | * Common logic to directly migrate a single page suitable for | 388 | * Common logic to directly migrate a single page suitable for |
389 | * pages that do not use PagePrivate. | 389 | * pages that do not use PagePrivate/PagePrivate2. |
390 | * | 390 | * |
391 | * Pages are locked upon entry and exit. | 391 | * Pages are locked upon entry and exit. |
392 | */ | 392 | */ |
@@ -522,7 +522,7 @@ static int fallback_migrate_page(struct address_space *mapping, | |||
522 | * Buffers may be managed in a filesystem specific way. | 522 | * Buffers may be managed in a filesystem specific way. |
523 | * We must have no buffers or drop them. | 523 | * We must have no buffers or drop them. |
524 | */ | 524 | */ |
525 | if (PagePrivate(page) && | 525 | if (page_has_private(page) && |
526 | !try_to_release_page(page, GFP_KERNEL)) | 526 | !try_to_release_page(page, GFP_KERNEL)) |
527 | return -EAGAIN; | 527 | return -EAGAIN; |
528 | 528 | ||
@@ -655,7 +655,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
655 | * free the metadata, so the page can be freed. | 655 | * free the metadata, so the page can be freed. |
656 | */ | 656 | */ |
657 | if (!page->mapping) { | 657 | if (!page->mapping) { |
658 | if (!PageAnon(page) && PagePrivate(page)) { | 658 | if (!PageAnon(page) && page_has_private(page)) { |
659 | /* | 659 | /* |
660 | * Go direct to try_to_free_buffers() here because | 660 | * Go direct to try_to_free_buffers() here because |
661 | * a) that's what try_to_release_page() would do anyway | 661 | * a) that's what try_to_release_page() would do anyway |
diff --git a/mm/readahead.c b/mm/readahead.c index 9ce303d4b810..133b6d525513 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -31,6 +31,42 @@ EXPORT_SYMBOL_GPL(file_ra_state_init); | |||
31 | 31 | ||
32 | #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) | 32 | #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) |
33 | 33 | ||
34 | /* | ||
35 | * see if a page needs releasing upon read_cache_pages() failure | ||
36 | * - the caller of read_cache_pages() may have set PG_private or PG_fscache | ||
37 | * before calling, such as the NFS fs marking pages that are cached locally | ||
38 | * on disk, thus we need to give the fs a chance to clean up in the event of | ||
39 | * an error | ||
40 | */ | ||
41 | static void read_cache_pages_invalidate_page(struct address_space *mapping, | ||
42 | struct page *page) | ||
43 | { | ||
44 | if (page_has_private(page)) { | ||
45 | if (!trylock_page(page)) | ||
46 | BUG(); | ||
47 | page->mapping = mapping; | ||
48 | do_invalidatepage(page, 0); | ||
49 | page->mapping = NULL; | ||
50 | unlock_page(page); | ||
51 | } | ||
52 | page_cache_release(page); | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * release a list of pages, invalidating them first if need be | ||
57 | */ | ||
58 | static void read_cache_pages_invalidate_pages(struct address_space *mapping, | ||
59 | struct list_head *pages) | ||
60 | { | ||
61 | struct page *victim; | ||
62 | |||
63 | while (!list_empty(pages)) { | ||
64 | victim = list_to_page(pages); | ||
65 | list_del(&victim->lru); | ||
66 | read_cache_pages_invalidate_page(mapping, victim); | ||
67 | } | ||
68 | } | ||
69 | |||
34 | /** | 70 | /** |
35 | * read_cache_pages - populate an address space with some pages & start reads against them | 71 | * read_cache_pages - populate an address space with some pages & start reads against them |
36 | * @mapping: the address_space | 72 | * @mapping: the address_space |
@@ -52,14 +88,14 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, | |||
52 | list_del(&page->lru); | 88 | list_del(&page->lru); |
53 | if (add_to_page_cache_lru(page, mapping, | 89 | if (add_to_page_cache_lru(page, mapping, |
54 | page->index, GFP_KERNEL)) { | 90 | page->index, GFP_KERNEL)) { |
55 | page_cache_release(page); | 91 | read_cache_pages_invalidate_page(mapping, page); |
56 | continue; | 92 | continue; |
57 | } | 93 | } |
58 | page_cache_release(page); | 94 | page_cache_release(page); |
59 | 95 | ||
60 | ret = filler(data, page); | 96 | ret = filler(data, page); |
61 | if (unlikely(ret)) { | 97 | if (unlikely(ret)) { |
62 | put_pages_list(pages); | 98 | read_cache_pages_invalidate_pages(mapping, pages); |
63 | break; | 99 | break; |
64 | } | 100 | } |
65 | task_io_account_read(PAGE_CACHE_SIZE); | 101 | task_io_account_read(PAGE_CACHE_SIZE); |
@@ -448,8 +448,8 @@ void pagevec_strip(struct pagevec *pvec) | |||
448 | for (i = 0; i < pagevec_count(pvec); i++) { | 448 | for (i = 0; i < pagevec_count(pvec); i++) { |
449 | struct page *page = pvec->pages[i]; | 449 | struct page *page = pvec->pages[i]; |
450 | 450 | ||
451 | if (PagePrivate(page) && trylock_page(page)) { | 451 | if (page_has_private(page) && trylock_page(page)) { |
452 | if (PagePrivate(page)) | 452 | if (page_has_private(page)) |
453 | try_to_release_page(page, 0); | 453 | try_to_release_page(page, 0); |
454 | unlock_page(page); | 454 | unlock_page(page); |
455 | } | 455 | } |
diff --git a/mm/truncate.c b/mm/truncate.c index 1229211104f8..55206fab7b99 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -50,7 +50,7 @@ void do_invalidatepage(struct page *page, unsigned long offset) | |||
50 | static inline void truncate_partial_page(struct page *page, unsigned partial) | 50 | static inline void truncate_partial_page(struct page *page, unsigned partial) |
51 | { | 51 | { |
52 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); | 52 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); |
53 | if (PagePrivate(page)) | 53 | if (page_has_private(page)) |
54 | do_invalidatepage(page, partial); | 54 | do_invalidatepage(page, partial); |
55 | } | 55 | } |
56 | 56 | ||
@@ -99,7 +99,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
99 | if (page->mapping != mapping) | 99 | if (page->mapping != mapping) |
100 | return; | 100 | return; |
101 | 101 | ||
102 | if (PagePrivate(page)) | 102 | if (page_has_private(page)) |
103 | do_invalidatepage(page, 0); | 103 | do_invalidatepage(page, 0); |
104 | 104 | ||
105 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 105 | cancel_dirty_page(page, PAGE_CACHE_SIZE); |
@@ -126,7 +126,7 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) | |||
126 | if (page->mapping != mapping) | 126 | if (page->mapping != mapping) |
127 | return 0; | 127 | return 0; |
128 | 128 | ||
129 | if (PagePrivate(page) && !try_to_release_page(page, 0)) | 129 | if (page_has_private(page) && !try_to_release_page(page, 0)) |
130 | return 0; | 130 | return 0; |
131 | 131 | ||
132 | clear_page_mlock(page); | 132 | clear_page_mlock(page); |
@@ -348,7 +348,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
348 | if (page->mapping != mapping) | 348 | if (page->mapping != mapping) |
349 | return 0; | 349 | return 0; |
350 | 350 | ||
351 | if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) | 351 | if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) |
352 | return 0; | 352 | return 0; |
353 | 353 | ||
354 | spin_lock_irq(&mapping->tree_lock); | 354 | spin_lock_irq(&mapping->tree_lock); |
@@ -356,7 +356,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
356 | goto failed; | 356 | goto failed; |
357 | 357 | ||
358 | clear_page_mlock(page); | 358 | clear_page_mlock(page); |
359 | BUG_ON(PagePrivate(page)); | 359 | BUG_ON(page_has_private(page)); |
360 | __remove_from_page_cache(page); | 360 | __remove_from_page_cache(page); |
361 | spin_unlock_irq(&mapping->tree_lock); | 361 | spin_unlock_irq(&mapping->tree_lock); |
362 | page_cache_release(page); /* pagecache ref */ | 362 | page_cache_release(page); /* pagecache ref */ |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 06e72693b458..425244988bb2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -283,7 +283,7 @@ static inline int page_mapping_inuse(struct page *page) | |||
283 | 283 | ||
284 | static inline int is_page_cache_freeable(struct page *page) | 284 | static inline int is_page_cache_freeable(struct page *page) |
285 | { | 285 | { |
286 | return page_count(page) - !!PagePrivate(page) == 2; | 286 | return page_count(page) - !!page_has_private(page) == 2; |
287 | } | 287 | } |
288 | 288 | ||
289 | static int may_write_to_queue(struct backing_dev_info *bdi) | 289 | static int may_write_to_queue(struct backing_dev_info *bdi) |
@@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
367 | * Some data journaling orphaned pages can have | 367 | * Some data journaling orphaned pages can have |
368 | * page->mapping == NULL while being dirty with clean buffers. | 368 | * page->mapping == NULL while being dirty with clean buffers. |
369 | */ | 369 | */ |
370 | if (PagePrivate(page)) { | 370 | if (page_has_private(page)) { |
371 | if (try_to_free_buffers(page)) { | 371 | if (try_to_free_buffers(page)) { |
372 | ClearPageDirty(page); | 372 | ClearPageDirty(page); |
373 | printk("%s: orphaned page\n", __func__); | 373 | printk("%s: orphaned page\n", __func__); |
@@ -727,7 +727,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
727 | * process address space (page_count == 1) it can be freed. | 727 | * process address space (page_count == 1) it can be freed. |
728 | * Otherwise, leave the page on the LRU so it is swappable. | 728 | * Otherwise, leave the page on the LRU so it is swappable. |
729 | */ | 729 | */ |
730 | if (PagePrivate(page)) { | 730 | if (page_has_private(page)) { |
731 | if (!try_to_release_page(page, sc->gfp_mask)) | 731 | if (!try_to_release_page(page, sc->gfp_mask)) |
732 | goto activate_locked; | 732 | goto activate_locked; |
733 | if (!mapping && page_count(page) == 1) { | 733 | if (!mapping && page_count(page) == 1) { |
diff --git a/security/security.c b/security/security.c index 206e53844d2f..5284255c5cdf 100644 --- a/security/security.c +++ b/security/security.c | |||
@@ -445,6 +445,7 @@ int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) | |||
445 | return 0; | 445 | return 0; |
446 | return security_ops->inode_create(dir, dentry, mode); | 446 | return security_ops->inode_create(dir, dentry, mode); |
447 | } | 447 | } |
448 | EXPORT_SYMBOL_GPL(security_inode_create); | ||
448 | 449 | ||
449 | int security_inode_link(struct dentry *old_dentry, struct inode *dir, | 450 | int security_inode_link(struct dentry *old_dentry, struct inode *dir, |
450 | struct dentry *new_dentry) | 451 | struct dentry *new_dentry) |
@@ -475,6 +476,7 @@ int security_inode_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
475 | return 0; | 476 | return 0; |
476 | return security_ops->inode_mkdir(dir, dentry, mode); | 477 | return security_ops->inode_mkdir(dir, dentry, mode); |
477 | } | 478 | } |
479 | EXPORT_SYMBOL_GPL(security_inode_mkdir); | ||
478 | 480 | ||
479 | int security_inode_rmdir(struct inode *dir, struct dentry *dentry) | 481 | int security_inode_rmdir(struct inode *dir, struct dentry *dentry) |
480 | { | 482 | { |