aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-11-30 16:33:48 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-11-30 16:33:48 -0500
commit6e80133f7f247f313da1638af4ce30f2bac303cc (patch)
tree318afcc1c1c434135849cef50e3d89be505ad011
parente3a41d7b99e7f97d9a50bec2a8f4eb237ce1d504 (diff)
parent4fa9f4ede88b4e2ff135b6e5717499d734508c62 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-fscache
* git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-fscache: (31 commits) FS-Cache: Provide nop fscache_stat_d() if CONFIG_FSCACHE_STATS=n SLOW_WORK: Fix GFS2 to #include <linux/module.h> before using THIS_MODULE SLOW_WORK: Fix CIFS to pass THIS_MODULE to slow_work_register_user() CacheFiles: Don't log lookup/create failing with ENOBUFS CacheFiles: Catch an overly long wait for an old active object CacheFiles: Better showing of debugging information in active object problems CacheFiles: Mark parent directory locks as I_MUTEX_PARENT to keep lockdep happy CacheFiles: Handle truncate unlocking the page we're reading CacheFiles: Don't write a full page if there's only a partial page to cache FS-Cache: Actually requeue an object when requested FS-Cache: Start processing an object's operations on that object's death FS-Cache: Make sure FSCACHE_COOKIE_LOOKING_UP cleared on lookup failure FS-Cache: Add a retirement stat counter FS-Cache: Handle pages pending storage that get evicted under OOM conditions FS-Cache: Handle read request vs lookup, creation or other cache failure FS-Cache: Don't delete pending pages from the page-store tracking tree FS-Cache: Fix lock misorder in fscache_write_op() FS-Cache: The object-available state can't rely on the cookie to be available FS-Cache: Permit cache retrieval ops to be interrupted in the initial wait phase FS-Cache: Use radix tree preload correctly in tracking of pages to be stored ...
-rw-r--r--Documentation/filesystems/caching/fscache.txt110
-rw-r--r--Documentation/filesystems/caching/netfs-api.txt21
-rw-r--r--Documentation/slow-work.txt160
-rw-r--r--fs/9p/cache.c14
-rw-r--r--fs/afs/file.c15
-rw-r--r--fs/cachefiles/interface.c32
-rw-r--r--fs/cachefiles/namei.c187
-rw-r--r--fs/cachefiles/rdwr.c128
-rw-r--r--fs/cifs/cifsfs.c2
-rw-r--r--fs/fscache/Kconfig7
-rw-r--r--fs/fscache/Makefile1
-rw-r--r--fs/fscache/cache.c5
-rw-r--r--fs/fscache/cookie.c26
-rw-r--r--fs/fscache/internal.h56
-rw-r--r--fs/fscache/main.c6
-rw-r--r--fs/fscache/object-list.c432
-rw-r--r--fs/fscache/object.c104
-rw-r--r--fs/fscache/operation.c120
-rw-r--r--fs/fscache/page.c273
-rw-r--r--fs/fscache/proc.c13
-rw-r--r--fs/fscache/stats.c94
-rw-r--r--fs/gfs2/main.c4
-rw-r--r--fs/gfs2/recovery.c2
-rw-r--r--fs/nfs/fscache.c10
-rw-r--r--include/linux/fscache-cache.h40
-rw-r--r--include/linux/fscache.h27
-rw-r--r--include/linux/slow-work.h72
-rw-r--r--init/Kconfig10
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/slow-work-proc.c227
-rw-r--r--kernel/slow-work.c494
-rw-r--r--kernel/slow-work.h72
-rw-r--r--lib/radix-tree.c5
33 files changed, 2505 insertions, 265 deletions
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
index 9e94b9491d89..a91e2e2095b0 100644
--- a/Documentation/filesystems/caching/fscache.txt
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -235,6 +235,7 @@ proc files.
235 neg=N Number of negative lookups made 235 neg=N Number of negative lookups made
236 pos=N Number of positive lookups made 236 pos=N Number of positive lookups made
237 crt=N Number of objects created by lookup 237 crt=N Number of objects created by lookup
238 tmo=N Number of lookups timed out and requeued
238 Updates n=N Number of update cookie requests seen 239 Updates n=N Number of update cookie requests seen
239 nul=N Number of upd reqs given a NULL parent 240 nul=N Number of upd reqs given a NULL parent
240 run=N Number of upd reqs granted CPU time 241 run=N Number of upd reqs granted CPU time
@@ -250,8 +251,10 @@ proc files.
250 ok=N Number of successful alloc reqs 251 ok=N Number of successful alloc reqs
251 wt=N Number of alloc reqs that waited on lookup completion 252 wt=N Number of alloc reqs that waited on lookup completion
252 nbf=N Number of alloc reqs rejected -ENOBUFS 253 nbf=N Number of alloc reqs rejected -ENOBUFS
254 int=N Number of alloc reqs aborted -ERESTARTSYS
253 ops=N Number of alloc reqs submitted 255 ops=N Number of alloc reqs submitted
254 owt=N Number of alloc reqs waited for CPU time 256 owt=N Number of alloc reqs waited for CPU time
257 abt=N Number of alloc reqs aborted due to object death
255 Retrvls n=N Number of retrieval (read) requests seen 258 Retrvls n=N Number of retrieval (read) requests seen
256 ok=N Number of successful retr reqs 259 ok=N Number of successful retr reqs
257 wt=N Number of retr reqs that waited on lookup completion 260 wt=N Number of retr reqs that waited on lookup completion
@@ -261,6 +264,7 @@ proc files.
261 oom=N Number of retr reqs failed -ENOMEM 264 oom=N Number of retr reqs failed -ENOMEM
262 ops=N Number of retr reqs submitted 265 ops=N Number of retr reqs submitted
263 owt=N Number of retr reqs waited for CPU time 266 owt=N Number of retr reqs waited for CPU time
267 abt=N Number of retr reqs aborted due to object death
264 Stores n=N Number of storage (write) requests seen 268 Stores n=N Number of storage (write) requests seen
265 ok=N Number of successful store reqs 269 ok=N Number of successful store reqs
266 agn=N Number of store reqs on a page already pending storage 270 agn=N Number of store reqs on a page already pending storage
@@ -268,12 +272,37 @@ proc files.
268 oom=N Number of store reqs failed -ENOMEM 272 oom=N Number of store reqs failed -ENOMEM
269 ops=N Number of store reqs submitted 273 ops=N Number of store reqs submitted
270 run=N Number of store reqs granted CPU time 274 run=N Number of store reqs granted CPU time
275 pgs=N Number of pages given store req processing time
276 rxd=N Number of store reqs deleted from tracking tree
277 olm=N Number of store reqs over store limit
278 VmScan nos=N Number of release reqs against pages with no pending store
279 gon=N Number of release reqs against pages stored by time lock granted
280 bsy=N Number of release reqs ignored due to in-progress store
281 can=N Number of page stores cancelled due to release req
271 Ops pend=N Number of times async ops added to pending queues 282 Ops pend=N Number of times async ops added to pending queues
272 run=N Number of times async ops given CPU time 283 run=N Number of times async ops given CPU time
273 enq=N Number of times async ops queued for processing 284 enq=N Number of times async ops queued for processing
285 can=N Number of async ops cancelled
286 rej=N Number of async ops rejected due to object lookup/create failure
274 dfr=N Number of async ops queued for deferred release 287 dfr=N Number of async ops queued for deferred release
275 rel=N Number of async ops released 288 rel=N Number of async ops released
276 gc=N Number of deferred-release async ops garbage collected 289 gc=N Number of deferred-release async ops garbage collected
290 CacheOp alo=N Number of in-progress alloc_object() cache ops
291 luo=N Number of in-progress lookup_object() cache ops
292 luc=N Number of in-progress lookup_complete() cache ops
293 gro=N Number of in-progress grab_object() cache ops
294 upo=N Number of in-progress update_object() cache ops
295 dro=N Number of in-progress drop_object() cache ops
296 pto=N Number of in-progress put_object() cache ops
297 syn=N Number of in-progress sync_cache() cache ops
298 atc=N Number of in-progress attr_changed() cache ops
299 rap=N Number of in-progress read_or_alloc_page() cache ops
300 ras=N Number of in-progress read_or_alloc_pages() cache ops
301 alp=N Number of in-progress allocate_page() cache ops
302 als=N Number of in-progress allocate_pages() cache ops
303 wrp=N Number of in-progress write_page() cache ops
304 ucp=N Number of in-progress uncache_page() cache ops
305 dsp=N Number of in-progress dissociate_pages() cache ops
277 306
278 307
279 (*) /proc/fs/fscache/histogram 308 (*) /proc/fs/fscache/histogram
@@ -299,6 +328,87 @@ proc files.
299 jiffy range covered, and the SECS field the equivalent number of seconds. 328 jiffy range covered, and the SECS field the equivalent number of seconds.
300 329
301 330
331===========
332OBJECT LIST
333===========
334
335If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a
336list of all the objects currently allocated and allow them to be viewed
337through:
338
339 /proc/fs/fscache/objects
340
341This will look something like:
342
343 [root@andromeda ~]# head /proc/fs/fscache/objects
344 OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA
345 ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
346 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
347 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
348
349where the first set of columns before the '|' describe the object:
350
351 COLUMN DESCRIPTION
352 ======= ===============================================================
353 OBJECT Object debugging ID (appears as OBJ%x in some debug messages)
354 PARENT Debugging ID of parent object
355 STAT Object state
356 CHLDN Number of child objects of this object
357 OPS Number of outstanding operations on this object
358 OOP Number of outstanding child object management operations
359 IPR
360 EX Number of outstanding exclusive operations
361 READS Number of outstanding read operations
362 EM Object's event mask
363 EV Events raised on this object
364 F Object flags
365 S Object slow-work work item flags
366
367and the second set of columns describe the object's cookie, if present:
368
369 COLUMN DESCRIPTION
370 =============== =======================================================
371 NETFS_COOKIE_DEF Name of netfs cookie definition
372 TY Cookie type (IX - index, DT - data, hex - special)
373 FL Cookie flags
374 NETFS_DATA Netfs private data stored in the cookie
375 OBJECT_KEY Object key } 1 column, with separating comma
376 AUX_DATA Object aux data } presence may be configured
377
378The data shown may be filtered by attaching the a key to an appropriate keyring
379before viewing the file. Something like:
380
381 keyctl add user fscache:objlist <restrictions> @s
382
383where <restrictions> are a selection of the following letters:
384
385 K Show hexdump of object key (don't show if not given)
386 A Show hexdump of object aux data (don't show if not given)
387
388and the following paired letters:
389
390 C Show objects that have a cookie
391 c Show objects that don't have a cookie
392 B Show objects that are busy
393 b Show objects that aren't busy
394 W Show objects that have pending writes
395 w Show objects that don't have pending writes
396 R Show objects that have outstanding reads
397 r Show objects that don't have outstanding reads
398 S Show objects that have slow work queued
399 s Show objects that don't have slow work queued
400
401If neither side of a letter pair is given, then both are implied. For example:
402
403 keyctl add user fscache:objlist KB @s
404
405shows objects that are busy, and lists their object keys, but does not dump
406their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is
407not implied.
408
409By default all objects and all fields will be shown.
410
411
302========= 412=========
303DEBUGGING 413DEBUGGING
304========= 414=========
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 2666b1ed5e9e..1902c57b72ef 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -641,7 +641,7 @@ data file must be retired (see the relinquish cookie function below).
641 641
642Furthermore, note that this does not cancel the asynchronous read or write 642Furthermore, note that this does not cancel the asynchronous read or write
643operation started by the read/alloc and write functions, so the page 643operation started by the read/alloc and write functions, so the page
644invalidation and release functions must use: 644invalidation functions must use:
645 645
646 bool fscache_check_page_write(struct fscache_cookie *cookie, 646 bool fscache_check_page_write(struct fscache_cookie *cookie,
647 struct page *page); 647 struct page *page);
@@ -654,6 +654,25 @@ to see if a page is being written to the cache, and:
654to wait for it to finish if it is. 654to wait for it to finish if it is.
655 655
656 656
657When releasepage() is being implemented, a special FS-Cache function exists to
658manage the heuristics of coping with vmscan trying to eject pages, which may
659conflict with the cache trying to write pages to the cache (which may itself
660need to allocate memory):
661
662 bool fscache_maybe_release_page(struct fscache_cookie *cookie,
663 struct page *page,
664 gfp_t gfp);
665
666This takes the netfs cookie, and the page and gfp arguments as supplied to
667releasepage(). It will return false if the page cannot be released yet for
668some reason and if it returns true, the page has been uncached and can now be
669released.
670
671To make a page available for release, this function may wait for an outstanding
672storage request to complete, or it may attempt to cancel the storage request -
673in which case the page will not be stored in the cache this time.
674
675
657========================== 676==========================
658INDEX AND DATA FILE UPDATE 677INDEX AND DATA FILE UPDATE
659========================== 678==========================
diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt
index ebc50f808ea4..52bc31433723 100644
--- a/Documentation/slow-work.txt
+++ b/Documentation/slow-work.txt
@@ -41,6 +41,13 @@ expand files, provided the time taken to do so isn't too long.
41Operations of both types may sleep during execution, thus tying up the thread 41Operations of both types may sleep during execution, thus tying up the thread
42loaned to it. 42loaned to it.
43 43
44A further class of work item is available, based on the slow work item class:
45
46 (*) Delayed slow work items.
47
48These are slow work items that have a timer to defer queueing of the item for
49a while.
50
44 51
45THREAD-TO-CLASS ALLOCATION 52THREAD-TO-CLASS ALLOCATION
46-------------------------- 53--------------------------
@@ -64,9 +71,11 @@ USING SLOW WORK ITEMS
64Firstly, a module or subsystem wanting to make use of slow work items must 71Firstly, a module or subsystem wanting to make use of slow work items must
65register its interest: 72register its interest:
66 73
67 int ret = slow_work_register_user(); 74 int ret = slow_work_register_user(struct module *module);
68 75
69This will return 0 if successful, or a -ve error upon failure. 76This will return 0 if successful, or a -ve error upon failure. The module
77pointer should be the module interested in using this facility (almost
78certainly THIS_MODULE).
70 79
71 80
72Slow work items may then be set up by: 81Slow work items may then be set up by:
@@ -93,6 +102,10 @@ Slow work items may then be set up by:
93 102
94 or: 103 or:
95 104
105 delayed_slow_work_init(&myitem, &myitem_ops);
106
107 or:
108
96 vslow_work_init(&myitem, &myitem_ops); 109 vslow_work_init(&myitem, &myitem_ops);
97 110
98 depending on its class. 111 depending on its class.
@@ -102,15 +115,92 @@ A suitably set up work item can then be enqueued for processing:
102 int ret = slow_work_enqueue(&myitem); 115 int ret = slow_work_enqueue(&myitem);
103 116
104This will return a -ve error if the thread pool is unable to gain a reference 117This will return a -ve error if the thread pool is unable to gain a reference
105on the item, 0 otherwise. 118on the item, 0 otherwise, or (for delayed work):
119
120 int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay);
106 121
107 122
108The items are reference counted, so there ought to be no need for a flush 123The items are reference counted, so there ought to be no need for a flush
109operation. When all a module's slow work items have been processed, and the 124operation. But as the reference counting is optional, means to cancel
125existing work items are also included:
126
127 cancel_slow_work(&myitem);
128 cancel_delayed_slow_work(&myitem);
129
130can be used to cancel pending work. The above cancel function waits for
131existing work to have been executed (or prevent execution of them, depending
132on timing).
133
134
135When all a module's slow work items have been processed, and the
110module has no further interest in the facility, it should unregister its 136module has no further interest in the facility, it should unregister its
111interest: 137interest:
112 138
113 slow_work_unregister_user(); 139 slow_work_unregister_user(struct module *module);
140
141The module pointer is used to wait for all outstanding work items for that
142module before completing the unregistration. This prevents the put_ref() code
143from being taken away before it completes. module should almost certainly be
144THIS_MODULE.
145
146
147================
148HELPER FUNCTIONS
149================
150
151The slow-work facility provides a function by which it can be determined
152whether or not an item is queued for later execution:
153
154 bool queued = slow_work_is_queued(struct slow_work *work);
155
156If it returns false, then the item is not on the queue (it may be executing
157with a requeue pending). This can be used to work out whether an item on which
158another depends is on the queue, thus allowing a dependent item to be queued
159after it.
160
161If the above shows an item on which another depends not to be queued, then the
162owner of the dependent item might need to wait. However, to avoid locking up
163the threads unnecessarily be sleeping in them, it can make sense under some
164circumstances to return the work item to the queue, thus deferring it until
165some other items have had a chance to make use of the yielded thread.
166
167To yield a thread and defer an item, the work function should simply enqueue
168the work item again and return. However, this doesn't work if there's nothing
169actually on the queue, as the thread just vacated will jump straight back into
170the item's work function, thus busy waiting on a CPU.
171
172Instead, the item should use the thread to wait for the dependency to go away,
173but rather than using schedule() or schedule_timeout() to sleep, it should use
174the following function:
175
176 bool requeue = slow_work_sleep_till_thread_needed(
177 struct slow_work *work,
178 signed long *_timeout);
179
180This will add a second wait and then sleep, such that it will be woken up if
181either something appears on the queue that could usefully make use of the
182thread - and behind which this item can be queued, or if the event the caller
183set up to wait for happens. True will be returned if something else appeared
184on the queue and this work function should perhaps return, of false if
185something else woke it up. The timeout is as for schedule_timeout().
186
187For example:
188
189 wq = bit_waitqueue(&my_flags, MY_BIT);
190 init_wait(&wait);
191 requeue = false;
192 do {
193 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
194 if (!test_bit(MY_BIT, &my_flags))
195 break;
196 requeue = slow_work_sleep_till_thread_needed(&my_work,
197 &timeout);
198 } while (timeout > 0 && !requeue);
199 finish_wait(wq, &wait);
200 if (!test_bit(MY_BIT, &my_flags)
201 goto do_my_thing;
202 if (requeue)
203 return; // to slow_work
114 204
115 205
116=============== 206===============
@@ -118,7 +208,8 @@ ITEM OPERATIONS
118=============== 208===============
119 209
120Each work item requires a table of operations of type struct slow_work_ops. 210Each work item requires a table of operations of type struct slow_work_ops.
121All members are required: 211Only ->execute() is required; the getting and putting of a reference and the
212describing of an item are all optional.
122 213
123 (*) Get a reference on an item: 214 (*) Get a reference on an item:
124 215
@@ -148,6 +239,16 @@ All members are required:
148 This should perform the work required of the item. It may sleep, it may 239 This should perform the work required of the item. It may sleep, it may
149 perform disk I/O and it may wait for locks. 240 perform disk I/O and it may wait for locks.
150 241
242 (*) View an item through /proc:
243
244 void (*desc)(struct slow_work *work, struct seq_file *m);
245
246 If supplied, this should print to 'm' a small string describing the work
247 the item is to do. This should be no more than about 40 characters, and
248 shouldn't include a newline character.
249
250 See the 'Viewing executing and queued items' section below.
251
151 252
152================== 253==================
153POOL CONFIGURATION 254POOL CONFIGURATION
@@ -172,3 +273,50 @@ The slow-work thread pool has a number of configurables:
172 is bounded to between 1 and one fewer than the number of active threads. 273 is bounded to between 1 and one fewer than the number of active threads.
173 This ensures there is always at least one thread that can process very 274 This ensures there is always at least one thread that can process very
174 slow work items, and always at least one thread that won't. 275 slow work items, and always at least one thread that won't.
276
277
278==================================
279VIEWING EXECUTING AND QUEUED ITEMS
280==================================
281
282If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available:
283
284 /proc/slow_work_rq
285
286through which the list of work items being executed and the queues of items to
287be executed may be viewed. The owner of a work item is given the chance to
288add some information of its own.
289
290The contents look something like the following:
291
292 THR PID ITEM ADDR FL MARK DESC
293 === ===== ================ == ===== ==========
294 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK
295 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2
296 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK
297 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN
298 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2
299 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2
300 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2
301 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN
302 vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2
303 vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2
304 vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2
305 vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2
306 vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2
307 vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2
308 vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK
309 vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK
310 vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK
311 vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK
312 vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK
313 vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK
314 vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK
315 vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK
316
317In the 'THR' column, executing items show the thread they're occupying and
318queued threads indicate which queue they're on. 'PID' shows the process ID of
319a slow-work thread that's executing something. 'FL' shows the work item flags.
320'MARK' indicates how long since an item was queued or began executing. Lastly,
321the 'DESC' column permits the owner of an item to give some information.
322
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 51c94e26a346..bcc5357a9069 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -343,18 +343,7 @@ int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
343 343
344 BUG_ON(!vcookie->fscache); 344 BUG_ON(!vcookie->fscache);
345 345
346 if (PageFsCache(page)) { 346 return fscache_maybe_release_page(vnode->cache, page, gfp);
347 if (fscache_check_page_write(vcookie->fscache, page)) {
348 if (!(gfp & __GFP_WAIT))
349 return 0;
350 fscache_wait_on_page_write(vcookie->fscache, page);
351 }
352
353 fscache_uncache_page(vcookie->fscache, page);
354 ClearPageFsCache(page);
355 }
356
357 return 1;
358} 347}
359 348
360void __v9fs_fscache_invalidate_page(struct page *page) 349void __v9fs_fscache_invalidate_page(struct page *page)
@@ -368,7 +357,6 @@ void __v9fs_fscache_invalidate_page(struct page *page)
368 fscache_wait_on_page_write(vcookie->fscache, page); 357 fscache_wait_on_page_write(vcookie->fscache, page);
369 BUG_ON(!PageLocked(page)); 358 BUG_ON(!PageLocked(page));
370 fscache_uncache_page(vcookie->fscache, page); 359 fscache_uncache_page(vcookie->fscache, page);
371 ClearPageFsCache(page);
372 } 360 }
373} 361}
374 362
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 681c2a7b013f..39b301662f22 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -315,7 +315,6 @@ static void afs_invalidatepage(struct page *page, unsigned long offset)
315 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); 315 struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
316 fscache_wait_on_page_write(vnode->cache, page); 316 fscache_wait_on_page_write(vnode->cache, page);
317 fscache_uncache_page(vnode->cache, page); 317 fscache_uncache_page(vnode->cache, page);
318 ClearPageFsCache(page);
319 } 318 }
320#endif 319#endif
321 320
@@ -349,17 +348,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
349 /* deny if page is being written to the cache and the caller hasn't 348 /* deny if page is being written to the cache and the caller hasn't
350 * elected to wait */ 349 * elected to wait */
351#ifdef CONFIG_AFS_FSCACHE 350#ifdef CONFIG_AFS_FSCACHE
352 if (PageFsCache(page)) { 351 if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) {
353 if (fscache_check_page_write(vnode->cache, page)) { 352 _leave(" = F [cache busy]");
354 if (!(gfp_flags & __GFP_WAIT)) { 353 return 0;
355 _leave(" = F [cache busy]");
356 return 0;
357 }
358 fscache_wait_on_page_write(vnode->cache, page);
359 }
360
361 fscache_uncache_page(vnode->cache, page);
362 ClearPageFsCache(page);
363 } 354 }
364#endif 355#endif
365 356
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 431accd475a7..27089311fbea 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -114,8 +114,9 @@ nomem_lookup_data:
114 114
115/* 115/*
116 * attempt to look up the nominated node in this cache 116 * attempt to look up the nominated node in this cache
117 * - return -ETIMEDOUT to be scheduled again
117 */ 118 */
118static void cachefiles_lookup_object(struct fscache_object *_object) 119static int cachefiles_lookup_object(struct fscache_object *_object)
119{ 120{
120 struct cachefiles_lookup_data *lookup_data; 121 struct cachefiles_lookup_data *lookup_data;
121 struct cachefiles_object *parent, *object; 122 struct cachefiles_object *parent, *object;
@@ -145,13 +146,15 @@ static void cachefiles_lookup_object(struct fscache_object *_object)
145 object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) 146 object->fscache.cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
146 cachefiles_attr_changed(&object->fscache); 147 cachefiles_attr_changed(&object->fscache);
147 148
148 if (ret < 0) { 149 if (ret < 0 && ret != -ETIMEDOUT) {
149 printk(KERN_WARNING "CacheFiles: Lookup failed error %d\n", 150 if (ret != -ENOBUFS)
150 ret); 151 printk(KERN_WARNING
152 "CacheFiles: Lookup failed error %d\n", ret);
151 fscache_object_lookup_error(&object->fscache); 153 fscache_object_lookup_error(&object->fscache);
152 } 154 }
153 155
154 _leave(" [%d]", ret); 156 _leave(" [%d]", ret);
157 return ret;
155} 158}
156 159
157/* 160/*
@@ -331,6 +334,7 @@ static void cachefiles_put_object(struct fscache_object *_object)
331 } 334 }
332 335
333 cache = object->fscache.cache; 336 cache = object->fscache.cache;
337 fscache_object_destroy(&object->fscache);
334 kmem_cache_free(cachefiles_object_jar, object); 338 kmem_cache_free(cachefiles_object_jar, object);
335 fscache_object_destroyed(cache); 339 fscache_object_destroyed(cache);
336 } 340 }
@@ -403,12 +407,26 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
403 if (oi_size == ni_size) 407 if (oi_size == ni_size)
404 return 0; 408 return 0;
405 409
406 newattrs.ia_size = ni_size;
407 newattrs.ia_valid = ATTR_SIZE;
408
409 cachefiles_begin_secure(cache, &saved_cred); 410 cachefiles_begin_secure(cache, &saved_cred);
410 mutex_lock(&object->backer->d_inode->i_mutex); 411 mutex_lock(&object->backer->d_inode->i_mutex);
412
413 /* if there's an extension to a partial page at the end of the backing
414 * file, we need to discard the partial page so that we pick up new
415 * data after it */
416 if (oi_size & ~PAGE_MASK && ni_size > oi_size) {
417 _debug("discard tail %llx", oi_size);
418 newattrs.ia_valid = ATTR_SIZE;
419 newattrs.ia_size = oi_size & PAGE_MASK;
420 ret = notify_change(object->backer, &newattrs);
421 if (ret < 0)
422 goto truncate_failed;
423 }
424
425 newattrs.ia_valid = ATTR_SIZE;
426 newattrs.ia_size = ni_size;
411 ret = notify_change(object->backer, &newattrs); 427 ret = notify_change(object->backer, &newattrs);
428
429truncate_failed:
412 mutex_unlock(&object->backer->d_inode->i_mutex); 430 mutex_unlock(&object->backer->d_inode->i_mutex);
413 cachefiles_end_secure(cache, saved_cred); 431 cachefiles_end_secure(cache, saved_cred);
414 432
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 4ce818ae39ea..14ac4806e291 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -21,17 +21,81 @@
21#include <linux/security.h> 21#include <linux/security.h>
22#include "internal.h" 22#include "internal.h"
23 23
24static int cachefiles_wait_bit(void *flags) 24#define CACHEFILES_KEYBUF_SIZE 512
25
26/*
27 * dump debugging info about an object
28 */
29static noinline
30void __cachefiles_printk_object(struct cachefiles_object *object,
31 const char *prefix,
32 u8 *keybuf)
25{ 33{
26 schedule(); 34 struct fscache_cookie *cookie;
27 return 0; 35 unsigned keylen, loop;
36
37 printk(KERN_ERR "%sobject: OBJ%x\n",
38 prefix, object->fscache.debug_id);
39 printk(KERN_ERR "%sobjstate=%s fl=%lx swfl=%lx ev=%lx[%lx]\n",
40 prefix, fscache_object_states[object->fscache.state],
41 object->fscache.flags, object->fscache.work.flags,
42 object->fscache.events,
43 object->fscache.event_mask & FSCACHE_OBJECT_EVENTS_MASK);
44 printk(KERN_ERR "%sops=%u inp=%u exc=%u\n",
45 prefix, object->fscache.n_ops, object->fscache.n_in_progress,
46 object->fscache.n_exclusive);
47 printk(KERN_ERR "%sparent=%p\n",
48 prefix, object->fscache.parent);
49
50 spin_lock(&object->fscache.lock);
51 cookie = object->fscache.cookie;
52 if (cookie) {
53 printk(KERN_ERR "%scookie=%p [pr=%p nd=%p fl=%lx]\n",
54 prefix,
55 object->fscache.cookie,
56 object->fscache.cookie->parent,
57 object->fscache.cookie->netfs_data,
58 object->fscache.cookie->flags);
59 if (keybuf)
60 keylen = cookie->def->get_key(cookie->netfs_data, keybuf,
61 CACHEFILES_KEYBUF_SIZE);
62 else
63 keylen = 0;
64 } else {
65 printk(KERN_ERR "%scookie=NULL\n", prefix);
66 keylen = 0;
67 }
68 spin_unlock(&object->fscache.lock);
69
70 if (keylen) {
71 printk(KERN_ERR "%skey=[%u] '", prefix, keylen);
72 for (loop = 0; loop < keylen; loop++)
73 printk("%02x", keybuf[loop]);
74 printk("'\n");
75 }
76}
77
78/*
79 * dump debugging info about a pair of objects
80 */
81static noinline void cachefiles_printk_object(struct cachefiles_object *object,
82 struct cachefiles_object *xobject)
83{
84 u8 *keybuf;
85
86 keybuf = kmalloc(CACHEFILES_KEYBUF_SIZE, GFP_NOIO);
87 if (object)
88 __cachefiles_printk_object(object, "", keybuf);
89 if (xobject)
90 __cachefiles_printk_object(xobject, "x", keybuf);
91 kfree(keybuf);
28} 92}
29 93
30/* 94/*
31 * record the fact that an object is now active 95 * record the fact that an object is now active
32 */ 96 */
33static void cachefiles_mark_object_active(struct cachefiles_cache *cache, 97static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
34 struct cachefiles_object *object) 98 struct cachefiles_object *object)
35{ 99{
36 struct cachefiles_object *xobject; 100 struct cachefiles_object *xobject;
37 struct rb_node **_p, *_parent = NULL; 101 struct rb_node **_p, *_parent = NULL;
@@ -42,8 +106,11 @@ static void cachefiles_mark_object_active(struct cachefiles_cache *cache,
42try_again: 106try_again:
43 write_lock(&cache->active_lock); 107 write_lock(&cache->active_lock);
44 108
45 if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) 109 if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) {
110 printk(KERN_ERR "CacheFiles: Error: Object already active\n");
111 cachefiles_printk_object(object, NULL);
46 BUG(); 112 BUG();
113 }
47 114
48 dentry = object->dentry; 115 dentry = object->dentry;
49 _p = &cache->active_nodes.rb_node; 116 _p = &cache->active_nodes.rb_node;
@@ -66,8 +133,8 @@ try_again:
66 rb_insert_color(&object->active_node, &cache->active_nodes); 133 rb_insert_color(&object->active_node, &cache->active_nodes);
67 134
68 write_unlock(&cache->active_lock); 135 write_unlock(&cache->active_lock);
69 _leave(""); 136 _leave(" = 0");
70 return; 137 return 0;
71 138
72 /* an old object from a previous incarnation is hogging the slot - we 139 /* an old object from a previous incarnation is hogging the slot - we
73 * need to wait for it to be destroyed */ 140 * need to wait for it to be destroyed */
@@ -76,44 +143,70 @@ wait_for_old_object:
76 printk(KERN_ERR "\n"); 143 printk(KERN_ERR "\n");
77 printk(KERN_ERR "CacheFiles: Error:" 144 printk(KERN_ERR "CacheFiles: Error:"
78 " Unexpected object collision\n"); 145 " Unexpected object collision\n");
79 printk(KERN_ERR "xobject: OBJ%x\n", 146 cachefiles_printk_object(object, xobject);
80 xobject->fscache.debug_id);
81 printk(KERN_ERR "xobjstate=%s\n",
82 fscache_object_states[xobject->fscache.state]);
83 printk(KERN_ERR "xobjflags=%lx\n", xobject->fscache.flags);
84 printk(KERN_ERR "xobjevent=%lx [%lx]\n",
85 xobject->fscache.events, xobject->fscache.event_mask);
86 printk(KERN_ERR "xops=%u inp=%u exc=%u\n",
87 xobject->fscache.n_ops, xobject->fscache.n_in_progress,
88 xobject->fscache.n_exclusive);
89 printk(KERN_ERR "xcookie=%p [pr=%p nd=%p fl=%lx]\n",
90 xobject->fscache.cookie,
91 xobject->fscache.cookie->parent,
92 xobject->fscache.cookie->netfs_data,
93 xobject->fscache.cookie->flags);
94 printk(KERN_ERR "xparent=%p\n",
95 xobject->fscache.parent);
96 printk(KERN_ERR "object: OBJ%x\n",
97 object->fscache.debug_id);
98 printk(KERN_ERR "cookie=%p [pr=%p nd=%p fl=%lx]\n",
99 object->fscache.cookie,
100 object->fscache.cookie->parent,
101 object->fscache.cookie->netfs_data,
102 object->fscache.cookie->flags);
103 printk(KERN_ERR "parent=%p\n",
104 object->fscache.parent);
105 BUG(); 147 BUG();
106 } 148 }
107 atomic_inc(&xobject->usage); 149 atomic_inc(&xobject->usage);
108 write_unlock(&cache->active_lock); 150 write_unlock(&cache->active_lock);
109 151
110 _debug(">>> wait"); 152 if (test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) {
111 wait_on_bit(&xobject->flags, CACHEFILES_OBJECT_ACTIVE, 153 wait_queue_head_t *wq;
112 cachefiles_wait_bit, TASK_UNINTERRUPTIBLE); 154
113 _debug("<<< waited"); 155 signed long timeout = 60 * HZ;
156 wait_queue_t wait;
157 bool requeue;
158
159 /* if the object we're waiting for is queued for processing,
160 * then just put ourselves on the queue behind it */
161 if (slow_work_is_queued(&xobject->fscache.work)) {
162 _debug("queue OBJ%x behind OBJ%x immediately",
163 object->fscache.debug_id,
164 xobject->fscache.debug_id);
165 goto requeue;
166 }
167
168 /* otherwise we sleep until either the object we're waiting for
169 * is done, or the slow-work facility wants the thread back to
170 * do other work */
171 wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE);
172 init_wait(&wait);
173 requeue = false;
174 do {
175 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
176 if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags))
177 break;
178 requeue = slow_work_sleep_till_thread_needed(
179 &object->fscache.work, &timeout);
180 } while (timeout > 0 && !requeue);
181 finish_wait(wq, &wait);
182
183 if (requeue &&
184 test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) {
185 _debug("queue OBJ%x behind OBJ%x after wait",
186 object->fscache.debug_id,
187 xobject->fscache.debug_id);
188 goto requeue;
189 }
190
191 if (timeout <= 0) {
192 printk(KERN_ERR "\n");
193 printk(KERN_ERR "CacheFiles: Error: Overlong"
194 " wait for old active object to go away\n");
195 cachefiles_printk_object(object, xobject);
196 goto requeue;
197 }
198 }
199
200 ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags));
114 201
115 cache->cache.ops->put_object(&xobject->fscache); 202 cache->cache.ops->put_object(&xobject->fscache);
116 goto try_again; 203 goto try_again;
204
205requeue:
206 clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
207 cache->cache.ops->put_object(&xobject->fscache);
208 _leave(" = -ETIMEDOUT");
209 return -ETIMEDOUT;
117} 210}
118 211
119/* 212/*
@@ -254,7 +347,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
254 347
255 dir = dget_parent(object->dentry); 348 dir = dget_parent(object->dentry);
256 349
257 mutex_lock(&dir->d_inode->i_mutex); 350 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
258 ret = cachefiles_bury_object(cache, dir, object->dentry); 351 ret = cachefiles_bury_object(cache, dir, object->dentry);
259 352
260 dput(dir); 353 dput(dir);
@@ -307,7 +400,7 @@ lookup_again:
307 /* search the current directory for the element name */ 400 /* search the current directory for the element name */
308 _debug("lookup '%s'", name); 401 _debug("lookup '%s'", name);
309 402
310 mutex_lock(&dir->d_inode->i_mutex); 403 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
311 404
312 start = jiffies; 405 start = jiffies;
313 next = lookup_one_len(name, dir, nlen); 406 next = lookup_one_len(name, dir, nlen);
@@ -418,12 +511,15 @@ lookup_again:
418 } 511 }
419 512
420 /* note that we're now using this object */ 513 /* note that we're now using this object */
421 cachefiles_mark_object_active(cache, object); 514 ret = cachefiles_mark_object_active(cache, object);
422 515
423 mutex_unlock(&dir->d_inode->i_mutex); 516 mutex_unlock(&dir->d_inode->i_mutex);
424 dput(dir); 517 dput(dir);
425 dir = NULL; 518 dir = NULL;
426 519
520 if (ret == -ETIMEDOUT)
521 goto mark_active_timed_out;
522
427 _debug("=== OBTAINED_OBJECT ==="); 523 _debug("=== OBTAINED_OBJECT ===");
428 524
429 if (object->new) { 525 if (object->new) {
@@ -467,6 +563,10 @@ create_error:
467 cachefiles_io_error(cache, "Create/mkdir failed"); 563 cachefiles_io_error(cache, "Create/mkdir failed");
468 goto error; 564 goto error;
469 565
566mark_active_timed_out:
567 _debug("mark active timed out");
568 goto release_dentry;
569
470check_error: 570check_error:
471 _debug("check error %d", ret); 571 _debug("check error %d", ret);
472 write_lock(&cache->active_lock); 572 write_lock(&cache->active_lock);
@@ -474,7 +574,7 @@ check_error:
474 clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags); 574 clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
475 wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE); 575 wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
476 write_unlock(&cache->active_lock); 576 write_unlock(&cache->active_lock);
477 577release_dentry:
478 dput(object->dentry); 578 dput(object->dentry);
479 object->dentry = NULL; 579 object->dentry = NULL;
480 goto error_out; 580 goto error_out;
@@ -495,9 +595,6 @@ error:
495error_out2: 595error_out2:
496 dput(dir); 596 dput(dir);
497error_out: 597error_out:
498 if (ret == -ENOSPC)
499 ret = -ENOBUFS;
500
501 _leave(" = error %d", -ret); 598 _leave(" = error %d", -ret);
502 return ret; 599 return ret;
503} 600}
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index a69787e7dd96..1d8332563863 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -40,8 +40,10 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
40 40
41 _debug("--- monitor %p %lx ---", page, page->flags); 41 _debug("--- monitor %p %lx ---", page, page->flags);
42 42
43 if (!PageUptodate(page) && !PageError(page)) 43 if (!PageUptodate(page) && !PageError(page)) {
44 dump_stack(); 44 /* unlocked, not uptodate and not erronous? */
45 _debug("page probably truncated");
46 }
45 47
46 /* remove from the waitqueue */ 48 /* remove from the waitqueue */
47 list_del(&wait->task_list); 49 list_del(&wait->task_list);
@@ -61,6 +63,84 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
61} 63}
62 64
63/* 65/*
66 * handle a probably truncated page
67 * - check to see if the page is still relevant and reissue the read if
68 * possible
69 * - return -EIO on error, -ENODATA if the page is gone, -EINPROGRESS if we
70 * must wait again and 0 if successful
71 */
72static int cachefiles_read_reissue(struct cachefiles_object *object,
73 struct cachefiles_one_read *monitor)
74{
75 struct address_space *bmapping = object->backer->d_inode->i_mapping;
76 struct page *backpage = monitor->back_page, *backpage2;
77 int ret;
78
79 kenter("{ino=%lx},{%lx,%lx}",
80 object->backer->d_inode->i_ino,
81 backpage->index, backpage->flags);
82
83 /* skip if the page was truncated away completely */
84 if (backpage->mapping != bmapping) {
85 kleave(" = -ENODATA [mapping]");
86 return -ENODATA;
87 }
88
89 backpage2 = find_get_page(bmapping, backpage->index);
90 if (!backpage2) {
91 kleave(" = -ENODATA [gone]");
92 return -ENODATA;
93 }
94
95 if (backpage != backpage2) {
96 put_page(backpage2);
97 kleave(" = -ENODATA [different]");
98 return -ENODATA;
99 }
100
101 /* the page is still there and we already have a ref on it, so we don't
102 * need a second */
103 put_page(backpage2);
104
105 INIT_LIST_HEAD(&monitor->op_link);
106 add_page_wait_queue(backpage, &monitor->monitor);
107
108 if (trylock_page(backpage)) {
109 ret = -EIO;
110 if (PageError(backpage))
111 goto unlock_discard;
112 ret = 0;
113 if (PageUptodate(backpage))
114 goto unlock_discard;
115
116 kdebug("reissue read");
117 ret = bmapping->a_ops->readpage(NULL, backpage);
118 if (ret < 0)
119 goto unlock_discard;
120 }
121
122 /* but the page may have been read before the monitor was installed, so
123 * the monitor may miss the event - so we have to ensure that we do get
124 * one in such a case */
125 if (trylock_page(backpage)) {
126 _debug("jumpstart %p {%lx}", backpage, backpage->flags);
127 unlock_page(backpage);
128 }
129
130 /* it'll reappear on the todo list */
131 kleave(" = -EINPROGRESS");
132 return -EINPROGRESS;
133
134unlock_discard:
135 unlock_page(backpage);
136 spin_lock_irq(&object->work_lock);
137 list_del(&monitor->op_link);
138 spin_unlock_irq(&object->work_lock);
139 kleave(" = %d", ret);
140 return ret;
141}
142
143/*
64 * copy data from backing pages to netfs pages to complete a read operation 144 * copy data from backing pages to netfs pages to complete a read operation
65 * - driven by FS-Cache's thread pool 145 * - driven by FS-Cache's thread pool
66 */ 146 */
@@ -92,20 +172,26 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
92 172
93 _debug("- copy {%lu}", monitor->back_page->index); 173 _debug("- copy {%lu}", monitor->back_page->index);
94 174
95 error = -EIO; 175 recheck:
96 if (PageUptodate(monitor->back_page)) { 176 if (PageUptodate(monitor->back_page)) {
97 copy_highpage(monitor->netfs_page, monitor->back_page); 177 copy_highpage(monitor->netfs_page, monitor->back_page);
98 178
99 pagevec_add(&pagevec, monitor->netfs_page); 179 pagevec_add(&pagevec, monitor->netfs_page);
100 fscache_mark_pages_cached(monitor->op, &pagevec); 180 fscache_mark_pages_cached(monitor->op, &pagevec);
101 error = 0; 181 error = 0;
102 } 182 } else if (!PageError(monitor->back_page)) {
103 183 /* the page has probably been truncated */
104 if (error) 184 error = cachefiles_read_reissue(object, monitor);
185 if (error == -EINPROGRESS)
186 goto next;
187 goto recheck;
188 } else {
105 cachefiles_io_error_obj( 189 cachefiles_io_error_obj(
106 object, 190 object,
107 "Readpage failed on backing file %lx", 191 "Readpage failed on backing file %lx",
108 (unsigned long) monitor->back_page->flags); 192 (unsigned long) monitor->back_page->flags);
193 error = -EIO;
194 }
109 195
110 page_cache_release(monitor->back_page); 196 page_cache_release(monitor->back_page);
111 197
@@ -114,6 +200,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
114 fscache_put_retrieval(op); 200 fscache_put_retrieval(op);
115 kfree(monitor); 201 kfree(monitor);
116 202
203 next:
117 /* let the thread pool have some air occasionally */ 204 /* let the thread pool have some air occasionally */
118 max--; 205 max--;
119 if (max < 0 || need_resched()) { 206 if (max < 0 || need_resched()) {
@@ -333,7 +420,8 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
333 420
334 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits; 421 shift = PAGE_SHIFT - inode->i_sb->s_blocksize_bits;
335 422
336 op->op.flags = FSCACHE_OP_FAST; 423 op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
424 op->op.flags |= FSCACHE_OP_FAST;
337 op->op.processor = cachefiles_read_copier; 425 op->op.processor = cachefiles_read_copier;
338 426
339 pagevec_init(&pagevec, 0); 427 pagevec_init(&pagevec, 0);
@@ -639,7 +727,8 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
639 727
640 pagevec_init(&pagevec, 0); 728 pagevec_init(&pagevec, 0);
641 729
642 op->op.flags = FSCACHE_OP_FAST; 730 op->op.flags &= FSCACHE_OP_KEEP_FLAGS;
731 op->op.flags |= FSCACHE_OP_FAST;
643 op->op.processor = cachefiles_read_copier; 732 op->op.processor = cachefiles_read_copier;
644 733
645 INIT_LIST_HEAD(&backpages); 734 INIT_LIST_HEAD(&backpages);
@@ -801,7 +890,8 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
801 struct cachefiles_cache *cache; 890 struct cachefiles_cache *cache;
802 mm_segment_t old_fs; 891 mm_segment_t old_fs;
803 struct file *file; 892 struct file *file;
804 loff_t pos; 893 loff_t pos, eof;
894 size_t len;
805 void *data; 895 void *data;
806 int ret; 896 int ret;
807 897
@@ -835,15 +925,29 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
835 ret = -EIO; 925 ret = -EIO;
836 if (file->f_op->write) { 926 if (file->f_op->write) {
837 pos = (loff_t) page->index << PAGE_SHIFT; 927 pos = (loff_t) page->index << PAGE_SHIFT;
928
929 /* we mustn't write more data than we have, so we have
930 * to beware of a partial page at EOF */
931 eof = object->fscache.store_limit_l;
932 len = PAGE_SIZE;
933 if (eof & ~PAGE_MASK) {
934 ASSERTCMP(pos, <, eof);
935 if (eof - pos < PAGE_SIZE) {
936 _debug("cut short %llx to %llx",
937 pos, eof);
938 len = eof - pos;
939 ASSERTCMP(pos + len, ==, eof);
940 }
941 }
942
838 data = kmap(page); 943 data = kmap(page);
839 old_fs = get_fs(); 944 old_fs = get_fs();
840 set_fs(KERNEL_DS); 945 set_fs(KERNEL_DS);
841 ret = file->f_op->write( 946 ret = file->f_op->write(
842 file, (const void __user *) data, PAGE_SIZE, 947 file, (const void __user *) data, len, &pos);
843 &pos);
844 set_fs(old_fs); 948 set_fs(old_fs);
845 kunmap(page); 949 kunmap(page);
846 if (ret != PAGE_SIZE) 950 if (ret != len)
847 ret = -EIO; 951 ret = -EIO;
848 } 952 }
849 fput(file); 953 fput(file);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9a5e4f5f3122..29f1da761bbf 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1037,7 +1037,7 @@ init_cifs(void)
1037 if (rc) 1037 if (rc)
1038 goto out_unregister_key_type; 1038 goto out_unregister_key_type;
1039#endif 1039#endif
1040 rc = slow_work_register_user(); 1040 rc = slow_work_register_user(THIS_MODULE);
1041 if (rc) 1041 if (rc)
1042 goto out_unregister_resolver_key; 1042 goto out_unregister_resolver_key;
1043 1043
diff --git a/fs/fscache/Kconfig b/fs/fscache/Kconfig
index 9bbb8ce7bea0..864dac20a242 100644
--- a/fs/fscache/Kconfig
+++ b/fs/fscache/Kconfig
@@ -54,3 +54,10 @@ config FSCACHE_DEBUG
54 enabled by setting bits in /sys/modules/fscache/parameter/debug. 54 enabled by setting bits in /sys/modules/fscache/parameter/debug.
55 55
56 See Documentation/filesystems/caching/fscache.txt for more information. 56 See Documentation/filesystems/caching/fscache.txt for more information.
57
58config FSCACHE_OBJECT_LIST
59 bool "Maintain global object list for debugging purposes"
60 depends on FSCACHE && PROC_FS
61 help
62 Maintain a global list of active fscache objects that can be
63 retrieved through /proc/fs/fscache/objects for debugging purposes
diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile
index 91571b95aacc..6d561531cb36 100644
--- a/fs/fscache/Makefile
+++ b/fs/fscache/Makefile
@@ -15,5 +15,6 @@ fscache-y := \
15fscache-$(CONFIG_PROC_FS) += proc.o 15fscache-$(CONFIG_PROC_FS) += proc.o
16fscache-$(CONFIG_FSCACHE_STATS) += stats.o 16fscache-$(CONFIG_FSCACHE_STATS) += stats.o
17fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o 17fscache-$(CONFIG_FSCACHE_HISTOGRAM) += histogram.o
18fscache-$(CONFIG_FSCACHE_OBJECT_LIST) += object-list.o
18 19
19obj-$(CONFIG_FSCACHE) := fscache.o 20obj-$(CONFIG_FSCACHE) := fscache.o
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
index e21985bbb1fb..6a3c48abd677 100644
--- a/fs/fscache/cache.c
+++ b/fs/fscache/cache.c
@@ -263,6 +263,7 @@ int fscache_add_cache(struct fscache_cache *cache,
263 spin_lock(&cache->object_list_lock); 263 spin_lock(&cache->object_list_lock);
264 list_add_tail(&ifsdef->cache_link, &cache->object_list); 264 list_add_tail(&ifsdef->cache_link, &cache->object_list);
265 spin_unlock(&cache->object_list_lock); 265 spin_unlock(&cache->object_list_lock);
266 fscache_objlist_add(ifsdef);
266 267
267 /* add the cache's netfs definition index object to the top level index 268 /* add the cache's netfs definition index object to the top level index
268 * cookie as a known backing object */ 269 * cookie as a known backing object */
@@ -380,11 +381,15 @@ void fscache_withdraw_cache(struct fscache_cache *cache)
380 381
381 /* make sure all pages pinned by operations on behalf of the netfs are 382 /* make sure all pages pinned by operations on behalf of the netfs are
382 * written to disk */ 383 * written to disk */
384 fscache_stat(&fscache_n_cop_sync_cache);
383 cache->ops->sync_cache(cache); 385 cache->ops->sync_cache(cache);
386 fscache_stat_d(&fscache_n_cop_sync_cache);
384 387
385 /* dissociate all the netfs pages backed by this cache from the block 388 /* dissociate all the netfs pages backed by this cache from the block
386 * mappings in the cache */ 389 * mappings in the cache */
390 fscache_stat(&fscache_n_cop_dissociate_pages);
387 cache->ops->dissociate_pages(cache); 391 cache->ops->dissociate_pages(cache);
392 fscache_stat_d(&fscache_n_cop_dissociate_pages);
388 393
389 /* we now have to destroy all the active objects pertaining to this 394 /* we now have to destroy all the active objects pertaining to this
390 * cache - which we do by passing them off to thread pool to be 395 * cache - which we do by passing them off to thread pool to be
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 72fd18f6c71f..990535071a8a 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -36,6 +36,7 @@ void fscache_cookie_init_once(void *_cookie)
36 36
37 memset(cookie, 0, sizeof(*cookie)); 37 memset(cookie, 0, sizeof(*cookie));
38 spin_lock_init(&cookie->lock); 38 spin_lock_init(&cookie->lock);
39 spin_lock_init(&cookie->stores_lock);
39 INIT_HLIST_HEAD(&cookie->backing_objects); 40 INIT_HLIST_HEAD(&cookie->backing_objects);
40} 41}
41 42
@@ -102,7 +103,9 @@ struct fscache_cookie *__fscache_acquire_cookie(
102 cookie->netfs_data = netfs_data; 103 cookie->netfs_data = netfs_data;
103 cookie->flags = 0; 104 cookie->flags = 0;
104 105
105 INIT_RADIX_TREE(&cookie->stores, GFP_NOFS); 106 /* radix tree insertion won't use the preallocation pool unless it's
107 * told it may not wait */
108 INIT_RADIX_TREE(&cookie->stores, GFP_NOFS & ~__GFP_WAIT);
106 109
107 switch (cookie->def->type) { 110 switch (cookie->def->type) {
108 case FSCACHE_COOKIE_TYPE_INDEX: 111 case FSCACHE_COOKIE_TYPE_INDEX:
@@ -249,7 +252,9 @@ static int fscache_alloc_object(struct fscache_cache *cache,
249 252
250 /* ask the cache to allocate an object (we may end up with duplicate 253 /* ask the cache to allocate an object (we may end up with duplicate
251 * objects at this stage, but we sort that out later) */ 254 * objects at this stage, but we sort that out later) */
255 fscache_stat(&fscache_n_cop_alloc_object);
252 object = cache->ops->alloc_object(cache, cookie); 256 object = cache->ops->alloc_object(cache, cookie);
257 fscache_stat_d(&fscache_n_cop_alloc_object);
253 if (IS_ERR(object)) { 258 if (IS_ERR(object)) {
254 fscache_stat(&fscache_n_object_no_alloc); 259 fscache_stat(&fscache_n_object_no_alloc);
255 ret = PTR_ERR(object); 260 ret = PTR_ERR(object);
@@ -270,8 +275,11 @@ static int fscache_alloc_object(struct fscache_cache *cache,
270 /* only attach if we managed to allocate all we needed, otherwise 275 /* only attach if we managed to allocate all we needed, otherwise
271 * discard the object we just allocated and instead use the one 276 * discard the object we just allocated and instead use the one
272 * attached to the cookie */ 277 * attached to the cookie */
273 if (fscache_attach_object(cookie, object) < 0) 278 if (fscache_attach_object(cookie, object) < 0) {
279 fscache_stat(&fscache_n_cop_put_object);
274 cache->ops->put_object(object); 280 cache->ops->put_object(object);
281 fscache_stat_d(&fscache_n_cop_put_object);
282 }
275 283
276 _leave(" = 0"); 284 _leave(" = 0");
277 return 0; 285 return 0;
@@ -287,7 +295,9 @@ object_already_extant:
287 return 0; 295 return 0;
288 296
289error_put: 297error_put:
298 fscache_stat(&fscache_n_cop_put_object);
290 cache->ops->put_object(object); 299 cache->ops->put_object(object);
300 fscache_stat_d(&fscache_n_cop_put_object);
291error: 301error:
292 _leave(" = %d", ret); 302 _leave(" = %d", ret);
293 return ret; 303 return ret;
@@ -349,6 +359,8 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
349 object->cookie = cookie; 359 object->cookie = cookie;
350 atomic_inc(&cookie->usage); 360 atomic_inc(&cookie->usage);
351 hlist_add_head(&object->cookie_link, &cookie->backing_objects); 361 hlist_add_head(&object->cookie_link, &cookie->backing_objects);
362
363 fscache_objlist_add(object);
352 ret = 0; 364 ret = 0;
353 365
354cant_attach_object: 366cant_attach_object:
@@ -403,6 +415,8 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
403 unsigned long event; 415 unsigned long event;
404 416
405 fscache_stat(&fscache_n_relinquishes); 417 fscache_stat(&fscache_n_relinquishes);
418 if (retire)
419 fscache_stat(&fscache_n_relinquishes_retire);
406 420
407 if (!cookie) { 421 if (!cookie) {
408 fscache_stat(&fscache_n_relinquishes_null); 422 fscache_stat(&fscache_n_relinquishes_null);
@@ -428,12 +442,8 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
428 442
429 event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; 443 event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE;
430 444
431 /* detach pointers back to the netfs */
432 spin_lock(&cookie->lock); 445 spin_lock(&cookie->lock);
433 446
434 cookie->netfs_data = NULL;
435 cookie->def = NULL;
436
437 /* break links with all the active objects */ 447 /* break links with all the active objects */
438 while (!hlist_empty(&cookie->backing_objects)) { 448 while (!hlist_empty(&cookie->backing_objects)) {
439 object = hlist_entry(cookie->backing_objects.first, 449 object = hlist_entry(cookie->backing_objects.first,
@@ -456,6 +466,10 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire)
456 BUG(); 466 BUG();
457 } 467 }
458 468
469 /* detach pointers back to the netfs */
470 cookie->netfs_data = NULL;
471 cookie->def = NULL;
472
459 spin_unlock(&cookie->lock); 473 spin_unlock(&cookie->lock);
460 474
461 if (cookie->parent) { 475 if (cookie->parent) {
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 1c341304621f..edd7434ab6e5 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -17,6 +17,7 @@
17 * - cache->object_list_lock 17 * - cache->object_list_lock
18 * - object->lock 18 * - object->lock
19 * - object->parent->lock 19 * - object->parent->lock
20 * - cookie->stores_lock
20 * - fscache_thread_lock 21 * - fscache_thread_lock
21 * 22 *
22 */ 23 */
@@ -88,17 +89,31 @@ extern int fscache_wait_bit_interruptible(void *);
88/* 89/*
89 * object.c 90 * object.c
90 */ 91 */
92extern const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5];
93
91extern void fscache_withdrawing_object(struct fscache_cache *, 94extern void fscache_withdrawing_object(struct fscache_cache *,
92 struct fscache_object *); 95 struct fscache_object *);
93extern void fscache_enqueue_object(struct fscache_object *); 96extern void fscache_enqueue_object(struct fscache_object *);
94 97
95/* 98/*
99 * object-list.c
100 */
101#ifdef CONFIG_FSCACHE_OBJECT_LIST
102extern const struct file_operations fscache_objlist_fops;
103
104extern void fscache_objlist_add(struct fscache_object *);
105#else
106#define fscache_objlist_add(object) do {} while(0)
107#endif
108
109/*
96 * operation.c 110 * operation.c
97 */ 111 */
98extern int fscache_submit_exclusive_op(struct fscache_object *, 112extern int fscache_submit_exclusive_op(struct fscache_object *,
99 struct fscache_operation *); 113 struct fscache_operation *);
100extern int fscache_submit_op(struct fscache_object *, 114extern int fscache_submit_op(struct fscache_object *,
101 struct fscache_operation *); 115 struct fscache_operation *);
116extern int fscache_cancel_op(struct fscache_operation *);
102extern void fscache_abort_object(struct fscache_object *); 117extern void fscache_abort_object(struct fscache_object *);
103extern void fscache_start_operations(struct fscache_object *); 118extern void fscache_start_operations(struct fscache_object *);
104extern void fscache_operation_gc(struct work_struct *); 119extern void fscache_operation_gc(struct work_struct *);
@@ -127,6 +142,8 @@ extern atomic_t fscache_n_op_enqueue;
127extern atomic_t fscache_n_op_deferred_release; 142extern atomic_t fscache_n_op_deferred_release;
128extern atomic_t fscache_n_op_release; 143extern atomic_t fscache_n_op_release;
129extern atomic_t fscache_n_op_gc; 144extern atomic_t fscache_n_op_gc;
145extern atomic_t fscache_n_op_cancelled;
146extern atomic_t fscache_n_op_rejected;
130 147
131extern atomic_t fscache_n_attr_changed; 148extern atomic_t fscache_n_attr_changed;
132extern atomic_t fscache_n_attr_changed_ok; 149extern atomic_t fscache_n_attr_changed_ok;
@@ -138,6 +155,8 @@ extern atomic_t fscache_n_allocs;
138extern atomic_t fscache_n_allocs_ok; 155extern atomic_t fscache_n_allocs_ok;
139extern atomic_t fscache_n_allocs_wait; 156extern atomic_t fscache_n_allocs_wait;
140extern atomic_t fscache_n_allocs_nobufs; 157extern atomic_t fscache_n_allocs_nobufs;
158extern atomic_t fscache_n_allocs_intr;
159extern atomic_t fscache_n_allocs_object_dead;
141extern atomic_t fscache_n_alloc_ops; 160extern atomic_t fscache_n_alloc_ops;
142extern atomic_t fscache_n_alloc_op_waits; 161extern atomic_t fscache_n_alloc_op_waits;
143 162
@@ -148,6 +167,7 @@ extern atomic_t fscache_n_retrievals_nodata;
148extern atomic_t fscache_n_retrievals_nobufs; 167extern atomic_t fscache_n_retrievals_nobufs;
149extern atomic_t fscache_n_retrievals_intr; 168extern atomic_t fscache_n_retrievals_intr;
150extern atomic_t fscache_n_retrievals_nomem; 169extern atomic_t fscache_n_retrievals_nomem;
170extern atomic_t fscache_n_retrievals_object_dead;
151extern atomic_t fscache_n_retrieval_ops; 171extern atomic_t fscache_n_retrieval_ops;
152extern atomic_t fscache_n_retrieval_op_waits; 172extern atomic_t fscache_n_retrieval_op_waits;
153 173
@@ -158,6 +178,14 @@ extern atomic_t fscache_n_stores_nobufs;
158extern atomic_t fscache_n_stores_oom; 178extern atomic_t fscache_n_stores_oom;
159extern atomic_t fscache_n_store_ops; 179extern atomic_t fscache_n_store_ops;
160extern atomic_t fscache_n_store_calls; 180extern atomic_t fscache_n_store_calls;
181extern atomic_t fscache_n_store_pages;
182extern atomic_t fscache_n_store_radix_deletes;
183extern atomic_t fscache_n_store_pages_over_limit;
184
185extern atomic_t fscache_n_store_vmscan_not_storing;
186extern atomic_t fscache_n_store_vmscan_gone;
187extern atomic_t fscache_n_store_vmscan_busy;
188extern atomic_t fscache_n_store_vmscan_cancelled;
161 189
162extern atomic_t fscache_n_marks; 190extern atomic_t fscache_n_marks;
163extern atomic_t fscache_n_uncaches; 191extern atomic_t fscache_n_uncaches;
@@ -176,6 +204,7 @@ extern atomic_t fscache_n_updates_run;
176extern atomic_t fscache_n_relinquishes; 204extern atomic_t fscache_n_relinquishes;
177extern atomic_t fscache_n_relinquishes_null; 205extern atomic_t fscache_n_relinquishes_null;
178extern atomic_t fscache_n_relinquishes_waitcrt; 206extern atomic_t fscache_n_relinquishes_waitcrt;
207extern atomic_t fscache_n_relinquishes_retire;
179 208
180extern atomic_t fscache_n_cookie_index; 209extern atomic_t fscache_n_cookie_index;
181extern atomic_t fscache_n_cookie_data; 210extern atomic_t fscache_n_cookie_data;
@@ -186,6 +215,7 @@ extern atomic_t fscache_n_object_no_alloc;
186extern atomic_t fscache_n_object_lookups; 215extern atomic_t fscache_n_object_lookups;
187extern atomic_t fscache_n_object_lookups_negative; 216extern atomic_t fscache_n_object_lookups_negative;
188extern atomic_t fscache_n_object_lookups_positive; 217extern atomic_t fscache_n_object_lookups_positive;
218extern atomic_t fscache_n_object_lookups_timed_out;
189extern atomic_t fscache_n_object_created; 219extern atomic_t fscache_n_object_created;
190extern atomic_t fscache_n_object_avail; 220extern atomic_t fscache_n_object_avail;
191extern atomic_t fscache_n_object_dead; 221extern atomic_t fscache_n_object_dead;
@@ -195,15 +225,41 @@ extern atomic_t fscache_n_checkaux_okay;
195extern atomic_t fscache_n_checkaux_update; 225extern atomic_t fscache_n_checkaux_update;
196extern atomic_t fscache_n_checkaux_obsolete; 226extern atomic_t fscache_n_checkaux_obsolete;
197 227
228extern atomic_t fscache_n_cop_alloc_object;
229extern atomic_t fscache_n_cop_lookup_object;
230extern atomic_t fscache_n_cop_lookup_complete;
231extern atomic_t fscache_n_cop_grab_object;
232extern atomic_t fscache_n_cop_update_object;
233extern atomic_t fscache_n_cop_drop_object;
234extern atomic_t fscache_n_cop_put_object;
235extern atomic_t fscache_n_cop_sync_cache;
236extern atomic_t fscache_n_cop_attr_changed;
237extern atomic_t fscache_n_cop_read_or_alloc_page;
238extern atomic_t fscache_n_cop_read_or_alloc_pages;
239extern atomic_t fscache_n_cop_allocate_page;
240extern atomic_t fscache_n_cop_allocate_pages;
241extern atomic_t fscache_n_cop_write_page;
242extern atomic_t fscache_n_cop_uncache_page;
243extern atomic_t fscache_n_cop_dissociate_pages;
244
198static inline void fscache_stat(atomic_t *stat) 245static inline void fscache_stat(atomic_t *stat)
199{ 246{
200 atomic_inc(stat); 247 atomic_inc(stat);
201} 248}
202 249
250static inline void fscache_stat_d(atomic_t *stat)
251{
252 atomic_dec(stat);
253}
254
255#define __fscache_stat(stat) (stat)
256
203extern const struct file_operations fscache_stats_fops; 257extern const struct file_operations fscache_stats_fops;
204#else 258#else
205 259
260#define __fscache_stat(stat) (NULL)
206#define fscache_stat(stat) do {} while (0) 261#define fscache_stat(stat) do {} while (0)
262#define fscache_stat_d(stat) do {} while (0)
207#endif 263#endif
208 264
209/* 265/*
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 4de41b597499..add6bdb53f04 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -48,7 +48,7 @@ static int __init fscache_init(void)
48{ 48{
49 int ret; 49 int ret;
50 50
51 ret = slow_work_register_user(); 51 ret = slow_work_register_user(THIS_MODULE);
52 if (ret < 0) 52 if (ret < 0)
53 goto error_slow_work; 53 goto error_slow_work;
54 54
@@ -80,7 +80,7 @@ error_kobj:
80error_cookie_jar: 80error_cookie_jar:
81 fscache_proc_cleanup(); 81 fscache_proc_cleanup();
82error_proc: 82error_proc:
83 slow_work_unregister_user(); 83 slow_work_unregister_user(THIS_MODULE);
84error_slow_work: 84error_slow_work:
85 return ret; 85 return ret;
86} 86}
@@ -97,7 +97,7 @@ static void __exit fscache_exit(void)
97 kobject_put(fscache_root); 97 kobject_put(fscache_root);
98 kmem_cache_destroy(fscache_cookie_jar); 98 kmem_cache_destroy(fscache_cookie_jar);
99 fscache_proc_cleanup(); 99 fscache_proc_cleanup();
100 slow_work_unregister_user(); 100 slow_work_unregister_user(THIS_MODULE);
101 printk(KERN_NOTICE "FS-Cache: Unloaded\n"); 101 printk(KERN_NOTICE "FS-Cache: Unloaded\n");
102} 102}
103 103
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
new file mode 100644
index 000000000000..e590242fa41a
--- /dev/null
+++ b/fs/fscache/object-list.c
@@ -0,0 +1,432 @@
1/* Global fscache object list maintainer and viewer
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#define FSCACHE_DEBUG_LEVEL COOKIE
13#include <linux/module.h>
14#include <linux/seq_file.h>
15#include <linux/key.h>
16#include <keys/user-type.h>
17#include "internal.h"
18
19static struct rb_root fscache_object_list;
20static DEFINE_RWLOCK(fscache_object_list_lock);
21
22struct fscache_objlist_data {
23 unsigned long config; /* display configuration */
24#define FSCACHE_OBJLIST_CONFIG_KEY 0x00000001 /* show object keys */
25#define FSCACHE_OBJLIST_CONFIG_AUX 0x00000002 /* show object auxdata */
26#define FSCACHE_OBJLIST_CONFIG_COOKIE 0x00000004 /* show objects with cookies */
27#define FSCACHE_OBJLIST_CONFIG_NOCOOKIE 0x00000008 /* show objects without cookies */
28#define FSCACHE_OBJLIST_CONFIG_BUSY 0x00000010 /* show busy objects */
29#define FSCACHE_OBJLIST_CONFIG_IDLE 0x00000020 /* show idle objects */
30#define FSCACHE_OBJLIST_CONFIG_PENDWR 0x00000040 /* show objects with pending writes */
31#define FSCACHE_OBJLIST_CONFIG_NOPENDWR 0x00000080 /* show objects without pending writes */
32#define FSCACHE_OBJLIST_CONFIG_READS 0x00000100 /* show objects with active reads */
33#define FSCACHE_OBJLIST_CONFIG_NOREADS 0x00000200 /* show objects without active reads */
34#define FSCACHE_OBJLIST_CONFIG_EVENTS 0x00000400 /* show objects with events */
35#define FSCACHE_OBJLIST_CONFIG_NOEVENTS 0x00000800 /* show objects without no events */
36#define FSCACHE_OBJLIST_CONFIG_WORK 0x00001000 /* show objects with slow work */
37#define FSCACHE_OBJLIST_CONFIG_NOWORK 0x00002000 /* show objects without slow work */
38
39 u8 buf[512]; /* key and aux data buffer */
40};
41
42/*
43 * Add an object to the object list
44 * - we use the address of the fscache_object structure as the key into the
45 * tree
46 */
47void fscache_objlist_add(struct fscache_object *obj)
48{
49 struct fscache_object *xobj;
50 struct rb_node **p = &fscache_object_list.rb_node, *parent = NULL;
51
52 write_lock(&fscache_object_list_lock);
53
54 while (*p) {
55 parent = *p;
56 xobj = rb_entry(parent, struct fscache_object, objlist_link);
57
58 if (obj < xobj)
59 p = &(*p)->rb_left;
60 else if (obj > xobj)
61 p = &(*p)->rb_right;
62 else
63 BUG();
64 }
65
66 rb_link_node(&obj->objlist_link, parent, p);
67 rb_insert_color(&obj->objlist_link, &fscache_object_list);
68
69 write_unlock(&fscache_object_list_lock);
70}
71
72/**
73 * fscache_object_destroy - Note that a cache object is about to be destroyed
74 * @object: The object to be destroyed
75 *
76 * Note the imminent destruction and deallocation of a cache object record.
77 */
78void fscache_object_destroy(struct fscache_object *obj)
79{
80 write_lock(&fscache_object_list_lock);
81
82 BUG_ON(RB_EMPTY_ROOT(&fscache_object_list));
83 rb_erase(&obj->objlist_link, &fscache_object_list);
84
85 write_unlock(&fscache_object_list_lock);
86}
87EXPORT_SYMBOL(fscache_object_destroy);
88
89/*
90 * find the object in the tree on or after the specified index
91 */
92static struct fscache_object *fscache_objlist_lookup(loff_t *_pos)
93{
94 struct fscache_object *pobj, *obj, *minobj = NULL;
95 struct rb_node *p;
96 unsigned long pos;
97
98 if (*_pos >= (unsigned long) ERR_PTR(-ENOENT))
99 return NULL;
100 pos = *_pos;
101
102 /* banners (can't represent line 0 by pos 0 as that would involve
103 * returning a NULL pointer) */
104 if (pos == 0)
105 return (struct fscache_object *) ++(*_pos);
106 if (pos < 3)
107 return (struct fscache_object *)pos;
108
109 pobj = (struct fscache_object *)pos;
110 p = fscache_object_list.rb_node;
111 while (p) {
112 obj = rb_entry(p, struct fscache_object, objlist_link);
113 if (pobj < obj) {
114 if (!minobj || minobj > obj)
115 minobj = obj;
116 p = p->rb_left;
117 } else if (pobj > obj) {
118 p = p->rb_right;
119 } else {
120 minobj = obj;
121 break;
122 }
123 obj = NULL;
124 }
125
126 if (!minobj)
127 *_pos = (unsigned long) ERR_PTR(-ENOENT);
128 else if (minobj != obj)
129 *_pos = (unsigned long) minobj;
130 return minobj;
131}
132
133/*
134 * set up the iterator to start reading from the first line
135 */
136static void *fscache_objlist_start(struct seq_file *m, loff_t *_pos)
137 __acquires(&fscache_object_list_lock)
138{
139 read_lock(&fscache_object_list_lock);
140 return fscache_objlist_lookup(_pos);
141}
142
143/*
144 * move to the next line
145 */
146static void *fscache_objlist_next(struct seq_file *m, void *v, loff_t *_pos)
147{
148 (*_pos)++;
149 return fscache_objlist_lookup(_pos);
150}
151
152/*
153 * clean up after reading
154 */
155static void fscache_objlist_stop(struct seq_file *m, void *v)
156 __releases(&fscache_object_list_lock)
157{
158 read_unlock(&fscache_object_list_lock);
159}
160
161/*
162 * display an object
163 */
164static int fscache_objlist_show(struct seq_file *m, void *v)
165{
166 struct fscache_objlist_data *data = m->private;
167 struct fscache_object *obj = v;
168 unsigned long config = data->config;
169 uint16_t keylen, auxlen;
170 char _type[3], *type;
171 bool no_cookie;
172 u8 *buf = data->buf, *p;
173
174 if ((unsigned long) v == 1) {
175 seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS"
176 " EM EV F S"
177 " | NETFS_COOKIE_DEF TY FL NETFS_DATA");
178 if (config & (FSCACHE_OBJLIST_CONFIG_KEY |
179 FSCACHE_OBJLIST_CONFIG_AUX))
180 seq_puts(m, " ");
181 if (config & FSCACHE_OBJLIST_CONFIG_KEY)
182 seq_puts(m, "OBJECT_KEY");
183 if ((config & (FSCACHE_OBJLIST_CONFIG_KEY |
184 FSCACHE_OBJLIST_CONFIG_AUX)) ==
185 (FSCACHE_OBJLIST_CONFIG_KEY | FSCACHE_OBJLIST_CONFIG_AUX))
186 seq_puts(m, ", ");
187 if (config & FSCACHE_OBJLIST_CONFIG_AUX)
188 seq_puts(m, "AUX_DATA");
189 seq_puts(m, "\n");
190 return 0;
191 }
192
193 if ((unsigned long) v == 2) {
194 seq_puts(m, "======== ======== ==== ===== === === === == ====="
195 " == == = ="
196 " | ================ == == ================");
197 if (config & (FSCACHE_OBJLIST_CONFIG_KEY |
198 FSCACHE_OBJLIST_CONFIG_AUX))
199 seq_puts(m, " ================");
200 seq_puts(m, "\n");
201 return 0;
202 }
203
204 /* filter out any unwanted objects */
205#define FILTER(criterion, _yes, _no) \
206 do { \
207 unsigned long yes = FSCACHE_OBJLIST_CONFIG_##_yes; \
208 unsigned long no = FSCACHE_OBJLIST_CONFIG_##_no; \
209 if (criterion) { \
210 if (!(config & yes)) \
211 return 0; \
212 } else { \
213 if (!(config & no)) \
214 return 0; \
215 } \
216 } while(0)
217
218 if (~config) {
219 FILTER(obj->cookie,
220 COOKIE, NOCOOKIE);
221 FILTER(obj->state != FSCACHE_OBJECT_ACTIVE ||
222 obj->n_ops != 0 ||
223 obj->n_obj_ops != 0 ||
224 obj->flags ||
225 !list_empty(&obj->dependents),
226 BUSY, IDLE);
227 FILTER(test_bit(FSCACHE_OBJECT_PENDING_WRITE, &obj->flags),
228 PENDWR, NOPENDWR);
229 FILTER(atomic_read(&obj->n_reads),
230 READS, NOREADS);
231 FILTER(obj->events & obj->event_mask,
232 EVENTS, NOEVENTS);
233 FILTER(obj->work.flags & ~(1UL << SLOW_WORK_VERY_SLOW),
234 WORK, NOWORK);
235 }
236
237 seq_printf(m,
238 "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1lx | ",
239 obj->debug_id,
240 obj->parent ? obj->parent->debug_id : -1,
241 fscache_object_states_short[obj->state],
242 obj->n_children,
243 obj->n_ops,
244 obj->n_obj_ops,
245 obj->n_in_progress,
246 obj->n_exclusive,
247 atomic_read(&obj->n_reads),
248 obj->event_mask & FSCACHE_OBJECT_EVENTS_MASK,
249 obj->events,
250 obj->flags,
251 obj->work.flags);
252
253 no_cookie = true;
254 keylen = auxlen = 0;
255 if (obj->cookie) {
256 spin_lock(&obj->lock);
257 if (obj->cookie) {
258 switch (obj->cookie->def->type) {
259 case 0:
260 type = "IX";
261 break;
262 case 1:
263 type = "DT";
264 break;
265 default:
266 sprintf(_type, "%02u",
267 obj->cookie->def->type);
268 type = _type;
269 break;
270 }
271
272 seq_printf(m, "%-16s %s %2lx %16p",
273 obj->cookie->def->name,
274 type,
275 obj->cookie->flags,
276 obj->cookie->netfs_data);
277
278 if (obj->cookie->def->get_key &&
279 config & FSCACHE_OBJLIST_CONFIG_KEY)
280 keylen = obj->cookie->def->get_key(
281 obj->cookie->netfs_data,
282 buf, 400);
283
284 if (obj->cookie->def->get_aux &&
285 config & FSCACHE_OBJLIST_CONFIG_AUX)
286 auxlen = obj->cookie->def->get_aux(
287 obj->cookie->netfs_data,
288 buf + keylen, 512 - keylen);
289
290 no_cookie = false;
291 }
292 spin_unlock(&obj->lock);
293
294 if (!no_cookie && (keylen > 0 || auxlen > 0)) {
295 seq_printf(m, " ");
296 for (p = buf; keylen > 0; keylen--)
297 seq_printf(m, "%02x", *p++);
298 if (auxlen > 0) {
299 if (config & FSCACHE_OBJLIST_CONFIG_KEY)
300 seq_printf(m, ", ");
301 for (; auxlen > 0; auxlen--)
302 seq_printf(m, "%02x", *p++);
303 }
304 }
305 }
306
307 if (no_cookie)
308 seq_printf(m, "<no_cookie>\n");
309 else
310 seq_printf(m, "\n");
311 return 0;
312}
313
314static const struct seq_operations fscache_objlist_ops = {
315 .start = fscache_objlist_start,
316 .stop = fscache_objlist_stop,
317 .next = fscache_objlist_next,
318 .show = fscache_objlist_show,
319};
320
321/*
322 * get the configuration for filtering the list
323 */
324static void fscache_objlist_config(struct fscache_objlist_data *data)
325{
326#ifdef CONFIG_KEYS
327 struct user_key_payload *confkey;
328 unsigned long config;
329 struct key *key;
330 const char *buf;
331 int len;
332
333 key = request_key(&key_type_user, "fscache:objlist", NULL);
334 if (IS_ERR(key))
335 goto no_config;
336
337 config = 0;
338 rcu_read_lock();
339
340 confkey = key->payload.data;
341 buf = confkey->data;
342
343 for (len = confkey->datalen - 1; len >= 0; len--) {
344 switch (buf[len]) {
345 case 'K': config |= FSCACHE_OBJLIST_CONFIG_KEY; break;
346 case 'A': config |= FSCACHE_OBJLIST_CONFIG_AUX; break;
347 case 'C': config |= FSCACHE_OBJLIST_CONFIG_COOKIE; break;
348 case 'c': config |= FSCACHE_OBJLIST_CONFIG_NOCOOKIE; break;
349 case 'B': config |= FSCACHE_OBJLIST_CONFIG_BUSY; break;
350 case 'b': config |= FSCACHE_OBJLIST_CONFIG_IDLE; break;
351 case 'W': config |= FSCACHE_OBJLIST_CONFIG_PENDWR; break;
352 case 'w': config |= FSCACHE_OBJLIST_CONFIG_NOPENDWR; break;
353 case 'R': config |= FSCACHE_OBJLIST_CONFIG_READS; break;
354 case 'r': config |= FSCACHE_OBJLIST_CONFIG_NOREADS; break;
355 case 'S': config |= FSCACHE_OBJLIST_CONFIG_WORK; break;
356 case 's': config |= FSCACHE_OBJLIST_CONFIG_NOWORK; break;
357 }
358 }
359
360 rcu_read_unlock();
361 key_put(key);
362
363 if (!(config & (FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE)))
364 config |= FSCACHE_OBJLIST_CONFIG_COOKIE | FSCACHE_OBJLIST_CONFIG_NOCOOKIE;
365 if (!(config & (FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE)))
366 config |= FSCACHE_OBJLIST_CONFIG_BUSY | FSCACHE_OBJLIST_CONFIG_IDLE;
367 if (!(config & (FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR)))
368 config |= FSCACHE_OBJLIST_CONFIG_PENDWR | FSCACHE_OBJLIST_CONFIG_NOPENDWR;
369 if (!(config & (FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS)))
370 config |= FSCACHE_OBJLIST_CONFIG_READS | FSCACHE_OBJLIST_CONFIG_NOREADS;
371 if (!(config & (FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS)))
372 config |= FSCACHE_OBJLIST_CONFIG_EVENTS | FSCACHE_OBJLIST_CONFIG_NOEVENTS;
373 if (!(config & (FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK)))
374 config |= FSCACHE_OBJLIST_CONFIG_WORK | FSCACHE_OBJLIST_CONFIG_NOWORK;
375
376 data->config = config;
377 return;
378
379no_config:
380#endif
381 data->config = ULONG_MAX;
382}
383
384/*
385 * open "/proc/fs/fscache/objects" to provide a list of active objects
386 * - can be configured by a user-defined key added to the caller's keyrings
387 */
388static int fscache_objlist_open(struct inode *inode, struct file *file)
389{
390 struct fscache_objlist_data *data;
391 struct seq_file *m;
392 int ret;
393
394 ret = seq_open(file, &fscache_objlist_ops);
395 if (ret < 0)
396 return ret;
397
398 m = file->private_data;
399
400 /* buffer for key extraction */
401 data = kmalloc(sizeof(struct fscache_objlist_data), GFP_KERNEL);
402 if (!data) {
403 seq_release(inode, file);
404 return -ENOMEM;
405 }
406
407 /* get the configuration key */
408 fscache_objlist_config(data);
409
410 m->private = data;
411 return 0;
412}
413
414/*
415 * clean up on close
416 */
417static int fscache_objlist_release(struct inode *inode, struct file *file)
418{
419 struct seq_file *m = file->private_data;
420
421 kfree(m->private);
422 m->private = NULL;
423 return seq_release(inode, file);
424}
425
426const struct file_operations fscache_objlist_fops = {
427 .owner = THIS_MODULE,
428 .open = fscache_objlist_open,
429 .read = seq_read,
430 .llseek = seq_lseek,
431 .release = fscache_objlist_release,
432};
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 392a41b1b79d..e513ac599c8e 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -14,9 +14,10 @@
14 14
15#define FSCACHE_DEBUG_LEVEL COOKIE 15#define FSCACHE_DEBUG_LEVEL COOKIE
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/seq_file.h>
17#include "internal.h" 18#include "internal.h"
18 19
19const char *fscache_object_states[] = { 20const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = {
20 [FSCACHE_OBJECT_INIT] = "OBJECT_INIT", 21 [FSCACHE_OBJECT_INIT] = "OBJECT_INIT",
21 [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP", 22 [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP",
22 [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", 23 [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING",
@@ -33,9 +34,28 @@ const char *fscache_object_states[] = {
33}; 34};
34EXPORT_SYMBOL(fscache_object_states); 35EXPORT_SYMBOL(fscache_object_states);
35 36
37const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = {
38 [FSCACHE_OBJECT_INIT] = "INIT",
39 [FSCACHE_OBJECT_LOOKING_UP] = "LOOK",
40 [FSCACHE_OBJECT_CREATING] = "CRTN",
41 [FSCACHE_OBJECT_AVAILABLE] = "AVBL",
42 [FSCACHE_OBJECT_ACTIVE] = "ACTV",
43 [FSCACHE_OBJECT_UPDATING] = "UPDT",
44 [FSCACHE_OBJECT_DYING] = "DYNG",
45 [FSCACHE_OBJECT_LC_DYING] = "LCDY",
46 [FSCACHE_OBJECT_ABORT_INIT] = "ABTI",
47 [FSCACHE_OBJECT_RELEASING] = "RELS",
48 [FSCACHE_OBJECT_RECYCLING] = "RCYC",
49 [FSCACHE_OBJECT_WITHDRAWING] = "WTHD",
50 [FSCACHE_OBJECT_DEAD] = "DEAD",
51};
52
36static void fscache_object_slow_work_put_ref(struct slow_work *); 53static void fscache_object_slow_work_put_ref(struct slow_work *);
37static int fscache_object_slow_work_get_ref(struct slow_work *); 54static int fscache_object_slow_work_get_ref(struct slow_work *);
38static void fscache_object_slow_work_execute(struct slow_work *); 55static void fscache_object_slow_work_execute(struct slow_work *);
56#ifdef CONFIG_SLOW_WORK_PROC
57static void fscache_object_slow_work_desc(struct slow_work *, struct seq_file *);
58#endif
39static void fscache_initialise_object(struct fscache_object *); 59static void fscache_initialise_object(struct fscache_object *);
40static void fscache_lookup_object(struct fscache_object *); 60static void fscache_lookup_object(struct fscache_object *);
41static void fscache_object_available(struct fscache_object *); 61static void fscache_object_available(struct fscache_object *);
@@ -45,9 +65,13 @@ static void fscache_enqueue_dependents(struct fscache_object *);
45static void fscache_dequeue_object(struct fscache_object *); 65static void fscache_dequeue_object(struct fscache_object *);
46 66
47const struct slow_work_ops fscache_object_slow_work_ops = { 67const struct slow_work_ops fscache_object_slow_work_ops = {
68 .owner = THIS_MODULE,
48 .get_ref = fscache_object_slow_work_get_ref, 69 .get_ref = fscache_object_slow_work_get_ref,
49 .put_ref = fscache_object_slow_work_put_ref, 70 .put_ref = fscache_object_slow_work_put_ref,
50 .execute = fscache_object_slow_work_execute, 71 .execute = fscache_object_slow_work_execute,
72#ifdef CONFIG_SLOW_WORK_PROC
73 .desc = fscache_object_slow_work_desc,
74#endif
51}; 75};
52EXPORT_SYMBOL(fscache_object_slow_work_ops); 76EXPORT_SYMBOL(fscache_object_slow_work_ops);
53 77
@@ -81,6 +105,7 @@ static inline void fscache_done_parent_op(struct fscache_object *object)
81static void fscache_object_state_machine(struct fscache_object *object) 105static void fscache_object_state_machine(struct fscache_object *object)
82{ 106{
83 enum fscache_object_state new_state; 107 enum fscache_object_state new_state;
108 struct fscache_cookie *cookie;
84 109
85 ASSERT(object != NULL); 110 ASSERT(object != NULL);
86 111
@@ -120,20 +145,31 @@ static void fscache_object_state_machine(struct fscache_object *object)
120 case FSCACHE_OBJECT_UPDATING: 145 case FSCACHE_OBJECT_UPDATING:
121 clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); 146 clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events);
122 fscache_stat(&fscache_n_updates_run); 147 fscache_stat(&fscache_n_updates_run);
148 fscache_stat(&fscache_n_cop_update_object);
123 object->cache->ops->update_object(object); 149 object->cache->ops->update_object(object);
150 fscache_stat_d(&fscache_n_cop_update_object);
124 goto active_transit; 151 goto active_transit;
125 152
126 /* handle an object dying during lookup or creation */ 153 /* handle an object dying during lookup or creation */
127 case FSCACHE_OBJECT_LC_DYING: 154 case FSCACHE_OBJECT_LC_DYING:
128 object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); 155 object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE);
156 fscache_stat(&fscache_n_cop_lookup_complete);
129 object->cache->ops->lookup_complete(object); 157 object->cache->ops->lookup_complete(object);
158 fscache_stat_d(&fscache_n_cop_lookup_complete);
130 159
131 spin_lock(&object->lock); 160 spin_lock(&object->lock);
132 object->state = FSCACHE_OBJECT_DYING; 161 object->state = FSCACHE_OBJECT_DYING;
133 if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, 162 cookie = object->cookie;
134 &object->cookie->flags)) 163 if (cookie) {
135 wake_up_bit(&object->cookie->flags, 164 if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP,
136 FSCACHE_COOKIE_CREATING); 165 &cookie->flags))
166 wake_up_bit(&cookie->flags,
167 FSCACHE_COOKIE_LOOKING_UP);
168 if (test_and_clear_bit(FSCACHE_COOKIE_CREATING,
169 &cookie->flags))
170 wake_up_bit(&cookie->flags,
171 FSCACHE_COOKIE_CREATING);
172 }
137 spin_unlock(&object->lock); 173 spin_unlock(&object->lock);
138 174
139 fscache_done_parent_op(object); 175 fscache_done_parent_op(object);
@@ -165,6 +201,7 @@ static void fscache_object_state_machine(struct fscache_object *object)
165 } 201 }
166 spin_unlock(&object->lock); 202 spin_unlock(&object->lock);
167 fscache_enqueue_dependents(object); 203 fscache_enqueue_dependents(object);
204 fscache_start_operations(object);
168 goto terminal_transit; 205 goto terminal_transit;
169 206
170 /* handle an abort during initialisation */ 207 /* handle an abort during initialisation */
@@ -316,14 +353,29 @@ static void fscache_object_slow_work_execute(struct slow_work *work)
316 353
317 _enter("{OBJ%x}", object->debug_id); 354 _enter("{OBJ%x}", object->debug_id);
318 355
319 clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
320
321 start = jiffies; 356 start = jiffies;
322 fscache_object_state_machine(object); 357 fscache_object_state_machine(object);
323 fscache_hist(fscache_objs_histogram, start); 358 fscache_hist(fscache_objs_histogram, start);
324 if (object->events & object->event_mask) 359 if (object->events & object->event_mask)
325 fscache_enqueue_object(object); 360 fscache_enqueue_object(object);
361 clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
362}
363
364/*
365 * describe an object for slow-work debugging
366 */
367#ifdef CONFIG_SLOW_WORK_PROC
368static void fscache_object_slow_work_desc(struct slow_work *work,
369 struct seq_file *m)
370{
371 struct fscache_object *object =
372 container_of(work, struct fscache_object, work);
373
374 seq_printf(m, "FSC: OBJ%x: %s",
375 object->debug_id,
376 fscache_object_states_short[object->state]);
326} 377}
378#endif
327 379
328/* 380/*
329 * initialise an object 381 * initialise an object
@@ -376,7 +428,9 @@ static void fscache_initialise_object(struct fscache_object *object)
376 * binding on to us, so we need to make sure we don't 428 * binding on to us, so we need to make sure we don't
377 * add ourself to the list multiple times */ 429 * add ourself to the list multiple times */
378 if (list_empty(&object->dep_link)) { 430 if (list_empty(&object->dep_link)) {
431 fscache_stat(&fscache_n_cop_grab_object);
379 object->cache->ops->grab_object(object); 432 object->cache->ops->grab_object(object);
433 fscache_stat_d(&fscache_n_cop_grab_object);
380 list_add(&object->dep_link, 434 list_add(&object->dep_link,
381 &parent->dependents); 435 &parent->dependents);
382 436
@@ -414,6 +468,7 @@ static void fscache_lookup_object(struct fscache_object *object)
414{ 468{
415 struct fscache_cookie *cookie = object->cookie; 469 struct fscache_cookie *cookie = object->cookie;
416 struct fscache_object *parent; 470 struct fscache_object *parent;
471 int ret;
417 472
418 _enter(""); 473 _enter("");
419 474
@@ -438,11 +493,20 @@ static void fscache_lookup_object(struct fscache_object *object)
438 object->cache->tag->name); 493 object->cache->tag->name);
439 494
440 fscache_stat(&fscache_n_object_lookups); 495 fscache_stat(&fscache_n_object_lookups);
441 object->cache->ops->lookup_object(object); 496 fscache_stat(&fscache_n_cop_lookup_object);
497 ret = object->cache->ops->lookup_object(object);
498 fscache_stat_d(&fscache_n_cop_lookup_object);
442 499
443 if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) 500 if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events))
444 set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); 501 set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags);
445 502
503 if (ret == -ETIMEDOUT) {
504 /* probably stuck behind another object, so move this one to
505 * the back of the queue */
506 fscache_stat(&fscache_n_object_lookups_timed_out);
507 set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events);
508 }
509
446 _leave(""); 510 _leave("");
447} 511}
448 512
@@ -546,7 +610,8 @@ static void fscache_object_available(struct fscache_object *object)
546 610
547 spin_lock(&object->lock); 611 spin_lock(&object->lock);
548 612
549 if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) 613 if (object->cookie &&
614 test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags))
550 wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING); 615 wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING);
551 616
552 fscache_done_parent_op(object); 617 fscache_done_parent_op(object);
@@ -562,7 +627,9 @@ static void fscache_object_available(struct fscache_object *object)
562 } 627 }
563 spin_unlock(&object->lock); 628 spin_unlock(&object->lock);
564 629
630 fscache_stat(&fscache_n_cop_lookup_complete);
565 object->cache->ops->lookup_complete(object); 631 object->cache->ops->lookup_complete(object);
632 fscache_stat_d(&fscache_n_cop_lookup_complete);
566 fscache_enqueue_dependents(object); 633 fscache_enqueue_dependents(object);
567 634
568 fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); 635 fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif);
@@ -581,11 +648,16 @@ static void fscache_drop_object(struct fscache_object *object)
581 648
582 _enter("{OBJ%x,%d}", object->debug_id, object->n_children); 649 _enter("{OBJ%x,%d}", object->debug_id, object->n_children);
583 650
651 ASSERTCMP(object->cookie, ==, NULL);
652 ASSERT(hlist_unhashed(&object->cookie_link));
653
584 spin_lock(&cache->object_list_lock); 654 spin_lock(&cache->object_list_lock);
585 list_del_init(&object->cache_link); 655 list_del_init(&object->cache_link);
586 spin_unlock(&cache->object_list_lock); 656 spin_unlock(&cache->object_list_lock);
587 657
658 fscache_stat(&fscache_n_cop_drop_object);
588 cache->ops->drop_object(object); 659 cache->ops->drop_object(object);
660 fscache_stat_d(&fscache_n_cop_drop_object);
589 661
590 if (parent) { 662 if (parent) {
591 _debug("release parent OBJ%x {%d}", 663 _debug("release parent OBJ%x {%d}",
@@ -600,7 +672,9 @@ static void fscache_drop_object(struct fscache_object *object)
600 } 672 }
601 673
602 /* this just shifts the object release to the slow work processor */ 674 /* this just shifts the object release to the slow work processor */
675 fscache_stat(&fscache_n_cop_put_object);
603 object->cache->ops->put_object(object); 676 object->cache->ops->put_object(object);
677 fscache_stat_d(&fscache_n_cop_put_object);
604 678
605 _leave(""); 679 _leave("");
606} 680}
@@ -690,8 +764,12 @@ static int fscache_object_slow_work_get_ref(struct slow_work *work)
690{ 764{
691 struct fscache_object *object = 765 struct fscache_object *object =
692 container_of(work, struct fscache_object, work); 766 container_of(work, struct fscache_object, work);
767 int ret;
693 768
694 return object->cache->ops->grab_object(object) ? 0 : -EAGAIN; 769 fscache_stat(&fscache_n_cop_grab_object);
770 ret = object->cache->ops->grab_object(object) ? 0 : -EAGAIN;
771 fscache_stat_d(&fscache_n_cop_grab_object);
772 return ret;
695} 773}
696 774
697/* 775/*
@@ -702,7 +780,9 @@ static void fscache_object_slow_work_put_ref(struct slow_work *work)
702 struct fscache_object *object = 780 struct fscache_object *object =
703 container_of(work, struct fscache_object, work); 781 container_of(work, struct fscache_object, work);
704 782
705 return object->cache->ops->put_object(object); 783 fscache_stat(&fscache_n_cop_put_object);
784 object->cache->ops->put_object(object);
785 fscache_stat_d(&fscache_n_cop_put_object);
706} 786}
707 787
708/* 788/*
@@ -739,7 +819,9 @@ static void fscache_enqueue_dependents(struct fscache_object *object)
739 819
740 /* sort onto appropriate lists */ 820 /* sort onto appropriate lists */
741 fscache_enqueue_object(dep); 821 fscache_enqueue_object(dep);
822 fscache_stat(&fscache_n_cop_put_object);
742 dep->cache->ops->put_object(dep); 823 dep->cache->ops->put_object(dep);
824 fscache_stat_d(&fscache_n_cop_put_object);
743 825
744 if (!list_empty(&object->dependents)) 826 if (!list_empty(&object->dependents))
745 cond_resched_lock(&object->lock); 827 cond_resched_lock(&object->lock);
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index e7f8d53b8b6b..313e79a14266 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -13,6 +13,7 @@
13 13
14#define FSCACHE_DEBUG_LEVEL OPERATION 14#define FSCACHE_DEBUG_LEVEL OPERATION
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/seq_file.h>
16#include "internal.h" 17#include "internal.h"
17 18
18atomic_t fscache_op_debug_id; 19atomic_t fscache_op_debug_id;
@@ -31,32 +32,33 @@ void fscache_enqueue_operation(struct fscache_operation *op)
31 _enter("{OBJ%x OP%x,%u}", 32 _enter("{OBJ%x OP%x,%u}",
32 op->object->debug_id, op->debug_id, atomic_read(&op->usage)); 33 op->object->debug_id, op->debug_id, atomic_read(&op->usage));
33 34
35 fscache_set_op_state(op, "EnQ");
36
37 ASSERT(list_empty(&op->pend_link));
34 ASSERT(op->processor != NULL); 38 ASSERT(op->processor != NULL);
35 ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); 39 ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
36 ASSERTCMP(atomic_read(&op->usage), >, 0); 40 ASSERTCMP(atomic_read(&op->usage), >, 0);
37 41
38 if (list_empty(&op->pend_link)) { 42 fscache_stat(&fscache_n_op_enqueue);
39 switch (op->flags & FSCACHE_OP_TYPE) { 43 switch (op->flags & FSCACHE_OP_TYPE) {
40 case FSCACHE_OP_FAST: 44 case FSCACHE_OP_FAST:
41 _debug("queue fast"); 45 _debug("queue fast");
42 atomic_inc(&op->usage); 46 atomic_inc(&op->usage);
43 if (!schedule_work(&op->fast_work)) 47 if (!schedule_work(&op->fast_work))
44 fscache_put_operation(op); 48 fscache_put_operation(op);
45 break; 49 break;
46 case FSCACHE_OP_SLOW: 50 case FSCACHE_OP_SLOW:
47 _debug("queue slow"); 51 _debug("queue slow");
48 slow_work_enqueue(&op->slow_work); 52 slow_work_enqueue(&op->slow_work);
49 break; 53 break;
50 case FSCACHE_OP_MYTHREAD: 54 case FSCACHE_OP_MYTHREAD:
51 _debug("queue for caller's attention"); 55 _debug("queue for caller's attention");
52 break; 56 break;
53 default: 57 default:
54 printk(KERN_ERR "FS-Cache: Unexpected op type %lx", 58 printk(KERN_ERR "FS-Cache: Unexpected op type %lx",
55 op->flags); 59 op->flags);
56 BUG(); 60 BUG();
57 break; 61 break;
58 }
59 fscache_stat(&fscache_n_op_enqueue);
60 } 62 }
61} 63}
62EXPORT_SYMBOL(fscache_enqueue_operation); 64EXPORT_SYMBOL(fscache_enqueue_operation);
@@ -67,6 +69,8 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
67static void fscache_run_op(struct fscache_object *object, 69static void fscache_run_op(struct fscache_object *object,
68 struct fscache_operation *op) 70 struct fscache_operation *op)
69{ 71{
72 fscache_set_op_state(op, "Run");
73
70 object->n_in_progress++; 74 object->n_in_progress++;
71 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) 75 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
72 wake_up_bit(&op->flags, FSCACHE_OP_WAITING); 76 wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -87,9 +91,12 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
87 91
88 _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); 92 _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
89 93
94 fscache_set_op_state(op, "SubmitX");
95
90 spin_lock(&object->lock); 96 spin_lock(&object->lock);
91 ASSERTCMP(object->n_ops, >=, object->n_in_progress); 97 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
92 ASSERTCMP(object->n_ops, >=, object->n_exclusive); 98 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
99 ASSERT(list_empty(&op->pend_link));
93 100
94 ret = -ENOBUFS; 101 ret = -ENOBUFS;
95 if (fscache_object_is_active(object)) { 102 if (fscache_object_is_active(object)) {
@@ -190,9 +197,12 @@ int fscache_submit_op(struct fscache_object *object,
190 197
191 ASSERTCMP(atomic_read(&op->usage), >, 0); 198 ASSERTCMP(atomic_read(&op->usage), >, 0);
192 199
200 fscache_set_op_state(op, "Submit");
201
193 spin_lock(&object->lock); 202 spin_lock(&object->lock);
194 ASSERTCMP(object->n_ops, >=, object->n_in_progress); 203 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
195 ASSERTCMP(object->n_ops, >=, object->n_exclusive); 204 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
205 ASSERT(list_empty(&op->pend_link));
196 206
197 ostate = object->state; 207 ostate = object->state;
198 smp_rmb(); 208 smp_rmb();
@@ -222,6 +232,11 @@ int fscache_submit_op(struct fscache_object *object,
222 list_add_tail(&op->pend_link, &object->pending_ops); 232 list_add_tail(&op->pend_link, &object->pending_ops);
223 fscache_stat(&fscache_n_op_pend); 233 fscache_stat(&fscache_n_op_pend);
224 ret = 0; 234 ret = 0;
235 } else if (object->state == FSCACHE_OBJECT_DYING ||
236 object->state == FSCACHE_OBJECT_LC_DYING ||
237 object->state == FSCACHE_OBJECT_WITHDRAWING) {
238 fscache_stat(&fscache_n_op_rejected);
239 ret = -ENOBUFS;
225 } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) { 240 } else if (!test_bit(FSCACHE_IOERROR, &object->cache->flags)) {
226 fscache_report_unexpected_submission(object, op, ostate); 241 fscache_report_unexpected_submission(object, op, ostate);
227 ASSERT(!fscache_object_is_active(object)); 242 ASSERT(!fscache_object_is_active(object));
@@ -264,12 +279,7 @@ void fscache_start_operations(struct fscache_object *object)
264 stop = true; 279 stop = true;
265 } 280 }
266 list_del_init(&op->pend_link); 281 list_del_init(&op->pend_link);
267 object->n_in_progress++; 282 fscache_run_op(object, op);
268
269 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
270 wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
271 if (op->processor)
272 fscache_enqueue_operation(op);
273 283
274 /* the pending queue was holding a ref on the object */ 284 /* the pending queue was holding a ref on the object */
275 fscache_put_operation(op); 285 fscache_put_operation(op);
@@ -282,6 +292,36 @@ void fscache_start_operations(struct fscache_object *object)
282} 292}
283 293
284/* 294/*
295 * cancel an operation that's pending on an object
296 */
297int fscache_cancel_op(struct fscache_operation *op)
298{
299 struct fscache_object *object = op->object;
300 int ret;
301
302 _enter("OBJ%x OP%x}", op->object->debug_id, op->debug_id);
303
304 spin_lock(&object->lock);
305
306 ret = -EBUSY;
307 if (!list_empty(&op->pend_link)) {
308 fscache_stat(&fscache_n_op_cancelled);
309 list_del_init(&op->pend_link);
310 object->n_ops--;
311 if (test_bit(FSCACHE_OP_EXCLUSIVE, &op->flags))
312 object->n_exclusive--;
313 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
314 wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
315 fscache_put_operation(op);
316 ret = 0;
317 }
318
319 spin_unlock(&object->lock);
320 _leave(" = %d", ret);
321 return ret;
322}
323
324/*
285 * release an operation 325 * release an operation
286 * - queues pending ops if this is the last in-progress op 326 * - queues pending ops if this is the last in-progress op
287 */ 327 */
@@ -298,6 +338,8 @@ void fscache_put_operation(struct fscache_operation *op)
298 if (!atomic_dec_and_test(&op->usage)) 338 if (!atomic_dec_and_test(&op->usage))
299 return; 339 return;
300 340
341 fscache_set_op_state(op, "Put");
342
301 _debug("PUT OP"); 343 _debug("PUT OP");
302 if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) 344 if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
303 BUG(); 345 BUG();
@@ -311,6 +353,9 @@ void fscache_put_operation(struct fscache_operation *op)
311 353
312 object = op->object; 354 object = op->object;
313 355
356 if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags))
357 atomic_dec(&object->n_reads);
358
314 /* now... we may get called with the object spinlock held, so we 359 /* now... we may get called with the object spinlock held, so we
315 * complete the cleanup here only if we can immediately acquire the 360 * complete the cleanup here only if we can immediately acquire the
316 * lock, and defer it otherwise */ 361 * lock, and defer it otherwise */
@@ -452,8 +497,27 @@ static void fscache_op_execute(struct slow_work *work)
452 _leave(""); 497 _leave("");
453} 498}
454 499
500/*
501 * describe an operation for slow-work debugging
502 */
503#ifdef CONFIG_SLOW_WORK_PROC
504static void fscache_op_desc(struct slow_work *work, struct seq_file *m)
505{
506 struct fscache_operation *op =
507 container_of(work, struct fscache_operation, slow_work);
508
509 seq_printf(m, "FSC: OBJ%x OP%x: %s/%s fl=%lx",
510 op->object->debug_id, op->debug_id,
511 op->name, op->state, op->flags);
512}
513#endif
514
455const struct slow_work_ops fscache_op_slow_work_ops = { 515const struct slow_work_ops fscache_op_slow_work_ops = {
516 .owner = THIS_MODULE,
456 .get_ref = fscache_op_get_ref, 517 .get_ref = fscache_op_get_ref,
457 .put_ref = fscache_op_put_ref, 518 .put_ref = fscache_op_put_ref,
458 .execute = fscache_op_execute, 519 .execute = fscache_op_execute,
520#ifdef CONFIG_SLOW_WORK_PROC
521 .desc = fscache_op_desc,
522#endif
459}; 523};
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 2568e0eb644f..c598ea4c4e7d 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -43,18 +43,102 @@ void __fscache_wait_on_page_write(struct fscache_cookie *cookie, struct page *pa
43EXPORT_SYMBOL(__fscache_wait_on_page_write); 43EXPORT_SYMBOL(__fscache_wait_on_page_write);
44 44
45/* 45/*
46 * note that a page has finished being written to the cache 46 * decide whether a page can be released, possibly by cancelling a store to it
47 * - we're allowed to sleep if __GFP_WAIT is flagged
47 */ 48 */
48static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *page) 49bool __fscache_maybe_release_page(struct fscache_cookie *cookie,
50 struct page *page,
51 gfp_t gfp)
49{ 52{
50 struct page *xpage; 53 struct page *xpage;
54 void *val;
55
56 _enter("%p,%p,%x", cookie, page, gfp);
57
58 rcu_read_lock();
59 val = radix_tree_lookup(&cookie->stores, page->index);
60 if (!val) {
61 rcu_read_unlock();
62 fscache_stat(&fscache_n_store_vmscan_not_storing);
63 __fscache_uncache_page(cookie, page);
64 return true;
65 }
66
67 /* see if the page is actually undergoing storage - if so we can't get
68 * rid of it till the cache has finished with it */
69 if (radix_tree_tag_get(&cookie->stores, page->index,
70 FSCACHE_COOKIE_STORING_TAG)) {
71 rcu_read_unlock();
72 goto page_busy;
73 }
74
75 /* the page is pending storage, so we attempt to cancel the store and
76 * discard the store request so that the page can be reclaimed */
77 spin_lock(&cookie->stores_lock);
78 rcu_read_unlock();
79
80 if (radix_tree_tag_get(&cookie->stores, page->index,
81 FSCACHE_COOKIE_STORING_TAG)) {
82 /* the page started to undergo storage whilst we were looking,
83 * so now we can only wait or return */
84 spin_unlock(&cookie->stores_lock);
85 goto page_busy;
86 }
51 87
52 spin_lock(&cookie->lock);
53 xpage = radix_tree_delete(&cookie->stores, page->index); 88 xpage = radix_tree_delete(&cookie->stores, page->index);
54 spin_unlock(&cookie->lock); 89 spin_unlock(&cookie->stores_lock);
55 ASSERT(xpage != NULL); 90
91 if (xpage) {
92 fscache_stat(&fscache_n_store_vmscan_cancelled);
93 fscache_stat(&fscache_n_store_radix_deletes);
94 ASSERTCMP(xpage, ==, page);
95 } else {
96 fscache_stat(&fscache_n_store_vmscan_gone);
97 }
56 98
57 wake_up_bit(&cookie->flags, 0); 99 wake_up_bit(&cookie->flags, 0);
100 if (xpage)
101 page_cache_release(xpage);
102 __fscache_uncache_page(cookie, page);
103 return true;
104
105page_busy:
106 /* we might want to wait here, but that could deadlock the allocator as
107 * the slow-work threads writing to the cache may all end up sleeping
108 * on memory allocation */
109 fscache_stat(&fscache_n_store_vmscan_busy);
110 return false;
111}
112EXPORT_SYMBOL(__fscache_maybe_release_page);
113
114/*
115 * note that a page has finished being written to the cache
116 */
117static void fscache_end_page_write(struct fscache_object *object,
118 struct page *page)
119{
120 struct fscache_cookie *cookie;
121 struct page *xpage = NULL;
122
123 spin_lock(&object->lock);
124 cookie = object->cookie;
125 if (cookie) {
126 /* delete the page from the tree if it is now no longer
127 * pending */
128 spin_lock(&cookie->stores_lock);
129 radix_tree_tag_clear(&cookie->stores, page->index,
130 FSCACHE_COOKIE_STORING_TAG);
131 if (!radix_tree_tag_get(&cookie->stores, page->index,
132 FSCACHE_COOKIE_PENDING_TAG)) {
133 fscache_stat(&fscache_n_store_radix_deletes);
134 xpage = radix_tree_delete(&cookie->stores, page->index);
135 }
136 spin_unlock(&cookie->stores_lock);
137 wake_up_bit(&cookie->flags, 0);
138 }
139 spin_unlock(&object->lock);
140 if (xpage)
141 page_cache_release(xpage);
58} 142}
59 143
60/* 144/*
@@ -63,14 +147,21 @@ static void fscache_end_page_write(struct fscache_cookie *cookie, struct page *p
63static void fscache_attr_changed_op(struct fscache_operation *op) 147static void fscache_attr_changed_op(struct fscache_operation *op)
64{ 148{
65 struct fscache_object *object = op->object; 149 struct fscache_object *object = op->object;
150 int ret;
66 151
67 _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id); 152 _enter("{OBJ%x OP%x}", object->debug_id, op->debug_id);
68 153
69 fscache_stat(&fscache_n_attr_changed_calls); 154 fscache_stat(&fscache_n_attr_changed_calls);
70 155
71 if (fscache_object_is_active(object) && 156 if (fscache_object_is_active(object)) {
72 object->cache->ops->attr_changed(object) < 0) 157 fscache_set_op_state(op, "CallFS");
73 fscache_abort_object(object); 158 fscache_stat(&fscache_n_cop_attr_changed);
159 ret = object->cache->ops->attr_changed(object);
160 fscache_stat_d(&fscache_n_cop_attr_changed);
161 fscache_set_op_state(op, "Done");
162 if (ret < 0)
163 fscache_abort_object(object);
164 }
74 165
75 _leave(""); 166 _leave("");
76} 167}
@@ -99,6 +190,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
99 fscache_operation_init(op, NULL); 190 fscache_operation_init(op, NULL);
100 fscache_operation_init_slow(op, fscache_attr_changed_op); 191 fscache_operation_init_slow(op, fscache_attr_changed_op);
101 op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE); 192 op->flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_EXCLUSIVE);
193 fscache_set_op_name(op, "Attr");
102 194
103 spin_lock(&cookie->lock); 195 spin_lock(&cookie->lock);
104 196
@@ -184,6 +276,7 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
184 op->start_time = jiffies; 276 op->start_time = jiffies;
185 INIT_WORK(&op->op.fast_work, fscache_retrieval_work); 277 INIT_WORK(&op->op.fast_work, fscache_retrieval_work);
186 INIT_LIST_HEAD(&op->to_do); 278 INIT_LIST_HEAD(&op->to_do);
279 fscache_set_op_name(&op->op, "Retr");
187 return op; 280 return op;
188} 281}
189 282
@@ -221,6 +314,43 @@ static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
221} 314}
222 315
223/* 316/*
317 * wait for an object to become active (or dead)
318 */
319static int fscache_wait_for_retrieval_activation(struct fscache_object *object,
320 struct fscache_retrieval *op,
321 atomic_t *stat_op_waits,
322 atomic_t *stat_object_dead)
323{
324 int ret;
325
326 if (!test_bit(FSCACHE_OP_WAITING, &op->op.flags))
327 goto check_if_dead;
328
329 _debug(">>> WT");
330 fscache_stat(stat_op_waits);
331 if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
332 fscache_wait_bit_interruptible,
333 TASK_INTERRUPTIBLE) < 0) {
334 ret = fscache_cancel_op(&op->op);
335 if (ret == 0)
336 return -ERESTARTSYS;
337
338 /* it's been removed from the pending queue by another party,
339 * so we should get to run shortly */
340 wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
341 fscache_wait_bit, TASK_UNINTERRUPTIBLE);
342 }
343 _debug("<<< GO");
344
345check_if_dead:
346 if (unlikely(fscache_object_is_dead(object))) {
347 fscache_stat(stat_object_dead);
348 return -ENOBUFS;
349 }
350 return 0;
351}
352
353/*
224 * read a page from the cache or allocate a block in which to store it 354 * read a page from the cache or allocate a block in which to store it
225 * - we return: 355 * - we return:
226 * -ENOMEM - out of memory, nothing done 356 * -ENOMEM - out of memory, nothing done
@@ -257,6 +387,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
257 _leave(" = -ENOMEM"); 387 _leave(" = -ENOMEM");
258 return -ENOMEM; 388 return -ENOMEM;
259 } 389 }
390 fscache_set_op_name(&op->op, "RetrRA1");
260 391
261 spin_lock(&cookie->lock); 392 spin_lock(&cookie->lock);
262 393
@@ -267,6 +398,9 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
267 398
268 ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); 399 ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP);
269 400
401 atomic_inc(&object->n_reads);
402 set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
403
270 if (fscache_submit_op(object, &op->op) < 0) 404 if (fscache_submit_op(object, &op->op) < 0)
271 goto nobufs_unlock; 405 goto nobufs_unlock;
272 spin_unlock(&cookie->lock); 406 spin_unlock(&cookie->lock);
@@ -279,23 +413,27 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
279 413
280 /* we wait for the operation to become active, and then process it 414 /* we wait for the operation to become active, and then process it
281 * *here*, in this thread, and not in the thread pool */ 415 * *here*, in this thread, and not in the thread pool */
282 if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { 416 ret = fscache_wait_for_retrieval_activation(
283 _debug(">>> WT"); 417 object, op,
284 fscache_stat(&fscache_n_retrieval_op_waits); 418 __fscache_stat(&fscache_n_retrieval_op_waits),
285 wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, 419 __fscache_stat(&fscache_n_retrievals_object_dead));
286 fscache_wait_bit, TASK_UNINTERRUPTIBLE); 420 if (ret < 0)
287 _debug("<<< GO"); 421 goto error;
288 }
289 422
290 /* ask the cache to honour the operation */ 423 /* ask the cache to honour the operation */
291 if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) { 424 if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) {
425 fscache_stat(&fscache_n_cop_allocate_page);
292 ret = object->cache->ops->allocate_page(op, page, gfp); 426 ret = object->cache->ops->allocate_page(op, page, gfp);
427 fscache_stat_d(&fscache_n_cop_allocate_page);
293 if (ret == 0) 428 if (ret == 0)
294 ret = -ENODATA; 429 ret = -ENODATA;
295 } else { 430 } else {
431 fscache_stat(&fscache_n_cop_read_or_alloc_page);
296 ret = object->cache->ops->read_or_alloc_page(op, page, gfp); 432 ret = object->cache->ops->read_or_alloc_page(op, page, gfp);
433 fscache_stat_d(&fscache_n_cop_read_or_alloc_page);
297 } 434 }
298 435
436error:
299 if (ret == -ENOMEM) 437 if (ret == -ENOMEM)
300 fscache_stat(&fscache_n_retrievals_nomem); 438 fscache_stat(&fscache_n_retrievals_nomem);
301 else if (ret == -ERESTARTSYS) 439 else if (ret == -ERESTARTSYS)
@@ -347,7 +485,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
347 void *context, 485 void *context,
348 gfp_t gfp) 486 gfp_t gfp)
349{ 487{
350 fscache_pages_retrieval_func_t func;
351 struct fscache_retrieval *op; 488 struct fscache_retrieval *op;
352 struct fscache_object *object; 489 struct fscache_object *object;
353 int ret; 490 int ret;
@@ -369,6 +506,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
369 op = fscache_alloc_retrieval(mapping, end_io_func, context); 506 op = fscache_alloc_retrieval(mapping, end_io_func, context);
370 if (!op) 507 if (!op)
371 return -ENOMEM; 508 return -ENOMEM;
509 fscache_set_op_name(&op->op, "RetrRAN");
372 510
373 spin_lock(&cookie->lock); 511 spin_lock(&cookie->lock);
374 512
@@ -377,6 +515,9 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
377 object = hlist_entry(cookie->backing_objects.first, 515 object = hlist_entry(cookie->backing_objects.first,
378 struct fscache_object, cookie_link); 516 struct fscache_object, cookie_link);
379 517
518 atomic_inc(&object->n_reads);
519 set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
520
380 if (fscache_submit_op(object, &op->op) < 0) 521 if (fscache_submit_op(object, &op->op) < 0)
381 goto nobufs_unlock; 522 goto nobufs_unlock;
382 spin_unlock(&cookie->lock); 523 spin_unlock(&cookie->lock);
@@ -389,21 +530,27 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
389 530
390 /* we wait for the operation to become active, and then process it 531 /* we wait for the operation to become active, and then process it
391 * *here*, in this thread, and not in the thread pool */ 532 * *here*, in this thread, and not in the thread pool */
392 if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { 533 ret = fscache_wait_for_retrieval_activation(
393 _debug(">>> WT"); 534 object, op,
394 fscache_stat(&fscache_n_retrieval_op_waits); 535 __fscache_stat(&fscache_n_retrieval_op_waits),
395 wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, 536 __fscache_stat(&fscache_n_retrievals_object_dead));
396 fscache_wait_bit, TASK_UNINTERRUPTIBLE); 537 if (ret < 0)
397 _debug("<<< GO"); 538 goto error;
398 }
399 539
400 /* ask the cache to honour the operation */ 540 /* ask the cache to honour the operation */
401 if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) 541 if (test_bit(FSCACHE_COOKIE_NO_DATA_YET, &object->cookie->flags)) {
402 func = object->cache->ops->allocate_pages; 542 fscache_stat(&fscache_n_cop_allocate_pages);
403 else 543 ret = object->cache->ops->allocate_pages(
404 func = object->cache->ops->read_or_alloc_pages; 544 op, pages, nr_pages, gfp);
405 ret = func(op, pages, nr_pages, gfp); 545 fscache_stat_d(&fscache_n_cop_allocate_pages);
546 } else {
547 fscache_stat(&fscache_n_cop_read_or_alloc_pages);
548 ret = object->cache->ops->read_or_alloc_pages(
549 op, pages, nr_pages, gfp);
550 fscache_stat_d(&fscache_n_cop_read_or_alloc_pages);
551 }
406 552
553error:
407 if (ret == -ENOMEM) 554 if (ret == -ENOMEM)
408 fscache_stat(&fscache_n_retrievals_nomem); 555 fscache_stat(&fscache_n_retrievals_nomem);
409 else if (ret == -ERESTARTSYS) 556 else if (ret == -ERESTARTSYS)
@@ -461,6 +608,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
461 op = fscache_alloc_retrieval(page->mapping, NULL, NULL); 608 op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
462 if (!op) 609 if (!op)
463 return -ENOMEM; 610 return -ENOMEM;
611 fscache_set_op_name(&op->op, "RetrAL1");
464 612
465 spin_lock(&cookie->lock); 613 spin_lock(&cookie->lock);
466 614
@@ -475,18 +623,22 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
475 623
476 fscache_stat(&fscache_n_alloc_ops); 624 fscache_stat(&fscache_n_alloc_ops);
477 625
478 if (test_bit(FSCACHE_OP_WAITING, &op->op.flags)) { 626 ret = fscache_wait_for_retrieval_activation(
479 _debug(">>> WT"); 627 object, op,
480 fscache_stat(&fscache_n_alloc_op_waits); 628 __fscache_stat(&fscache_n_alloc_op_waits),
481 wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING, 629 __fscache_stat(&fscache_n_allocs_object_dead));
482 fscache_wait_bit, TASK_UNINTERRUPTIBLE); 630 if (ret < 0)
483 _debug("<<< GO"); 631 goto error;
484 }
485 632
486 /* ask the cache to honour the operation */ 633 /* ask the cache to honour the operation */
634 fscache_stat(&fscache_n_cop_allocate_page);
487 ret = object->cache->ops->allocate_page(op, page, gfp); 635 ret = object->cache->ops->allocate_page(op, page, gfp);
636 fscache_stat_d(&fscache_n_cop_allocate_page);
488 637
489 if (ret < 0) 638error:
639 if (ret == -ERESTARTSYS)
640 fscache_stat(&fscache_n_allocs_intr);
641 else if (ret < 0)
490 fscache_stat(&fscache_n_allocs_nobufs); 642 fscache_stat(&fscache_n_allocs_nobufs);
491 else 643 else
492 fscache_stat(&fscache_n_allocs_ok); 644 fscache_stat(&fscache_n_allocs_ok);
@@ -521,7 +673,7 @@ static void fscache_write_op(struct fscache_operation *_op)
521 struct fscache_storage *op = 673 struct fscache_storage *op =
522 container_of(_op, struct fscache_storage, op); 674 container_of(_op, struct fscache_storage, op);
523 struct fscache_object *object = op->op.object; 675 struct fscache_object *object = op->op.object;
524 struct fscache_cookie *cookie = object->cookie; 676 struct fscache_cookie *cookie;
525 struct page *page; 677 struct page *page;
526 unsigned n; 678 unsigned n;
527 void *results[1]; 679 void *results[1];
@@ -529,16 +681,19 @@ static void fscache_write_op(struct fscache_operation *_op)
529 681
530 _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); 682 _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
531 683
532 spin_lock(&cookie->lock); 684 fscache_set_op_state(&op->op, "GetPage");
685
533 spin_lock(&object->lock); 686 spin_lock(&object->lock);
687 cookie = object->cookie;
534 688
535 if (!fscache_object_is_active(object)) { 689 if (!fscache_object_is_active(object) || !cookie) {
536 spin_unlock(&object->lock); 690 spin_unlock(&object->lock);
537 spin_unlock(&cookie->lock);
538 _leave(""); 691 _leave("");
539 return; 692 return;
540 } 693 }
541 694
695 spin_lock(&cookie->stores_lock);
696
542 fscache_stat(&fscache_n_store_calls); 697 fscache_stat(&fscache_n_store_calls);
543 698
544 /* find a page to store */ 699 /* find a page to store */
@@ -549,23 +704,35 @@ static void fscache_write_op(struct fscache_operation *_op)
549 goto superseded; 704 goto superseded;
550 page = results[0]; 705 page = results[0];
551 _debug("gang %d [%lx]", n, page->index); 706 _debug("gang %d [%lx]", n, page->index);
552 if (page->index > op->store_limit) 707 if (page->index > op->store_limit) {
708 fscache_stat(&fscache_n_store_pages_over_limit);
553 goto superseded; 709 goto superseded;
710 }
554 711
555 radix_tree_tag_clear(&cookie->stores, page->index, 712 if (page) {
556 FSCACHE_COOKIE_PENDING_TAG); 713 radix_tree_tag_set(&cookie->stores, page->index,
714 FSCACHE_COOKIE_STORING_TAG);
715 radix_tree_tag_clear(&cookie->stores, page->index,
716 FSCACHE_COOKIE_PENDING_TAG);
717 }
557 718
719 spin_unlock(&cookie->stores_lock);
558 spin_unlock(&object->lock); 720 spin_unlock(&object->lock);
559 spin_unlock(&cookie->lock);
560 721
561 if (page) { 722 if (page) {
723 fscache_set_op_state(&op->op, "Store");
724 fscache_stat(&fscache_n_store_pages);
725 fscache_stat(&fscache_n_cop_write_page);
562 ret = object->cache->ops->write_page(op, page); 726 ret = object->cache->ops->write_page(op, page);
563 fscache_end_page_write(cookie, page); 727 fscache_stat_d(&fscache_n_cop_write_page);
564 page_cache_release(page); 728 fscache_set_op_state(&op->op, "EndWrite");
565 if (ret < 0) 729 fscache_end_page_write(object, page);
730 if (ret < 0) {
731 fscache_set_op_state(&op->op, "Abort");
566 fscache_abort_object(object); 732 fscache_abort_object(object);
567 else 733 } else {
568 fscache_enqueue_operation(&op->op); 734 fscache_enqueue_operation(&op->op);
735 }
569 } 736 }
570 737
571 _leave(""); 738 _leave("");
@@ -575,9 +742,9 @@ superseded:
575 /* this writer is going away and there aren't any more things to 742 /* this writer is going away and there aren't any more things to
576 * write */ 743 * write */
577 _debug("cease"); 744 _debug("cease");
745 spin_unlock(&cookie->stores_lock);
578 clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); 746 clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
579 spin_unlock(&object->lock); 747 spin_unlock(&object->lock);
580 spin_unlock(&cookie->lock);
581 _leave(""); 748 _leave("");
582} 749}
583 750
@@ -634,6 +801,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
634 fscache_operation_init(&op->op, fscache_release_write_op); 801 fscache_operation_init(&op->op, fscache_release_write_op);
635 fscache_operation_init_slow(&op->op, fscache_write_op); 802 fscache_operation_init_slow(&op->op, fscache_write_op);
636 op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING); 803 op->op.flags = FSCACHE_OP_SLOW | (1 << FSCACHE_OP_WAITING);
804 fscache_set_op_name(&op->op, "Write1");
637 805
638 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); 806 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
639 if (ret < 0) 807 if (ret < 0)
@@ -652,6 +820,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
652 /* add the page to the pending-storage radix tree on the backing 820 /* add the page to the pending-storage radix tree on the backing
653 * object */ 821 * object */
654 spin_lock(&object->lock); 822 spin_lock(&object->lock);
823 spin_lock(&cookie->stores_lock);
655 824
656 _debug("store limit %llx", (unsigned long long) object->store_limit); 825 _debug("store limit %llx", (unsigned long long) object->store_limit);
657 826
@@ -672,6 +841,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
672 if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags)) 841 if (test_and_set_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags))
673 goto already_pending; 842 goto already_pending;
674 843
844 spin_unlock(&cookie->stores_lock);
675 spin_unlock(&object->lock); 845 spin_unlock(&object->lock);
676 846
677 op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); 847 op->op.debug_id = atomic_inc_return(&fscache_op_debug_id);
@@ -693,6 +863,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
693already_queued: 863already_queued:
694 fscache_stat(&fscache_n_stores_again); 864 fscache_stat(&fscache_n_stores_again);
695already_pending: 865already_pending:
866 spin_unlock(&cookie->stores_lock);
696 spin_unlock(&object->lock); 867 spin_unlock(&object->lock);
697 spin_unlock(&cookie->lock); 868 spin_unlock(&cookie->lock);
698 radix_tree_preload_end(); 869 radix_tree_preload_end();
@@ -702,7 +873,9 @@ already_pending:
702 return 0; 873 return 0;
703 874
704submit_failed: 875submit_failed:
876 spin_lock(&cookie->stores_lock);
705 radix_tree_delete(&cookie->stores, page->index); 877 radix_tree_delete(&cookie->stores, page->index);
878 spin_unlock(&cookie->stores_lock);
706 page_cache_release(page); 879 page_cache_release(page);
707 ret = -ENOBUFS; 880 ret = -ENOBUFS;
708 goto nobufs; 881 goto nobufs;
@@ -763,7 +936,9 @@ void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page)
763 if (TestClearPageFsCache(page) && 936 if (TestClearPageFsCache(page) &&
764 object->cache->ops->uncache_page) { 937 object->cache->ops->uncache_page) {
765 /* the cache backend releases the cookie lock */ 938 /* the cache backend releases the cookie lock */
939 fscache_stat(&fscache_n_cop_uncache_page);
766 object->cache->ops->uncache_page(object, page); 940 object->cache->ops->uncache_page(object, page);
941 fscache_stat_d(&fscache_n_cop_uncache_page);
767 goto done; 942 goto done;
768 } 943 }
769 944
diff --git a/fs/fscache/proc.c b/fs/fscache/proc.c
index beeab44bc31a..1d9e4951a597 100644
--- a/fs/fscache/proc.c
+++ b/fs/fscache/proc.c
@@ -37,10 +37,20 @@ int __init fscache_proc_init(void)
37 goto error_histogram; 37 goto error_histogram;
38#endif 38#endif
39 39
40#ifdef CONFIG_FSCACHE_OBJECT_LIST
41 if (!proc_create("fs/fscache/objects", S_IFREG | 0444, NULL,
42 &fscache_objlist_fops))
43 goto error_objects;
44#endif
45
40 _leave(" = 0"); 46 _leave(" = 0");
41 return 0; 47 return 0;
42 48
49#ifdef CONFIG_FSCACHE_OBJECT_LIST
50error_objects:
51#endif
43#ifdef CONFIG_FSCACHE_HISTOGRAM 52#ifdef CONFIG_FSCACHE_HISTOGRAM
53 remove_proc_entry("fs/fscache/histogram", NULL);
44error_histogram: 54error_histogram:
45#endif 55#endif
46#ifdef CONFIG_FSCACHE_STATS 56#ifdef CONFIG_FSCACHE_STATS
@@ -58,6 +68,9 @@ error_dir:
58 */ 68 */
59void fscache_proc_cleanup(void) 69void fscache_proc_cleanup(void)
60{ 70{
71#ifdef CONFIG_FSCACHE_OBJECT_LIST
72 remove_proc_entry("fs/fscache/objects", NULL);
73#endif
61#ifdef CONFIG_FSCACHE_HISTOGRAM 74#ifdef CONFIG_FSCACHE_HISTOGRAM
62 remove_proc_entry("fs/fscache/histogram", NULL); 75 remove_proc_entry("fs/fscache/histogram", NULL);
63#endif 76#endif
diff --git a/fs/fscache/stats.c b/fs/fscache/stats.c
index 65deb99e756b..46435f3aae68 100644
--- a/fs/fscache/stats.c
+++ b/fs/fscache/stats.c
@@ -25,6 +25,8 @@ atomic_t fscache_n_op_requeue;
25atomic_t fscache_n_op_deferred_release; 25atomic_t fscache_n_op_deferred_release;
26atomic_t fscache_n_op_release; 26atomic_t fscache_n_op_release;
27atomic_t fscache_n_op_gc; 27atomic_t fscache_n_op_gc;
28atomic_t fscache_n_op_cancelled;
29atomic_t fscache_n_op_rejected;
28 30
29atomic_t fscache_n_attr_changed; 31atomic_t fscache_n_attr_changed;
30atomic_t fscache_n_attr_changed_ok; 32atomic_t fscache_n_attr_changed_ok;
@@ -36,6 +38,8 @@ atomic_t fscache_n_allocs;
36atomic_t fscache_n_allocs_ok; 38atomic_t fscache_n_allocs_ok;
37atomic_t fscache_n_allocs_wait; 39atomic_t fscache_n_allocs_wait;
38atomic_t fscache_n_allocs_nobufs; 40atomic_t fscache_n_allocs_nobufs;
41atomic_t fscache_n_allocs_intr;
42atomic_t fscache_n_allocs_object_dead;
39atomic_t fscache_n_alloc_ops; 43atomic_t fscache_n_alloc_ops;
40atomic_t fscache_n_alloc_op_waits; 44atomic_t fscache_n_alloc_op_waits;
41 45
@@ -46,6 +50,7 @@ atomic_t fscache_n_retrievals_nodata;
46atomic_t fscache_n_retrievals_nobufs; 50atomic_t fscache_n_retrievals_nobufs;
47atomic_t fscache_n_retrievals_intr; 51atomic_t fscache_n_retrievals_intr;
48atomic_t fscache_n_retrievals_nomem; 52atomic_t fscache_n_retrievals_nomem;
53atomic_t fscache_n_retrievals_object_dead;
49atomic_t fscache_n_retrieval_ops; 54atomic_t fscache_n_retrieval_ops;
50atomic_t fscache_n_retrieval_op_waits; 55atomic_t fscache_n_retrieval_op_waits;
51 56
@@ -56,6 +61,14 @@ atomic_t fscache_n_stores_nobufs;
56atomic_t fscache_n_stores_oom; 61atomic_t fscache_n_stores_oom;
57atomic_t fscache_n_store_ops; 62atomic_t fscache_n_store_ops;
58atomic_t fscache_n_store_calls; 63atomic_t fscache_n_store_calls;
64atomic_t fscache_n_store_pages;
65atomic_t fscache_n_store_radix_deletes;
66atomic_t fscache_n_store_pages_over_limit;
67
68atomic_t fscache_n_store_vmscan_not_storing;
69atomic_t fscache_n_store_vmscan_gone;
70atomic_t fscache_n_store_vmscan_busy;
71atomic_t fscache_n_store_vmscan_cancelled;
59 72
60atomic_t fscache_n_marks; 73atomic_t fscache_n_marks;
61atomic_t fscache_n_uncaches; 74atomic_t fscache_n_uncaches;
@@ -74,6 +87,7 @@ atomic_t fscache_n_updates_run;
74atomic_t fscache_n_relinquishes; 87atomic_t fscache_n_relinquishes;
75atomic_t fscache_n_relinquishes_null; 88atomic_t fscache_n_relinquishes_null;
76atomic_t fscache_n_relinquishes_waitcrt; 89atomic_t fscache_n_relinquishes_waitcrt;
90atomic_t fscache_n_relinquishes_retire;
77 91
78atomic_t fscache_n_cookie_index; 92atomic_t fscache_n_cookie_index;
79atomic_t fscache_n_cookie_data; 93atomic_t fscache_n_cookie_data;
@@ -84,6 +98,7 @@ atomic_t fscache_n_object_no_alloc;
84atomic_t fscache_n_object_lookups; 98atomic_t fscache_n_object_lookups;
85atomic_t fscache_n_object_lookups_negative; 99atomic_t fscache_n_object_lookups_negative;
86atomic_t fscache_n_object_lookups_positive; 100atomic_t fscache_n_object_lookups_positive;
101atomic_t fscache_n_object_lookups_timed_out;
87atomic_t fscache_n_object_created; 102atomic_t fscache_n_object_created;
88atomic_t fscache_n_object_avail; 103atomic_t fscache_n_object_avail;
89atomic_t fscache_n_object_dead; 104atomic_t fscache_n_object_dead;
@@ -93,6 +108,23 @@ atomic_t fscache_n_checkaux_okay;
93atomic_t fscache_n_checkaux_update; 108atomic_t fscache_n_checkaux_update;
94atomic_t fscache_n_checkaux_obsolete; 109atomic_t fscache_n_checkaux_obsolete;
95 110
111atomic_t fscache_n_cop_alloc_object;
112atomic_t fscache_n_cop_lookup_object;
113atomic_t fscache_n_cop_lookup_complete;
114atomic_t fscache_n_cop_grab_object;
115atomic_t fscache_n_cop_update_object;
116atomic_t fscache_n_cop_drop_object;
117atomic_t fscache_n_cop_put_object;
118atomic_t fscache_n_cop_sync_cache;
119atomic_t fscache_n_cop_attr_changed;
120atomic_t fscache_n_cop_read_or_alloc_page;
121atomic_t fscache_n_cop_read_or_alloc_pages;
122atomic_t fscache_n_cop_allocate_page;
123atomic_t fscache_n_cop_allocate_pages;
124atomic_t fscache_n_cop_write_page;
125atomic_t fscache_n_cop_uncache_page;
126atomic_t fscache_n_cop_dissociate_pages;
127
96/* 128/*
97 * display the general statistics 129 * display the general statistics
98 */ 130 */
@@ -129,10 +161,11 @@ static int fscache_stats_show(struct seq_file *m, void *v)
129 atomic_read(&fscache_n_acquires_nobufs), 161 atomic_read(&fscache_n_acquires_nobufs),
130 atomic_read(&fscache_n_acquires_oom)); 162 atomic_read(&fscache_n_acquires_oom));
131 163
132 seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u\n", 164 seq_printf(m, "Lookups: n=%u neg=%u pos=%u crt=%u tmo=%u\n",
133 atomic_read(&fscache_n_object_lookups), 165 atomic_read(&fscache_n_object_lookups),
134 atomic_read(&fscache_n_object_lookups_negative), 166 atomic_read(&fscache_n_object_lookups_negative),
135 atomic_read(&fscache_n_object_lookups_positive), 167 atomic_read(&fscache_n_object_lookups_positive),
168 atomic_read(&fscache_n_object_lookups_timed_out),
136 atomic_read(&fscache_n_object_created)); 169 atomic_read(&fscache_n_object_created));
137 170
138 seq_printf(m, "Updates: n=%u nul=%u run=%u\n", 171 seq_printf(m, "Updates: n=%u nul=%u run=%u\n",
@@ -140,10 +173,11 @@ static int fscache_stats_show(struct seq_file *m, void *v)
140 atomic_read(&fscache_n_updates_null), 173 atomic_read(&fscache_n_updates_null),
141 atomic_read(&fscache_n_updates_run)); 174 atomic_read(&fscache_n_updates_run));
142 175
143 seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u\n", 176 seq_printf(m, "Relinqs: n=%u nul=%u wcr=%u rtr=%u\n",
144 atomic_read(&fscache_n_relinquishes), 177 atomic_read(&fscache_n_relinquishes),
145 atomic_read(&fscache_n_relinquishes_null), 178 atomic_read(&fscache_n_relinquishes_null),
146 atomic_read(&fscache_n_relinquishes_waitcrt)); 179 atomic_read(&fscache_n_relinquishes_waitcrt),
180 atomic_read(&fscache_n_relinquishes_retire));
147 181
148 seq_printf(m, "AttrChg: n=%u ok=%u nbf=%u oom=%u run=%u\n", 182 seq_printf(m, "AttrChg: n=%u ok=%u nbf=%u oom=%u run=%u\n",
149 atomic_read(&fscache_n_attr_changed), 183 atomic_read(&fscache_n_attr_changed),
@@ -152,14 +186,16 @@ static int fscache_stats_show(struct seq_file *m, void *v)
152 atomic_read(&fscache_n_attr_changed_nomem), 186 atomic_read(&fscache_n_attr_changed_nomem),
153 atomic_read(&fscache_n_attr_changed_calls)); 187 atomic_read(&fscache_n_attr_changed_calls));
154 188
155 seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u\n", 189 seq_printf(m, "Allocs : n=%u ok=%u wt=%u nbf=%u int=%u\n",
156 atomic_read(&fscache_n_allocs), 190 atomic_read(&fscache_n_allocs),
157 atomic_read(&fscache_n_allocs_ok), 191 atomic_read(&fscache_n_allocs_ok),
158 atomic_read(&fscache_n_allocs_wait), 192 atomic_read(&fscache_n_allocs_wait),
159 atomic_read(&fscache_n_allocs_nobufs)); 193 atomic_read(&fscache_n_allocs_nobufs),
160 seq_printf(m, "Allocs : ops=%u owt=%u\n", 194 atomic_read(&fscache_n_allocs_intr));
195 seq_printf(m, "Allocs : ops=%u owt=%u abt=%u\n",
161 atomic_read(&fscache_n_alloc_ops), 196 atomic_read(&fscache_n_alloc_ops),
162 atomic_read(&fscache_n_alloc_op_waits)); 197 atomic_read(&fscache_n_alloc_op_waits),
198 atomic_read(&fscache_n_allocs_object_dead));
163 199
164 seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u" 200 seq_printf(m, "Retrvls: n=%u ok=%u wt=%u nod=%u nbf=%u"
165 " int=%u oom=%u\n", 201 " int=%u oom=%u\n",
@@ -170,9 +206,10 @@ static int fscache_stats_show(struct seq_file *m, void *v)
170 atomic_read(&fscache_n_retrievals_nobufs), 206 atomic_read(&fscache_n_retrievals_nobufs),
171 atomic_read(&fscache_n_retrievals_intr), 207 atomic_read(&fscache_n_retrievals_intr),
172 atomic_read(&fscache_n_retrievals_nomem)); 208 atomic_read(&fscache_n_retrievals_nomem));
173 seq_printf(m, "Retrvls: ops=%u owt=%u\n", 209 seq_printf(m, "Retrvls: ops=%u owt=%u abt=%u\n",
174 atomic_read(&fscache_n_retrieval_ops), 210 atomic_read(&fscache_n_retrieval_ops),
175 atomic_read(&fscache_n_retrieval_op_waits)); 211 atomic_read(&fscache_n_retrieval_op_waits),
212 atomic_read(&fscache_n_retrievals_object_dead));
176 213
177 seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n", 214 seq_printf(m, "Stores : n=%u ok=%u agn=%u nbf=%u oom=%u\n",
178 atomic_read(&fscache_n_stores), 215 atomic_read(&fscache_n_stores),
@@ -180,18 +217,49 @@ static int fscache_stats_show(struct seq_file *m, void *v)
180 atomic_read(&fscache_n_stores_again), 217 atomic_read(&fscache_n_stores_again),
181 atomic_read(&fscache_n_stores_nobufs), 218 atomic_read(&fscache_n_stores_nobufs),
182 atomic_read(&fscache_n_stores_oom)); 219 atomic_read(&fscache_n_stores_oom));
183 seq_printf(m, "Stores : ops=%u run=%u\n", 220 seq_printf(m, "Stores : ops=%u run=%u pgs=%u rxd=%u olm=%u\n",
184 atomic_read(&fscache_n_store_ops), 221 atomic_read(&fscache_n_store_ops),
185 atomic_read(&fscache_n_store_calls)); 222 atomic_read(&fscache_n_store_calls),
223 atomic_read(&fscache_n_store_pages),
224 atomic_read(&fscache_n_store_radix_deletes),
225 atomic_read(&fscache_n_store_pages_over_limit));
186 226
187 seq_printf(m, "Ops : pend=%u run=%u enq=%u\n", 227 seq_printf(m, "VmScan : nos=%u gon=%u bsy=%u can=%u\n",
228 atomic_read(&fscache_n_store_vmscan_not_storing),
229 atomic_read(&fscache_n_store_vmscan_gone),
230 atomic_read(&fscache_n_store_vmscan_busy),
231 atomic_read(&fscache_n_store_vmscan_cancelled));
232
233 seq_printf(m, "Ops : pend=%u run=%u enq=%u can=%u rej=%u\n",
188 atomic_read(&fscache_n_op_pend), 234 atomic_read(&fscache_n_op_pend),
189 atomic_read(&fscache_n_op_run), 235 atomic_read(&fscache_n_op_run),
190 atomic_read(&fscache_n_op_enqueue)); 236 atomic_read(&fscache_n_op_enqueue),
237 atomic_read(&fscache_n_op_cancelled),
238 atomic_read(&fscache_n_op_rejected));
191 seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n", 239 seq_printf(m, "Ops : dfr=%u rel=%u gc=%u\n",
192 atomic_read(&fscache_n_op_deferred_release), 240 atomic_read(&fscache_n_op_deferred_release),
193 atomic_read(&fscache_n_op_release), 241 atomic_read(&fscache_n_op_release),
194 atomic_read(&fscache_n_op_gc)); 242 atomic_read(&fscache_n_op_gc));
243
244 seq_printf(m, "CacheOp: alo=%d luo=%d luc=%d gro=%d\n",
245 atomic_read(&fscache_n_cop_alloc_object),
246 atomic_read(&fscache_n_cop_lookup_object),
247 atomic_read(&fscache_n_cop_lookup_complete),
248 atomic_read(&fscache_n_cop_grab_object));
249 seq_printf(m, "CacheOp: upo=%d dro=%d pto=%d atc=%d syn=%d\n",
250 atomic_read(&fscache_n_cop_update_object),
251 atomic_read(&fscache_n_cop_drop_object),
252 atomic_read(&fscache_n_cop_put_object),
253 atomic_read(&fscache_n_cop_attr_changed),
254 atomic_read(&fscache_n_cop_sync_cache));
255 seq_printf(m, "CacheOp: rap=%d ras=%d alp=%d als=%d wrp=%d ucp=%d dsp=%d\n",
256 atomic_read(&fscache_n_cop_read_or_alloc_page),
257 atomic_read(&fscache_n_cop_read_or_alloc_pages),
258 atomic_read(&fscache_n_cop_allocate_page),
259 atomic_read(&fscache_n_cop_allocate_pages),
260 atomic_read(&fscache_n_cop_write_page),
261 atomic_read(&fscache_n_cop_uncache_page),
262 atomic_read(&fscache_n_cop_dissociate_pages));
195 return 0; 263 return 0;
196} 264}
197 265
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index eacd78a5d082..5b31f7741a8f 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -114,7 +114,7 @@ static int __init init_gfs2_fs(void)
114 if (error) 114 if (error)
115 goto fail_unregister; 115 goto fail_unregister;
116 116
117 error = slow_work_register_user(); 117 error = slow_work_register_user(THIS_MODULE);
118 if (error) 118 if (error)
119 goto fail_slow; 119 goto fail_slow;
120 120
@@ -163,7 +163,7 @@ static void __exit exit_gfs2_fs(void)
163 gfs2_unregister_debugfs(); 163 gfs2_unregister_debugfs();
164 unregister_filesystem(&gfs2_fs_type); 164 unregister_filesystem(&gfs2_fs_type);
165 unregister_filesystem(&gfs2meta_fs_type); 165 unregister_filesystem(&gfs2meta_fs_type);
166 slow_work_unregister_user(); 166 slow_work_unregister_user(THIS_MODULE);
167 167
168 kmem_cache_destroy(gfs2_quotad_cachep); 168 kmem_cache_destroy(gfs2_quotad_cachep);
169 kmem_cache_destroy(gfs2_rgrpd_cachep); 169 kmem_cache_destroy(gfs2_rgrpd_cachep);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 59d2695509d3..09fa31965576 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -7,6 +7,7 @@
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10#include <linux/module.h>
10#include <linux/slab.h> 11#include <linux/slab.h>
11#include <linux/spinlock.h> 12#include <linux/spinlock.h>
12#include <linux/completion.h> 13#include <linux/completion.h>
@@ -593,6 +594,7 @@ fail:
593} 594}
594 595
595struct slow_work_ops gfs2_recover_ops = { 596struct slow_work_ops gfs2_recover_ops = {
597 .owner = THIS_MODULE,
596 .get_ref = gfs2_recover_get_ref, 598 .get_ref = gfs2_recover_get_ref,
597 .put_ref = gfs2_recover_put_ref, 599 .put_ref = gfs2_recover_put_ref,
598 .execute = gfs2_recover_work, 600 .execute = gfs2_recover_work,
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 70fad69eb959..fa588006588d 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -359,17 +359,13 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp)
359 359
360 BUG_ON(!cookie); 360 BUG_ON(!cookie);
361 361
362 if (fscache_check_page_write(cookie, page)) {
363 if (!(gfp & __GFP_WAIT))
364 return 0;
365 fscache_wait_on_page_write(cookie, page);
366 }
367
368 if (PageFsCache(page)) { 362 if (PageFsCache(page)) {
369 dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", 363 dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
370 cookie, page, nfsi); 364 cookie, page, nfsi);
371 365
372 fscache_uncache_page(cookie, page); 366 if (!fscache_maybe_release_page(cookie, page, gfp))
367 return 0;
368
373 nfs_add_fscache_stats(page->mapping->host, 369 nfs_add_fscache_stats(page->mapping->host,
374 NFSIOS_FSCACHE_PAGES_UNCACHED, 1); 370 NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
375 } 371 }
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 84d3532dd3ea..7be0c6fbe880 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -91,6 +91,8 @@ struct fscache_operation {
91#define FSCACHE_OP_WAITING 4 /* cleared when op is woken */ 91#define FSCACHE_OP_WAITING 4 /* cleared when op is woken */
92#define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */ 92#define FSCACHE_OP_EXCLUSIVE 5 /* exclusive op, other ops must wait */
93#define FSCACHE_OP_DEAD 6 /* op is now dead */ 93#define FSCACHE_OP_DEAD 6 /* op is now dead */
94#define FSCACHE_OP_DEC_READ_CNT 7 /* decrement object->n_reads on destruction */
95#define FSCACHE_OP_KEEP_FLAGS 0xc0 /* flags to keep when repurposing an op */
94 96
95 atomic_t usage; 97 atomic_t usage;
96 unsigned debug_id; /* debugging ID */ 98 unsigned debug_id; /* debugging ID */
@@ -102,6 +104,16 @@ struct fscache_operation {
102 104
103 /* operation releaser */ 105 /* operation releaser */
104 fscache_operation_release_t release; 106 fscache_operation_release_t release;
107
108#ifdef CONFIG_SLOW_WORK_PROC
109 const char *name; /* operation name */
110 const char *state; /* operation state */
111#define fscache_set_op_name(OP, N) do { (OP)->name = (N); } while(0)
112#define fscache_set_op_state(OP, S) do { (OP)->state = (S); } while(0)
113#else
114#define fscache_set_op_name(OP, N) do { } while(0)
115#define fscache_set_op_state(OP, S) do { } while(0)
116#endif
105}; 117};
106 118
107extern atomic_t fscache_op_debug_id; 119extern atomic_t fscache_op_debug_id;
@@ -125,6 +137,7 @@ static inline void fscache_operation_init(struct fscache_operation *op,
125 op->debug_id = atomic_inc_return(&fscache_op_debug_id); 137 op->debug_id = atomic_inc_return(&fscache_op_debug_id);
126 op->release = release; 138 op->release = release;
127 INIT_LIST_HEAD(&op->pend_link); 139 INIT_LIST_HEAD(&op->pend_link);
140 fscache_set_op_state(op, "Init");
128} 141}
129 142
130/** 143/**
@@ -221,8 +234,10 @@ struct fscache_cache_ops {
221 struct fscache_object *(*alloc_object)(struct fscache_cache *cache, 234 struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
222 struct fscache_cookie *cookie); 235 struct fscache_cookie *cookie);
223 236
224 /* look up the object for a cookie */ 237 /* look up the object for a cookie
225 void (*lookup_object)(struct fscache_object *object); 238 * - return -ETIMEDOUT to be requeued
239 */
240 int (*lookup_object)(struct fscache_object *object);
226 241
227 /* finished looking up */ 242 /* finished looking up */
228 void (*lookup_complete)(struct fscache_object *object); 243 void (*lookup_complete)(struct fscache_object *object);
@@ -297,12 +312,14 @@ struct fscache_cookie {
297 atomic_t usage; /* number of users of this cookie */ 312 atomic_t usage; /* number of users of this cookie */
298 atomic_t n_children; /* number of children of this cookie */ 313 atomic_t n_children; /* number of children of this cookie */
299 spinlock_t lock; 314 spinlock_t lock;
315 spinlock_t stores_lock; /* lock on page store tree */
300 struct hlist_head backing_objects; /* object(s) backing this file/index */ 316 struct hlist_head backing_objects; /* object(s) backing this file/index */
301 const struct fscache_cookie_def *def; /* definition */ 317 const struct fscache_cookie_def *def; /* definition */
302 struct fscache_cookie *parent; /* parent of this entry */ 318 struct fscache_cookie *parent; /* parent of this entry */
303 void *netfs_data; /* back pointer to netfs */ 319 void *netfs_data; /* back pointer to netfs */
304 struct radix_tree_root stores; /* pages to be stored on this cookie */ 320 struct radix_tree_root stores; /* pages to be stored on this cookie */
305#define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */ 321#define FSCACHE_COOKIE_PENDING_TAG 0 /* pages tag: pending write to cache */
322#define FSCACHE_COOKIE_STORING_TAG 1 /* pages tag: writing to cache */
306 323
307 unsigned long flags; 324 unsigned long flags;
308#define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */ 325#define FSCACHE_COOKIE_LOOKING_UP 0 /* T if non-index cookie being looked up still */
@@ -337,6 +354,7 @@ struct fscache_object {
337 FSCACHE_OBJECT_RECYCLING, /* retiring object */ 354 FSCACHE_OBJECT_RECYCLING, /* retiring object */
338 FSCACHE_OBJECT_WITHDRAWING, /* withdrawing object */ 355 FSCACHE_OBJECT_WITHDRAWING, /* withdrawing object */
339 FSCACHE_OBJECT_DEAD, /* object is now dead */ 356 FSCACHE_OBJECT_DEAD, /* object is now dead */
357 FSCACHE_OBJECT__NSTATES
340 } state; 358 } state;
341 359
342 int debug_id; /* debugging ID */ 360 int debug_id; /* debugging ID */
@@ -345,6 +363,7 @@ struct fscache_object {
345 int n_obj_ops; /* number of object ops outstanding on object */ 363 int n_obj_ops; /* number of object ops outstanding on object */
346 int n_in_progress; /* number of ops in progress */ 364 int n_in_progress; /* number of ops in progress */
347 int n_exclusive; /* number of exclusive ops queued */ 365 int n_exclusive; /* number of exclusive ops queued */
366 atomic_t n_reads; /* number of read ops in progress */
348 spinlock_t lock; /* state and operations lock */ 367 spinlock_t lock; /* state and operations lock */
349 368
350 unsigned long lookup_jif; /* time at which lookup started */ 369 unsigned long lookup_jif; /* time at which lookup started */
@@ -358,6 +377,7 @@ struct fscache_object {
358#define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */ 377#define FSCACHE_OBJECT_EV_RELEASE 4 /* T if netfs requested object release */
359#define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */ 378#define FSCACHE_OBJECT_EV_RETIRE 5 /* T if netfs requested object retirement */
360#define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */ 379#define FSCACHE_OBJECT_EV_WITHDRAW 6 /* T if cache requested object withdrawal */
380#define FSCACHE_OBJECT_EVENTS_MASK 0x7f /* mask of all events*/
361 381
362 unsigned long flags; 382 unsigned long flags;
363#define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */ 383#define FSCACHE_OBJECT_LOCK 0 /* T if object is busy being processed */
@@ -373,7 +393,11 @@ struct fscache_object {
373 struct list_head dependents; /* FIFO of dependent objects */ 393 struct list_head dependents; /* FIFO of dependent objects */
374 struct list_head dep_link; /* link in parent's dependents list */ 394 struct list_head dep_link; /* link in parent's dependents list */
375 struct list_head pending_ops; /* unstarted operations on this object */ 395 struct list_head pending_ops; /* unstarted operations on this object */
396#ifdef CONFIG_FSCACHE_OBJECT_LIST
397 struct rb_node objlist_link; /* link in global object list */
398#endif
376 pgoff_t store_limit; /* current storage limit */ 399 pgoff_t store_limit; /* current storage limit */
400 loff_t store_limit_l; /* current storage limit */
377}; 401};
378 402
379extern const char *fscache_object_states[]; 403extern const char *fscache_object_states[];
@@ -383,6 +407,10 @@ extern const char *fscache_object_states[];
383 (obj)->state >= FSCACHE_OBJECT_AVAILABLE && \ 407 (obj)->state >= FSCACHE_OBJECT_AVAILABLE && \
384 (obj)->state < FSCACHE_OBJECT_DYING) 408 (obj)->state < FSCACHE_OBJECT_DYING)
385 409
410#define fscache_object_is_dead(obj) \
411 (test_bit(FSCACHE_IOERROR, &(obj)->cache->flags) && \
412 (obj)->state >= FSCACHE_OBJECT_DYING)
413
386extern const struct slow_work_ops fscache_object_slow_work_ops; 414extern const struct slow_work_ops fscache_object_slow_work_ops;
387 415
388/** 416/**
@@ -414,6 +442,7 @@ void fscache_object_init(struct fscache_object *object,
414 object->events = object->event_mask = 0; 442 object->events = object->event_mask = 0;
415 object->flags = 0; 443 object->flags = 0;
416 object->store_limit = 0; 444 object->store_limit = 0;
445 object->store_limit_l = 0;
417 object->cache = cache; 446 object->cache = cache;
418 object->cookie = cookie; 447 object->cookie = cookie;
419 object->parent = NULL; 448 object->parent = NULL;
@@ -422,6 +451,12 @@ void fscache_object_init(struct fscache_object *object,
422extern void fscache_object_lookup_negative(struct fscache_object *object); 451extern void fscache_object_lookup_negative(struct fscache_object *object);
423extern void fscache_obtained_object(struct fscache_object *object); 452extern void fscache_obtained_object(struct fscache_object *object);
424 453
454#ifdef CONFIG_FSCACHE_OBJECT_LIST
455extern void fscache_object_destroy(struct fscache_object *object);
456#else
457#define fscache_object_destroy(object) do {} while(0)
458#endif
459
425/** 460/**
426 * fscache_object_destroyed - Note destruction of an object in a cache 461 * fscache_object_destroyed - Note destruction of an object in a cache
427 * @cache: The cache from which the object came 462 * @cache: The cache from which the object came
@@ -460,6 +495,7 @@ static inline void fscache_object_lookup_error(struct fscache_object *object)
460static inline 495static inline
461void fscache_set_store_limit(struct fscache_object *object, loff_t i_size) 496void fscache_set_store_limit(struct fscache_object *object, loff_t i_size)
462{ 497{
498 object->store_limit_l = i_size;
463 object->store_limit = i_size >> PAGE_SHIFT; 499 object->store_limit = i_size >> PAGE_SHIFT;
464 if (i_size & ~PAGE_MASK) 500 if (i_size & ~PAGE_MASK)
465 object->store_limit++; 501 object->store_limit++;
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 6d8ee466e0a0..595ce49288b7 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -202,6 +202,8 @@ extern int __fscache_write_page(struct fscache_cookie *, struct page *, gfp_t);
202extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); 202extern void __fscache_uncache_page(struct fscache_cookie *, struct page *);
203extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *); 203extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *);
204extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *); 204extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *);
205extern bool __fscache_maybe_release_page(struct fscache_cookie *, struct page *,
206 gfp_t);
205 207
206/** 208/**
207 * fscache_register_netfs - Register a filesystem as desiring caching services 209 * fscache_register_netfs - Register a filesystem as desiring caching services
@@ -615,4 +617,29 @@ void fscache_wait_on_page_write(struct fscache_cookie *cookie,
615 __fscache_wait_on_page_write(cookie, page); 617 __fscache_wait_on_page_write(cookie, page);
616} 618}
617 619
620/**
621 * fscache_maybe_release_page - Consider releasing a page, cancelling a store
622 * @cookie: The cookie representing the cache object
623 * @page: The netfs page that is being cached.
624 * @gfp: The gfp flags passed to releasepage()
625 *
626 * Consider releasing a page for the vmscan algorithm, on behalf of the netfs's
627 * releasepage() call. A storage request on the page may cancelled if it is
628 * not currently being processed.
629 *
630 * The function returns true if the page no longer has a storage request on it,
631 * and false if a storage request is left in place. If true is returned, the
632 * page will have been passed to fscache_uncache_page(). If false is returned
633 * the page cannot be freed yet.
634 */
635static inline
636bool fscache_maybe_release_page(struct fscache_cookie *cookie,
637 struct page *page,
638 gfp_t gfp)
639{
640 if (fscache_cookie_valid(cookie) && PageFsCache(page))
641 return __fscache_maybe_release_page(cookie, page, gfp);
642 return false;
643}
644
618#endif /* _LINUX_FSCACHE_H */ 645#endif /* _LINUX_FSCACHE_H */
diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h
index b65c8881f07a..5035a2691739 100644
--- a/include/linux/slow-work.h
+++ b/include/linux/slow-work.h
@@ -17,13 +17,20 @@
17#ifdef CONFIG_SLOW_WORK 17#ifdef CONFIG_SLOW_WORK
18 18
19#include <linux/sysctl.h> 19#include <linux/sysctl.h>
20#include <linux/timer.h>
20 21
21struct slow_work; 22struct slow_work;
23#ifdef CONFIG_SLOW_WORK_PROC
24struct seq_file;
25#endif
22 26
23/* 27/*
24 * The operations used to support slow work items 28 * The operations used to support slow work items
25 */ 29 */
26struct slow_work_ops { 30struct slow_work_ops {
31 /* owner */
32 struct module *owner;
33
27 /* get a ref on a work item 34 /* get a ref on a work item
28 * - return 0 if successful, -ve if not 35 * - return 0 if successful, -ve if not
29 */ 36 */
@@ -34,6 +41,11 @@ struct slow_work_ops {
34 41
35 /* execute a work item */ 42 /* execute a work item */
36 void (*execute)(struct slow_work *work); 43 void (*execute)(struct slow_work *work);
44
45#ifdef CONFIG_SLOW_WORK_PROC
46 /* describe a work item for /proc */
47 void (*desc)(struct slow_work *work, struct seq_file *m);
48#endif
37}; 49};
38 50
39/* 51/*
@@ -42,13 +54,24 @@ struct slow_work_ops {
42 * queued 54 * queued
43 */ 55 */
44struct slow_work { 56struct slow_work {
57 struct module *owner; /* the owning module */
45 unsigned long flags; 58 unsigned long flags;
46#define SLOW_WORK_PENDING 0 /* item pending (further) execution */ 59#define SLOW_WORK_PENDING 0 /* item pending (further) execution */
47#define SLOW_WORK_EXECUTING 1 /* item currently executing */ 60#define SLOW_WORK_EXECUTING 1 /* item currently executing */
48#define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */ 61#define SLOW_WORK_ENQ_DEFERRED 2 /* item enqueue deferred */
49#define SLOW_WORK_VERY_SLOW 3 /* item is very slow */ 62#define SLOW_WORK_VERY_SLOW 3 /* item is very slow */
63#define SLOW_WORK_CANCELLING 4 /* item is being cancelled, don't enqueue */
64#define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */
50 const struct slow_work_ops *ops; /* operations table for this item */ 65 const struct slow_work_ops *ops; /* operations table for this item */
51 struct list_head link; /* link in queue */ 66 struct list_head link; /* link in queue */
67#ifdef CONFIG_SLOW_WORK_PROC
68 struct timespec mark; /* jiffies at which queued or exec begun */
69#endif
70};
71
72struct delayed_slow_work {
73 struct slow_work work;
74 struct timer_list timer;
52}; 75};
53 76
54/** 77/**
@@ -67,6 +90,20 @@ static inline void slow_work_init(struct slow_work *work,
67} 90}
68 91
69/** 92/**
93 * slow_work_init - Initialise a delayed slow work item
94 * @work: The work item to initialise
95 * @ops: The operations to use to handle the slow work item
96 *
97 * Initialise a delayed slow work item.
98 */
99static inline void delayed_slow_work_init(struct delayed_slow_work *dwork,
100 const struct slow_work_ops *ops)
101{
102 init_timer(&dwork->timer);
103 slow_work_init(&dwork->work, ops);
104}
105
106/**
70 * vslow_work_init - Initialise a very slow work item 107 * vslow_work_init - Initialise a very slow work item
71 * @work: The work item to initialise 108 * @work: The work item to initialise
72 * @ops: The operations to use to handle the slow work item 109 * @ops: The operations to use to handle the slow work item
@@ -83,9 +120,40 @@ static inline void vslow_work_init(struct slow_work *work,
83 INIT_LIST_HEAD(&work->link); 120 INIT_LIST_HEAD(&work->link);
84} 121}
85 122
123/**
124 * slow_work_is_queued - Determine if a slow work item is on the work queue
125 * work: The work item to test
126 *
127 * Determine if the specified slow-work item is on the work queue. This
128 * returns true if it is actually on the queue.
129 *
130 * If the item is executing and has been marked for requeue when execution
131 * finishes, then false will be returned.
132 *
133 * Anyone wishing to wait for completion of execution can wait on the
134 * SLOW_WORK_EXECUTING bit.
135 */
136static inline bool slow_work_is_queued(struct slow_work *work)
137{
138 unsigned long flags = work->flags;
139 return flags & SLOW_WORK_PENDING && !(flags & SLOW_WORK_EXECUTING);
140}
141
86extern int slow_work_enqueue(struct slow_work *work); 142extern int slow_work_enqueue(struct slow_work *work);
87extern int slow_work_register_user(void); 143extern void slow_work_cancel(struct slow_work *work);
88extern void slow_work_unregister_user(void); 144extern int slow_work_register_user(struct module *owner);
145extern void slow_work_unregister_user(struct module *owner);
146
147extern int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
148 unsigned long delay);
149
150static inline void delayed_slow_work_cancel(struct delayed_slow_work *dwork)
151{
152 slow_work_cancel(&dwork->work);
153}
154
155extern bool slow_work_sleep_till_thread_needed(struct slow_work *work,
156 signed long *_timeout);
89 157
90#ifdef CONFIG_SYSCTL 158#ifdef CONFIG_SYSCTL
91extern ctl_table slow_work_sysctls[]; 159extern ctl_table slow_work_sysctls[];
diff --git a/init/Kconfig b/init/Kconfig
index 9e03ef8b311e..ab5c64801fe5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1098,6 +1098,16 @@ config SLOW_WORK
1098 1098
1099 See Documentation/slow-work.txt. 1099 See Documentation/slow-work.txt.
1100 1100
1101config SLOW_WORK_PROC
1102 bool "Slow work debugging through /proc"
1103 default n
1104 depends on SLOW_WORK && PROC_FS
1105 help
1106 Display the contents of the slow work run queue through /proc,
1107 including items currently executing.
1108
1109 See Documentation/slow-work.txt.
1110
1101endmenu # General setup 1111endmenu # General setup
1102 1112
1103config HAVE_GENERIC_DMA_COHERENT 1113config HAVE_GENERIC_DMA_COHERENT
diff --git a/kernel/Makefile b/kernel/Makefile
index b8d4cd8ac0b9..776ffed1556d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/
94obj-$(CONFIG_RING_BUFFER) += trace/ 94obj-$(CONFIG_RING_BUFFER) += trace/
95obj-$(CONFIG_SMP) += sched_cpupri.o 95obj-$(CONFIG_SMP) += sched_cpupri.o
96obj-$(CONFIG_SLOW_WORK) += slow-work.o 96obj-$(CONFIG_SLOW_WORK) += slow-work.o
97obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o
97obj-$(CONFIG_PERF_EVENTS) += perf_event.o 98obj-$(CONFIG_PERF_EVENTS) += perf_event.o
98 99
99ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 100ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-proc.c
new file mode 100644
index 000000000000..3988032571f5
--- /dev/null
+++ b/kernel/slow-work-proc.c
@@ -0,0 +1,227 @@
1/* Slow work debugging
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/slow-work.h>
14#include <linux/fs.h>
15#include <linux/time.h>
16#include <linux/seq_file.h>
17#include "slow-work.h"
18
19#define ITERATOR_SHIFT (BITS_PER_LONG - 4)
20#define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT)
21#define ITERATOR_COUNTER (~ITERATOR_SELECTOR)
22
23void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m)
24{
25 seq_puts(m, "Slow-work: New thread");
26}
27
28/*
29 * Render the time mark field on a work item into a 5-char time with units plus
30 * a space
31 */
32static void slow_work_print_mark(struct seq_file *m, struct slow_work *work)
33{
34 struct timespec now, diff;
35
36 now = CURRENT_TIME;
37 diff = timespec_sub(now, work->mark);
38
39 if (diff.tv_sec < 0)
40 seq_puts(m, " -ve ");
41 else if (diff.tv_sec == 0 && diff.tv_nsec < 1000)
42 seq_printf(m, "%3luns ", diff.tv_nsec);
43 else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000)
44 seq_printf(m, "%3luus ", diff.tv_nsec / 1000);
45 else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000)
46 seq_printf(m, "%3lums ", diff.tv_nsec / 1000000);
47 else if (diff.tv_sec <= 1)
48 seq_puts(m, " 1s ");
49 else if (diff.tv_sec < 60)
50 seq_printf(m, "%4lus ", diff.tv_sec);
51 else if (diff.tv_sec < 60 * 60)
52 seq_printf(m, "%4lum ", diff.tv_sec / 60);
53 else if (diff.tv_sec < 60 * 60 * 24)
54 seq_printf(m, "%4luh ", diff.tv_sec / 3600);
55 else
56 seq_puts(m, "exces ");
57}
58
59/*
60 * Describe a slow work item for /proc
61 */
62static int slow_work_runqueue_show(struct seq_file *m, void *v)
63{
64 struct slow_work *work;
65 struct list_head *p = v;
66 unsigned long id;
67
68 switch ((unsigned long) v) {
69 case 1:
70 seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n");
71 return 0;
72 case 2:
73 seq_puts(m, "=== ===== ================ == ===== ==========\n");
74 return 0;
75
76 case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1:
77 id = (unsigned long) v - 3;
78
79 read_lock(&slow_work_execs_lock);
80 work = slow_work_execs[id];
81 if (work) {
82 smp_read_barrier_depends();
83
84 seq_printf(m, "%3lu %5d %16p %2lx ",
85 id, slow_work_pids[id], work, work->flags);
86 slow_work_print_mark(m, work);
87
88 if (work->ops->desc)
89 work->ops->desc(work, m);
90 seq_putc(m, '\n');
91 }
92 read_unlock(&slow_work_execs_lock);
93 return 0;
94
95 default:
96 work = list_entry(p, struct slow_work, link);
97 seq_printf(m, "%3s - %16p %2lx ",
98 work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq",
99 work, work->flags);
100 slow_work_print_mark(m, work);
101
102 if (work->ops->desc)
103 work->ops->desc(work, m);
104 seq_putc(m, '\n');
105 return 0;
106 }
107}
108
109/*
110 * map the iterator to a work item
111 */
112static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos)
113{
114 struct list_head *p;
115 unsigned long count, id;
116
117 switch (*_pos >> ITERATOR_SHIFT) {
118 case 0x0:
119 if (*_pos == 0)
120 *_pos = 1;
121 if (*_pos < 3)
122 return (void *)(unsigned long) *_pos;
123 if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT)
124 for (id = *_pos - 3;
125 id < SLOW_WORK_THREAD_LIMIT;
126 id++, (*_pos)++)
127 if (slow_work_execs[id])
128 return (void *)(unsigned long) *_pos;
129 *_pos = 0x1UL << ITERATOR_SHIFT;
130
131 case 0x1:
132 count = *_pos & ITERATOR_COUNTER;
133 list_for_each(p, &slow_work_queue) {
134 if (count == 0)
135 return p;
136 count--;
137 }
138 *_pos = 0x2UL << ITERATOR_SHIFT;
139
140 case 0x2:
141 count = *_pos & ITERATOR_COUNTER;
142 list_for_each(p, &vslow_work_queue) {
143 if (count == 0)
144 return p;
145 count--;
146 }
147 *_pos = 0x3UL << ITERATOR_SHIFT;
148
149 default:
150 return NULL;
151 }
152}
153
154/*
155 * set up the iterator to start reading from the first line
156 */
157static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos)
158{
159 spin_lock_irq(&slow_work_queue_lock);
160 return slow_work_runqueue_index(m, _pos);
161}
162
163/*
164 * move to the next line
165 */
166static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos)
167{
168 struct list_head *p = v;
169 unsigned long selector = *_pos >> ITERATOR_SHIFT;
170
171 (*_pos)++;
172 switch (selector) {
173 case 0x0:
174 return slow_work_runqueue_index(m, _pos);
175
176 case 0x1:
177 if (*_pos >> ITERATOR_SHIFT == 0x1) {
178 p = p->next;
179 if (p != &slow_work_queue)
180 return p;
181 }
182 *_pos = 0x2UL << ITERATOR_SHIFT;
183 p = &vslow_work_queue;
184
185 case 0x2:
186 if (*_pos >> ITERATOR_SHIFT == 0x2) {
187 p = p->next;
188 if (p != &vslow_work_queue)
189 return p;
190 }
191 *_pos = 0x3UL << ITERATOR_SHIFT;
192
193 default:
194 return NULL;
195 }
196}
197
198/*
199 * clean up after reading
200 */
201static void slow_work_runqueue_stop(struct seq_file *m, void *v)
202{
203 spin_unlock_irq(&slow_work_queue_lock);
204}
205
206static const struct seq_operations slow_work_runqueue_ops = {
207 .start = slow_work_runqueue_start,
208 .stop = slow_work_runqueue_stop,
209 .next = slow_work_runqueue_next,
210 .show = slow_work_runqueue_show,
211};
212
213/*
214 * open "/proc/slow_work_rq" to list queue contents
215 */
216static int slow_work_runqueue_open(struct inode *inode, struct file *file)
217{
218 return seq_open(file, &slow_work_runqueue_ops);
219}
220
221const struct file_operations slow_work_runqueue_fops = {
222 .owner = THIS_MODULE,
223 .open = slow_work_runqueue_open,
224 .read = seq_read,
225 .llseek = seq_lseek,
226 .release = seq_release,
227};
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 0d31135efbf4..da94f3c101af 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -16,11 +16,8 @@
16#include <linux/kthread.h> 16#include <linux/kthread.h>
17#include <linux/freezer.h> 17#include <linux/freezer.h>
18#include <linux/wait.h> 18#include <linux/wait.h>
19 19#include <linux/proc_fs.h>
20#define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of 20#include "slow-work.h"
21 * things to do */
22#define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after
23 * OOM */
24 21
25static void slow_work_cull_timeout(unsigned long); 22static void slow_work_cull_timeout(unsigned long);
26static void slow_work_oom_timeout(unsigned long); 23static void slow_work_oom_timeout(unsigned long);
@@ -46,7 +43,7 @@ static unsigned vslow_work_proportion = 50; /* % of threads that may process
46 43
47#ifdef CONFIG_SYSCTL 44#ifdef CONFIG_SYSCTL
48static const int slow_work_min_min_threads = 2; 45static const int slow_work_min_min_threads = 2;
49static int slow_work_max_max_threads = 255; 46static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT;
50static const int slow_work_min_vslow = 1; 47static const int slow_work_min_vslow = 1;
51static const int slow_work_max_vslow = 99; 48static const int slow_work_max_vslow = 99;
52 49
@@ -98,6 +95,32 @@ static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0);
98static struct slow_work slow_work_new_thread; /* new thread starter */ 95static struct slow_work slow_work_new_thread; /* new thread starter */
99 96
100/* 97/*
98 * slow work ID allocation (use slow_work_queue_lock)
99 */
100static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
101
102/*
103 * Unregistration tracking to prevent put_ref() from disappearing during module
104 * unload
105 */
106#ifdef CONFIG_MODULES
107static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT];
108static struct module *slow_work_unreg_module;
109static struct slow_work *slow_work_unreg_work_item;
110static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq);
111static DEFINE_MUTEX(slow_work_unreg_sync_lock);
112#endif
113
114/*
115 * Data for tracking currently executing items for indication through /proc
116 */
117#ifdef CONFIG_SLOW_WORK_PROC
118struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT];
119pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT];
120DEFINE_RWLOCK(slow_work_execs_lock);
121#endif
122
123/*
101 * The queues of work items and the lock governing access to them. These are 124 * The queues of work items and the lock governing access to them. These are
102 * shared between all the CPUs. It doesn't make sense to have per-CPU queues 125 * shared between all the CPUs. It doesn't make sense to have per-CPU queues
103 * as the number of threads bears no relation to the number of CPUs. 126 * as the number of threads bears no relation to the number of CPUs.
@@ -105,9 +128,18 @@ static struct slow_work slow_work_new_thread; /* new thread starter */
105 * There are two queues of work items: one for slow work items, and one for 128 * There are two queues of work items: one for slow work items, and one for
106 * very slow work items. 129 * very slow work items.
107 */ 130 */
108static LIST_HEAD(slow_work_queue); 131LIST_HEAD(slow_work_queue);
109static LIST_HEAD(vslow_work_queue); 132LIST_HEAD(vslow_work_queue);
110static DEFINE_SPINLOCK(slow_work_queue_lock); 133DEFINE_SPINLOCK(slow_work_queue_lock);
134
135/*
136 * The following are two wait queues that get pinged when a work item is placed
137 * on an empty queue. These allow work items that are hogging a thread by
138 * sleeping in a way that could be deferred to yield their thread and enqueue
139 * themselves.
140 */
141static DECLARE_WAIT_QUEUE_HEAD(slow_work_queue_waits_for_occupation);
142static DECLARE_WAIT_QUEUE_HEAD(vslow_work_queue_waits_for_occupation);
111 143
112/* 144/*
113 * The thread controls. A variable used to signal to the threads that they 145 * The thread controls. A variable used to signal to the threads that they
@@ -126,6 +158,20 @@ static DECLARE_COMPLETION(slow_work_last_thread_exited);
126static int slow_work_user_count; 158static int slow_work_user_count;
127static DEFINE_MUTEX(slow_work_user_lock); 159static DEFINE_MUTEX(slow_work_user_lock);
128 160
161static inline int slow_work_get_ref(struct slow_work *work)
162{
163 if (work->ops->get_ref)
164 return work->ops->get_ref(work);
165
166 return 0;
167}
168
169static inline void slow_work_put_ref(struct slow_work *work)
170{
171 if (work->ops->put_ref)
172 work->ops->put_ref(work);
173}
174
129/* 175/*
130 * Calculate the maximum number of active threads in the pool that are 176 * Calculate the maximum number of active threads in the pool that are
131 * permitted to process very slow work items. 177 * permitted to process very slow work items.
@@ -149,8 +195,11 @@ static unsigned slow_work_calc_vsmax(void)
149 * Attempt to execute stuff queued on a slow thread. Return true if we managed 195 * Attempt to execute stuff queued on a slow thread. Return true if we managed
150 * it, false if there was nothing to do. 196 * it, false if there was nothing to do.
151 */ 197 */
152static bool slow_work_execute(void) 198static noinline bool slow_work_execute(int id)
153{ 199{
200#ifdef CONFIG_MODULES
201 struct module *module;
202#endif
154 struct slow_work *work = NULL; 203 struct slow_work *work = NULL;
155 unsigned vsmax; 204 unsigned vsmax;
156 bool very_slow; 205 bool very_slow;
@@ -186,6 +235,16 @@ static bool slow_work_execute(void)
186 } else { 235 } else {
187 very_slow = false; /* avoid the compiler warning */ 236 very_slow = false; /* avoid the compiler warning */
188 } 237 }
238
239#ifdef CONFIG_MODULES
240 if (work)
241 slow_work_thread_processing[id] = work->owner;
242#endif
243 if (work) {
244 slow_work_mark_time(work);
245 slow_work_begin_exec(id, work);
246 }
247
189 spin_unlock_irq(&slow_work_queue_lock); 248 spin_unlock_irq(&slow_work_queue_lock);
190 249
191 if (!work) 250 if (!work)
@@ -194,12 +253,19 @@ static bool slow_work_execute(void)
194 if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags)) 253 if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags))
195 BUG(); 254 BUG();
196 255
197 work->ops->execute(work); 256 /* don't execute if the work is in the process of being cancelled */
257 if (!test_bit(SLOW_WORK_CANCELLING, &work->flags))
258 work->ops->execute(work);
198 259
199 if (very_slow) 260 if (very_slow)
200 atomic_dec(&vslow_work_executing_count); 261 atomic_dec(&vslow_work_executing_count);
201 clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags); 262 clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags);
202 263
264 /* wake up anyone waiting for this work to be complete */
265 wake_up_bit(&work->flags, SLOW_WORK_EXECUTING);
266
267 slow_work_end_exec(id, work);
268
203 /* if someone tried to enqueue the item whilst we were executing it, 269 /* if someone tried to enqueue the item whilst we were executing it,
204 * then it'll be left unenqueued to avoid multiple threads trying to 270 * then it'll be left unenqueued to avoid multiple threads trying to
205 * execute it simultaneously 271 * execute it simultaneously
@@ -219,7 +285,18 @@ static bool slow_work_execute(void)
219 spin_unlock_irq(&slow_work_queue_lock); 285 spin_unlock_irq(&slow_work_queue_lock);
220 } 286 }
221 287
222 work->ops->put_ref(work); 288 /* sort out the race between module unloading and put_ref() */
289 slow_work_put_ref(work);
290
291#ifdef CONFIG_MODULES
292 module = slow_work_thread_processing[id];
293 slow_work_thread_processing[id] = NULL;
294 smp_mb();
295 if (slow_work_unreg_work_item == work ||
296 slow_work_unreg_module == module)
297 wake_up_all(&slow_work_unreg_wq);
298#endif
299
223 return true; 300 return true;
224 301
225auto_requeue: 302auto_requeue:
@@ -227,15 +304,61 @@ auto_requeue:
227 * - we transfer our ref on the item back to the appropriate queue 304 * - we transfer our ref on the item back to the appropriate queue
228 * - don't wake another thread up as we're awake already 305 * - don't wake another thread up as we're awake already
229 */ 306 */
307 slow_work_mark_time(work);
230 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) 308 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
231 list_add_tail(&work->link, &vslow_work_queue); 309 list_add_tail(&work->link, &vslow_work_queue);
232 else 310 else
233 list_add_tail(&work->link, &slow_work_queue); 311 list_add_tail(&work->link, &slow_work_queue);
234 spin_unlock_irq(&slow_work_queue_lock); 312 spin_unlock_irq(&slow_work_queue_lock);
313 slow_work_thread_processing[id] = NULL;
235 return true; 314 return true;
236} 315}
237 316
238/** 317/**
318 * slow_work_sleep_till_thread_needed - Sleep till thread needed by other work
319 * work: The work item under execution that wants to sleep
320 * _timeout: Scheduler sleep timeout
321 *
322 * Allow a requeueable work item to sleep on a slow-work processor thread until
323 * that thread is needed to do some other work or the sleep is interrupted by
324 * some other event.
325 *
326 * The caller must set up a wake up event before calling this and must have set
327 * the appropriate sleep mode (such as TASK_UNINTERRUPTIBLE) and tested its own
328 * condition before calling this function as no test is made here.
329 *
330 * False is returned if there is nothing on the queue; true is returned if the
331 * work item should be requeued
332 */
333bool slow_work_sleep_till_thread_needed(struct slow_work *work,
334 signed long *_timeout)
335{
336 wait_queue_head_t *wfo_wq;
337 struct list_head *queue;
338
339 DEFINE_WAIT(wait);
340
341 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
342 wfo_wq = &vslow_work_queue_waits_for_occupation;
343 queue = &vslow_work_queue;
344 } else {
345 wfo_wq = &slow_work_queue_waits_for_occupation;
346 queue = &slow_work_queue;
347 }
348
349 if (!list_empty(queue))
350 return true;
351
352 add_wait_queue_exclusive(wfo_wq, &wait);
353 if (list_empty(queue))
354 *_timeout = schedule_timeout(*_timeout);
355 finish_wait(wfo_wq, &wait);
356
357 return !list_empty(queue);
358}
359EXPORT_SYMBOL(slow_work_sleep_till_thread_needed);
360
361/**
239 * slow_work_enqueue - Schedule a slow work item for processing 362 * slow_work_enqueue - Schedule a slow work item for processing
240 * @work: The work item to queue 363 * @work: The work item to queue
241 * 364 *
@@ -260,16 +383,22 @@ auto_requeue:
260 * allowed to pick items to execute. This ensures that very slow items won't 383 * allowed to pick items to execute. This ensures that very slow items won't
261 * overly block ones that are just ordinarily slow. 384 * overly block ones that are just ordinarily slow.
262 * 385 *
263 * Returns 0 if successful, -EAGAIN if not. 386 * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is
387 * attempted queued)
264 */ 388 */
265int slow_work_enqueue(struct slow_work *work) 389int slow_work_enqueue(struct slow_work *work)
266{ 390{
391 wait_queue_head_t *wfo_wq;
392 struct list_head *queue;
267 unsigned long flags; 393 unsigned long flags;
394 int ret;
395
396 if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
397 return -ECANCELED;
268 398
269 BUG_ON(slow_work_user_count <= 0); 399 BUG_ON(slow_work_user_count <= 0);
270 BUG_ON(!work); 400 BUG_ON(!work);
271 BUG_ON(!work->ops); 401 BUG_ON(!work->ops);
272 BUG_ON(!work->ops->get_ref);
273 402
274 /* when honouring an enqueue request, we only promise that we will run 403 /* when honouring an enqueue request, we only promise that we will run
275 * the work function in the future; we do not promise to run it once 404 * the work function in the future; we do not promise to run it once
@@ -280,8 +409,19 @@ int slow_work_enqueue(struct slow_work *work)
280 * maintaining our promise 409 * maintaining our promise
281 */ 410 */
282 if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { 411 if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
412 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
413 wfo_wq = &vslow_work_queue_waits_for_occupation;
414 queue = &vslow_work_queue;
415 } else {
416 wfo_wq = &slow_work_queue_waits_for_occupation;
417 queue = &slow_work_queue;
418 }
419
283 spin_lock_irqsave(&slow_work_queue_lock, flags); 420 spin_lock_irqsave(&slow_work_queue_lock, flags);
284 421
422 if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags)))
423 goto cancelled;
424
285 /* we promise that we will not attempt to execute the work 425 /* we promise that we will not attempt to execute the work
286 * function in more than one thread simultaneously 426 * function in more than one thread simultaneously
287 * 427 *
@@ -299,25 +439,221 @@ int slow_work_enqueue(struct slow_work *work)
299 if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { 439 if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
300 set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); 440 set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
301 } else { 441 } else {
302 if (work->ops->get_ref(work) < 0) 442 ret = slow_work_get_ref(work);
303 goto cant_get_ref; 443 if (ret < 0)
304 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) 444 goto failed;
305 list_add_tail(&work->link, &vslow_work_queue); 445 slow_work_mark_time(work);
306 else 446 list_add_tail(&work->link, queue);
307 list_add_tail(&work->link, &slow_work_queue);
308 wake_up(&slow_work_thread_wq); 447 wake_up(&slow_work_thread_wq);
448
449 /* if someone who could be requeued is sleeping on a
450 * thread, then ask them to yield their thread */
451 if (work->link.prev == queue)
452 wake_up(wfo_wq);
309 } 453 }
310 454
311 spin_unlock_irqrestore(&slow_work_queue_lock, flags); 455 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
312 } 456 }
313 return 0; 457 return 0;
314 458
315cant_get_ref: 459cancelled:
460 ret = -ECANCELED;
461failed:
316 spin_unlock_irqrestore(&slow_work_queue_lock, flags); 462 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
317 return -EAGAIN; 463 return ret;
318} 464}
319EXPORT_SYMBOL(slow_work_enqueue); 465EXPORT_SYMBOL(slow_work_enqueue);
320 466
467static int slow_work_wait(void *word)
468{
469 schedule();
470 return 0;
471}
472
473/**
474 * slow_work_cancel - Cancel a slow work item
475 * @work: The work item to cancel
476 *
477 * This function will cancel a previously enqueued work item. If we cannot
478 * cancel the work item, it is guarenteed to have run when this function
479 * returns.
480 */
481void slow_work_cancel(struct slow_work *work)
482{
483 bool wait = true, put = false;
484
485 set_bit(SLOW_WORK_CANCELLING, &work->flags);
486 smp_mb();
487
488 /* if the work item is a delayed work item with an active timer, we
489 * need to wait for the timer to finish _before_ getting the spinlock,
490 * lest we deadlock against the timer routine
491 *
492 * the timer routine will leave DELAYED set if it notices the
493 * CANCELLING flag in time
494 */
495 if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
496 struct delayed_slow_work *dwork =
497 container_of(work, struct delayed_slow_work, work);
498 del_timer_sync(&dwork->timer);
499 }
500
501 spin_lock_irq(&slow_work_queue_lock);
502
503 if (test_bit(SLOW_WORK_DELAYED, &work->flags)) {
504 /* the timer routine aborted or never happened, so we are left
505 * holding the timer's reference on the item and should just
506 * drop the pending flag and wait for any ongoing execution to
507 * finish */
508 struct delayed_slow_work *dwork =
509 container_of(work, struct delayed_slow_work, work);
510
511 BUG_ON(timer_pending(&dwork->timer));
512 BUG_ON(!list_empty(&work->link));
513
514 clear_bit(SLOW_WORK_DELAYED, &work->flags);
515 put = true;
516 clear_bit(SLOW_WORK_PENDING, &work->flags);
517
518 } else if (test_bit(SLOW_WORK_PENDING, &work->flags) &&
519 !list_empty(&work->link)) {
520 /* the link in the pending queue holds a reference on the item
521 * that we will need to release */
522 list_del_init(&work->link);
523 wait = false;
524 put = true;
525 clear_bit(SLOW_WORK_PENDING, &work->flags);
526
527 } else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) {
528 /* the executor is holding our only reference on the item, so
529 * we merely need to wait for it to finish executing */
530 clear_bit(SLOW_WORK_PENDING, &work->flags);
531 }
532
533 spin_unlock_irq(&slow_work_queue_lock);
534
535 /* the EXECUTING flag is set by the executor whilst the spinlock is set
536 * and before the item is dequeued - so assuming the above doesn't
537 * actually dequeue it, simply waiting for the EXECUTING flag to be
538 * released here should be sufficient */
539 if (wait)
540 wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait,
541 TASK_UNINTERRUPTIBLE);
542
543 clear_bit(SLOW_WORK_CANCELLING, &work->flags);
544 if (put)
545 slow_work_put_ref(work);
546}
547EXPORT_SYMBOL(slow_work_cancel);
548
549/*
550 * Handle expiry of the delay timer, indicating that a delayed slow work item
551 * should now be queued if not cancelled
552 */
553static void delayed_slow_work_timer(unsigned long data)
554{
555 wait_queue_head_t *wfo_wq;
556 struct list_head *queue;
557 struct slow_work *work = (struct slow_work *) data;
558 unsigned long flags;
559 bool queued = false, put = false, first = false;
560
561 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) {
562 wfo_wq = &vslow_work_queue_waits_for_occupation;
563 queue = &vslow_work_queue;
564 } else {
565 wfo_wq = &slow_work_queue_waits_for_occupation;
566 queue = &slow_work_queue;
567 }
568
569 spin_lock_irqsave(&slow_work_queue_lock, flags);
570 if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) {
571 clear_bit(SLOW_WORK_DELAYED, &work->flags);
572
573 if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
574 /* we discard the reference the timer was holding in
575 * favour of the one the executor holds */
576 set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
577 put = true;
578 } else {
579 slow_work_mark_time(work);
580 list_add_tail(&work->link, queue);
581 queued = true;
582 if (work->link.prev == queue)
583 first = true;
584 }
585 }
586
587 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
588 if (put)
589 slow_work_put_ref(work);
590 if (first)
591 wake_up(wfo_wq);
592 if (queued)
593 wake_up(&slow_work_thread_wq);
594}
595
596/**
597 * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing
598 * @dwork: The delayed work item to queue
599 * @delay: When to start executing the work, in jiffies from now
600 *
601 * This is similar to slow_work_enqueue(), but it adds a delay before the work
602 * is actually queued for processing.
603 *
604 * The item can have delayed processing requested on it whilst it is being
605 * executed. The delay will begin immediately, and if it expires before the
606 * item finishes executing, the item will be placed back on the queue when it
607 * has done executing.
608 */
609int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
610 unsigned long delay)
611{
612 struct slow_work *work = &dwork->work;
613 unsigned long flags;
614 int ret;
615
616 if (delay == 0)
617 return slow_work_enqueue(&dwork->work);
618
619 BUG_ON(slow_work_user_count <= 0);
620 BUG_ON(!work);
621 BUG_ON(!work->ops);
622
623 if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
624 return -ECANCELED;
625
626 if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
627 spin_lock_irqsave(&slow_work_queue_lock, flags);
628
629 if (test_bit(SLOW_WORK_CANCELLING, &work->flags))
630 goto cancelled;
631
632 /* the timer holds a reference whilst it is pending */
633 ret = work->ops->get_ref(work);
634 if (ret < 0)
635 goto cant_get_ref;
636
637 if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags))
638 BUG();
639 dwork->timer.expires = jiffies + delay;
640 dwork->timer.data = (unsigned long) work;
641 dwork->timer.function = delayed_slow_work_timer;
642 add_timer(&dwork->timer);
643
644 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
645 }
646
647 return 0;
648
649cancelled:
650 ret = -ECANCELED;
651cant_get_ref:
652 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
653 return ret;
654}
655EXPORT_SYMBOL(delayed_slow_work_enqueue);
656
321/* 657/*
322 * Schedule a cull of the thread pool at some time in the near future 658 * Schedule a cull of the thread pool at some time in the near future
323 */ 659 */
@@ -368,13 +704,23 @@ static inline bool slow_work_available(int vsmax)
368 */ 704 */
369static int slow_work_thread(void *_data) 705static int slow_work_thread(void *_data)
370{ 706{
371 int vsmax; 707 int vsmax, id;
372 708
373 DEFINE_WAIT(wait); 709 DEFINE_WAIT(wait);
374 710
375 set_freezable(); 711 set_freezable();
376 set_user_nice(current, -5); 712 set_user_nice(current, -5);
377 713
714 /* allocate ourselves an ID */
715 spin_lock_irq(&slow_work_queue_lock);
716 id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT);
717 BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT);
718 __set_bit(id, slow_work_ids);
719 slow_work_set_thread_pid(id, current->pid);
720 spin_unlock_irq(&slow_work_queue_lock);
721
722 sprintf(current->comm, "kslowd%03u", id);
723
378 for (;;) { 724 for (;;) {
379 vsmax = vslow_work_proportion; 725 vsmax = vslow_work_proportion;
380 vsmax *= atomic_read(&slow_work_thread_count); 726 vsmax *= atomic_read(&slow_work_thread_count);
@@ -395,7 +741,7 @@ static int slow_work_thread(void *_data)
395 vsmax *= atomic_read(&slow_work_thread_count); 741 vsmax *= atomic_read(&slow_work_thread_count);
396 vsmax /= 100; 742 vsmax /= 100;
397 743
398 if (slow_work_available(vsmax) && slow_work_execute()) { 744 if (slow_work_available(vsmax) && slow_work_execute(id)) {
399 cond_resched(); 745 cond_resched();
400 if (list_empty(&slow_work_queue) && 746 if (list_empty(&slow_work_queue) &&
401 list_empty(&vslow_work_queue) && 747 list_empty(&vslow_work_queue) &&
@@ -412,6 +758,11 @@ static int slow_work_thread(void *_data)
412 break; 758 break;
413 } 759 }
414 760
761 spin_lock_irq(&slow_work_queue_lock);
762 slow_work_set_thread_pid(id, 0);
763 __clear_bit(id, slow_work_ids);
764 spin_unlock_irq(&slow_work_queue_lock);
765
415 if (atomic_dec_and_test(&slow_work_thread_count)) 766 if (atomic_dec_and_test(&slow_work_thread_count))
416 complete_and_exit(&slow_work_last_thread_exited, 0); 767 complete_and_exit(&slow_work_last_thread_exited, 0);
417 return 0; 768 return 0;
@@ -427,21 +778,6 @@ static void slow_work_cull_timeout(unsigned long data)
427} 778}
428 779
429/* 780/*
430 * Get a reference on slow work thread starter
431 */
432static int slow_work_new_thread_get_ref(struct slow_work *work)
433{
434 return 0;
435}
436
437/*
438 * Drop a reference on slow work thread starter
439 */
440static void slow_work_new_thread_put_ref(struct slow_work *work)
441{
442}
443
444/*
445 * Start a new slow work thread 781 * Start a new slow work thread
446 */ 782 */
447static void slow_work_new_thread_execute(struct slow_work *work) 783static void slow_work_new_thread_execute(struct slow_work *work)
@@ -475,9 +811,11 @@ static void slow_work_new_thread_execute(struct slow_work *work)
475} 811}
476 812
477static const struct slow_work_ops slow_work_new_thread_ops = { 813static const struct slow_work_ops slow_work_new_thread_ops = {
478 .get_ref = slow_work_new_thread_get_ref, 814 .owner = THIS_MODULE,
479 .put_ref = slow_work_new_thread_put_ref,
480 .execute = slow_work_new_thread_execute, 815 .execute = slow_work_new_thread_execute,
816#ifdef CONFIG_SLOW_WORK_PROC
817 .desc = slow_work_new_thread_desc,
818#endif
481}; 819};
482 820
483/* 821/*
@@ -546,12 +884,13 @@ static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
546 884
547/** 885/**
548 * slow_work_register_user - Register a user of the facility 886 * slow_work_register_user - Register a user of the facility
887 * @module: The module about to make use of the facility
549 * 888 *
550 * Register a user of the facility, starting up the initial threads if there 889 * Register a user of the facility, starting up the initial threads if there
551 * aren't any other users at this point. This will return 0 if successful, or 890 * aren't any other users at this point. This will return 0 if successful, or
552 * an error if not. 891 * an error if not.
553 */ 892 */
554int slow_work_register_user(void) 893int slow_work_register_user(struct module *module)
555{ 894{
556 struct task_struct *p; 895 struct task_struct *p;
557 int loop; 896 int loop;
@@ -598,14 +937,79 @@ error:
598} 937}
599EXPORT_SYMBOL(slow_work_register_user); 938EXPORT_SYMBOL(slow_work_register_user);
600 939
940/*
941 * wait for all outstanding items from the calling module to complete
942 * - note that more items may be queued whilst we're waiting
943 */
944static void slow_work_wait_for_items(struct module *module)
945{
946 DECLARE_WAITQUEUE(myself, current);
947 struct slow_work *work;
948 int loop;
949
950 mutex_lock(&slow_work_unreg_sync_lock);
951 add_wait_queue(&slow_work_unreg_wq, &myself);
952
953 for (;;) {
954 spin_lock_irq(&slow_work_queue_lock);
955
956 /* first of all, we wait for the last queued item in each list
957 * to be processed */
958 list_for_each_entry_reverse(work, &vslow_work_queue, link) {
959 if (work->owner == module) {
960 set_current_state(TASK_UNINTERRUPTIBLE);
961 slow_work_unreg_work_item = work;
962 goto do_wait;
963 }
964 }
965 list_for_each_entry_reverse(work, &slow_work_queue, link) {
966 if (work->owner == module) {
967 set_current_state(TASK_UNINTERRUPTIBLE);
968 slow_work_unreg_work_item = work;
969 goto do_wait;
970 }
971 }
972
973 /* then we wait for the items being processed to finish */
974 slow_work_unreg_module = module;
975 smp_mb();
976 for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) {
977 if (slow_work_thread_processing[loop] == module)
978 goto do_wait;
979 }
980 spin_unlock_irq(&slow_work_queue_lock);
981 break; /* okay, we're done */
982
983 do_wait:
984 spin_unlock_irq(&slow_work_queue_lock);
985 schedule();
986 slow_work_unreg_work_item = NULL;
987 slow_work_unreg_module = NULL;
988 }
989
990 remove_wait_queue(&slow_work_unreg_wq, &myself);
991 mutex_unlock(&slow_work_unreg_sync_lock);
992}
993
601/** 994/**
602 * slow_work_unregister_user - Unregister a user of the facility 995 * slow_work_unregister_user - Unregister a user of the facility
996 * @module: The module whose items should be cleared
603 * 997 *
604 * Unregister a user of the facility, killing all the threads if this was the 998 * Unregister a user of the facility, killing all the threads if this was the
605 * last one. 999 * last one.
1000 *
1001 * This waits for all the work items belonging to the nominated module to go
1002 * away before proceeding.
606 */ 1003 */
607void slow_work_unregister_user(void) 1004void slow_work_unregister_user(struct module *module)
608{ 1005{
1006 /* first of all, wait for all outstanding items from the calling module
1007 * to complete */
1008 if (module)
1009 slow_work_wait_for_items(module);
1010
1011 /* then we can actually go about shutting down the facility if need
1012 * be */
609 mutex_lock(&slow_work_user_lock); 1013 mutex_lock(&slow_work_user_lock);
610 1014
611 BUG_ON(slow_work_user_count <= 0); 1015 BUG_ON(slow_work_user_count <= 0);
@@ -639,6 +1043,10 @@ static int __init init_slow_work(void)
639 if (slow_work_max_max_threads < nr_cpus * 2) 1043 if (slow_work_max_max_threads < nr_cpus * 2)
640 slow_work_max_max_threads = nr_cpus * 2; 1044 slow_work_max_max_threads = nr_cpus * 2;
641#endif 1045#endif
1046#ifdef CONFIG_SLOW_WORK_PROC
1047 proc_create("slow_work_rq", S_IFREG | 0400, NULL,
1048 &slow_work_runqueue_fops);
1049#endif
642 return 0; 1050 return 0;
643} 1051}
644 1052
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
new file mode 100644
index 000000000000..3c2f007f3ad6
--- /dev/null
+++ b/kernel/slow-work.h
@@ -0,0 +1,72 @@
1/* Slow work private definitions
2 *
3 * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of
13 * things to do */
14#define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after
15 * OOM */
16
17#define SLOW_WORK_THREAD_LIMIT 255 /* abs maximum number of slow-work threads */
18
19/*
20 * slow-work.c
21 */
22#ifdef CONFIG_SLOW_WORK_PROC
23extern struct slow_work *slow_work_execs[];
24extern pid_t slow_work_pids[];
25extern rwlock_t slow_work_execs_lock;
26#endif
27
28extern struct list_head slow_work_queue;
29extern struct list_head vslow_work_queue;
30extern spinlock_t slow_work_queue_lock;
31
32/*
33 * slow-work-proc.c
34 */
35#ifdef CONFIG_SLOW_WORK_PROC
36extern const struct file_operations slow_work_runqueue_fops;
37
38extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
39#endif
40
41/*
42 * Helper functions
43 */
44static inline void slow_work_set_thread_pid(int id, pid_t pid)
45{
46#ifdef CONFIG_SLOW_WORK_PROC
47 slow_work_pids[id] = pid;
48#endif
49}
50
51static inline void slow_work_mark_time(struct slow_work *work)
52{
53#ifdef CONFIG_SLOW_WORK_PROC
54 work->mark = CURRENT_TIME;
55#endif
56}
57
58static inline void slow_work_begin_exec(int id, struct slow_work *work)
59{
60#ifdef CONFIG_SLOW_WORK_PROC
61 slow_work_execs[id] = work;
62#endif
63}
64
65static inline void slow_work_end_exec(int id, struct slow_work *work)
66{
67#ifdef CONFIG_SLOW_WORK_PROC
68 write_lock(&slow_work_execs_lock);
69 slow_work_execs[id] = NULL;
70 write_unlock(&slow_work_execs_lock);
71#endif
72}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 23abbd93cae1..92cdd9936e3d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -200,6 +200,9 @@ radix_tree_node_free(struct radix_tree_node *node)
200 * ensure that the addition of a single element in the tree cannot fail. On 200 * ensure that the addition of a single element in the tree cannot fail. On
201 * success, return zero, with preemption disabled. On error, return -ENOMEM 201 * success, return zero, with preemption disabled. On error, return -ENOMEM
202 * with preemption not disabled. 202 * with preemption not disabled.
203 *
204 * To make use of this facility, the radix tree must be initialised without
205 * __GFP_WAIT being passed to INIT_RADIX_TREE().
203 */ 206 */
204int radix_tree_preload(gfp_t gfp_mask) 207int radix_tree_preload(gfp_t gfp_mask)
205{ 208{
@@ -543,7 +546,6 @@ out:
543} 546}
544EXPORT_SYMBOL(radix_tree_tag_clear); 547EXPORT_SYMBOL(radix_tree_tag_clear);
545 548
546#ifndef __KERNEL__ /* Only the test harness uses this at present */
547/** 549/**
548 * radix_tree_tag_get - get a tag on a radix tree node 550 * radix_tree_tag_get - get a tag on a radix tree node
549 * @root: radix tree root 551 * @root: radix tree root
@@ -606,7 +608,6 @@ int radix_tree_tag_get(struct radix_tree_root *root,
606 } 608 }
607} 609}
608EXPORT_SYMBOL(radix_tree_tag_get); 610EXPORT_SYMBOL(radix_tree_tag_get);
609#endif
610 611
611/** 612/**
612 * radix_tree_next_hole - find the next hole (not-present entry) 613 * radix_tree_next_hole - find the next hole (not-present entry)