diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2007-04-02 19:29:52 -0400 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2007-05-01 01:17:06 -0400 |
commit | c63c7b051395368573779c8309aa5c990dcf2f96 (patch) | |
tree | db54090eef99349d15b95fcd8c2620a2403d8db8 /fs/nfs/pagelist.c | |
parent | 8b09bee3083897e375bd0bf9d60f48daedfab3e0 (diff) |
NFS: Fix a race when doing NFS write coalescing
Currently we do write coalescing in a very inefficient manner: one pass in
generic_writepages() in order to lock the pages for writing, then one pass
in nfs_flush_mapping() and/or nfs_sync_mapping_wait() in order to gather
the locked pages for coalescing into RPC requests of size "wsize".
In fact, it turns out there is actually a deadlock possible here since we
only start I/O on the second pass. If the user signals the process while
we're in nfs_sync_mapping_wait(), for instance, then we may exit before
starting I/O on all the requests that have been queued up.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Diffstat (limited to 'fs/nfs/pagelist.c')
-rw-r--r-- | fs/nfs/pagelist.c | 92 |
1 files changed, 0 insertions, 92 deletions
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 094537ddd344..ea1a85df9ab1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/nfs_page.h> | 17 | #include <linux/nfs_page.h> |
18 | #include <linux/nfs_fs.h> | 18 | #include <linux/nfs_fs.h> |
19 | #include <linux/nfs_mount.h> | 19 | #include <linux/nfs_mount.h> |
20 | #include <linux/writeback.h> | ||
21 | 20 | ||
22 | #define NFS_PARANOIA 1 | 21 | #define NFS_PARANOIA 1 |
23 | 22 | ||
@@ -354,25 +353,6 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
354 | } | 353 | } |
355 | 354 | ||
356 | /** | 355 | /** |
357 | * nfs_pageio_add_list - Split coalesced requests out from a list. | ||
358 | * @desc: destination io descriptor | ||
359 | * @head: source list | ||
360 | * | ||
361 | * Moves a maximum of 'nmax' elements from one list to another. | ||
362 | * The elements are checked to ensure that they form a contiguous set | ||
363 | * of pages, and that the RPC credentials are the same. | ||
364 | */ | ||
365 | void nfs_pageio_add_list(struct nfs_pageio_descriptor *desc, | ||
366 | struct list_head *head) | ||
367 | { | ||
368 | while (!list_empty(head)) { | ||
369 | struct nfs_page *req = nfs_list_entry(head->next); | ||
370 | if (!nfs_pageio_add_request(desc, req)) | ||
371 | break; | ||
372 | } | ||
373 | } | ||
374 | |||
375 | /** | ||
376 | * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor | 356 | * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor |
377 | * @desc: pointer to io descriptor | 357 | * @desc: pointer to io descriptor |
378 | */ | 358 | */ |
@@ -383,78 +363,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) | |||
383 | 363 | ||
384 | #define NFS_SCAN_MAXENTRIES 16 | 364 | #define NFS_SCAN_MAXENTRIES 16 |
385 | /** | 365 | /** |
386 | * nfs_scan_dirty - Scan the radix tree for dirty requests | ||
387 | * @mapping: pointer to address space | ||
388 | * @wbc: writeback_control structure | ||
389 | * @dst: Destination list | ||
390 | * | ||
391 | * Moves elements from one of the inode request lists. | ||
392 | * If the number of requests is set to 0, the entire address_space | ||
393 | * starting at index idx_start, is scanned. | ||
394 | * The requests are *not* checked to ensure that they form a contiguous set. | ||
395 | * You must be holding the inode's req_lock when calling this function | ||
396 | */ | ||
397 | long nfs_scan_dirty(struct address_space *mapping, | ||
398 | struct writeback_control *wbc, | ||
399 | struct list_head *dst) | ||
400 | { | ||
401 | struct nfs_inode *nfsi = NFS_I(mapping->host); | ||
402 | struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; | ||
403 | struct nfs_page *req; | ||
404 | pgoff_t idx_start, idx_end; | ||
405 | long res = 0; | ||
406 | int found, i; | ||
407 | |||
408 | if (nfsi->ndirty == 0) | ||
409 | return 0; | ||
410 | if (wbc->range_cyclic) { | ||
411 | idx_start = 0; | ||
412 | idx_end = ULONG_MAX; | ||
413 | } else if (wbc->range_end == 0) { | ||
414 | idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
415 | idx_end = ULONG_MAX; | ||
416 | } else { | ||
417 | idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
418 | idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
419 | } | ||
420 | |||
421 | for (;;) { | ||
422 | unsigned int toscan = NFS_SCAN_MAXENTRIES; | ||
423 | |||
424 | found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, | ||
425 | (void **)&pgvec[0], idx_start, toscan, | ||
426 | NFS_PAGE_TAG_DIRTY); | ||
427 | |||
428 | /* Did we make progress? */ | ||
429 | if (found <= 0) | ||
430 | break; | ||
431 | |||
432 | for (i = 0; i < found; i++) { | ||
433 | req = pgvec[i]; | ||
434 | if (!wbc->range_cyclic && req->wb_index > idx_end) | ||
435 | goto out; | ||
436 | |||
437 | /* Try to lock request and mark it for writeback */ | ||
438 | if (!nfs_set_page_writeback_locked(req)) | ||
439 | goto next; | ||
440 | radix_tree_tag_clear(&nfsi->nfs_page_tree, | ||
441 | req->wb_index, NFS_PAGE_TAG_DIRTY); | ||
442 | nfsi->ndirty--; | ||
443 | nfs_list_remove_request(req); | ||
444 | nfs_list_add_request(req, dst); | ||
445 | res++; | ||
446 | if (res == LONG_MAX) | ||
447 | goto out; | ||
448 | next: | ||
449 | idx_start = req->wb_index + 1; | ||
450 | } | ||
451 | } | ||
452 | out: | ||
453 | WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)); | ||
454 | return res; | ||
455 | } | ||
456 | |||
457 | /** | ||
458 | * nfs_scan_list - Scan a list for matching requests | 366 | * nfs_scan_list - Scan a list for matching requests |
459 | * @nfsi: NFS inode | 367 | * @nfsi: NFS inode |
460 | * @head: One of the NFS inode request lists | 368 | * @head: One of the NFS inode request lists |