aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4filelayout.c109
-rw-r--r--fs/nfs/nfs4filelayout.h7
-rw-r--r--fs/nfs/pagelist.c61
-rw-r--r--fs/nfs/pnfs.h82
-rw-r--r--fs/nfs/write.c120
-rw-r--r--include/linux/nfs_fs.h6
-rw-r--r--include/linux/nfs_page.h13
9 files changed, 208 insertions, 194 deletions
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 70e25c9c5670..1a19f8d30c14 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1560,7 +1560,7 @@ static void init_once(void *foo)
1560 INIT_LIST_HEAD(&nfsi->open_files); 1560 INIT_LIST_HEAD(&nfsi->open_files);
1561 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); 1561 INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
1562 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); 1562 INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
1563 INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); 1563 INIT_LIST_HEAD(&nfsi->commit_list);
1564 nfsi->npages = 0; 1564 nfsi->npages = 0;
1565 nfsi->ncommit = 0; 1565 nfsi->ncommit = 0;
1566 atomic_set(&nfsi->silly_count, 1); 1566 atomic_set(&nfsi->silly_count, 1);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 0c3648a947d1..04a914704e7b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -308,6 +308,8 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
308extern void nfs_readdata_release(struct nfs_read_data *rdata); 308extern void nfs_readdata_release(struct nfs_read_data *rdata);
309 309
310/* write.c */ 310/* write.c */
311extern int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
312 int max);
311extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, 313extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
312 struct list_head *head); 314 struct list_head *head);
313extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 315extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 768f6f86c9f0..716fac6bc082 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -682,14 +682,16 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
682 int size = (fl->stripe_type == STRIPE_SPARSE) ? 682 int size = (fl->stripe_type == STRIPE_SPARSE) ?
683 fl->dsaddr->ds_num : fl->dsaddr->stripe_count; 683 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
684 684
685 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); 685 fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
686 if (!fl->commit_buckets) { 686 if (!fl->commit_buckets) {
687 filelayout_free_lseg(&fl->generic_hdr); 687 filelayout_free_lseg(&fl->generic_hdr);
688 return NULL; 688 return NULL;
689 } 689 }
690 fl->number_of_buckets = size; 690 fl->number_of_buckets = size;
691 for (i = 0; i < size; i++) 691 for (i = 0; i < size; i++) {
692 INIT_LIST_HEAD(&fl->commit_buckets[i]); 692 INIT_LIST_HEAD(&fl->commit_buckets[i].written);
693 INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
694 }
693 } 695 }
694 return &fl->generic_hdr; 696 return &fl->generic_hdr;
695} 697}
@@ -767,11 +769,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = {
767 .pg_doio = pnfs_generic_pg_writepages, 769 .pg_doio = pnfs_generic_pg_writepages,
768}; 770};
769 771
770static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
771{
772 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
773}
774
775static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) 772static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
776{ 773{
777 if (fl->stripe_type == STRIPE_SPARSE) 774 if (fl->stripe_type == STRIPE_SPARSE)
@@ -780,13 +777,39 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
780 return j; 777 return j;
781} 778}
782 779
783struct list_head *filelayout_choose_commit_list(struct nfs_page *req) 780/* The generic layer is about to remove the req from the commit list.
781 * If this will make the bucket empty, it will need to put the lseg reference.
782 * Note inode lock is held, so we can't do the put here.
783 */
784static struct pnfs_layout_segment *
785filelayout_remove_commit_req(struct nfs_page *req)
786{
787 if (list_is_singular(&req->wb_list)) {
788 struct inode *inode = req->wb_context->dentry->d_inode;
789 struct pnfs_layout_segment *lseg;
790
791 /* From here we can find the bucket, but for the moment,
792 * since there is only one relevant lseg...
793 */
794 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
795 if (lseg->pls_range.iomode == IOMODE_RW)
796 return lseg;
797 }
798 }
799 return NULL;
800}
801
802static struct list_head *
803filelayout_choose_commit_list(struct nfs_page *req,
804 struct pnfs_layout_segment *lseg)
784{ 805{
785 struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
786 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 806 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
787 u32 i, j; 807 u32 i, j;
788 struct list_head *list; 808 struct list_head *list;
789 809
810 if (fl->commit_through_mds)
811 return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
812
790 /* Note that we are calling nfs4_fl_calc_j_index on each page 813 /* Note that we are calling nfs4_fl_calc_j_index on each page
791 * that ends up being committed to a data server. An attractive 814 * that ends up being committed to a data server. An attractive
792 * alternative is to add a field to nfs_write_data and nfs_page 815 * alternative is to add a field to nfs_write_data and nfs_page
@@ -796,9 +819,14 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
796 j = nfs4_fl_calc_j_index(lseg, 819 j = nfs4_fl_calc_j_index(lseg,
797 (loff_t)req->wb_index << PAGE_CACHE_SHIFT); 820 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
798 i = select_bucket_index(fl, j); 821 i = select_bucket_index(fl, j);
799 list = &fl->commit_buckets[i]; 822 list = &fl->commit_buckets[i].written;
800 if (list_empty(list)) { 823 if (list_empty(list)) {
801 /* Non-empty buckets hold a reference on the lseg */ 824 /* Non-empty buckets hold a reference on the lseg. That ref
825 * is normally transferred to the COMMIT call and released
826 * there. It could also be released if the last req is pulled
827 * off due to a rewrite, in which case it will be done in
828 * filelayout_remove_commit_req
829 */
802 get_lseg(lseg); 830 get_lseg(lseg);
803 } 831 }
804 return list; 832 return list;
@@ -860,18 +888,56 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
860/* 888/*
861 * This is only useful while we are using whole file layouts. 889 * This is only useful while we are using whole file layouts.
862 */ 890 */
863static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) 891static struct pnfs_layout_segment *
892find_only_write_lseg_locked(struct inode *inode)
864{ 893{
865 struct pnfs_layout_segment *lseg, *rv = NULL; 894 struct pnfs_layout_segment *lseg;
866 895
867 spin_lock(&inode->i_lock);
868 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) 896 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
869 if (lseg->pls_range.iomode == IOMODE_RW) 897 if (lseg->pls_range.iomode == IOMODE_RW)
870 rv = get_lseg(lseg); 898 return get_lseg(lseg);
899 return NULL;
900}
901
902static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
903{
904 struct pnfs_layout_segment *rv;
905
906 spin_lock(&inode->i_lock);
907 rv = find_only_write_lseg_locked(inode);
871 spin_unlock(&inode->i_lock); 908 spin_unlock(&inode->i_lock);
872 return rv; 909 return rv;
873} 910}
874 911
912/* Move reqs from written to committing lists, returning count of number moved.
913 * Note called with i_lock held.
914 */
915static int filelayout_scan_commit_lists(struct inode *inode, int max)
916{
917 struct pnfs_layout_segment *lseg;
918 struct nfs4_filelayout_segment *fl;
919 int i, rv = 0, cnt;
920
921 lseg = find_only_write_lseg_locked(inode);
922 if (!lseg)
923 return 0;
924 fl = FILELAYOUT_LSEG(lseg);
925 if (fl->commit_through_mds)
926 goto out_put;
927 for (i = 0; i < fl->number_of_buckets; i++) {
928 if (list_empty(&fl->commit_buckets[i].written))
929 continue;
930 cnt = nfs_scan_commit_list(&fl->commit_buckets[i].written,
931 &fl->commit_buckets[i].committing,
932 max);
933 max -= cnt;
934 rv += cnt;
935 }
936out_put:
937 put_lseg(lseg);
938 return rv;
939}
940
875static int alloc_ds_commits(struct inode *inode, struct list_head *list) 941static int alloc_ds_commits(struct inode *inode, struct list_head *list)
876{ 942{
877 struct pnfs_layout_segment *lseg; 943 struct pnfs_layout_segment *lseg;
@@ -886,7 +952,7 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list)
886 return 0; 952 return 0;
887 fl = FILELAYOUT_LSEG(lseg); 953 fl = FILELAYOUT_LSEG(lseg);
888 for (i = 0; i < fl->number_of_buckets; i++) { 954 for (i = 0; i < fl->number_of_buckets; i++) {
889 if (list_empty(&fl->commit_buckets[i])) 955 if (list_empty(&fl->commit_buckets[i].committing))
890 continue; 956 continue;
891 data = nfs_commitdata_alloc(); 957 data = nfs_commitdata_alloc();
892 if (!data) 958 if (!data)
@@ -900,9 +966,9 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list)
900 966
901out_bad: 967out_bad:
902 for (j = i; j < fl->number_of_buckets; j++) { 968 for (j = i; j < fl->number_of_buckets; j++) {
903 if (list_empty(&fl->commit_buckets[i])) 969 if (list_empty(&fl->commit_buckets[i].committing))
904 continue; 970 continue;
905 nfs_retry_commit(&fl->commit_buckets[i], lseg); 971 nfs_retry_commit(&fl->commit_buckets[i].committing, lseg);
906 put_lseg(lseg); /* associated with emptying bucket */ 972 put_lseg(lseg); /* associated with emptying bucket */
907 } 973 }
908 put_lseg(lseg); 974 put_lseg(lseg);
@@ -937,7 +1003,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
937 nfs_initiate_commit(data, NFS_CLIENT(inode), 1003 nfs_initiate_commit(data, NFS_CLIENT(inode),
938 data->mds_ops, how); 1004 data->mds_ops, how);
939 } else { 1005 } else {
940 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); 1006 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg);
941 filelayout_initiate_commit(data, how); 1007 filelayout_initiate_commit(data, how);
942 } 1008 }
943 } 1009 }
@@ -967,8 +1033,9 @@ static struct pnfs_layoutdriver_type filelayout_type = {
967 .free_lseg = filelayout_free_lseg, 1033 .free_lseg = filelayout_free_lseg,
968 .pg_read_ops = &filelayout_pg_read_ops, 1034 .pg_read_ops = &filelayout_pg_read_ops,
969 .pg_write_ops = &filelayout_pg_write_ops, 1035 .pg_write_ops = &filelayout_pg_write_ops,
970 .mark_pnfs_commit = filelayout_mark_pnfs_commit,
971 .choose_commit_list = filelayout_choose_commit_list, 1036 .choose_commit_list = filelayout_choose_commit_list,
1037 .remove_commit_req = filelayout_remove_commit_req,
1038 .scan_commit_lists = filelayout_scan_commit_lists,
972 .commit_pagelist = filelayout_commit_pagelist, 1039 .commit_pagelist = filelayout_commit_pagelist,
973 .read_pagelist = filelayout_read_pagelist, 1040 .read_pagelist = filelayout_read_pagelist,
974 .write_pagelist = filelayout_write_pagelist, 1041 .write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 2e42284253fa..21190bb1f5e3 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -74,6 +74,11 @@ struct nfs4_file_layout_dsaddr {
74 struct nfs4_pnfs_ds *ds_list[1]; 74 struct nfs4_pnfs_ds *ds_list[1];
75}; 75};
76 76
77struct nfs4_fl_commit_bucket {
78 struct list_head written;
79 struct list_head committing;
80};
81
77struct nfs4_filelayout_segment { 82struct nfs4_filelayout_segment {
78 struct pnfs_layout_segment generic_hdr; 83 struct pnfs_layout_segment generic_hdr;
79 u32 stripe_type; 84 u32 stripe_type;
@@ -84,7 +89,7 @@ struct nfs4_filelayout_segment {
84 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 89 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
85 unsigned int num_fh; 90 unsigned int num_fh;
86 struct nfs_fh **fh_array; 91 struct nfs_fh **fh_array;
87 struct list_head *commit_buckets; /* Sort commits to ds */ 92 struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
88 int number_of_buckets; 93 int number_of_buckets;
89}; 94};
90 95
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index fc5b54b84f8f..d21fceaa9f62 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -396,67 +396,6 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
396 } 396 }
397} 397}
398 398
399#define NFS_SCAN_MAXENTRIES 16
400/**
401 * nfs_scan_list - Scan a list for matching requests
402 * @nfsi: NFS inode
403 * @dst: Destination list
404 * @idx_start: lower bound of page->index to scan
405 * @npages: idx_start + npages sets the upper bound to scan.
406 * @tag: tag to scan for
407 *
408 * Moves elements from one of the inode request lists.
409 * If the number of requests is set to 0, the entire address_space
410 * starting at index idx_start, is scanned.
411 * The requests are *not* checked to ensure that they form a contiguous set.
412 * You must be holding the inode's i_lock when calling this function
413 */
414int nfs_scan_list(struct nfs_inode *nfsi,
415 struct list_head *dst, pgoff_t idx_start,
416 unsigned int npages, int tag)
417{
418 struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
419 struct nfs_page *req;
420 pgoff_t idx_end;
421 int found, i;
422 int res;
423 struct list_head *list;
424
425 res = 0;
426 if (npages == 0)
427 idx_end = ~0;
428 else
429 idx_end = idx_start + npages - 1;
430
431 for (;;) {
432 found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
433 (void **)&pgvec[0], idx_start,
434 NFS_SCAN_MAXENTRIES, tag);
435 if (found <= 0)
436 break;
437 for (i = 0; i < found; i++) {
438 req = pgvec[i];
439 if (req->wb_index > idx_end)
440 goto out;
441 idx_start = req->wb_index + 1;
442 if (nfs_lock_request_dontget(req)) {
443 kref_get(&req->wb_kref);
444 radix_tree_tag_clear(&nfsi->nfs_page_tree,
445 req->wb_index, tag);
446 list = pnfs_choose_commit_list(req, dst);
447 nfs_list_add_request(req, list);
448 res++;
449 if (res == INT_MAX)
450 goto out;
451 }
452 }
453 /* for latency reduction */
454 cond_resched_lock(&nfsi->vfs_inode.i_lock);
455 }
456out:
457 return res;
458}
459
460int __init nfs_init_nfspagecache(void) 399int __init nfs_init_nfspagecache(void)
461{ 400{
462 nfs_page_cachep = kmem_cache_create("nfs_page", 401 nfs_page_cachep = kmem_cache_create("nfs_page",
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 8088d51f495e..ef92f676cf1e 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,11 +94,10 @@ struct pnfs_layoutdriver_type {
94 const struct nfs_pageio_ops *pg_read_ops; 94 const struct nfs_pageio_ops *pg_read_ops;
95 const struct nfs_pageio_ops *pg_write_ops; 95 const struct nfs_pageio_ops *pg_write_ops;
96 96
97 /* Returns true if layoutdriver wants to divert this request to 97 struct list_head * (*choose_commit_list) (struct nfs_page *req,
98 * driver's commit routine. 98 struct pnfs_layout_segment *lseg);
99 */ 99 struct pnfs_layout_segment *(*remove_commit_req) (struct nfs_page *req);
100 bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); 100 int (*scan_commit_lists) (struct inode *inode, int max);
101 struct list_head * (*choose_commit_list) (struct nfs_page *req);
102 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); 101 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
103 102
104 /* 103 /*
@@ -262,20 +261,6 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
262 return nfss->pnfs_curr_ld != NULL; 261 return nfss->pnfs_curr_ld != NULL;
263} 262}
264 263
265static inline void
266pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
267{
268 if (lseg) {
269 struct pnfs_layoutdriver_type *ld;
270
271 ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
272 if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
273 set_bit(PG_PNFS_COMMIT, &req->wb_flags);
274 req->wb_commit_lseg = get_lseg(lseg);
275 }
276 }
277}
278
279static inline int 264static inline int
280pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 265pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
281{ 266{
@@ -285,26 +270,38 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
285} 270}
286 271
287static inline struct list_head * 272static inline struct list_head *
288pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) 273pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg)
289{ 274{
275 struct inode *inode = req->wb_context->dentry->d_inode;
290 struct list_head *rv; 276 struct list_head *rv;
291 277
292 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { 278 if (lseg && NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list)
293 struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; 279 rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req, lseg);
294 280 else
295 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); 281 rv = &NFS_I(inode)->commit_list;
296 rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req);
297 /* matched by ref taken when PG_PNFS_COMMIT is set */
298 put_lseg(req->wb_commit_lseg);
299 } else
300 rv = mds;
301 return rv; 282 return rv;
302} 283}
303 284
304static inline void pnfs_clear_request_commit(struct nfs_page *req) 285static inline struct pnfs_layout_segment *
286pnfs_clear_request_commit(struct nfs_page *req)
305{ 287{
306 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) 288 struct inode *inode = req->wb_context->dentry->d_inode;
307 put_lseg(req->wb_commit_lseg); 289
290 if (NFS_SERVER(inode)->pnfs_curr_ld &&
291 NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req)
292 return NFS_SERVER(inode)->pnfs_curr_ld->remove_commit_req(req);
293 else
294 return NULL;
295}
296
297static inline int
298pnfs_scan_commit_lists(struct inode *inode, int max)
299{
300 if (NFS_SERVER(inode)->pnfs_curr_ld &&
301 NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists)
302 return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(inode, max);
303 else
304 return 0;
308} 305}
309 306
310/* Should the pNFS client commit and return the layout upon a setattr */ 307/* Should the pNFS client commit and return the layout upon a setattr */
@@ -400,11 +397,6 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st
400 return false; 397 return false;
401} 398}
402 399
403static inline void
404pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
405{
406}
407
408static inline int 400static inline int
409pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) 401pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
410{ 402{
@@ -412,13 +404,23 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
412} 404}
413 405
414static inline struct list_head * 406static inline struct list_head *
415pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) 407pnfs_choose_commit_list(struct nfs_page *req, struct pnfs_layout_segment *lseg)
416{ 408{
417 return mds; 409 struct inode *inode = req->wb_context->dentry->d_inode;
410
411 return &NFS_I(inode)->commit_list;
418} 412}
419 413
420static inline void pnfs_clear_request_commit(struct nfs_page *req) 414static inline struct pnfs_layout_segment *
415pnfs_clear_request_commit(struct nfs_page *req)
421{ 416{
417 return NULL;
418}
419
420static inline int
421pnfs_scan_commit_lists(struct inode *inode, int max)
422{
423 return 0;
422} 424}
423 425
424static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) 426static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index fd8a4f07bc0c..a630ad65d64c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -375,21 +375,14 @@ out_err:
375/* 375/*
376 * Insert a write request into an inode 376 * Insert a write request into an inode
377 */ 377 */
378static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 378static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
379{ 379{
380 struct nfs_inode *nfsi = NFS_I(inode); 380 struct nfs_inode *nfsi = NFS_I(inode);
381 int error;
382
383 error = radix_tree_preload(GFP_NOFS);
384 if (error != 0)
385 goto out;
386 381
387 /* Lock the request! */ 382 /* Lock the request! */
388 nfs_lock_request_dontget(req); 383 nfs_lock_request_dontget(req);
389 384
390 spin_lock(&inode->i_lock); 385 spin_lock(&inode->i_lock);
391 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
392 BUG_ON(error);
393 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 386 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
394 inode->i_version++; 387 inode->i_version++;
395 set_bit(PG_MAPPED, &req->wb_flags); 388 set_bit(PG_MAPPED, &req->wb_flags);
@@ -398,11 +391,10 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
398 nfsi->npages++; 391 nfsi->npages++;
399 kref_get(&req->wb_kref); 392 kref_get(&req->wb_kref);
400 spin_unlock(&inode->i_lock); 393 spin_unlock(&inode->i_lock);
401 radix_tree_preload_end();
402out:
403 return error;
404} 394}
405 395
396static struct pnfs_layout_segment *nfs_clear_request_commit(struct nfs_page *req);
397
406/* 398/*
407 * Remove a write request from an inode 399 * Remove a write request from an inode
408 */ 400 */
@@ -410,16 +402,18 @@ static void nfs_inode_remove_request(struct nfs_page *req)
410{ 402{
411 struct inode *inode = req->wb_context->dentry->d_inode; 403 struct inode *inode = req->wb_context->dentry->d_inode;
412 struct nfs_inode *nfsi = NFS_I(inode); 404 struct nfs_inode *nfsi = NFS_I(inode);
405 struct pnfs_layout_segment *lseg;
413 406
414 BUG_ON (!NFS_WBACK_BUSY(req)); 407 BUG_ON (!NFS_WBACK_BUSY(req));
415 408
416 spin_lock(&inode->i_lock); 409 spin_lock(&inode->i_lock);
410 lseg = nfs_clear_request_commit(req);
417 set_page_private(req->wb_page, 0); 411 set_page_private(req->wb_page, 0);
418 ClearPagePrivate(req->wb_page); 412 ClearPagePrivate(req->wb_page);
419 clear_bit(PG_MAPPED, &req->wb_flags); 413 clear_bit(PG_MAPPED, &req->wb_flags);
420 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
421 nfsi->npages--; 414 nfsi->npages--;
422 spin_unlock(&inode->i_lock); 415 spin_unlock(&inode->i_lock);
416 put_lseg(lseg);
423 nfs_release_request(req); 417 nfs_release_request(req);
424} 418}
425 419
@@ -438,31 +432,38 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
438{ 432{
439 struct inode *inode = req->wb_context->dentry->d_inode; 433 struct inode *inode = req->wb_context->dentry->d_inode;
440 struct nfs_inode *nfsi = NFS_I(inode); 434 struct nfs_inode *nfsi = NFS_I(inode);
435 struct list_head *clist;
441 436
437 clist = pnfs_choose_commit_list(req, lseg);
442 spin_lock(&inode->i_lock); 438 spin_lock(&inode->i_lock);
443 set_bit(PG_CLEAN, &(req)->wb_flags); 439 set_bit(PG_CLEAN, &(req)->wb_flags);
444 radix_tree_tag_set(&nfsi->nfs_page_tree, 440 nfs_list_add_request(req, clist);
445 req->wb_index,
446 NFS_PAGE_TAG_COMMIT);
447 nfsi->ncommit++; 441 nfsi->ncommit++;
448 spin_unlock(&inode->i_lock); 442 spin_unlock(&inode->i_lock);
449 pnfs_mark_request_commit(req, lseg);
450 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 443 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
451 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 444 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
452 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 445 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
453} 446}
454 447
455static int 448static void
449nfs_clear_page_commit(struct page *page)
450{
451 dec_zone_page_state(page, NR_UNSTABLE_NFS);
452 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
453}
454
455static struct pnfs_layout_segment *
456nfs_clear_request_commit(struct nfs_page *req) 456nfs_clear_request_commit(struct nfs_page *req)
457{ 457{
458 struct page *page = req->wb_page; 458 struct pnfs_layout_segment *lseg = NULL;
459 459
460 if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { 460 if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) {
461 dec_zone_page_state(page, NR_UNSTABLE_NFS); 461 nfs_clear_page_commit(req->wb_page);
462 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); 462 lseg = pnfs_clear_request_commit(req);
463 return 1; 463 NFS_I(req->wb_context->dentry->d_inode)->ncommit--;
464 list_del(&req->wb_list);
464 } 465 }
465 return 0; 466 return lseg;
466} 467}
467 468
468static inline 469static inline
@@ -494,10 +495,10 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
494{ 495{
495} 496}
496 497
497static inline int 498static inline struct pnfs_layout_segment *
498nfs_clear_request_commit(struct nfs_page *req) 499nfs_clear_request_commit(struct nfs_page *req)
499{ 500{
500 return 0; 501 return NULL;
501} 502}
502 503
503static inline 504static inline
@@ -518,46 +519,67 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
518static int 519static int
519nfs_need_commit(struct nfs_inode *nfsi) 520nfs_need_commit(struct nfs_inode *nfsi)
520{ 521{
521 return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); 522 return nfsi->ncommit > 0;
522} 523}
523 524
525/* i_lock held by caller */
526int
527nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max)
528{
529 struct nfs_page *req, *tmp;
530 int ret = 0;
531
532 list_for_each_entry_safe(req, tmp, src, wb_list) {
533 if (nfs_lock_request_dontget(req)) {
534 kref_get(&req->wb_kref);
535 list_move_tail(&req->wb_list, dst);
536 clear_bit(PG_CLEAN, &(req)->wb_flags);
537 ret++;
538 if (ret == max)
539 break;
540 }
541 }
542 return ret;
543}
544EXPORT_SYMBOL_GPL(nfs_scan_commit_list);
545
524/* 546/*
525 * nfs_scan_commit - Scan an inode for commit requests 547 * nfs_scan_commit - Scan an inode for commit requests
526 * @inode: NFS inode to scan 548 * @inode: NFS inode to scan
527 * @dst: destination list 549 * @dst: destination list
528 * @idx_start: lower bound of page->index to scan.
529 * @npages: idx_start + npages sets the upper bound to scan.
530 * 550 *
531 * Moves requests from the inode's 'commit' request list. 551 * Moves requests from the inode's 'commit' request list.
532 * The requests are *not* checked to ensure that they form a contiguous set. 552 * The requests are *not* checked to ensure that they form a contiguous set.
533 */ 553 */
534static int 554static int
535nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 555nfs_scan_commit(struct inode *inode, struct list_head *dst)
536{ 556{
537 struct nfs_inode *nfsi = NFS_I(inode); 557 struct nfs_inode *nfsi = NFS_I(inode);
538 int ret; 558 int ret = 0;
539
540 if (!nfs_need_commit(nfsi))
541 return 0;
542 559
543 spin_lock(&inode->i_lock); 560 spin_lock(&inode->i_lock);
544 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 561 if (nfsi->ncommit > 0) {
545 if (ret > 0) 562 int pnfs_ret;
563
564 ret = nfs_scan_commit_list(&nfsi->commit_list, dst, INT_MAX);
565 pnfs_ret = pnfs_scan_commit_lists(inode, INT_MAX - ret);
566 if (pnfs_ret) {
567 ret += pnfs_ret;
568 set_bit(NFS_INO_PNFS_COMMIT, &nfsi->flags);
569 }
546 nfsi->ncommit -= ret; 570 nfsi->ncommit -= ret;
571 }
547 spin_unlock(&inode->i_lock); 572 spin_unlock(&inode->i_lock);
548
549 if (nfs_need_commit(NFS_I(inode)))
550 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
551
552 return ret; 573 return ret;
553} 574}
575
554#else 576#else
555static inline int nfs_need_commit(struct nfs_inode *nfsi) 577static inline int nfs_need_commit(struct nfs_inode *nfsi)
556{ 578{
557 return 0; 579 return 0;
558} 580}
559 581
560static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 582static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
561{ 583{
562 return 0; 584 return 0;
563} 585}
@@ -579,6 +601,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
579 unsigned int rqend; 601 unsigned int rqend;
580 unsigned int end; 602 unsigned int end;
581 int error; 603 int error;
604 struct pnfs_layout_segment *lseg = NULL;
582 605
583 if (!PagePrivate(page)) 606 if (!PagePrivate(page))
584 return NULL; 607 return NULL;
@@ -614,12 +637,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
614 spin_lock(&inode->i_lock); 637 spin_lock(&inode->i_lock);
615 } 638 }
616 639
617 if (nfs_clear_request_commit(req) && 640 lseg = nfs_clear_request_commit(req);
618 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
619 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
620 NFS_I(inode)->ncommit--;
621 pnfs_clear_request_commit(req);
622 }
623 641
624 /* Okay, the request matches. Update the region */ 642 /* Okay, the request matches. Update the region */
625 if (offset < req->wb_offset) { 643 if (offset < req->wb_offset) {
@@ -632,6 +650,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
632 req->wb_bytes = rqend - req->wb_offset; 650 req->wb_bytes = rqend - req->wb_offset;
633out_unlock: 651out_unlock:
634 spin_unlock(&inode->i_lock); 652 spin_unlock(&inode->i_lock);
653 put_lseg(lseg);
635 return req; 654 return req;
636out_flushme: 655out_flushme:
637 spin_unlock(&inode->i_lock); 656 spin_unlock(&inode->i_lock);
@@ -653,7 +672,6 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
653{ 672{
654 struct inode *inode = page->mapping->host; 673 struct inode *inode = page->mapping->host;
655 struct nfs_page *req; 674 struct nfs_page *req;
656 int error;
657 675
658 req = nfs_try_to_update_request(inode, page, offset, bytes); 676 req = nfs_try_to_update_request(inode, page, offset, bytes);
659 if (req != NULL) 677 if (req != NULL)
@@ -661,11 +679,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
661 req = nfs_create_request(ctx, inode, page, offset, bytes); 679 req = nfs_create_request(ctx, inode, page, offset, bytes);
662 if (IS_ERR(req)) 680 if (IS_ERR(req))
663 goto out; 681 goto out;
664 error = nfs_inode_add_request(inode, req); 682 nfs_inode_add_request(inode, req);
665 if (error != 0) {
666 nfs_release_request(req);
667 req = ERR_PTR(error);
668 }
669out: 683out:
670 return req; 684 return req;
671} 685}
@@ -1458,7 +1472,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1458 while (!list_empty(&data->pages)) { 1472 while (!list_empty(&data->pages)) {
1459 req = nfs_list_entry(data->pages.next); 1473 req = nfs_list_entry(data->pages.next);
1460 nfs_list_remove_request(req); 1474 nfs_list_remove_request(req);
1461 nfs_clear_request_commit(req); 1475 nfs_clear_page_commit(req->wb_page);
1462 1476
1463 dprintk("NFS: commit (%s/%lld %d@%lld)", 1477 dprintk("NFS: commit (%s/%lld %d@%lld)",
1464 req->wb_context->dentry->d_sb->s_id, 1478 req->wb_context->dentry->d_sb->s_id,
@@ -1515,7 +1529,7 @@ int nfs_commit_inode(struct inode *inode, int how)
1515 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1529 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1516 if (res <= 0) 1530 if (res <= 0)
1517 goto out_mark_dirty; 1531 goto out_mark_dirty;
1518 res = nfs_scan_commit(inode, &head, 0, 0); 1532 res = nfs_scan_commit(inode, &head);
1519 if (res) { 1533 if (res) {
1520 int error; 1534 int error;
1521 1535
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ce8e4361ad14..0a63ab2b5a76 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -171,13 +171,9 @@ struct nfs_inode {
171 */ 171 */
172 __be32 cookieverf[2]; 172 __be32 cookieverf[2];
173 173
174 /*
175 * This is the list of dirty unwritten pages.
176 */
177 struct radix_tree_root nfs_page_tree;
178
179 unsigned long npages; 174 unsigned long npages;
180 unsigned long ncommit; 175 unsigned long ncommit;
176 struct list_head commit_list;
181 177
182 /* Open contexts for shared mmap writes */ 178 /* Open contexts for shared mmap writes */
183 struct list_head open_files; 179 struct list_head open_files;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 65b563f0903a..50856e9c1e5f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -19,11 +19,6 @@
19#include <linux/kref.h> 19#include <linux/kref.h>
20 20
21/* 21/*
22 * Valid flags for the radix tree
23 */
24#define NFS_PAGE_TAG_COMMIT 1
25
26/*
27 * Valid flags for a dirty buffer 22 * Valid flags for a dirty buffer
28 */ 23 */
29enum { 24enum {
@@ -32,16 +27,12 @@ enum {
32 PG_CLEAN, 27 PG_CLEAN,
33 PG_NEED_COMMIT, 28 PG_NEED_COMMIT,
34 PG_NEED_RESCHED, 29 PG_NEED_RESCHED,
35 PG_PNFS_COMMIT,
36 PG_PARTIAL_READ_FAILED, 30 PG_PARTIAL_READ_FAILED,
37}; 31};
38 32
39struct nfs_inode; 33struct nfs_inode;
40struct nfs_page { 34struct nfs_page {
41 union { 35 struct list_head wb_list; /* Defines state of page: */
42 struct list_head wb_list; /* Defines state of page: */
43 struct pnfs_layout_segment *wb_commit_lseg; /* Used when PG_PNFS_COMMIT set */
44 };
45 struct page *wb_page; /* page to read in/write out */ 36 struct page *wb_page; /* page to read in/write out */
46 struct nfs_open_context *wb_context; /* File state context info */ 37 struct nfs_open_context *wb_context; /* File state context info */
47 struct nfs_lock_context *wb_lock_context; /* lock context info */ 38 struct nfs_lock_context *wb_lock_context; /* lock context info */
@@ -89,8 +80,6 @@ extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
89extern void nfs_release_request(struct nfs_page *req); 80extern void nfs_release_request(struct nfs_page *req);
90 81
91 82
92extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
93 pgoff_t idx_start, unsigned int npages, int tag);
94extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 83extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
95 struct inode *inode, 84 struct inode *inode,
96 const struct nfs_pageio_ops *pg_ops, 85 const struct nfs_pageio_ops *pg_ops,