aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Isaman <iisaman@netapp.com>2012-04-20 14:47:46 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2012-04-27 14:10:37 -0400
commit4db6e0b74c0f6dfc2f9c0690e8df512e3b635983 (patch)
tree19d8a2a7051bdab220b0bdcf3da1e350a53ce428
parent30dd374f6fc1b202db3a1b57b61afff1326bad92 (diff)
NFS: merge _full and _partial read rpc_ops
Decouple nfs_pgio_header and nfs_read_data, and have (possibly multiple) nfs_read_datas each take a refcount on nfs_pgio_header. For the moment keeps nfs_read_header as a way to preallocate a single nfs_read_data with the nfs_pgio_header. The code doesn't need this, and would be prettier without, but given the amount of churn I am already introducing I didn't want to play with tuning new mempools. This also fixes bug in pnfs_ld_handle_read_error. In the case of desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing replay attempt to do nothing. Signed-off-by: Fred Isaman <iisaman@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/direct.c10
-rw-r--r--fs/nfs/internal.h15
-rw-r--r--fs/nfs/nfs4filelayout.c1
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfs/pagelist.c24
-rw-r--r--fs/nfs/pnfs.c55
-rw-r--r--fs/nfs/read.c338
-rw-r--r--include/linux/nfs_page.h1
-rw-r--r--include/linux/nfs_xdr.h16
9 files changed, 252 insertions, 210 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0faba4cb531d..90b00ce42cbe 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -319,10 +319,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
319 bytes = min(rsize,count); 319 bytes = min(rsize,count);
320 320
321 result = -ENOMEM; 321 result = -ENOMEM;
322 rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes)); 322 rhdr = nfs_readhdr_alloc();
323 if (unlikely(!rhdr)) 323 if (unlikely(!rhdr))
324 break; 324 break;
325 data = &rhdr->rpc_data; 325 data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes));
326 if (!data) {
327 nfs_readhdr_free(&rhdr->header);
328 break;
329 }
330 data->header = &rhdr->header;
331 atomic_inc(&data->header->refcnt);
326 pages = &data->pages; 332 pages = &data->pages;
327 333
328 down_read(&current->mm->mmap_sem); 334 down_read(&current->mm->mmap_sem);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5c3d77fda560..33af5e51c0bb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -200,6 +200,7 @@ struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry)
200extern struct svc_version nfs4_callback_version1; 200extern struct svc_version nfs4_callback_version1;
201extern struct svc_version nfs4_callback_version4; 201extern struct svc_version nfs4_callback_version4;
202 202
203struct nfs_pageio_descriptor;
203/* pagelist.c */ 204/* pagelist.c */
204extern int __init nfs_init_nfspagecache(void); 205extern int __init nfs_init_nfspagecache(void);
205extern void nfs_destroy_nfspagecache(void); 206extern void nfs_destroy_nfspagecache(void);
@@ -211,6 +212,10 @@ extern void nfs_destroy_writepagecache(void);
211extern int __init nfs_init_directcache(void); 212extern int __init nfs_init_directcache(void);
212extern void nfs_destroy_directcache(void); 213extern void nfs_destroy_directcache(void);
213extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); 214extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
215extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
216 struct nfs_pgio_header *hdr,
217 void (*release)(struct nfs_pgio_header *hdr));
218void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
214 219
215/* nfs2xdr.c */ 220/* nfs2xdr.c */
216extern int nfs_stat_to_errno(enum nfs_stat); 221extern int nfs_stat_to_errno(enum nfs_stat);
@@ -295,17 +300,19 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
295extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 300extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
296#endif 301#endif
297 302
298struct nfs_pageio_descriptor;
299/* read.c */ 303/* read.c */
300extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages); 304extern void nfs_async_read_error(struct list_head *head);
305extern struct nfs_read_header *nfs_readhdr_alloc(void);
301extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); 306extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
307extern void nfs_read_completion(struct nfs_pgio_header *hdr);
308extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
309 unsigned int pagecount);
302extern int nfs_initiate_read(struct rpc_clnt *clnt, 310extern int nfs_initiate_read(struct rpc_clnt *clnt,
303 struct nfs_read_data *data, 311 struct nfs_read_data *data,
304 const struct rpc_call_ops *call_ops); 312 const struct rpc_call_ops *call_ops);
305extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 313extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
306extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 314extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
307 struct list_head *head); 315 struct nfs_pgio_header *hdr);
308
309extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 316extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
310 struct inode *inode); 317 struct inode *inode);
311extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); 318extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index ad1d68013a5b..333e765f3ac2 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -227,7 +227,6 @@ static void filelayout_read_release(void *data)
227{ 227{
228 struct nfs_read_data *rdata = data; 228 struct nfs_read_data *rdata = data;
229 229
230 put_lseg(rdata->header->lseg);
231 rdata->header->mds_ops->rpc_release(data); 230 rdata->header->mds_ops->rpc_release(data);
232} 231}
233 232
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5375862075de..ce31ab22bc55 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3391,8 +3391,6 @@ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
3391 struct inode *inode = hdr->inode; 3391 struct inode *inode = hdr->inode;
3392 3392
3393 dprintk("%s Reset task for i/o through\n", __func__); 3393 dprintk("%s Reset task for i/o through\n", __func__);
3394 put_lseg(hdr->lseg);
3395 hdr->lseg = NULL;
3396 data->ds_clp = NULL; 3394 data->ds_clp = NULL;
3397 /* offsets will differ in the dense stripe case */ 3395 /* offsets will differ in the dense stripe case */
3398 data->args.offset = data->mds_offset; 3396 data->args.offset = data->mds_offset;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d349bd4c48db..cd4c038135a7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -39,6 +39,30 @@ bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
39 return p->pagevec != NULL; 39 return p->pagevec != NULL;
40} 40}
41 41
42void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
43 struct nfs_pgio_header *hdr,
44 void (*release)(struct nfs_pgio_header *hdr))
45{
46 hdr->req = nfs_list_entry(desc->pg_list.next);
47 hdr->inode = desc->pg_inode;
48 hdr->cred = hdr->req->wb_context->cred;
49 hdr->io_start = req_offset(hdr->req);
50 hdr->good_bytes = desc->pg_count;
51 hdr->release = release;
52}
53
54void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
55{
56 spin_lock(&hdr->lock);
57 if (pos < hdr->io_start + hdr->good_bytes) {
58 set_bit(NFS_IOHDR_ERROR, &hdr->flags);
59 clear_bit(NFS_IOHDR_EOF, &hdr->flags);
60 hdr->good_bytes = pos - hdr->io_start;
61 hdr->error = error;
62 }
63 spin_unlock(&hdr->lock);
64}
65
42static inline struct nfs_page * 66static inline struct nfs_page *
43nfs_page_alloc(void) 67nfs_page_alloc(void)
44{ 68{
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d705da427e6d..d1a91dbe7654 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1333,7 +1333,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
1333 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags); 1333 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
1334 pnfs_return_layout(hdr->inode); 1334 pnfs_return_layout(hdr->inode);
1335 } 1335 }
1336 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages); 1336 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
1337 data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
1338 &hdr->pages);
1337} 1339}
1338 1340
1339/* 1341/*
@@ -1348,7 +1350,6 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
1348 hdr->mds_ops->rpc_call_done(&data->task, data); 1350 hdr->mds_ops->rpc_call_done(&data->task, data);
1349 } else 1351 } else
1350 pnfs_ld_handle_read_error(data); 1352 pnfs_ld_handle_read_error(data);
1351 put_lseg(hdr->lseg);
1352 hdr->mds_ops->rpc_release(data); 1353 hdr->mds_ops->rpc_release(data);
1353} 1354}
1354EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1355EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1359,11 +1360,11 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1359{ 1360{
1360 struct nfs_pgio_header *hdr = data->header; 1361 struct nfs_pgio_header *hdr = data->header;
1361 1362
1362 list_splice_tail_init(&hdr->pages, &desc->pg_list); 1363 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
1363 if (hdr->req && list_empty(&hdr->req->wb_list)) 1364 list_splice_tail_init(&hdr->pages, &desc->pg_list);
1364 nfs_list_add_request(hdr->req, &desc->pg_list); 1365 nfs_pageio_reset_read_mds(desc);
1365 nfs_pageio_reset_read_mds(desc); 1366 desc->pg_recoalesce = 1;
1366 desc->pg_recoalesce = 1; 1367 }
1367 nfs_readdata_release(data); 1368 nfs_readdata_release(data);
1368} 1369}
1369 1370
@@ -1381,18 +1382,13 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1381 enum pnfs_try_status trypnfs; 1382 enum pnfs_try_status trypnfs;
1382 1383
1383 hdr->mds_ops = call_ops; 1384 hdr->mds_ops = call_ops;
1384 hdr->lseg = get_lseg(lseg);
1385 1385
1386 dprintk("%s: Reading ino:%lu %u@%llu\n", 1386 dprintk("%s: Reading ino:%lu %u@%llu\n",
1387 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1387 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
1388 1388
1389 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); 1389 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
1390 if (trypnfs == PNFS_NOT_ATTEMPTED) { 1390 if (trypnfs != PNFS_NOT_ATTEMPTED)
1391 put_lseg(hdr->lseg);
1392 hdr->lseg = NULL;
1393 } else {
1394 nfs_inc_stats(inode, NFSIOS_PNFS_READ); 1391 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
1395 }
1396 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); 1392 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
1397 return trypnfs; 1393 return trypnfs;
1398} 1394}
@@ -1408,7 +1404,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1408 while (!list_empty(head)) { 1404 while (!list_empty(head)) {
1409 enum pnfs_try_status trypnfs; 1405 enum pnfs_try_status trypnfs;
1410 1406
1411 data = list_entry(head->next, struct nfs_read_data, list); 1407 data = list_first_entry(head, struct nfs_read_data, list);
1412 list_del_init(&data->list); 1408 list_del_init(&data->list);
1413 1409
1414 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); 1410 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1418,20 +1414,41 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1418 put_lseg(lseg); 1414 put_lseg(lseg);
1419} 1415}
1420 1416
1417static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1418{
1419 put_lseg(hdr->lseg);
1420 nfs_readhdr_free(hdr);
1421}
1422
1421int 1423int
1422pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 1424pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1423{ 1425{
1424 LIST_HEAD(head); 1426 struct nfs_read_header *rhdr;
1427 struct nfs_pgio_header *hdr;
1425 int ret; 1428 int ret;
1426 1429
1427 ret = nfs_generic_pagein(desc, &head); 1430 rhdr = nfs_readhdr_alloc();
1428 if (ret != 0) { 1431 if (!rhdr) {
1432 nfs_async_read_error(&desc->pg_list);
1433 ret = -ENOMEM;
1429 put_lseg(desc->pg_lseg); 1434 put_lseg(desc->pg_lseg);
1430 desc->pg_lseg = NULL; 1435 desc->pg_lseg = NULL;
1431 return ret; 1436 return ret;
1432 } 1437 }
1433 pnfs_do_multiple_reads(desc, &head); 1438 hdr = &rhdr->header;
1434 return 0; 1439 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1440 hdr->lseg = get_lseg(desc->pg_lseg);
1441 atomic_inc(&hdr->refcnt);
1442 ret = nfs_generic_pagein(desc, hdr);
1443 if (ret != 0) {
1444 put_lseg(desc->pg_lseg);
1445 desc->pg_lseg = NULL;
1446 set_bit(NFS_IOHDR_REDO, &hdr->flags);
1447 } else
1448 pnfs_do_multiple_reads(desc, &hdr->rpc_list);
1449 if (atomic_dec_and_test(&hdr->refcnt))
1450 nfs_read_completion(hdr);
1451 return ret;
1435} 1452}
1436EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); 1453EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1437 1454
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f6ab30b5a462..c9633b2501bd 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,29 +30,49 @@
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 31
32static const struct nfs_pageio_ops nfs_pageio_read_ops; 32static const struct nfs_pageio_ops nfs_pageio_read_ops;
33static const struct rpc_call_ops nfs_read_partial_ops; 33static const struct rpc_call_ops nfs_read_common_ops;
34static const struct rpc_call_ops nfs_read_full_ops;
35 34
36static struct kmem_cache *nfs_rdata_cachep; 35static struct kmem_cache *nfs_rdata_cachep;
37 36
38struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount) 37struct nfs_read_header *nfs_readhdr_alloc()
39{ 38{
40 struct nfs_read_header *p; 39 struct nfs_read_header *rhdr;
41 40
42 p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 41 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
43 if (p) { 42 if (rhdr) {
44 struct nfs_pgio_header *hdr = &p->header; 43 struct nfs_pgio_header *hdr = &rhdr->header;
45 struct nfs_read_data *data = &p->rpc_data;
46 44
47 INIT_LIST_HEAD(&hdr->pages); 45 INIT_LIST_HEAD(&hdr->pages);
48 INIT_LIST_HEAD(&data->list); 46 INIT_LIST_HEAD(&hdr->rpc_list);
47 spin_lock_init(&hdr->lock);
48 atomic_set(&hdr->refcnt, 0);
49 }
50 return rhdr;
51}
52
53struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
54 unsigned int pagecount)
55{
56 struct nfs_read_data *data, *prealloc;
57
58 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
59 if (prealloc->header == NULL)
60 data = prealloc;
61 else
62 data = kzalloc(sizeof(*data), GFP_KERNEL);
63 if (!data)
64 goto out;
65
66 if (nfs_pgarray_set(&data->pages, pagecount)) {
49 data->header = hdr; 67 data->header = hdr;
50 if (!nfs_pgarray_set(&data->pages, pagecount)) { 68 atomic_inc(&hdr->refcnt);
51 kmem_cache_free(nfs_rdata_cachep, p); 69 } else {
52 p = NULL; 70 if (data != prealloc)
53 } 71 kfree(data);
72 data = NULL;
54 } 73 }
55 return p; 74out:
75 return data;
56} 76}
57 77
58void nfs_readhdr_free(struct nfs_pgio_header *hdr) 78void nfs_readhdr_free(struct nfs_pgio_header *hdr)
@@ -64,10 +84,18 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
64 84
65void nfs_readdata_release(struct nfs_read_data *rdata) 85void nfs_readdata_release(struct nfs_read_data *rdata)
66{ 86{
87 struct nfs_pgio_header *hdr = rdata->header;
88 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
89
67 put_nfs_open_context(rdata->args.context); 90 put_nfs_open_context(rdata->args.context);
68 if (rdata->pages.pagevec != rdata->pages.page_array) 91 if (rdata->pages.pagevec != rdata->pages.page_array)
69 kfree(rdata->pages.pagevec); 92 kfree(rdata->pages.pagevec);
70 nfs_readhdr_free(rdata->header); 93 if (rdata != &read_header->rpc_data)
94 kfree(rdata);
95 else
96 rdata->header = NULL;
97 if (atomic_dec_and_test(&hdr->refcnt))
98 nfs_read_completion(hdr);
71} 99}
72 100
73static 101static
@@ -79,35 +107,6 @@ int nfs_return_empty_page(struct page *page)
79 return 0; 107 return 0;
80} 108}
81 109
82static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
83{
84 unsigned int remainder = data->args.count - data->res.count;
85 unsigned int base = data->args.pgbase + data->res.count;
86 unsigned int pglen;
87 struct page **pages;
88
89 if (data->res.eof == 0 || remainder == 0)
90 return;
91 /*
92 * Note: "remainder" can never be negative, since we check for
93 * this in the XDR code.
94 */
95 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
96 base &= ~PAGE_CACHE_MASK;
97 pglen = PAGE_CACHE_SIZE - base;
98 for (;;) {
99 if (remainder <= pglen) {
100 zero_user(*pages, base, remainder);
101 break;
102 }
103 zero_user(*pages, base, pglen);
104 pages++;
105 remainder -= pglen;
106 pglen = PAGE_CACHE_SIZE;
107 base = 0;
108 }
109}
110
111void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 110void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
112 struct inode *inode) 111 struct inode *inode)
113{ 112{
@@ -170,6 +169,46 @@ static void nfs_readpage_release(struct nfs_page *req)
170 nfs_release_request(req); 169 nfs_release_request(req);
171} 170}
172 171
172/* Note io was page aligned */
173void nfs_read_completion(struct nfs_pgio_header *hdr)
174{
175 unsigned long bytes = 0;
176
177 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
178 goto out;
179 if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
180 while (!list_empty(&hdr->pages)) {
181 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
182 struct page *page = req->wb_page;
183
184 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
185 if (bytes > hdr->good_bytes)
186 zero_user(page, 0, PAGE_SIZE);
187 else if (hdr->good_bytes - bytes < PAGE_SIZE)
188 zero_user_segment(page,
189 hdr->good_bytes & ~PAGE_MASK,
190 PAGE_SIZE);
191 }
192 SetPageUptodate(page);
193 nfs_list_remove_request(req);
194 nfs_readpage_release(req);
195 bytes += PAGE_SIZE;
196 }
197 } else {
198 while (!list_empty(&hdr->pages)) {
199 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
200
201 bytes += req->wb_bytes;
202 if (bytes <= hdr->good_bytes)
203 SetPageUptodate(req->wb_page);
204 nfs_list_remove_request(req);
205 nfs_readpage_release(req);
206 }
207 }
208out:
209 hdr->release(hdr);
210}
211
173int nfs_initiate_read(struct rpc_clnt *clnt, 212int nfs_initiate_read(struct rpc_clnt *clnt,
174 struct nfs_read_data *data, 213 struct nfs_read_data *data,
175 const struct rpc_call_ops *call_ops) 214 const struct rpc_call_ops *call_ops)
@@ -214,16 +253,12 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
214/* 253/*
215 * Set up the NFS read request struct 254 * Set up the NFS read request struct
216 */ 255 */
217static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 256static void nfs_read_rpcsetup(struct nfs_read_data *data,
218 unsigned int count, unsigned int offset) 257 unsigned int count, unsigned int offset)
219{ 258{
220 struct inode *inode = data->header->inode; 259 struct nfs_page *req = data->header->req;
221
222 data->header->req = req;
223 data->header->inode = inode;
224 data->header->cred = req->wb_context->cred;
225 260
226 data->args.fh = NFS_FH(inode); 261 data->args.fh = NFS_FH(data->header->inode);
227 data->args.offset = req_offset(req) + offset; 262 data->args.offset = req_offset(req) + offset;
228 data->args.pgbase = req->wb_pgbase + offset; 263 data->args.pgbase = req->wb_pgbase + offset;
229 data->args.pages = data->pages.pagevec; 264 data->args.pages = data->pages.pagevec;
@@ -255,7 +290,7 @@ nfs_do_multiple_reads(struct list_head *head,
255 while (!list_empty(head)) { 290 while (!list_empty(head)) {
256 int ret2; 291 int ret2;
257 292
258 data = list_entry(head->next, struct nfs_read_data, list); 293 data = list_first_entry(head, struct nfs_read_data, list);
259 list_del_init(&data->list); 294 list_del_init(&data->list);
260 295
261 ret2 = nfs_do_read(data, call_ops); 296 ret2 = nfs_do_read(data, call_ops);
@@ -265,7 +300,7 @@ nfs_do_multiple_reads(struct list_head *head,
265 return ret; 300 return ret;
266} 301}
267 302
268static void 303void
269nfs_async_read_error(struct list_head *head) 304nfs_async_read_error(struct list_head *head)
270{ 305{
271 struct nfs_page *req; 306 struct nfs_page *req;
@@ -290,11 +325,11 @@ nfs_async_read_error(struct list_head *head)
290 * won't see the new data until our attribute cache is updated. This is more 325 * won't see the new data until our attribute cache is updated. This is more
291 * or less conventional NFS client behavior. 326 * or less conventional NFS client behavior.
292 */ 327 */
293static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 328static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
329 struct nfs_pgio_header *hdr)
294{ 330{
295 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 331 struct nfs_page *req = hdr->req;
296 struct page *page = req->wb_page; 332 struct page *page = req->wb_page;
297 struct nfs_read_header *rhdr;
298 struct nfs_read_data *data; 333 struct nfs_read_data *data;
299 size_t rsize = desc->pg_bsize, nbytes; 334 size_t rsize = desc->pg_bsize, nbytes;
300 unsigned int offset; 335 unsigned int offset;
@@ -302,85 +337,97 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
302 int ret = 0; 337 int ret = 0;
303 338
304 nfs_list_remove_request(req); 339 nfs_list_remove_request(req);
340 nfs_list_add_request(req, &hdr->pages);
305 341
306 offset = 0; 342 offset = 0;
307 nbytes = desc->pg_count; 343 nbytes = desc->pg_count;
308 do { 344 do {
309 size_t len = min(nbytes,rsize); 345 size_t len = min(nbytes,rsize);
310 346
311 rhdr = nfs_readhdr_alloc(1); 347 data = nfs_readdata_alloc(hdr, 1);
312 if (!rhdr) 348 if (!data)
313 goto out_bad; 349 goto out_bad;
314 data = &rhdr->rpc_data;
315 data->pages.pagevec[0] = page; 350 data->pages.pagevec[0] = page;
316 nfs_read_rpcsetup(req, data, len, offset); 351 nfs_read_rpcsetup(data, len, offset);
317 list_add(&data->list, res); 352 list_add(&data->list, &hdr->rpc_list);
318 requests++; 353 requests++;
319 nbytes -= len; 354 nbytes -= len;
320 offset += len; 355 offset += len;
321 } while(nbytes != 0); 356 } while(nbytes != 0);
322 atomic_set(&req->wb_complete, requests); 357 desc->pg_rpc_callops = &nfs_read_common_ops;
323 desc->pg_rpc_callops = &nfs_read_partial_ops;
324 return ret; 358 return ret;
325out_bad: 359out_bad:
326 while (!list_empty(res)) { 360 while (!list_empty(&hdr->rpc_list)) {
327 data = list_entry(res->next, struct nfs_read_data, list); 361 data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list);
328 list_del(&data->list); 362 list_del(&data->list);
329 nfs_readdata_release(data); 363 nfs_readdata_release(data);
330 } 364 }
331 nfs_readpage_release(req); 365 nfs_async_read_error(&hdr->pages);
332 return -ENOMEM; 366 return -ENOMEM;
333} 367}
334 368
335static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 369static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
370 struct nfs_pgio_header *hdr)
336{ 371{
337 struct nfs_page *req; 372 struct nfs_page *req;
338 struct page **pages; 373 struct page **pages;
339 struct nfs_read_header *rhdr; 374 struct nfs_read_data *data;
340 struct nfs_read_data *data;
341 struct list_head *head = &desc->pg_list; 375 struct list_head *head = &desc->pg_list;
342 int ret = 0; 376 int ret = 0;
343 377
344 rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base, 378 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
345 desc->pg_count)); 379 desc->pg_count));
346 if (!rhdr) { 380 if (!data) {
347 nfs_async_read_error(head); 381 nfs_async_read_error(head);
348 ret = -ENOMEM; 382 ret = -ENOMEM;
349 goto out; 383 goto out;
350 } 384 }
351 385
352 data = &rhdr->rpc_data;
353 pages = data->pages.pagevec; 386 pages = data->pages.pagevec;
354 while (!list_empty(head)) { 387 while (!list_empty(head)) {
355 req = nfs_list_entry(head->next); 388 req = nfs_list_entry(head->next);
356 nfs_list_remove_request(req); 389 nfs_list_remove_request(req);
357 nfs_list_add_request(req, &rhdr->header.pages); 390 nfs_list_add_request(req, &hdr->pages);
358 *pages++ = req->wb_page; 391 *pages++ = req->wb_page;
359 } 392 }
360 req = nfs_list_entry(rhdr->header.pages.next);
361 393
362 nfs_read_rpcsetup(req, data, desc->pg_count, 0); 394 nfs_read_rpcsetup(data, desc->pg_count, 0);
363 list_add(&data->list, res); 395 list_add(&data->list, &hdr->rpc_list);
364 desc->pg_rpc_callops = &nfs_read_full_ops; 396 desc->pg_rpc_callops = &nfs_read_common_ops;
365out: 397out:
366 return ret; 398 return ret;
367} 399}
368 400
369int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) 401int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
402 struct nfs_pgio_header *hdr)
370{ 403{
371 if (desc->pg_bsize < PAGE_CACHE_SIZE) 404 if (desc->pg_bsize < PAGE_CACHE_SIZE)
372 return nfs_pagein_multi(desc, head); 405 return nfs_pagein_multi(desc, hdr);
373 return nfs_pagein_one(desc, head); 406 return nfs_pagein_one(desc, hdr);
374} 407}
375 408
376static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 409static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
377{ 410{
378 LIST_HEAD(head); 411 struct nfs_read_header *rhdr;
412 struct nfs_pgio_header *hdr;
379 int ret; 413 int ret;
380 414
381 ret = nfs_generic_pagein(desc, &head); 415 rhdr = nfs_readhdr_alloc();
416 if (!rhdr) {
417 nfs_async_read_error(&desc->pg_list);
418 return -ENOMEM;
419 }
420 hdr = &rhdr->header;
421 nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
422 atomic_inc(&hdr->refcnt);
423 ret = nfs_generic_pagein(desc, hdr);
382 if (ret == 0) 424 if (ret == 0)
383 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); 425 ret = nfs_do_multiple_reads(&hdr->rpc_list,
426 desc->pg_rpc_callops);
427 else
428 set_bit(NFS_IOHDR_REDO, &hdr->flags);
429 if (atomic_dec_and_test(&hdr->refcnt))
430 nfs_read_completion(hdr);
384 return ret; 431 return ret;
385} 432}
386 433
@@ -419,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
419 struct nfs_readargs *argp = &data->args; 466 struct nfs_readargs *argp = &data->args;
420 struct nfs_readres *resp = &data->res; 467 struct nfs_readres *resp = &data->res;
421 468
422 if (resp->eof || resp->count == argp->count)
423 return;
424
425 /* This is a short read! */ 469 /* This is a short read! */
426 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); 470 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
427 /* Has the server at least made some progress? */ 471 /* Has the server at least made some progress? */
428 if (resp->count == 0) 472 if (resp->count == 0) {
473 nfs_set_pgio_error(data->header, -EIO, argp->offset);
429 return; 474 return;
430 475 }
431 /* Yes, so retry the read at the end of the data */ 476 /* Yes, so retry the read at the end of the data */
432 data->mds_offset += resp->count; 477 data->mds_offset += resp->count;
433 argp->offset += resp->count; 478 argp->offset += resp->count;
@@ -436,38 +481,34 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
436 rpc_restart_call_prepare(task); 481 rpc_restart_call_prepare(task);
437} 482}
438 483
439/* 484static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
440 * Handle a read reply that fills part of a page.
441 */
442static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
443{ 485{
444 struct nfs_read_data *data = calldata; 486 struct nfs_read_data *data = calldata;
445 487 struct nfs_pgio_header *hdr = data->header;
488
489 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
446 if (nfs_readpage_result(task, data) != 0) 490 if (nfs_readpage_result(task, data) != 0)
447 return; 491 return;
448 if (task->tk_status < 0) 492 if (task->tk_status < 0)
449 return; 493 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
450 494 else if (data->res.eof) {
451 nfs_readpage_truncate_uninitialised_page(data); 495 loff_t bound;
452 nfs_readpage_retry(task, data); 496
497 bound = data->args.offset + data->res.count;
498 spin_lock(&hdr->lock);
499 if (bound < hdr->io_start + hdr->good_bytes) {
500 set_bit(NFS_IOHDR_EOF, &hdr->flags);
501 clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
502 hdr->good_bytes = bound - hdr->io_start;
503 }
504 spin_unlock(&hdr->lock);
505 } else if (data->res.count != data->args.count)
506 nfs_readpage_retry(task, data);
453} 507}
454 508
455static void nfs_readpage_release_partial(void *calldata) 509static void nfs_readpage_release_common(void *calldata)
456{ 510{
457 struct nfs_read_data *data = calldata; 511 nfs_readdata_release(calldata);
458 struct nfs_page *req = data->header->req;
459 struct page *page = req->wb_page;
460 int status = data->task.tk_status;
461
462 if (status < 0)
463 set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
464
465 if (atomic_dec_and_test(&req->wb_complete)) {
466 if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
467 SetPageUptodate(page);
468 nfs_readpage_release(req);
469 }
470 nfs_readdata_release(data);
471} 512}
472 513
473void nfs_read_prepare(struct rpc_task *task, void *calldata) 514void nfs_read_prepare(struct rpc_task *task, void *calldata)
@@ -476,75 +517,10 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata)
476 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); 517 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
477} 518}
478 519
479static const struct rpc_call_ops nfs_read_partial_ops = { 520static const struct rpc_call_ops nfs_read_common_ops = {
480 .rpc_call_prepare = nfs_read_prepare,
481 .rpc_call_done = nfs_readpage_result_partial,
482 .rpc_release = nfs_readpage_release_partial,
483};
484
485static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
486{
487 unsigned int count = data->res.count;
488 unsigned int base = data->args.pgbase;
489 struct page **pages;
490
491 if (data->res.eof)
492 count = data->args.count;
493 if (unlikely(count == 0))
494 return;
495 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
496 base &= ~PAGE_CACHE_MASK;
497 count += base;
498 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
499 SetPageUptodate(*pages);
500 if (count == 0)
501 return;
502 /* Was this a short read? */
503 if (data->res.eof || data->res.count == data->args.count)
504 SetPageUptodate(*pages);
505}
506
507/*
508 * This is the callback from RPC telling us whether a reply was
509 * received or some error occurred (timeout or socket shutdown).
510 */
511static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
512{
513 struct nfs_read_data *data = calldata;
514
515 if (nfs_readpage_result(task, data) != 0)
516 return;
517 if (task->tk_status < 0)
518 return;
519 /*
520 * Note: nfs_readpage_retry may change the values of
521 * data->args. In the multi-page case, we therefore need
522 * to ensure that we call nfs_readpage_set_pages_uptodate()
523 * first.
524 */
525 nfs_readpage_truncate_uninitialised_page(data);
526 nfs_readpage_set_pages_uptodate(data);
527 nfs_readpage_retry(task, data);
528}
529
530static void nfs_readpage_release_full(void *calldata)
531{
532 struct nfs_read_data *data = calldata;
533 struct nfs_pgio_header *hdr = data->header;
534
535 while (!list_empty(&hdr->pages)) {
536 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
537
538 nfs_list_remove_request(req);
539 nfs_readpage_release(req);
540 }
541 nfs_readdata_release(calldata);
542}
543
544static const struct rpc_call_ops nfs_read_full_ops = {
545 .rpc_call_prepare = nfs_read_prepare, 521 .rpc_call_prepare = nfs_read_prepare,
546 .rpc_call_done = nfs_readpage_result_full, 522 .rpc_call_done = nfs_readpage_result_common,
547 .rpc_release = nfs_readpage_release_full, 523 .rpc_release = nfs_readpage_release_common,
548}; 524};
549 525
550/* 526/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index eac30d6bec17..5c520344d8ad 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -27,7 +27,6 @@ enum {
27 PG_CLEAN, 27 PG_CLEAN,
28 PG_NEED_COMMIT, 28 PG_NEED_COMMIT,
29 PG_NEED_RESCHED, 29 PG_NEED_RESCHED,
30 PG_PARTIAL_READ_FAILED,
31 PG_COMMIT_TO_DS, 30 PG_COMMIT_TO_DS,
32}; 31};
33 32
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e34beaf86e9c..164862148ba0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1187,14 +1187,30 @@ struct nfs_read_data {
1187 struct nfs_client *ds_clp; /* pNFS data server */ 1187 struct nfs_client *ds_clp; /* pNFS data server */
1188}; 1188};
1189 1189
1190/* used as flag bits in nfs_pgio_header */
1191enum {
1192 NFS_IOHDR_ERROR = 0,
1193 NFS_IOHDR_EOF,
1194 NFS_IOHDR_REDO,
1195};
1196
1190struct nfs_pgio_header { 1197struct nfs_pgio_header {
1191 struct inode *inode; 1198 struct inode *inode;
1192 struct rpc_cred *cred; 1199 struct rpc_cred *cred;
1193 struct list_head pages; 1200 struct list_head pages;
1201 struct list_head rpc_list;
1202 atomic_t refcnt;
1194 struct nfs_page *req; 1203 struct nfs_page *req;
1195 struct pnfs_layout_segment *lseg; 1204 struct pnfs_layout_segment *lseg;
1205 loff_t io_start;
1196 const struct rpc_call_ops *mds_ops; 1206 const struct rpc_call_ops *mds_ops;
1207 void (*release) (struct nfs_pgio_header *hdr);
1208 spinlock_t lock;
1209 /* fields protected by lock */
1197 int pnfs_error; 1210 int pnfs_error;
1211 int error; /* merge with pnfs_error */
1212 unsigned long good_bytes; /* boundary of good data */
1213 unsigned long flags;
1198}; 1214};
1199 1215
1200struct nfs_read_header { 1216struct nfs_read_header {