diff options
41 files changed, 1804 insertions, 1896 deletions
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 03192a66c143..4782e0840dcc 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
| @@ -29,8 +29,6 @@ nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o | |||
| 29 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o | 29 | nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o |
| 30 | nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o | 30 | nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o |
| 31 | 31 | ||
| 32 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | 32 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ |
| 33 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o | ||
| 34 | |||
| 35 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | 33 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ |
| 36 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ | 34 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 65d849bdf77a..9b431f44fad9 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
| @@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err) | |||
| 210 | SetPageUptodate(bvec->bv_page); | 210 | SetPageUptodate(bvec->bv_page); |
| 211 | 211 | ||
| 212 | if (err) { | 212 | if (err) { |
| 213 | struct nfs_read_data *rdata = par->data; | 213 | struct nfs_pgio_data *rdata = par->data; |
| 214 | struct nfs_pgio_header *header = rdata->header; | 214 | struct nfs_pgio_header *header = rdata->header; |
| 215 | 215 | ||
| 216 | if (!header->pnfs_error) | 216 | if (!header->pnfs_error) |
| @@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err) | |||
| 224 | static void bl_read_cleanup(struct work_struct *work) | 224 | static void bl_read_cleanup(struct work_struct *work) |
| 225 | { | 225 | { |
| 226 | struct rpc_task *task; | 226 | struct rpc_task *task; |
| 227 | struct nfs_read_data *rdata; | 227 | struct nfs_pgio_data *rdata; |
| 228 | dprintk("%s enter\n", __func__); | 228 | dprintk("%s enter\n", __func__); |
| 229 | task = container_of(work, struct rpc_task, u.tk_work); | 229 | task = container_of(work, struct rpc_task, u.tk_work); |
| 230 | rdata = container_of(task, struct nfs_read_data, task); | 230 | rdata = container_of(task, struct nfs_pgio_data, task); |
| 231 | pnfs_ld_read_done(rdata); | 231 | pnfs_ld_read_done(rdata); |
| 232 | } | 232 | } |
| 233 | 233 | ||
| 234 | static void | 234 | static void |
| 235 | bl_end_par_io_read(void *data, int unused) | 235 | bl_end_par_io_read(void *data, int unused) |
| 236 | { | 236 | { |
| 237 | struct nfs_read_data *rdata = data; | 237 | struct nfs_pgio_data *rdata = data; |
| 238 | 238 | ||
| 239 | rdata->task.tk_status = rdata->header->pnfs_error; | 239 | rdata->task.tk_status = rdata->header->pnfs_error; |
| 240 | INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); | 240 | INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); |
| @@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused) | |||
| 242 | } | 242 | } |
| 243 | 243 | ||
| 244 | static enum pnfs_try_status | 244 | static enum pnfs_try_status |
| 245 | bl_read_pagelist(struct nfs_read_data *rdata) | 245 | bl_read_pagelist(struct nfs_pgio_data *rdata) |
| 246 | { | 246 | { |
| 247 | struct nfs_pgio_header *header = rdata->header; | 247 | struct nfs_pgio_header *header = rdata->header; |
| 248 | int i, hole; | 248 | int i, hole; |
| @@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err) | |||
| 390 | } | 390 | } |
| 391 | 391 | ||
| 392 | if (unlikely(err)) { | 392 | if (unlikely(err)) { |
| 393 | struct nfs_write_data *data = par->data; | 393 | struct nfs_pgio_data *data = par->data; |
| 394 | struct nfs_pgio_header *header = data->header; | 394 | struct nfs_pgio_header *header = data->header; |
| 395 | 395 | ||
| 396 | if (!header->pnfs_error) | 396 | if (!header->pnfs_error) |
| @@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err) | |||
| 405 | { | 405 | { |
| 406 | struct parallel_io *par = bio->bi_private; | 406 | struct parallel_io *par = bio->bi_private; |
| 407 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); | 407 | const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 408 | struct nfs_write_data *data = par->data; | 408 | struct nfs_pgio_data *data = par->data; |
| 409 | struct nfs_pgio_header *header = data->header; | 409 | struct nfs_pgio_header *header = data->header; |
| 410 | 410 | ||
| 411 | if (!uptodate) { | 411 | if (!uptodate) { |
| @@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err) | |||
| 423 | static void bl_write_cleanup(struct work_struct *work) | 423 | static void bl_write_cleanup(struct work_struct *work) |
| 424 | { | 424 | { |
| 425 | struct rpc_task *task; | 425 | struct rpc_task *task; |
| 426 | struct nfs_write_data *wdata; | 426 | struct nfs_pgio_data *wdata; |
| 427 | dprintk("%s enter\n", __func__); | 427 | dprintk("%s enter\n", __func__); |
| 428 | task = container_of(work, struct rpc_task, u.tk_work); | 428 | task = container_of(work, struct rpc_task, u.tk_work); |
| 429 | wdata = container_of(task, struct nfs_write_data, task); | 429 | wdata = container_of(task, struct nfs_pgio_data, task); |
| 430 | if (likely(!wdata->header->pnfs_error)) { | 430 | if (likely(!wdata->header->pnfs_error)) { |
| 431 | /* Marks for LAYOUTCOMMIT */ | 431 | /* Marks for LAYOUTCOMMIT */ |
| 432 | mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), | 432 | mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), |
| @@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work) | |||
| 438 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ | 438 | /* Called when last of bios associated with a bl_write_pagelist call finishes */ |
| 439 | static void bl_end_par_io_write(void *data, int num_se) | 439 | static void bl_end_par_io_write(void *data, int num_se) |
| 440 | { | 440 | { |
| 441 | struct nfs_write_data *wdata = data; | 441 | struct nfs_pgio_data *wdata = data; |
| 442 | 442 | ||
| 443 | if (unlikely(wdata->header->pnfs_error)) { | 443 | if (unlikely(wdata->header->pnfs_error)) { |
| 444 | bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, | 444 | bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, |
| @@ -673,7 +673,7 @@ check_page: | |||
| 673 | } | 673 | } |
| 674 | 674 | ||
| 675 | static enum pnfs_try_status | 675 | static enum pnfs_try_status |
| 676 | bl_write_pagelist(struct nfs_write_data *wdata, int sync) | 676 | bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) |
| 677 | { | 677 | { |
| 678 | struct nfs_pgio_header *header = wdata->header; | 678 | struct nfs_pgio_header *header = wdata->header; |
| 679 | int i, ret, npg_zero, pg_index, last = 0; | 679 | int i, ret, npg_zero, pg_index, last = 0; |
| @@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | |||
| 1189 | pnfs_generic_pg_init_read(pgio, req); | 1189 | pnfs_generic_pg_init_read(pgio, req); |
| 1190 | } | 1190 | } |
| 1191 | 1191 | ||
| 1192 | static bool | 1192 | /* |
| 1193 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | ||
| 1194 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | ||
| 1195 | */ | ||
| 1196 | static size_t | ||
| 1193 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1197 | bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
| 1194 | struct nfs_page *req) | 1198 | struct nfs_page *req) |
| 1195 | { | 1199 | { |
| 1196 | if (pgio->pg_dreq != NULL && | 1200 | if (pgio->pg_dreq != NULL && |
| 1197 | !is_aligned_req(req, SECTOR_SIZE)) | 1201 | !is_aligned_req(req, SECTOR_SIZE)) |
| 1198 | return false; | 1202 | return 0; |
| 1199 | 1203 | ||
| 1200 | return pnfs_generic_pg_test(pgio, prev, req); | 1204 | return pnfs_generic_pg_test(pgio, prev, req); |
| 1201 | } | 1205 | } |
| @@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | |||
| 1241 | } | 1245 | } |
| 1242 | } | 1246 | } |
| 1243 | 1247 | ||
| 1244 | static bool | 1248 | /* |
| 1249 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | ||
| 1250 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | ||
| 1251 | */ | ||
| 1252 | static size_t | ||
| 1245 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1253 | bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
| 1246 | struct nfs_page *req) | 1254 | struct nfs_page *req) |
| 1247 | { | 1255 | { |
| 1248 | if (pgio->pg_dreq != NULL && | 1256 | if (pgio->pg_dreq != NULL && |
| 1249 | !is_aligned_req(req, PAGE_CACHE_SIZE)) | 1257 | !is_aligned_req(req, PAGE_CACHE_SIZE)) |
| 1250 | return false; | 1258 | return 0; |
| 1251 | 1259 | ||
| 1252 | return pnfs_generic_pg_test(pgio, prev, req); | 1260 | return pnfs_generic_pg_test(pgio, prev, req); |
| 1253 | } | 1261 | } |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b8797ae6831f..4ad7bc388679 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
| @@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq) | |||
| 108 | return atomic_dec_and_test(&dreq->io_count); | 108 | return atomic_dec_and_test(&dreq->io_count); |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | /* | ||
| 112 | * nfs_direct_select_verf - select the right verifier | ||
| 113 | * @dreq - direct request possibly spanning multiple servers | ||
| 114 | * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs | ||
| 115 | * @ds_idx - index of data server in data server list, only valid if ds_clp set | ||
| 116 | * | ||
| 117 | * returns the correct verifier to use given the role of the server | ||
| 118 | */ | ||
| 119 | static struct nfs_writeverf * | ||
| 120 | nfs_direct_select_verf(struct nfs_direct_req *dreq, | ||
| 121 | struct nfs_client *ds_clp, | ||
| 122 | int ds_idx) | ||
| 123 | { | ||
| 124 | struct nfs_writeverf *verfp = &dreq->verf; | ||
| 125 | |||
| 126 | #ifdef CONFIG_NFS_V4_1 | ||
| 127 | if (ds_clp) { | ||
| 128 | /* pNFS is in use, use the DS verf */ | ||
| 129 | if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) | ||
| 130 | verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; | ||
| 131 | else | ||
| 132 | WARN_ON_ONCE(1); | ||
| 133 | } | ||
| 134 | #endif | ||
| 135 | return verfp; | ||
| 136 | } | ||
| 137 | |||
| 138 | |||
| 139 | /* | ||
| 140 | * nfs_direct_set_hdr_verf - set the write/commit verifier | ||
| 141 | * @dreq - direct request possibly spanning multiple servers | ||
| 142 | * @hdr - pageio header to validate against previously seen verfs | ||
| 143 | * | ||
| 144 | * Set the server's (MDS or DS) "seen" verifier | ||
| 145 | */ | ||
| 146 | static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, | ||
| 147 | struct nfs_pgio_header *hdr) | ||
| 148 | { | ||
| 149 | struct nfs_writeverf *verfp; | ||
| 150 | |||
| 151 | verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, | ||
| 152 | hdr->data->ds_idx); | ||
| 153 | WARN_ON_ONCE(verfp->committed >= 0); | ||
| 154 | memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | ||
| 155 | WARN_ON_ONCE(verfp->committed < 0); | ||
| 156 | } | ||
| 157 | |||
| 158 | /* | ||
| 159 | * nfs_direct_cmp_hdr_verf - compare verifier for pgio header | ||
| 160 | * @dreq - direct request possibly spanning multiple servers | ||
| 161 | * @hdr - pageio header to validate against previously seen verf | ||
| 162 | * | ||
| 163 | * set the server's "seen" verf if not initialized. | ||
| 164 | * returns result of comparison between @hdr->verf and the "seen" | ||
| 165 | * verf of the server used by @hdr (DS or MDS) | ||
| 166 | */ | ||
| 167 | static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, | ||
| 168 | struct nfs_pgio_header *hdr) | ||
| 169 | { | ||
| 170 | struct nfs_writeverf *verfp; | ||
| 171 | |||
| 172 | verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, | ||
| 173 | hdr->data->ds_idx); | ||
| 174 | if (verfp->committed < 0) { | ||
| 175 | nfs_direct_set_hdr_verf(dreq, hdr); | ||
| 176 | return 0; | ||
| 177 | } | ||
| 178 | return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); | ||
| 179 | } | ||
| 180 | |||
| 181 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | ||
| 182 | /* | ||
| 183 | * nfs_direct_cmp_commit_data_verf - compare verifier for commit data | ||
| 184 | * @dreq - direct request possibly spanning multiple servers | ||
| 185 | * @data - commit data to validate against previously seen verf | ||
| 186 | * | ||
| 187 | * returns result of comparison between @data->verf and the verf of | ||
| 188 | * the server used by @data (DS or MDS) | ||
| 189 | */ | ||
| 190 | static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, | ||
| 191 | struct nfs_commit_data *data) | ||
| 192 | { | ||
| 193 | struct nfs_writeverf *verfp; | ||
| 194 | |||
| 195 | verfp = nfs_direct_select_verf(dreq, data->ds_clp, | ||
| 196 | data->ds_commit_index); | ||
| 197 | WARN_ON_ONCE(verfp->committed < 0); | ||
| 198 | return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); | ||
| 199 | } | ||
| 200 | #endif | ||
| 201 | |||
| 111 | /** | 202 | /** |
| 112 | * nfs_direct_IO - NFS address space operation for direct I/O | 203 | * nfs_direct_IO - NFS address space operation for direct I/O |
| 113 | * @rw: direction (read or write) | 204 | * @rw: direction (read or write) |
| @@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) | |||
| 168 | kref_get(&dreq->kref); | 259 | kref_get(&dreq->kref); |
| 169 | init_completion(&dreq->completion); | 260 | init_completion(&dreq->completion); |
| 170 | INIT_LIST_HEAD(&dreq->mds_cinfo.list); | 261 | INIT_LIST_HEAD(&dreq->mds_cinfo.list); |
| 262 | dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ | ||
| 171 | INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); | 263 | INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); |
| 172 | spin_lock_init(&dreq->lock); | 264 | spin_lock_init(&dreq->lock); |
| 173 | 265 | ||
| @@ -380,8 +472,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de | |||
| 380 | struct nfs_page *req; | 472 | struct nfs_page *req; |
| 381 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | 473 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); |
| 382 | /* XXX do we need to do the eof zeroing found in async_filler? */ | 474 | /* XXX do we need to do the eof zeroing found in async_filler? */ |
| 383 | req = nfs_create_request(dreq->ctx, dreq->inode, | 475 | req = nfs_create_request(dreq->ctx, pagevec[i], NULL, |
| 384 | pagevec[i], | ||
| 385 | pgbase, req_len); | 476 | pgbase, req_len); |
| 386 | if (IS_ERR(req)) { | 477 | if (IS_ERR(req)) { |
| 387 | result = PTR_ERR(req); | 478 | result = PTR_ERR(req); |
| @@ -424,7 +515,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
| 424 | size_t requested_bytes = 0; | 515 | size_t requested_bytes = 0; |
| 425 | unsigned long seg; | 516 | unsigned long seg; |
| 426 | 517 | ||
| 427 | NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, | 518 | nfs_pageio_init_read(&desc, dreq->inode, false, |
| 428 | &nfs_direct_read_completion_ops); | 519 | &nfs_direct_read_completion_ops); |
| 429 | get_dreq(dreq); | 520 | get_dreq(dreq); |
| 430 | desc.pg_dreq = dreq; | 521 | desc.pg_dreq = dreq; |
| @@ -564,7 +655,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) | |||
| 564 | dreq->count = 0; | 655 | dreq->count = 0; |
| 565 | get_dreq(dreq); | 656 | get_dreq(dreq); |
| 566 | 657 | ||
| 567 | NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, | 658 | nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, |
| 568 | &nfs_direct_write_completion_ops); | 659 | &nfs_direct_write_completion_ops); |
| 569 | desc.pg_dreq = dreq; | 660 | desc.pg_dreq = dreq; |
| 570 | 661 | ||
| @@ -603,7 +694,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) | |||
| 603 | dprintk("NFS: %5u commit failed with error %d.\n", | 694 | dprintk("NFS: %5u commit failed with error %d.\n", |
| 604 | data->task.tk_pid, status); | 695 | data->task.tk_pid, status); |
| 605 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | 696 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; |
| 606 | } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { | 697 | } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) { |
| 607 | dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); | 698 | dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); |
| 608 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | 699 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; |
| 609 | } | 700 | } |
| @@ -750,8 +841,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d | |||
| 750 | struct nfs_page *req; | 841 | struct nfs_page *req; |
| 751 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); | 842 | unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); |
| 752 | 843 | ||
| 753 | req = nfs_create_request(dreq->ctx, dreq->inode, | 844 | req = nfs_create_request(dreq->ctx, pagevec[i], NULL, |
| 754 | pagevec[i], | ||
| 755 | pgbase, req_len); | 845 | pgbase, req_len); |
| 756 | if (IS_ERR(req)) { | 846 | if (IS_ERR(req)) { |
| 757 | result = PTR_ERR(req); | 847 | result = PTR_ERR(req); |
| @@ -813,13 +903,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||
| 813 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) | 903 | if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) |
| 814 | bit = NFS_IOHDR_NEED_RESCHED; | 904 | bit = NFS_IOHDR_NEED_RESCHED; |
| 815 | else if (dreq->flags == 0) { | 905 | else if (dreq->flags == 0) { |
| 816 | memcpy(&dreq->verf, hdr->verf, | 906 | nfs_direct_set_hdr_verf(dreq, hdr); |
| 817 | sizeof(dreq->verf)); | ||
| 818 | bit = NFS_IOHDR_NEED_COMMIT; | 907 | bit = NFS_IOHDR_NEED_COMMIT; |
| 819 | dreq->flags = NFS_ODIRECT_DO_COMMIT; | 908 | dreq->flags = NFS_ODIRECT_DO_COMMIT; |
| 820 | } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { | 909 | } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { |
| 821 | if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { | 910 | if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { |
| 822 | dreq->flags = NFS_ODIRECT_RESCHED_WRITES; | 911 | dreq->flags = |
| 912 | NFS_ODIRECT_RESCHED_WRITES; | ||
| 823 | bit = NFS_IOHDR_NEED_RESCHED; | 913 | bit = NFS_IOHDR_NEED_RESCHED; |
| 824 | } else | 914 | } else |
| 825 | bit = NFS_IOHDR_NEED_COMMIT; | 915 | bit = NFS_IOHDR_NEED_COMMIT; |
| @@ -829,6 +919,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||
| 829 | spin_unlock(&dreq->lock); | 919 | spin_unlock(&dreq->lock); |
| 830 | 920 | ||
| 831 | while (!list_empty(&hdr->pages)) { | 921 | while (!list_empty(&hdr->pages)) { |
| 922 | bool do_destroy = true; | ||
| 923 | |||
| 832 | req = nfs_list_entry(hdr->pages.next); | 924 | req = nfs_list_entry(hdr->pages.next); |
| 833 | nfs_list_remove_request(req); | 925 | nfs_list_remove_request(req); |
| 834 | switch (bit) { | 926 | switch (bit) { |
| @@ -836,6 +928,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) | |||
| 836 | case NFS_IOHDR_NEED_COMMIT: | 928 | case NFS_IOHDR_NEED_COMMIT: |
| 837 | kref_get(&req->wb_kref); | 929 | kref_get(&req->wb_kref); |
| 838 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); | 930 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); |
| 931 | do_destroy = false; | ||
| 839 | } | 932 | } |
| 840 | nfs_unlock_and_release_request(req); | 933 | nfs_unlock_and_release_request(req); |
| 841 | } | 934 | } |
| @@ -874,7 +967,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
| 874 | size_t requested_bytes = 0; | 967 | size_t requested_bytes = 0; |
| 875 | unsigned long seg; | 968 | unsigned long seg; |
| 876 | 969 | ||
| 877 | NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, | 970 | nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, |
| 878 | &nfs_direct_write_completion_ops); | 971 | &nfs_direct_write_completion_ops); |
| 879 | desc.pg_dreq = dreq; | 972 | desc.pg_dreq = dreq; |
| 880 | get_dreq(dreq); | 973 | get_dreq(dreq); |
diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile new file mode 100644 index 000000000000..8516cdffb9e9 --- /dev/null +++ b/fs/nfs/filelayout/Makefile | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | # | ||
| 2 | # Makefile for the pNFS Files Layout Driver kernel module | ||
| 3 | # | ||
| 4 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | ||
| 5 | nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o | ||
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/filelayout/filelayout.c index b9a35c05b60f..d2eba1c13b7e 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/filelayout/filelayout.c | |||
| @@ -35,11 +35,11 @@ | |||
| 35 | 35 | ||
| 36 | #include <linux/sunrpc/metrics.h> | 36 | #include <linux/sunrpc/metrics.h> |
| 37 | 37 | ||
| 38 | #include "nfs4session.h" | 38 | #include "../nfs4session.h" |
| 39 | #include "internal.h" | 39 | #include "../internal.h" |
| 40 | #include "delegation.h" | 40 | #include "../delegation.h" |
| 41 | #include "nfs4filelayout.h" | 41 | #include "filelayout.h" |
| 42 | #include "nfs4trace.h" | 42 | #include "../nfs4trace.h" |
| 43 | 43 | ||
| 44 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 44 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
| 45 | 45 | ||
| @@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) | |||
| 84 | BUG(); | 84 | BUG(); |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | static void filelayout_reset_write(struct nfs_write_data *data) | 87 | static void filelayout_reset_write(struct nfs_pgio_data *data) |
| 88 | { | 88 | { |
| 89 | struct nfs_pgio_header *hdr = data->header; | 89 | struct nfs_pgio_header *hdr = data->header; |
| 90 | struct rpc_task *task = &data->task; | 90 | struct rpc_task *task = &data->task; |
| @@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data) | |||
| 105 | } | 105 | } |
| 106 | } | 106 | } |
| 107 | 107 | ||
| 108 | static void filelayout_reset_read(struct nfs_read_data *data) | 108 | static void filelayout_reset_read(struct nfs_pgio_data *data) |
| 109 | { | 109 | { |
| 110 | struct nfs_pgio_header *hdr = data->header; | 110 | struct nfs_pgio_header *hdr = data->header; |
| 111 | struct rpc_task *task = &data->task; | 111 | struct rpc_task *task = &data->task; |
| @@ -243,7 +243,7 @@ wait_on_recovery: | |||
| 243 | /* NFS_PROTO call done callback routines */ | 243 | /* NFS_PROTO call done callback routines */ |
| 244 | 244 | ||
| 245 | static int filelayout_read_done_cb(struct rpc_task *task, | 245 | static int filelayout_read_done_cb(struct rpc_task *task, |
| 246 | struct nfs_read_data *data) | 246 | struct nfs_pgio_data *data) |
| 247 | { | 247 | { |
| 248 | struct nfs_pgio_header *hdr = data->header; | 248 | struct nfs_pgio_header *hdr = data->header; |
| 249 | int err; | 249 | int err; |
| @@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, | |||
| 270 | * rfc5661 is not clear about which credential should be used. | 270 | * rfc5661 is not clear about which credential should be used. |
| 271 | */ | 271 | */ |
| 272 | static void | 272 | static void |
| 273 | filelayout_set_layoutcommit(struct nfs_write_data *wdata) | 273 | filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) |
| 274 | { | 274 | { |
| 275 | struct nfs_pgio_header *hdr = wdata->header; | 275 | struct nfs_pgio_header *hdr = wdata->header; |
| 276 | 276 | ||
| @@ -279,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata) | |||
| 279 | return; | 279 | return; |
| 280 | 280 | ||
| 281 | pnfs_set_layoutcommit(wdata); | 281 | pnfs_set_layoutcommit(wdata); |
| 282 | dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, | 282 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, |
| 283 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); | 283 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); |
| 284 | } | 284 | } |
| 285 | 285 | ||
| @@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) | |||
| 305 | */ | 305 | */ |
| 306 | static void filelayout_read_prepare(struct rpc_task *task, void *data) | 306 | static void filelayout_read_prepare(struct rpc_task *task, void *data) |
| 307 | { | 307 | { |
| 308 | struct nfs_read_data *rdata = data; | 308 | struct nfs_pgio_data *rdata = data; |
| 309 | 309 | ||
| 310 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { | 310 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { |
| 311 | rpc_exit(task, -EIO); | 311 | rpc_exit(task, -EIO); |
| @@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) | |||
| 317 | rpc_exit(task, 0); | 317 | rpc_exit(task, 0); |
| 318 | return; | 318 | return; |
| 319 | } | 319 | } |
| 320 | rdata->read_done_cb = filelayout_read_done_cb; | 320 | rdata->pgio_done_cb = filelayout_read_done_cb; |
| 321 | 321 | ||
| 322 | if (nfs41_setup_sequence(rdata->ds_clp->cl_session, | 322 | if (nfs41_setup_sequence(rdata->ds_clp->cl_session, |
| 323 | &rdata->args.seq_args, | 323 | &rdata->args.seq_args, |
| @@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) | |||
| 331 | 331 | ||
| 332 | static void filelayout_read_call_done(struct rpc_task *task, void *data) | 332 | static void filelayout_read_call_done(struct rpc_task *task, void *data) |
| 333 | { | 333 | { |
| 334 | struct nfs_read_data *rdata = data; | 334 | struct nfs_pgio_data *rdata = data; |
| 335 | 335 | ||
| 336 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); | 336 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); |
| 337 | 337 | ||
| @@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) | |||
| 347 | 347 | ||
| 348 | static void filelayout_read_count_stats(struct rpc_task *task, void *data) | 348 | static void filelayout_read_count_stats(struct rpc_task *task, void *data) |
| 349 | { | 349 | { |
| 350 | struct nfs_read_data *rdata = data; | 350 | struct nfs_pgio_data *rdata = data; |
| 351 | 351 | ||
| 352 | rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); | 352 | rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); |
| 353 | } | 353 | } |
| 354 | 354 | ||
| 355 | static void filelayout_read_release(void *data) | 355 | static void filelayout_read_release(void *data) |
| 356 | { | 356 | { |
| 357 | struct nfs_read_data *rdata = data; | 357 | struct nfs_pgio_data *rdata = data; |
| 358 | struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; | 358 | struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; |
| 359 | 359 | ||
| 360 | filelayout_fenceme(lo->plh_inode, lo); | 360 | filelayout_fenceme(lo->plh_inode, lo); |
| @@ -363,7 +363,7 @@ static void filelayout_read_release(void *data) | |||
| 363 | } | 363 | } |
| 364 | 364 | ||
| 365 | static int filelayout_write_done_cb(struct rpc_task *task, | 365 | static int filelayout_write_done_cb(struct rpc_task *task, |
| 366 | struct nfs_write_data *data) | 366 | struct nfs_pgio_data *data) |
| 367 | { | 367 | { |
| 368 | struct nfs_pgio_header *hdr = data->header; | 368 | struct nfs_pgio_header *hdr = data->header; |
| 369 | int err; | 369 | int err; |
| @@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, | |||
| 419 | 419 | ||
| 420 | static void filelayout_write_prepare(struct rpc_task *task, void *data) | 420 | static void filelayout_write_prepare(struct rpc_task *task, void *data) |
| 421 | { | 421 | { |
| 422 | struct nfs_write_data *wdata = data; | 422 | struct nfs_pgio_data *wdata = data; |
| 423 | 423 | ||
| 424 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { | 424 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { |
| 425 | rpc_exit(task, -EIO); | 425 | rpc_exit(task, -EIO); |
| @@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) | |||
| 443 | 443 | ||
| 444 | static void filelayout_write_call_done(struct rpc_task *task, void *data) | 444 | static void filelayout_write_call_done(struct rpc_task *task, void *data) |
| 445 | { | 445 | { |
| 446 | struct nfs_write_data *wdata = data; | 446 | struct nfs_pgio_data *wdata = data; |
| 447 | 447 | ||
| 448 | if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && | 448 | if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && |
| 449 | task->tk_status == 0) { | 449 | task->tk_status == 0) { |
| @@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) | |||
| 457 | 457 | ||
| 458 | static void filelayout_write_count_stats(struct rpc_task *task, void *data) | 458 | static void filelayout_write_count_stats(struct rpc_task *task, void *data) |
| 459 | { | 459 | { |
| 460 | struct nfs_write_data *wdata = data; | 460 | struct nfs_pgio_data *wdata = data; |
| 461 | 461 | ||
| 462 | rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); | 462 | rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); |
| 463 | } | 463 | } |
| 464 | 464 | ||
| 465 | static void filelayout_write_release(void *data) | 465 | static void filelayout_write_release(void *data) |
| 466 | { | 466 | { |
| 467 | struct nfs_write_data *wdata = data; | 467 | struct nfs_pgio_data *wdata = data; |
| 468 | struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; | 468 | struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; |
| 469 | 469 | ||
| 470 | filelayout_fenceme(lo->plh_inode, lo); | 470 | filelayout_fenceme(lo->plh_inode, lo); |
| @@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { | |||
| 529 | }; | 529 | }; |
| 530 | 530 | ||
| 531 | static enum pnfs_try_status | 531 | static enum pnfs_try_status |
| 532 | filelayout_read_pagelist(struct nfs_read_data *data) | 532 | filelayout_read_pagelist(struct nfs_pgio_data *data) |
| 533 | { | 533 | { |
| 534 | struct nfs_pgio_header *hdr = data->header; | 534 | struct nfs_pgio_header *hdr = data->header; |
| 535 | struct pnfs_layout_segment *lseg = hdr->lseg; | 535 | struct pnfs_layout_segment *lseg = hdr->lseg; |
| @@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) | |||
| 560 | /* No multipath support. Use first DS */ | 560 | /* No multipath support. Use first DS */ |
| 561 | atomic_inc(&ds->ds_clp->cl_count); | 561 | atomic_inc(&ds->ds_clp->cl_count); |
| 562 | data->ds_clp = ds->ds_clp; | 562 | data->ds_clp = ds->ds_clp; |
| 563 | data->ds_idx = idx; | ||
| 563 | fh = nfs4_fl_select_ds_fh(lseg, j); | 564 | fh = nfs4_fl_select_ds_fh(lseg, j); |
| 564 | if (fh) | 565 | if (fh) |
| 565 | data->args.fh = fh; | 566 | data->args.fh = fh; |
| @@ -568,14 +569,14 @@ filelayout_read_pagelist(struct nfs_read_data *data) | |||
| 568 | data->mds_offset = offset; | 569 | data->mds_offset = offset; |
| 569 | 570 | ||
| 570 | /* Perform an asynchronous read to ds */ | 571 | /* Perform an asynchronous read to ds */ |
| 571 | nfs_initiate_read(ds_clnt, data, | 572 | nfs_initiate_pgio(ds_clnt, data, |
| 572 | &filelayout_read_call_ops, RPC_TASK_SOFTCONN); | 573 | &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); |
| 573 | return PNFS_ATTEMPTED; | 574 | return PNFS_ATTEMPTED; |
| 574 | } | 575 | } |
| 575 | 576 | ||
| 576 | /* Perform async writes. */ | 577 | /* Perform async writes. */ |
| 577 | static enum pnfs_try_status | 578 | static enum pnfs_try_status |
| 578 | filelayout_write_pagelist(struct nfs_write_data *data, int sync) | 579 | filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) |
| 579 | { | 580 | { |
| 580 | struct nfs_pgio_header *hdr = data->header; | 581 | struct nfs_pgio_header *hdr = data->header; |
| 581 | struct pnfs_layout_segment *lseg = hdr->lseg; | 582 | struct pnfs_layout_segment *lseg = hdr->lseg; |
| @@ -600,20 +601,18 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) | |||
| 600 | __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, | 601 | __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, |
| 601 | offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); | 602 | offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); |
| 602 | 603 | ||
| 603 | data->write_done_cb = filelayout_write_done_cb; | 604 | data->pgio_done_cb = filelayout_write_done_cb; |
| 604 | atomic_inc(&ds->ds_clp->cl_count); | 605 | atomic_inc(&ds->ds_clp->cl_count); |
| 605 | data->ds_clp = ds->ds_clp; | 606 | data->ds_clp = ds->ds_clp; |
| 607 | data->ds_idx = idx; | ||
| 606 | fh = nfs4_fl_select_ds_fh(lseg, j); | 608 | fh = nfs4_fl_select_ds_fh(lseg, j); |
| 607 | if (fh) | 609 | if (fh) |
| 608 | data->args.fh = fh; | 610 | data->args.fh = fh; |
| 609 | /* | 611 | |
| 610 | * Get the file offset on the dserver. Set the write offset to | ||
| 611 | * this offset and save the original offset. | ||
| 612 | */ | ||
| 613 | data->args.offset = filelayout_get_dserver_offset(lseg, offset); | 612 | data->args.offset = filelayout_get_dserver_offset(lseg, offset); |
| 614 | 613 | ||
| 615 | /* Perform an asynchronous write */ | 614 | /* Perform an asynchronous write */ |
| 616 | nfs_initiate_write(ds_clnt, data, | 615 | nfs_initiate_pgio(ds_clnt, data, |
| 617 | &filelayout_write_call_ops, sync, | 616 | &filelayout_write_call_ops, sync, |
| 618 | RPC_TASK_SOFTCONN); | 617 | RPC_TASK_SOFTCONN); |
| 619 | return PNFS_ATTEMPTED; | 618 | return PNFS_ATTEMPTED; |
| @@ -637,7 +636,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
| 637 | struct nfs4_deviceid_node *d; | 636 | struct nfs4_deviceid_node *d; |
| 638 | struct nfs4_file_layout_dsaddr *dsaddr; | 637 | struct nfs4_file_layout_dsaddr *dsaddr; |
| 639 | int status = -EINVAL; | 638 | int status = -EINVAL; |
| 640 | struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); | ||
| 641 | 639 | ||
| 642 | dprintk("--> %s\n", __func__); | 640 | dprintk("--> %s\n", __func__); |
| 643 | 641 | ||
| @@ -655,7 +653,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
| 655 | goto out; | 653 | goto out; |
| 656 | } | 654 | } |
| 657 | 655 | ||
| 658 | if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) { | 656 | if (!fl->stripe_unit) { |
| 659 | dprintk("%s Invalid stripe unit (%u)\n", | 657 | dprintk("%s Invalid stripe unit (%u)\n", |
| 660 | __func__, fl->stripe_unit); | 658 | __func__, fl->stripe_unit); |
| 661 | goto out; | 659 | goto out; |
| @@ -692,12 +690,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, | |||
| 692 | goto out_put; | 690 | goto out_put; |
| 693 | } | 691 | } |
| 694 | 692 | ||
| 695 | if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) { | ||
| 696 | dprintk("%s Stripe unit (%u) not aligned with rsize %u " | ||
| 697 | "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize, | ||
| 698 | nfss->wsize); | ||
| 699 | } | ||
| 700 | |||
| 701 | status = 0; | 693 | status = 0; |
| 702 | out: | 694 | out: |
| 703 | dprintk("--> %s returns %d\n", __func__, status); | 695 | dprintk("--> %s returns %d\n", __func__, status); |
| @@ -850,11 +842,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, | |||
| 850 | { | 842 | { |
| 851 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | 843 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); |
| 852 | struct pnfs_commit_bucket *buckets; | 844 | struct pnfs_commit_bucket *buckets; |
| 853 | int size; | 845 | int size, i; |
| 854 | 846 | ||
| 855 | if (fl->commit_through_mds) | 847 | if (fl->commit_through_mds) |
| 856 | return 0; | 848 | return 0; |
| 857 | if (cinfo->ds->nbuckets != 0) { | 849 | |
| 850 | size = (fl->stripe_type == STRIPE_SPARSE) ? | ||
| 851 | fl->dsaddr->ds_num : fl->dsaddr->stripe_count; | ||
| 852 | |||
| 853 | if (cinfo->ds->nbuckets >= size) { | ||
| 858 | /* This assumes there is only one IOMODE_RW lseg. What | 854 | /* This assumes there is only one IOMODE_RW lseg. What |
| 859 | * we really want to do is have a layout_hdr level | 855 | * we really want to do is have a layout_hdr level |
| 860 | * dictionary of <multipath_list4, fh> keys, each | 856 | * dictionary of <multipath_list4, fh> keys, each |
| @@ -864,30 +860,36 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, | |||
| 864 | return 0; | 860 | return 0; |
| 865 | } | 861 | } |
| 866 | 862 | ||
| 867 | size = (fl->stripe_type == STRIPE_SPARSE) ? | ||
| 868 | fl->dsaddr->ds_num : fl->dsaddr->stripe_count; | ||
| 869 | |||
| 870 | buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), | 863 | buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), |
| 871 | gfp_flags); | 864 | gfp_flags); |
| 872 | if (!buckets) | 865 | if (!buckets) |
| 873 | return -ENOMEM; | 866 | return -ENOMEM; |
| 874 | else { | 867 | for (i = 0; i < size; i++) { |
| 875 | int i; | 868 | INIT_LIST_HEAD(&buckets[i].written); |
| 869 | INIT_LIST_HEAD(&buckets[i].committing); | ||
| 870 | /* mark direct verifier as unset */ | ||
| 871 | buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; | ||
| 872 | } | ||
| 876 | 873 | ||
| 877 | spin_lock(cinfo->lock); | 874 | spin_lock(cinfo->lock); |
| 878 | if (cinfo->ds->nbuckets != 0) | 875 | if (cinfo->ds->nbuckets >= size) |
| 879 | kfree(buckets); | 876 | goto out; |
| 880 | else { | 877 | for (i = 0; i < cinfo->ds->nbuckets; i++) { |
| 881 | cinfo->ds->buckets = buckets; | 878 | list_splice(&cinfo->ds->buckets[i].written, |
| 882 | cinfo->ds->nbuckets = size; | 879 | &buckets[i].written); |
| 883 | for (i = 0; i < size; i++) { | 880 | list_splice(&cinfo->ds->buckets[i].committing, |
| 884 | INIT_LIST_HEAD(&buckets[i].written); | 881 | &buckets[i].committing); |
| 885 | INIT_LIST_HEAD(&buckets[i].committing); | 882 | buckets[i].direct_verf.committed = |
| 886 | } | 883 | cinfo->ds->buckets[i].direct_verf.committed; |
| 887 | } | 884 | buckets[i].wlseg = cinfo->ds->buckets[i].wlseg; |
| 888 | spin_unlock(cinfo->lock); | 885 | buckets[i].clseg = cinfo->ds->buckets[i].clseg; |
| 889 | return 0; | ||
| 890 | } | 886 | } |
| 887 | swap(cinfo->ds->buckets, buckets); | ||
| 888 | cinfo->ds->nbuckets = size; | ||
| 889 | out: | ||
| 890 | spin_unlock(cinfo->lock); | ||
| 891 | kfree(buckets); | ||
| 892 | return 0; | ||
| 891 | } | 893 | } |
| 892 | 894 | ||
| 893 | static struct pnfs_layout_segment * | 895 | static struct pnfs_layout_segment * |
| @@ -915,47 +917,51 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | |||
| 915 | /* | 917 | /* |
| 916 | * filelayout_pg_test(). Called by nfs_can_coalesce_requests() | 918 | * filelayout_pg_test(). Called by nfs_can_coalesce_requests() |
| 917 | * | 919 | * |
| 918 | * return true : coalesce page | 920 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number |
| 919 | * return false : don't coalesce page | 921 | * of bytes (maximum @req->wb_bytes) that can be coalesced. |
| 920 | */ | 922 | */ |
| 921 | static bool | 923 | static size_t |
| 922 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 924 | filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
| 923 | struct nfs_page *req) | 925 | struct nfs_page *req) |
| 924 | { | 926 | { |
| 927 | unsigned int size; | ||
| 925 | u64 p_stripe, r_stripe; | 928 | u64 p_stripe, r_stripe; |
| 926 | u32 stripe_unit; | 929 | u32 stripe_offset; |
| 930 | u64 segment_offset = pgio->pg_lseg->pls_range.offset; | ||
| 931 | u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; | ||
| 927 | 932 | ||
| 928 | if (!pnfs_generic_pg_test(pgio, prev, req) || | 933 | /* calls nfs_generic_pg_test */ |
| 929 | !nfs_generic_pg_test(pgio, prev, req)) | 934 | size = pnfs_generic_pg_test(pgio, prev, req); |
| 930 | return false; | 935 | if (!size) |
| 936 | return 0; | ||
| 931 | 937 | ||
| 932 | p_stripe = (u64)req_offset(prev); | 938 | /* see if req and prev are in the same stripe */ |
| 933 | r_stripe = (u64)req_offset(req); | 939 | if (prev) { |
| 934 | stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; | 940 | p_stripe = (u64)req_offset(prev) - segment_offset; |
| 941 | r_stripe = (u64)req_offset(req) - segment_offset; | ||
| 942 | do_div(p_stripe, stripe_unit); | ||
| 943 | do_div(r_stripe, stripe_unit); | ||
| 935 | 944 | ||
| 936 | do_div(p_stripe, stripe_unit); | 945 | if (p_stripe != r_stripe) |
| 937 | do_div(r_stripe, stripe_unit); | 946 | return 0; |
| 947 | } | ||
| 938 | 948 | ||
| 939 | return (p_stripe == r_stripe); | 949 | /* calculate remaining bytes in the current stripe */ |
| 950 | div_u64_rem((u64)req_offset(req) - segment_offset, | ||
| 951 | stripe_unit, | ||
| 952 | &stripe_offset); | ||
| 953 | WARN_ON_ONCE(stripe_offset > stripe_unit); | ||
| 954 | if (stripe_offset >= stripe_unit) | ||
| 955 | return 0; | ||
| 956 | return min(stripe_unit - (unsigned int)stripe_offset, size); | ||
| 940 | } | 957 | } |
| 941 | 958 | ||
| 942 | static void | 959 | static void |
| 943 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, | 960 | filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, |
| 944 | struct nfs_page *req) | 961 | struct nfs_page *req) |
| 945 | { | 962 | { |
| 946 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 963 | if (!pgio->pg_lseg) |
| 947 | 964 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | |
| 948 | if (req->wb_offset != req->wb_pgbase) { | ||
| 949 | /* | ||
| 950 | * Handling unaligned pages is difficult, because have to | ||
| 951 | * somehow split a req in two in certain cases in the | ||
| 952 | * pg.test code. Avoid this by just not using pnfs | ||
| 953 | * in this case. | ||
| 954 | */ | ||
| 955 | nfs_pageio_reset_read_mds(pgio); | ||
| 956 | return; | ||
| 957 | } | ||
| 958 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
| 959 | req->wb_context, | 965 | req->wb_context, |
| 960 | 0, | 966 | 0, |
| 961 | NFS4_MAX_UINT64, | 967 | NFS4_MAX_UINT64, |
| @@ -973,11 +979,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
| 973 | struct nfs_commit_info cinfo; | 979 | struct nfs_commit_info cinfo; |
| 974 | int status; | 980 | int status; |
| 975 | 981 | ||
| 976 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 982 | if (!pgio->pg_lseg) |
| 977 | 983 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | |
| 978 | if (req->wb_offset != req->wb_pgbase) | ||
| 979 | goto out_mds; | ||
| 980 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
| 981 | req->wb_context, | 984 | req->wb_context, |
| 982 | 0, | 985 | 0, |
| 983 | NFS4_MAX_UINT64, | 986 | NFS4_MAX_UINT64, |
| @@ -1067,6 +1070,7 @@ filelayout_choose_commit_list(struct nfs_page *req, | |||
| 1067 | */ | 1070 | */ |
| 1068 | j = nfs4_fl_calc_j_index(lseg, req_offset(req)); | 1071 | j = nfs4_fl_calc_j_index(lseg, req_offset(req)); |
| 1069 | i = select_bucket_index(fl, j); | 1072 | i = select_bucket_index(fl, j); |
| 1073 | spin_lock(cinfo->lock); | ||
| 1070 | buckets = cinfo->ds->buckets; | 1074 | buckets = cinfo->ds->buckets; |
| 1071 | list = &buckets[i].written; | 1075 | list = &buckets[i].written; |
| 1072 | if (list_empty(list)) { | 1076 | if (list_empty(list)) { |
| @@ -1080,6 +1084,7 @@ filelayout_choose_commit_list(struct nfs_page *req, | |||
| 1080 | } | 1084 | } |
| 1081 | set_bit(PG_COMMIT_TO_DS, &req->wb_flags); | 1085 | set_bit(PG_COMMIT_TO_DS, &req->wb_flags); |
| 1082 | cinfo->ds->nwritten++; | 1086 | cinfo->ds->nwritten++; |
| 1087 | spin_unlock(cinfo->lock); | ||
| 1083 | return list; | 1088 | return list; |
| 1084 | } | 1089 | } |
| 1085 | 1090 | ||
| @@ -1176,6 +1181,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst, | |||
| 1176 | return ret; | 1181 | return ret; |
| 1177 | } | 1182 | } |
| 1178 | 1183 | ||
| 1184 | /* Note called with cinfo->lock held. */ | ||
| 1179 | static int | 1185 | static int |
| 1180 | filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, | 1186 | filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, |
| 1181 | struct nfs_commit_info *cinfo, | 1187 | struct nfs_commit_info *cinfo, |
| @@ -1220,15 +1226,18 @@ static void filelayout_recover_commit_reqs(struct list_head *dst, | |||
| 1220 | struct nfs_commit_info *cinfo) | 1226 | struct nfs_commit_info *cinfo) |
| 1221 | { | 1227 | { |
| 1222 | struct pnfs_commit_bucket *b; | 1228 | struct pnfs_commit_bucket *b; |
| 1229 | struct pnfs_layout_segment *freeme; | ||
| 1223 | int i; | 1230 | int i; |
| 1224 | 1231 | ||
| 1232 | restart: | ||
| 1225 | spin_lock(cinfo->lock); | 1233 | spin_lock(cinfo->lock); |
| 1226 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { | 1234 | for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { |
| 1227 | if (transfer_commit_list(&b->written, dst, cinfo, 0)) { | 1235 | if (transfer_commit_list(&b->written, dst, cinfo, 0)) { |
| 1228 | spin_unlock(cinfo->lock); | 1236 | freeme = b->wlseg; |
| 1229 | pnfs_put_lseg(b->wlseg); | ||
| 1230 | b->wlseg = NULL; | 1237 | b->wlseg = NULL; |
| 1231 | spin_lock(cinfo->lock); | 1238 | spin_unlock(cinfo->lock); |
| 1239 | pnfs_put_lseg(freeme); | ||
| 1240 | goto restart; | ||
| 1232 | } | 1241 | } |
| 1233 | } | 1242 | } |
| 1234 | cinfo->ds->nwritten = 0; | 1243 | cinfo->ds->nwritten = 0; |
| @@ -1243,6 +1252,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) | |||
| 1243 | struct nfs_commit_data *data; | 1252 | struct nfs_commit_data *data; |
| 1244 | int i, j; | 1253 | int i, j; |
| 1245 | unsigned int nreq = 0; | 1254 | unsigned int nreq = 0; |
| 1255 | struct pnfs_layout_segment *freeme; | ||
| 1246 | 1256 | ||
| 1247 | fl_cinfo = cinfo->ds; | 1257 | fl_cinfo = cinfo->ds; |
| 1248 | bucket = fl_cinfo->buckets; | 1258 | bucket = fl_cinfo->buckets; |
| @@ -1253,8 +1263,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) | |||
| 1253 | if (!data) | 1263 | if (!data) |
| 1254 | break; | 1264 | break; |
| 1255 | data->ds_commit_index = i; | 1265 | data->ds_commit_index = i; |
| 1266 | spin_lock(cinfo->lock); | ||
| 1256 | data->lseg = bucket->clseg; | 1267 | data->lseg = bucket->clseg; |
| 1257 | bucket->clseg = NULL; | 1268 | bucket->clseg = NULL; |
| 1269 | spin_unlock(cinfo->lock); | ||
| 1258 | list_add(&data->pages, list); | 1270 | list_add(&data->pages, list); |
| 1259 | nreq++; | 1271 | nreq++; |
| 1260 | } | 1272 | } |
| @@ -1264,8 +1276,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) | |||
| 1264 | if (list_empty(&bucket->committing)) | 1276 | if (list_empty(&bucket->committing)) |
| 1265 | continue; | 1277 | continue; |
| 1266 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); | 1278 | nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); |
| 1267 | pnfs_put_lseg(bucket->clseg); | 1279 | spin_lock(cinfo->lock); |
| 1280 | freeme = bucket->clseg; | ||
| 1268 | bucket->clseg = NULL; | 1281 | bucket->clseg = NULL; |
| 1282 | spin_unlock(cinfo->lock); | ||
| 1283 | pnfs_put_lseg(freeme); | ||
| 1269 | } | 1284 | } |
| 1270 | /* Caller will clean up entries put on list */ | 1285 | /* Caller will clean up entries put on list */ |
| 1271 | return nreq; | 1286 | return nreq; |
| @@ -1330,7 +1345,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | |||
| 1330 | struct nfs4_filelayout *flo; | 1345 | struct nfs4_filelayout *flo; |
| 1331 | 1346 | ||
| 1332 | flo = kzalloc(sizeof(*flo), gfp_flags); | 1347 | flo = kzalloc(sizeof(*flo), gfp_flags); |
| 1333 | return &flo->generic_hdr; | 1348 | return flo != NULL ? &flo->generic_hdr : NULL; |
| 1334 | } | 1349 | } |
| 1335 | 1350 | ||
| 1336 | static void | 1351 | static void |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/filelayout/filelayout.h index cebd20e7e923..ffbddf2219ea 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/filelayout/filelayout.h | |||
| @@ -30,7 +30,7 @@ | |||
| 30 | #ifndef FS_NFS_NFS4FILELAYOUT_H | 30 | #ifndef FS_NFS_NFS4FILELAYOUT_H |
| 31 | #define FS_NFS_NFS4FILELAYOUT_H | 31 | #define FS_NFS_NFS4FILELAYOUT_H |
| 32 | 32 | ||
| 33 | #include "pnfs.h" | 33 | #include "../pnfs.h" |
| 34 | 34 | ||
| 35 | /* | 35 | /* |
| 36 | * Default data server connection timeout and retrans vaules. | 36 | * Default data server connection timeout and retrans vaules. |
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index b9c61efe9660..44bf0140a4c7 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c | |||
| @@ -33,9 +33,9 @@ | |||
| 33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
| 34 | #include <linux/sunrpc/addr.h> | 34 | #include <linux/sunrpc/addr.h> |
| 35 | 35 | ||
| 36 | #include "internal.h" | 36 | #include "../internal.h" |
| 37 | #include "nfs4session.h" | 37 | #include "../nfs4session.h" |
| 38 | #include "nfs4filelayout.h" | 38 | #include "filelayout.h" |
| 39 | 39 | ||
| 40 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 40 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
| 41 | 41 | ||
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 66984a9aafaa..b94f80420a58 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
| @@ -120,7 +120,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, | |||
| 120 | 120 | ||
| 121 | security_d_instantiate(ret, inode); | 121 | security_d_instantiate(ret, inode); |
| 122 | spin_lock(&ret->d_lock); | 122 | spin_lock(&ret->d_lock); |
| 123 | if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { | 123 | if (IS_ROOT(ret) && !ret->d_fsdata && |
| 124 | !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { | ||
| 124 | ret->d_fsdata = name; | 125 | ret->d_fsdata = name; |
| 125 | name = NULL; | 126 | name = NULL; |
| 126 | } | 127 | } |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e6f7398d2b3c..c496f8a74639 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -1575,18 +1575,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1575 | inode->i_version = fattr->change_attr; | 1575 | inode->i_version = fattr->change_attr; |
| 1576 | } | 1576 | } |
| 1577 | } else if (server->caps & NFS_CAP_CHANGE_ATTR) | 1577 | } else if (server->caps & NFS_CAP_CHANGE_ATTR) |
| 1578 | invalid |= save_cache_validity; | 1578 | nfsi->cache_validity |= save_cache_validity; |
| 1579 | 1579 | ||
| 1580 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) { | 1580 | if (fattr->valid & NFS_ATTR_FATTR_MTIME) { |
| 1581 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); | 1581 | memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); |
| 1582 | } else if (server->caps & NFS_CAP_MTIME) | 1582 | } else if (server->caps & NFS_CAP_MTIME) |
| 1583 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1583 | nfsi->cache_validity |= save_cache_validity & |
| 1584 | (NFS_INO_INVALID_ATTR | ||
| 1584 | | NFS_INO_REVAL_FORCED); | 1585 | | NFS_INO_REVAL_FORCED); |
| 1585 | 1586 | ||
| 1586 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) { | 1587 | if (fattr->valid & NFS_ATTR_FATTR_CTIME) { |
| 1587 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); | 1588 | memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); |
| 1588 | } else if (server->caps & NFS_CAP_CTIME) | 1589 | } else if (server->caps & NFS_CAP_CTIME) |
| 1589 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1590 | nfsi->cache_validity |= save_cache_validity & |
| 1591 | (NFS_INO_INVALID_ATTR | ||
| 1590 | | NFS_INO_REVAL_FORCED); | 1592 | | NFS_INO_REVAL_FORCED); |
| 1591 | 1593 | ||
| 1592 | /* Check if our cached file size is stale */ | 1594 | /* Check if our cached file size is stale */ |
| @@ -1608,7 +1610,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1608 | (long long)new_isize); | 1610 | (long long)new_isize); |
| 1609 | } | 1611 | } |
| 1610 | } else | 1612 | } else |
| 1611 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1613 | nfsi->cache_validity |= save_cache_validity & |
| 1614 | (NFS_INO_INVALID_ATTR | ||
| 1612 | | NFS_INO_REVAL_PAGECACHE | 1615 | | NFS_INO_REVAL_PAGECACHE |
| 1613 | | NFS_INO_REVAL_FORCED); | 1616 | | NFS_INO_REVAL_FORCED); |
| 1614 | 1617 | ||
| @@ -1616,7 +1619,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1616 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) | 1619 | if (fattr->valid & NFS_ATTR_FATTR_ATIME) |
| 1617 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); | 1620 | memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); |
| 1618 | else if (server->caps & NFS_CAP_ATIME) | 1621 | else if (server->caps & NFS_CAP_ATIME) |
| 1619 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME | 1622 | nfsi->cache_validity |= save_cache_validity & |
| 1623 | (NFS_INO_INVALID_ATIME | ||
| 1620 | | NFS_INO_REVAL_FORCED); | 1624 | | NFS_INO_REVAL_FORCED); |
| 1621 | 1625 | ||
| 1622 | if (fattr->valid & NFS_ATTR_FATTR_MODE) { | 1626 | if (fattr->valid & NFS_ATTR_FATTR_MODE) { |
| @@ -1627,7 +1631,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1627 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1631 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
| 1628 | } | 1632 | } |
| 1629 | } else if (server->caps & NFS_CAP_MODE) | 1633 | } else if (server->caps & NFS_CAP_MODE) |
| 1630 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1634 | nfsi->cache_validity |= save_cache_validity & |
| 1635 | (NFS_INO_INVALID_ATTR | ||
| 1631 | | NFS_INO_INVALID_ACCESS | 1636 | | NFS_INO_INVALID_ACCESS |
| 1632 | | NFS_INO_INVALID_ACL | 1637 | | NFS_INO_INVALID_ACL |
| 1633 | | NFS_INO_REVAL_FORCED); | 1638 | | NFS_INO_REVAL_FORCED); |
| @@ -1638,7 +1643,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1638 | inode->i_uid = fattr->uid; | 1643 | inode->i_uid = fattr->uid; |
| 1639 | } | 1644 | } |
| 1640 | } else if (server->caps & NFS_CAP_OWNER) | 1645 | } else if (server->caps & NFS_CAP_OWNER) |
| 1641 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1646 | nfsi->cache_validity |= save_cache_validity & |
| 1647 | (NFS_INO_INVALID_ATTR | ||
| 1642 | | NFS_INO_INVALID_ACCESS | 1648 | | NFS_INO_INVALID_ACCESS |
| 1643 | | NFS_INO_INVALID_ACL | 1649 | | NFS_INO_INVALID_ACL |
| 1644 | | NFS_INO_REVAL_FORCED); | 1650 | | NFS_INO_REVAL_FORCED); |
| @@ -1649,7 +1655,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1649 | inode->i_gid = fattr->gid; | 1655 | inode->i_gid = fattr->gid; |
| 1650 | } | 1656 | } |
| 1651 | } else if (server->caps & NFS_CAP_OWNER_GROUP) | 1657 | } else if (server->caps & NFS_CAP_OWNER_GROUP) |
| 1652 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1658 | nfsi->cache_validity |= save_cache_validity & |
| 1659 | (NFS_INO_INVALID_ATTR | ||
| 1653 | | NFS_INO_INVALID_ACCESS | 1660 | | NFS_INO_INVALID_ACCESS |
| 1654 | | NFS_INO_INVALID_ACL | 1661 | | NFS_INO_INVALID_ACL |
| 1655 | | NFS_INO_REVAL_FORCED); | 1662 | | NFS_INO_REVAL_FORCED); |
| @@ -1662,7 +1669,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1662 | set_nlink(inode, fattr->nlink); | 1669 | set_nlink(inode, fattr->nlink); |
| 1663 | } | 1670 | } |
| 1664 | } else if (server->caps & NFS_CAP_NLINK) | 1671 | } else if (server->caps & NFS_CAP_NLINK) |
| 1665 | invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR | 1672 | nfsi->cache_validity |= save_cache_validity & |
| 1673 | (NFS_INO_INVALID_ATTR | ||
| 1666 | | NFS_INO_REVAL_FORCED); | 1674 | | NFS_INO_REVAL_FORCED); |
| 1667 | 1675 | ||
| 1668 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { | 1676 | if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index dd8bfc2e2464..8b69cba1bb04 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -231,13 +231,20 @@ extern void nfs_destroy_writepagecache(void); | |||
| 231 | 231 | ||
| 232 | extern int __init nfs_init_directcache(void); | 232 | extern int __init nfs_init_directcache(void); |
| 233 | extern void nfs_destroy_directcache(void); | 233 | extern void nfs_destroy_directcache(void); |
| 234 | extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); | ||
| 235 | extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, | 234 | extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, |
| 236 | struct nfs_pgio_header *hdr, | 235 | struct nfs_pgio_header *hdr, |
| 237 | void (*release)(struct nfs_pgio_header *hdr)); | 236 | void (*release)(struct nfs_pgio_header *hdr)); |
| 238 | void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); | 237 | void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); |
| 239 | int nfs_iocounter_wait(struct nfs_io_counter *c); | 238 | int nfs_iocounter_wait(struct nfs_io_counter *c); |
| 240 | 239 | ||
| 240 | extern const struct nfs_pageio_ops nfs_pgio_rw_ops; | ||
| 241 | struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); | ||
| 242 | void nfs_rw_header_free(struct nfs_pgio_header *); | ||
| 243 | void nfs_pgio_data_release(struct nfs_pgio_data *); | ||
| 244 | int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); | ||
| 245 | int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, | ||
| 246 | const struct rpc_call_ops *, int, int); | ||
| 247 | |||
| 241 | static inline void nfs_iocounter_init(struct nfs_io_counter *c) | 248 | static inline void nfs_iocounter_init(struct nfs_io_counter *c) |
| 242 | { | 249 | { |
| 243 | c->flags = 0; | 250 | c->flags = 0; |
| @@ -395,19 +402,11 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool | |||
| 395 | 402 | ||
| 396 | struct nfs_pgio_completion_ops; | 403 | struct nfs_pgio_completion_ops; |
| 397 | /* read.c */ | 404 | /* read.c */ |
| 398 | extern struct nfs_read_header *nfs_readhdr_alloc(void); | ||
| 399 | extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); | ||
| 400 | extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, | 405 | extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, |
| 401 | struct inode *inode, | 406 | struct inode *inode, bool force_mds, |
| 402 | const struct nfs_pgio_completion_ops *compl_ops); | 407 | const struct nfs_pgio_completion_ops *compl_ops); |
| 403 | extern int nfs_initiate_read(struct rpc_clnt *clnt, | ||
| 404 | struct nfs_read_data *data, | ||
| 405 | const struct rpc_call_ops *call_ops, int flags); | ||
| 406 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); | 408 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); |
| 407 | extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, | ||
| 408 | struct nfs_pgio_header *hdr); | ||
| 409 | extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); | 409 | extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); |
| 410 | extern void nfs_readdata_release(struct nfs_read_data *rdata); | ||
| 411 | 410 | ||
| 412 | /* super.c */ | 411 | /* super.c */ |
| 413 | void nfs_clone_super(struct super_block *, struct nfs_mount_info *); | 412 | void nfs_clone_super(struct super_block *, struct nfs_mount_info *); |
| @@ -422,19 +421,10 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); | |||
| 422 | 421 | ||
| 423 | /* write.c */ | 422 | /* write.c */ |
| 424 | extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | 423 | extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, |
| 425 | struct inode *inode, int ioflags, | 424 | struct inode *inode, int ioflags, bool force_mds, |
| 426 | const struct nfs_pgio_completion_ops *compl_ops); | 425 | const struct nfs_pgio_completion_ops *compl_ops); |
| 427 | extern struct nfs_write_header *nfs_writehdr_alloc(void); | ||
| 428 | extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); | ||
| 429 | extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, | ||
| 430 | struct nfs_pgio_header *hdr); | ||
| 431 | extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); | 426 | extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); |
| 432 | extern void nfs_writedata_release(struct nfs_write_data *wdata); | ||
| 433 | extern void nfs_commit_free(struct nfs_commit_data *p); | 427 | extern void nfs_commit_free(struct nfs_commit_data *p); |
| 434 | extern int nfs_initiate_write(struct rpc_clnt *clnt, | ||
| 435 | struct nfs_write_data *data, | ||
| 436 | const struct rpc_call_ops *call_ops, | ||
| 437 | int how, int flags); | ||
| 438 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); | 428 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); |
| 439 | extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); | 429 | extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); |
| 440 | extern int nfs_initiate_commit(struct rpc_clnt *clnt, | 430 | extern int nfs_initiate_commit(struct rpc_clnt *clnt, |
| @@ -447,6 +437,7 @@ extern void nfs_init_commit(struct nfs_commit_data *data, | |||
| 447 | struct nfs_commit_info *cinfo); | 437 | struct nfs_commit_info *cinfo); |
| 448 | int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, | 438 | int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, |
| 449 | struct nfs_commit_info *cinfo, int max); | 439 | struct nfs_commit_info *cinfo, int max); |
| 440 | unsigned long nfs_reqs_to_commit(struct nfs_commit_info *); | ||
| 450 | int nfs_scan_commit(struct inode *inode, struct list_head *dst, | 441 | int nfs_scan_commit(struct inode *inode, struct list_head *dst, |
| 451 | struct nfs_commit_info *cinfo); | 442 | struct nfs_commit_info *cinfo); |
| 452 | void nfs_mark_request_commit(struct nfs_page *req, | 443 | void nfs_mark_request_commit(struct nfs_page *req, |
| @@ -492,7 +483,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) | |||
| 492 | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); | 483 | extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); |
| 493 | 484 | ||
| 494 | /* nfs4proc.c */ | 485 | /* nfs4proc.c */ |
| 495 | extern void __nfs4_read_done_cb(struct nfs_read_data *); | 486 | extern void __nfs4_read_done_cb(struct nfs_pgio_data *); |
| 496 | extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, | 487 | extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, |
| 497 | const struct rpc_timeout *timeparms, | 488 | const struct rpc_timeout *timeparms, |
| 498 | const char *ip_addr); | 489 | const char *ip_addr); |
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 62db136339ea..5f61b83f4a1c 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
| @@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | |||
| 103 | /* | 103 | /* |
| 104 | * typedef opaque nfsdata<>; | 104 | * typedef opaque nfsdata<>; |
| 105 | */ | 105 | */ |
| 106 | static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) | 106 | static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result) |
| 107 | { | 107 | { |
| 108 | u32 recvd, count; | 108 | u32 recvd, count; |
| 109 | __be32 *p; | 109 | __be32 *p; |
| @@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, | |||
| 613 | * }; | 613 | * }; |
| 614 | */ | 614 | */ |
| 615 | static void encode_readargs(struct xdr_stream *xdr, | 615 | static void encode_readargs(struct xdr_stream *xdr, |
| 616 | const struct nfs_readargs *args) | 616 | const struct nfs_pgio_args *args) |
| 617 | { | 617 | { |
| 618 | u32 offset = args->offset; | 618 | u32 offset = args->offset; |
| 619 | u32 count = args->count; | 619 | u32 count = args->count; |
| @@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr, | |||
| 629 | 629 | ||
| 630 | static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, | 630 | static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, |
| 631 | struct xdr_stream *xdr, | 631 | struct xdr_stream *xdr, |
| 632 | const struct nfs_readargs *args) | 632 | const struct nfs_pgio_args *args) |
| 633 | { | 633 | { |
| 634 | encode_readargs(xdr, args); | 634 | encode_readargs(xdr, args); |
| 635 | prepare_reply_buffer(req, args->pages, args->pgbase, | 635 | prepare_reply_buffer(req, args->pages, args->pgbase, |
| @@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, | |||
| 649 | * }; | 649 | * }; |
| 650 | */ | 650 | */ |
| 651 | static void encode_writeargs(struct xdr_stream *xdr, | 651 | static void encode_writeargs(struct xdr_stream *xdr, |
| 652 | const struct nfs_writeargs *args) | 652 | const struct nfs_pgio_args *args) |
| 653 | { | 653 | { |
| 654 | u32 offset = args->offset; | 654 | u32 offset = args->offset; |
| 655 | u32 count = args->count; | 655 | u32 count = args->count; |
| @@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr, | |||
| 669 | 669 | ||
| 670 | static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, | 670 | static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, |
| 671 | struct xdr_stream *xdr, | 671 | struct xdr_stream *xdr, |
| 672 | const struct nfs_writeargs *args) | 672 | const struct nfs_pgio_args *args) |
| 673 | { | 673 | { |
| 674 | encode_writeargs(xdr, args); | 674 | encode_writeargs(xdr, args); |
| 675 | xdr->buf->flags |= XDRBUF_WRITE; | 675 | xdr->buf->flags |= XDRBUF_WRITE; |
| @@ -857,7 +857,7 @@ out_default: | |||
| 857 | * }; | 857 | * }; |
| 858 | */ | 858 | */ |
| 859 | static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, | 859 | static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, |
| 860 | struct nfs_readres *result) | 860 | struct nfs_pgio_res *result) |
| 861 | { | 861 | { |
| 862 | enum nfs_stat status; | 862 | enum nfs_stat status; |
| 863 | int error; | 863 | int error; |
| @@ -878,7 +878,7 @@ out_default: | |||
| 878 | } | 878 | } |
| 879 | 879 | ||
| 880 | static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, | 880 | static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, |
| 881 | struct nfs_writeres *result) | 881 | struct nfs_pgio_res *result) |
| 882 | { | 882 | { |
| 883 | /* All NFSv2 writes are "file sync" writes */ | 883 | /* All NFSv2 writes are "file sync" writes */ |
| 884 | result->verf->committed = NFS_FILE_SYNC; | 884 | result->verf->committed = NFS_FILE_SYNC; |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index db60149c4579..e7daa42bbc86 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
| @@ -795,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 795 | return status; | 795 | return status; |
| 796 | } | 796 | } |
| 797 | 797 | ||
| 798 | static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) | 798 | static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) |
| 799 | { | 799 | { |
| 800 | struct inode *inode = data->header->inode; | 800 | struct inode *inode = data->header->inode; |
| 801 | 801 | ||
| @@ -807,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
| 807 | return 0; | 807 | return 0; |
| 808 | } | 808 | } |
| 809 | 809 | ||
| 810 | static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) | 810 | static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
| 811 | { | 811 | { |
| 812 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; | 812 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; |
| 813 | } | 813 | } |
| 814 | 814 | ||
| 815 | static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | 815 | static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) |
| 816 | { | 816 | { |
| 817 | rpc_call_start(task); | 817 | rpc_call_start(task); |
| 818 | return 0; | 818 | return 0; |
| 819 | } | 819 | } |
| 820 | 820 | ||
| 821 | static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) | 821 | static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) |
| 822 | { | 822 | { |
| 823 | struct inode *inode = data->header->inode; | 823 | struct inode *inode = data->header->inode; |
| 824 | 824 | ||
| @@ -829,17 +829,11 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
| 829 | return 0; | 829 | return 0; |
| 830 | } | 830 | } |
| 831 | 831 | ||
| 832 | static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) | 832 | static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
| 833 | { | 833 | { |
| 834 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; | 834 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; |
| 835 | } | 835 | } |
| 836 | 836 | ||
| 837 | static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | ||
| 838 | { | ||
| 839 | rpc_call_start(task); | ||
| 840 | return 0; | ||
| 841 | } | ||
| 842 | |||
| 843 | static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) | 837 | static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) |
| 844 | { | 838 | { |
| 845 | rpc_call_start(task); | 839 | rpc_call_start(task); |
| @@ -946,13 +940,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = { | |||
| 946 | .fsinfo = nfs3_proc_fsinfo, | 940 | .fsinfo = nfs3_proc_fsinfo, |
| 947 | .pathconf = nfs3_proc_pathconf, | 941 | .pathconf = nfs3_proc_pathconf, |
| 948 | .decode_dirent = nfs3_decode_dirent, | 942 | .decode_dirent = nfs3_decode_dirent, |
| 943 | .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare, | ||
| 949 | .read_setup = nfs3_proc_read_setup, | 944 | .read_setup = nfs3_proc_read_setup, |
| 950 | .read_pageio_init = nfs_pageio_init_read, | ||
| 951 | .read_rpc_prepare = nfs3_proc_read_rpc_prepare, | ||
| 952 | .read_done = nfs3_read_done, | 945 | .read_done = nfs3_read_done, |
| 953 | .write_setup = nfs3_proc_write_setup, | 946 | .write_setup = nfs3_proc_write_setup, |
| 954 | .write_pageio_init = nfs_pageio_init_write, | ||
| 955 | .write_rpc_prepare = nfs3_proc_write_rpc_prepare, | ||
| 956 | .write_done = nfs3_write_done, | 947 | .write_done = nfs3_write_done, |
| 957 | .commit_setup = nfs3_proc_commit_setup, | 948 | .commit_setup = nfs3_proc_commit_setup, |
| 958 | .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, | 949 | .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index fa6d72131c19..8f4cbe7f4aa8 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
| @@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, | |||
| 953 | * }; | 953 | * }; |
| 954 | */ | 954 | */ |
| 955 | static void encode_read3args(struct xdr_stream *xdr, | 955 | static void encode_read3args(struct xdr_stream *xdr, |
| 956 | const struct nfs_readargs *args) | 956 | const struct nfs_pgio_args *args) |
| 957 | { | 957 | { |
| 958 | __be32 *p; | 958 | __be32 *p; |
| 959 | 959 | ||
| @@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr, | |||
| 966 | 966 | ||
| 967 | static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, | 967 | static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, |
| 968 | struct xdr_stream *xdr, | 968 | struct xdr_stream *xdr, |
| 969 | const struct nfs_readargs *args) | 969 | const struct nfs_pgio_args *args) |
| 970 | { | 970 | { |
| 971 | encode_read3args(xdr, args); | 971 | encode_read3args(xdr, args); |
| 972 | prepare_reply_buffer(req, args->pages, args->pgbase, | 972 | prepare_reply_buffer(req, args->pages, args->pgbase, |
| @@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, | |||
| 992 | * }; | 992 | * }; |
| 993 | */ | 993 | */ |
| 994 | static void encode_write3args(struct xdr_stream *xdr, | 994 | static void encode_write3args(struct xdr_stream *xdr, |
| 995 | const struct nfs_writeargs *args) | 995 | const struct nfs_pgio_args *args) |
| 996 | { | 996 | { |
| 997 | __be32 *p; | 997 | __be32 *p; |
| 998 | 998 | ||
| @@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr, | |||
| 1008 | 1008 | ||
| 1009 | static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, | 1009 | static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, |
| 1010 | struct xdr_stream *xdr, | 1010 | struct xdr_stream *xdr, |
| 1011 | const struct nfs_writeargs *args) | 1011 | const struct nfs_pgio_args *args) |
| 1012 | { | 1012 | { |
| 1013 | encode_write3args(xdr, args); | 1013 | encode_write3args(xdr, args); |
| 1014 | xdr->buf->flags |= XDRBUF_WRITE; | 1014 | xdr->buf->flags |= XDRBUF_WRITE; |
| @@ -1589,7 +1589,7 @@ out_default: | |||
| 1589 | * }; | 1589 | * }; |
| 1590 | */ | 1590 | */ |
| 1591 | static int decode_read3resok(struct xdr_stream *xdr, | 1591 | static int decode_read3resok(struct xdr_stream *xdr, |
| 1592 | struct nfs_readres *result) | 1592 | struct nfs_pgio_res *result) |
| 1593 | { | 1593 | { |
| 1594 | u32 eof, count, ocount, recvd; | 1594 | u32 eof, count, ocount, recvd; |
| 1595 | __be32 *p; | 1595 | __be32 *p; |
| @@ -1625,7 +1625,7 @@ out_overflow: | |||
| 1625 | } | 1625 | } |
| 1626 | 1626 | ||
| 1627 | static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, | 1627 | static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, |
| 1628 | struct nfs_readres *result) | 1628 | struct nfs_pgio_res *result) |
| 1629 | { | 1629 | { |
| 1630 | enum nfs_stat status; | 1630 | enum nfs_stat status; |
| 1631 | int error; | 1631 | int error; |
| @@ -1673,7 +1673,7 @@ out_status: | |||
| 1673 | * }; | 1673 | * }; |
| 1674 | */ | 1674 | */ |
| 1675 | static int decode_write3resok(struct xdr_stream *xdr, | 1675 | static int decode_write3resok(struct xdr_stream *xdr, |
| 1676 | struct nfs_writeres *result) | 1676 | struct nfs_pgio_res *result) |
| 1677 | { | 1677 | { |
| 1678 | __be32 *p; | 1678 | __be32 *p; |
| 1679 | 1679 | ||
| @@ -1697,7 +1697,7 @@ out_eio: | |||
| 1697 | } | 1697 | } |
| 1698 | 1698 | ||
| 1699 | static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, | 1699 | static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, |
| 1700 | struct nfs_writeres *result) | 1700 | struct nfs_pgio_res *result) |
| 1701 | { | 1701 | { |
| 1702 | enum nfs_stat status; | 1702 | enum nfs_stat status; |
| 1703 | int error; | 1703 | int error; |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e1d1badbe53c..f63cb87cd730 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
| @@ -337,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, | |||
| 337 | */ | 337 | */ |
| 338 | static inline void | 338 | static inline void |
| 339 | nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, | 339 | nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, |
| 340 | struct rpc_message *msg, struct nfs_write_data *wdata) | 340 | struct rpc_message *msg, struct nfs_pgio_data *wdata) |
| 341 | { | 341 | { |
| 342 | if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && | 342 | if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && |
| 343 | !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) | 343 | !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) |
| @@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, | |||
| 369 | 369 | ||
| 370 | static inline void | 370 | static inline void |
| 371 | nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, | 371 | nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, |
| 372 | struct rpc_message *msg, struct nfs_write_data *wdata) | 372 | struct rpc_message *msg, struct nfs_pgio_data *wdata) |
| 373 | { | 373 | { |
| 374 | } | 374 | } |
| 375 | #endif /* CONFIG_NFS_V4_1 */ | 375 | #endif /* CONFIG_NFS_V4_1 */ |
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 8de3407e0360..464db9dd6318 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
| @@ -100,8 +100,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 100 | break; | 100 | break; |
| 101 | mutex_lock(&inode->i_mutex); | 101 | mutex_lock(&inode->i_mutex); |
| 102 | ret = nfs_file_fsync_commit(file, start, end, datasync); | 102 | ret = nfs_file_fsync_commit(file, start, end, datasync); |
| 103 | if (!ret && !datasync) | 103 | if (!ret) |
| 104 | /* application has asked for meta-data sync */ | ||
| 105 | ret = pnfs_layoutcommit_inode(inode, true); | 104 | ret = pnfs_layoutcommit_inode(inode, true); |
| 106 | mutex_unlock(&inode->i_mutex); | 105 | mutex_unlock(&inode->i_mutex); |
| 107 | /* | 106 | /* |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7f55fed8dc64..285ad5334018 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -2027,7 +2027,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) | |||
| 2027 | return status; | 2027 | return status; |
| 2028 | } | 2028 | } |
| 2029 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) | 2029 | if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) |
| 2030 | _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); | 2030 | nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); |
| 2031 | return 0; | 2031 | return 0; |
| 2032 | } | 2032 | } |
| 2033 | 2033 | ||
| @@ -4033,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err) | |||
| 4033 | return false; | 4033 | return false; |
| 4034 | } | 4034 | } |
| 4035 | 4035 | ||
| 4036 | void __nfs4_read_done_cb(struct nfs_read_data *data) | 4036 | void __nfs4_read_done_cb(struct nfs_pgio_data *data) |
| 4037 | { | 4037 | { |
| 4038 | nfs_invalidate_atime(data->header->inode); | 4038 | nfs_invalidate_atime(data->header->inode); |
| 4039 | } | 4039 | } |
| 4040 | 4040 | ||
| 4041 | static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) | 4041 | static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) |
| 4042 | { | 4042 | { |
| 4043 | struct nfs_server *server = NFS_SERVER(data->header->inode); | 4043 | struct nfs_server *server = NFS_SERVER(data->header->inode); |
| 4044 | 4044 | ||
| @@ -4055,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) | |||
| 4055 | } | 4055 | } |
| 4056 | 4056 | ||
| 4057 | static bool nfs4_read_stateid_changed(struct rpc_task *task, | 4057 | static bool nfs4_read_stateid_changed(struct rpc_task *task, |
| 4058 | struct nfs_readargs *args) | 4058 | struct nfs_pgio_args *args) |
| 4059 | { | 4059 | { |
| 4060 | 4060 | ||
| 4061 | if (!nfs4_error_stateid_expired(task->tk_status) || | 4061 | if (!nfs4_error_stateid_expired(task->tk_status) || |
| @@ -4068,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, | |||
| 4068 | return true; | 4068 | return true; |
| 4069 | } | 4069 | } |
| 4070 | 4070 | ||
| 4071 | static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) | 4071 | static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) |
| 4072 | { | 4072 | { |
| 4073 | 4073 | ||
| 4074 | dprintk("--> %s\n", __func__); | 4074 | dprintk("--> %s\n", __func__); |
| @@ -4077,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
| 4077 | return -EAGAIN; | 4077 | return -EAGAIN; |
| 4078 | if (nfs4_read_stateid_changed(task, &data->args)) | 4078 | if (nfs4_read_stateid_changed(task, &data->args)) |
| 4079 | return -EAGAIN; | 4079 | return -EAGAIN; |
| 4080 | return data->read_done_cb ? data->read_done_cb(task, data) : | 4080 | return data->pgio_done_cb ? data->pgio_done_cb(task, data) : |
| 4081 | nfs4_read_done_cb(task, data); | 4081 | nfs4_read_done_cb(task, data); |
| 4082 | } | 4082 | } |
| 4083 | 4083 | ||
| 4084 | static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) | 4084 | static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
| 4085 | { | 4085 | { |
| 4086 | data->timestamp = jiffies; | 4086 | data->timestamp = jiffies; |
| 4087 | data->read_done_cb = nfs4_read_done_cb; | 4087 | data->pgio_done_cb = nfs4_read_done_cb; |
| 4088 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; | 4088 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; |
| 4089 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); | 4089 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); |
| 4090 | } | 4090 | } |
| 4091 | 4091 | ||
| 4092 | static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | 4092 | static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) |
| 4093 | { | 4093 | { |
| 4094 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), | 4094 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), |
| 4095 | &data->args.seq_args, | 4095 | &data->args.seq_args, |
| @@ -4097,14 +4097,14 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat | |||
| 4097 | task)) | 4097 | task)) |
| 4098 | return 0; | 4098 | return 0; |
| 4099 | if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, | 4099 | if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, |
| 4100 | data->args.lock_context, FMODE_READ) == -EIO) | 4100 | data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) |
| 4101 | return -EIO; | 4101 | return -EIO; |
| 4102 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) | 4102 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) |
| 4103 | return -EIO; | 4103 | return -EIO; |
| 4104 | return 0; | 4104 | return 0; |
| 4105 | } | 4105 | } |
| 4106 | 4106 | ||
| 4107 | static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) | 4107 | static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) |
| 4108 | { | 4108 | { |
| 4109 | struct inode *inode = data->header->inode; | 4109 | struct inode *inode = data->header->inode; |
| 4110 | 4110 | ||
| @@ -4121,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data | |||
| 4121 | } | 4121 | } |
| 4122 | 4122 | ||
| 4123 | static bool nfs4_write_stateid_changed(struct rpc_task *task, | 4123 | static bool nfs4_write_stateid_changed(struct rpc_task *task, |
| 4124 | struct nfs_writeargs *args) | 4124 | struct nfs_pgio_args *args) |
| 4125 | { | 4125 | { |
| 4126 | 4126 | ||
| 4127 | if (!nfs4_error_stateid_expired(task->tk_status) || | 4127 | if (!nfs4_error_stateid_expired(task->tk_status) || |
| @@ -4134,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task, | |||
| 4134 | return true; | 4134 | return true; |
| 4135 | } | 4135 | } |
| 4136 | 4136 | ||
| 4137 | static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) | 4137 | static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) |
| 4138 | { | 4138 | { |
| 4139 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | 4139 | if (!nfs4_sequence_done(task, &data->res.seq_res)) |
| 4140 | return -EAGAIN; | 4140 | return -EAGAIN; |
| 4141 | if (nfs4_write_stateid_changed(task, &data->args)) | 4141 | if (nfs4_write_stateid_changed(task, &data->args)) |
| 4142 | return -EAGAIN; | 4142 | return -EAGAIN; |
| 4143 | return data->write_done_cb ? data->write_done_cb(task, data) : | 4143 | return data->pgio_done_cb ? data->pgio_done_cb(task, data) : |
| 4144 | nfs4_write_done_cb(task, data); | 4144 | nfs4_write_done_cb(task, data); |
| 4145 | } | 4145 | } |
| 4146 | 4146 | ||
| 4147 | static | 4147 | static |
| 4148 | bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) | 4148 | bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) |
| 4149 | { | 4149 | { |
| 4150 | const struct nfs_pgio_header *hdr = data->header; | 4150 | const struct nfs_pgio_header *hdr = data->header; |
| 4151 | 4151 | ||
| @@ -4158,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) | |||
| 4158 | return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; | 4158 | return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; |
| 4159 | } | 4159 | } |
| 4160 | 4160 | ||
| 4161 | static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) | 4161 | static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
| 4162 | { | 4162 | { |
| 4163 | struct nfs_server *server = NFS_SERVER(data->header->inode); | 4163 | struct nfs_server *server = NFS_SERVER(data->header->inode); |
| 4164 | 4164 | ||
| @@ -4168,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
| 4168 | } else | 4168 | } else |
| 4169 | data->args.bitmask = server->cache_consistency_bitmask; | 4169 | data->args.bitmask = server->cache_consistency_bitmask; |
| 4170 | 4170 | ||
| 4171 | if (!data->write_done_cb) | 4171 | if (!data->pgio_done_cb) |
| 4172 | data->write_done_cb = nfs4_write_done_cb; | 4172 | data->pgio_done_cb = nfs4_write_done_cb; |
| 4173 | data->res.server = server; | 4173 | data->res.server = server; |
| 4174 | data->timestamp = jiffies; | 4174 | data->timestamp = jiffies; |
| 4175 | 4175 | ||
| @@ -4177,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
| 4177 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); | 4177 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); |
| 4178 | } | 4178 | } |
| 4179 | 4179 | ||
| 4180 | static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | ||
| 4181 | { | ||
| 4182 | if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), | ||
| 4183 | &data->args.seq_args, | ||
| 4184 | &data->res.seq_res, | ||
| 4185 | task)) | ||
| 4186 | return 0; | ||
| 4187 | if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, | ||
| 4188 | data->args.lock_context, FMODE_WRITE) == -EIO) | ||
| 4189 | return -EIO; | ||
| 4190 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) | ||
| 4191 | return -EIO; | ||
| 4192 | return 0; | ||
| 4193 | } | ||
| 4194 | |||
| 4195 | static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) | 4180 | static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) |
| 4196 | { | 4181 | { |
| 4197 | nfs4_setup_sequence(NFS_SERVER(data->inode), | 4182 | nfs4_setup_sequence(NFS_SERVER(data->inode), |
| @@ -8432,13 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
| 8432 | .pathconf = nfs4_proc_pathconf, | 8417 | .pathconf = nfs4_proc_pathconf, |
| 8433 | .set_capabilities = nfs4_server_capabilities, | 8418 | .set_capabilities = nfs4_server_capabilities, |
| 8434 | .decode_dirent = nfs4_decode_dirent, | 8419 | .decode_dirent = nfs4_decode_dirent, |
| 8420 | .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare, | ||
| 8435 | .read_setup = nfs4_proc_read_setup, | 8421 | .read_setup = nfs4_proc_read_setup, |
| 8436 | .read_pageio_init = pnfs_pageio_init_read, | ||
| 8437 | .read_rpc_prepare = nfs4_proc_read_rpc_prepare, | ||
| 8438 | .read_done = nfs4_read_done, | 8422 | .read_done = nfs4_read_done, |
| 8439 | .write_setup = nfs4_proc_write_setup, | 8423 | .write_setup = nfs4_proc_write_setup, |
| 8440 | .write_pageio_init = pnfs_pageio_init_write, | ||
| 8441 | .write_rpc_prepare = nfs4_proc_write_rpc_prepare, | ||
| 8442 | .write_done = nfs4_write_done, | 8424 | .write_done = nfs4_write_done, |
| 8443 | .commit_setup = nfs4_proc_commit_setup, | 8425 | .commit_setup = nfs4_proc_commit_setup, |
| 8444 | .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, | 8426 | .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c0583b9bef71..848f6853c59e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
| @@ -1456,7 +1456,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs | |||
| 1456 | * server that doesn't support a grace period. | 1456 | * server that doesn't support a grace period. |
| 1457 | */ | 1457 | */ |
| 1458 | spin_lock(&sp->so_lock); | 1458 | spin_lock(&sp->so_lock); |
| 1459 | write_seqcount_begin(&sp->so_reclaim_seqcount); | 1459 | raw_write_seqcount_begin(&sp->so_reclaim_seqcount); |
| 1460 | restart: | 1460 | restart: |
| 1461 | list_for_each_entry(state, &sp->so_states, open_states) { | 1461 | list_for_each_entry(state, &sp->so_states, open_states) { |
| 1462 | if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) | 1462 | if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) |
| @@ -1519,13 +1519,13 @@ restart: | |||
| 1519 | spin_lock(&sp->so_lock); | 1519 | spin_lock(&sp->so_lock); |
| 1520 | goto restart; | 1520 | goto restart; |
| 1521 | } | 1521 | } |
| 1522 | write_seqcount_end(&sp->so_reclaim_seqcount); | 1522 | raw_write_seqcount_end(&sp->so_reclaim_seqcount); |
| 1523 | spin_unlock(&sp->so_lock); | 1523 | spin_unlock(&sp->so_lock); |
| 1524 | return 0; | 1524 | return 0; |
| 1525 | out_err: | 1525 | out_err: |
| 1526 | nfs4_put_open_state(state); | 1526 | nfs4_put_open_state(state); |
| 1527 | spin_lock(&sp->so_lock); | 1527 | spin_lock(&sp->so_lock); |
| 1528 | write_seqcount_end(&sp->so_reclaim_seqcount); | 1528 | raw_write_seqcount_end(&sp->so_reclaim_seqcount); |
| 1529 | spin_unlock(&sp->so_lock); | 1529 | spin_unlock(&sp->so_lock); |
| 1530 | return status; | 1530 | return status; |
| 1531 | } | 1531 | } |
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 849cf146db30..0a744f3a86f6 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h | |||
| @@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); | |||
| 932 | 932 | ||
| 933 | DECLARE_EVENT_CLASS(nfs4_read_event, | 933 | DECLARE_EVENT_CLASS(nfs4_read_event, |
| 934 | TP_PROTO( | 934 | TP_PROTO( |
| 935 | const struct nfs_read_data *data, | 935 | const struct nfs_pgio_data *data, |
| 936 | int error | 936 | int error |
| 937 | ), | 937 | ), |
| 938 | 938 | ||
| @@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, | |||
| 972 | #define DEFINE_NFS4_READ_EVENT(name) \ | 972 | #define DEFINE_NFS4_READ_EVENT(name) \ |
| 973 | DEFINE_EVENT(nfs4_read_event, name, \ | 973 | DEFINE_EVENT(nfs4_read_event, name, \ |
| 974 | TP_PROTO( \ | 974 | TP_PROTO( \ |
| 975 | const struct nfs_read_data *data, \ | 975 | const struct nfs_pgio_data *data, \ |
| 976 | int error \ | 976 | int error \ |
| 977 | ), \ | 977 | ), \ |
| 978 | TP_ARGS(data, error)) | 978 | TP_ARGS(data, error)) |
| @@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); | |||
| 983 | 983 | ||
| 984 | DECLARE_EVENT_CLASS(nfs4_write_event, | 984 | DECLARE_EVENT_CLASS(nfs4_write_event, |
| 985 | TP_PROTO( | 985 | TP_PROTO( |
| 986 | const struct nfs_write_data *data, | 986 | const struct nfs_pgio_data *data, |
| 987 | int error | 987 | int error |
| 988 | ), | 988 | ), |
| 989 | 989 | ||
| @@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, | |||
| 1024 | #define DEFINE_NFS4_WRITE_EVENT(name) \ | 1024 | #define DEFINE_NFS4_WRITE_EVENT(name) \ |
| 1025 | DEFINE_EVENT(nfs4_write_event, name, \ | 1025 | DEFINE_EVENT(nfs4_write_event, name, \ |
| 1026 | TP_PROTO( \ | 1026 | TP_PROTO( \ |
| 1027 | const struct nfs_write_data *data, \ | 1027 | const struct nfs_pgio_data *data, \ |
| 1028 | int error \ | 1028 | int error \ |
| 1029 | ), \ | 1029 | ), \ |
| 1030 | TP_ARGS(data, error)) | 1030 | TP_ARGS(data, error)) |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 73ce8d4fe2c8..939ae606cfa4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
| @@ -1556,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) | |||
| 1556 | encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); | 1556 | encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); |
| 1557 | } | 1557 | } |
| 1558 | 1558 | ||
| 1559 | static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) | 1559 | static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args, |
| 1560 | struct compound_hdr *hdr) | ||
| 1560 | { | 1561 | { |
| 1561 | __be32 *p; | 1562 | __be32 *p; |
| 1562 | 1563 | ||
| @@ -1701,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4 | |||
| 1701 | encode_nfs4_verifier(xdr, &arg->confirm); | 1702 | encode_nfs4_verifier(xdr, &arg->confirm); |
| 1702 | } | 1703 | } |
| 1703 | 1704 | ||
| 1704 | static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) | 1705 | static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args, |
| 1706 | struct compound_hdr *hdr) | ||
| 1705 | { | 1707 | { |
| 1706 | __be32 *p; | 1708 | __be32 *p; |
| 1707 | 1709 | ||
| @@ -2451,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
| 2451 | * Encode a READ request | 2453 | * Encode a READ request |
| 2452 | */ | 2454 | */ |
| 2453 | static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, | 2455 | static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, |
| 2454 | struct nfs_readargs *args) | 2456 | struct nfs_pgio_args *args) |
| 2455 | { | 2457 | { |
| 2456 | struct compound_hdr hdr = { | 2458 | struct compound_hdr hdr = { |
| 2457 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | 2459 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), |
| @@ -2513,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
| 2513 | * Encode a WRITE request | 2515 | * Encode a WRITE request |
| 2514 | */ | 2516 | */ |
| 2515 | static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, | 2517 | static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, |
| 2516 | struct nfs_writeargs *args) | 2518 | struct nfs_pgio_args *args) |
| 2517 | { | 2519 | { |
| 2518 | struct compound_hdr hdr = { | 2520 | struct compound_hdr hdr = { |
| 2519 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | 2521 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), |
| @@ -5085,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr) | |||
| 5085 | return decode_op_hdr(xdr, OP_PUTROOTFH); | 5087 | return decode_op_hdr(xdr, OP_PUTROOTFH); |
| 5086 | } | 5088 | } |
| 5087 | 5089 | ||
| 5088 | static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) | 5090 | static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, |
| 5091 | struct nfs_pgio_res *res) | ||
| 5089 | { | 5092 | { |
| 5090 | __be32 *p; | 5093 | __be32 *p; |
| 5091 | uint32_t count, eof, recvd; | 5094 | uint32_t count, eof, recvd; |
| @@ -5339,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr) | |||
| 5339 | return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); | 5342 | return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); |
| 5340 | } | 5343 | } |
| 5341 | 5344 | ||
| 5342 | static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) | 5345 | static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res) |
| 5343 | { | 5346 | { |
| 5344 | __be32 *p; | 5347 | __be32 *p; |
| 5345 | int status; | 5348 | int status; |
| @@ -6636,7 +6639,7 @@ out: | |||
| 6636 | * Decode Read response | 6639 | * Decode Read response |
| 6637 | */ | 6640 | */ |
| 6638 | static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | 6641 | static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, |
| 6639 | struct nfs_readres *res) | 6642 | struct nfs_pgio_res *res) |
| 6640 | { | 6643 | { |
| 6641 | struct compound_hdr hdr; | 6644 | struct compound_hdr hdr; |
| 6642 | int status; | 6645 | int status; |
| @@ -6661,7 +6664,7 @@ out: | |||
| 6661 | * Decode WRITE response | 6664 | * Decode WRITE response |
| 6662 | */ | 6665 | */ |
| 6663 | static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | 6666 | static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, |
| 6664 | struct nfs_writeres *res) | 6667 | struct nfs_pgio_res *res) |
| 6665 | { | 6668 | { |
| 6666 | struct compound_hdr hdr; | 6669 | struct compound_hdr hdr; |
| 6667 | int status; | 6670 | int status; |
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 5457745dd4f1..611320753db2 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
| @@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private) | |||
| 439 | objlayout_read_done(&objios->oir, status, objios->sync); | 439 | objlayout_read_done(&objios->oir, status, objios->sync); |
| 440 | } | 440 | } |
| 441 | 441 | ||
| 442 | int objio_read_pagelist(struct nfs_read_data *rdata) | 442 | int objio_read_pagelist(struct nfs_pgio_data *rdata) |
| 443 | { | 443 | { |
| 444 | struct nfs_pgio_header *hdr = rdata->header; | 444 | struct nfs_pgio_header *hdr = rdata->header; |
| 445 | struct objio_state *objios; | 445 | struct objio_state *objios; |
| @@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private) | |||
| 487 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) | 487 | static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) |
| 488 | { | 488 | { |
| 489 | struct objio_state *objios = priv; | 489 | struct objio_state *objios = priv; |
| 490 | struct nfs_write_data *wdata = objios->oir.rpcdata; | 490 | struct nfs_pgio_data *wdata = objios->oir.rpcdata; |
| 491 | struct address_space *mapping = wdata->header->inode->i_mapping; | 491 | struct address_space *mapping = wdata->header->inode->i_mapping; |
| 492 | pgoff_t index = offset / PAGE_SIZE; | 492 | pgoff_t index = offset / PAGE_SIZE; |
| 493 | struct page *page; | 493 | struct page *page; |
| @@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = { | |||
| 531 | .put_page = &__r4w_put_page, | 531 | .put_page = &__r4w_put_page, |
| 532 | }; | 532 | }; |
| 533 | 533 | ||
| 534 | int objio_write_pagelist(struct nfs_write_data *wdata, int how) | 534 | int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) |
| 535 | { | 535 | { |
| 536 | struct nfs_pgio_header *hdr = wdata->header; | 536 | struct nfs_pgio_header *hdr = wdata->header; |
| 537 | struct objio_state *objios; | 537 | struct objio_state *objios; |
| @@ -564,14 +564,22 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how) | |||
| 564 | return 0; | 564 | return 0; |
| 565 | } | 565 | } |
| 566 | 566 | ||
| 567 | static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, | 567 | /* |
| 568 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number | ||
| 569 | * of bytes (maximum @req->wb_bytes) that can be coalesced. | ||
| 570 | */ | ||
| 571 | static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, | ||
| 568 | struct nfs_page *prev, struct nfs_page *req) | 572 | struct nfs_page *prev, struct nfs_page *req) |
| 569 | { | 573 | { |
| 570 | if (!pnfs_generic_pg_test(pgio, prev, req)) | 574 | unsigned int size; |
| 571 | return false; | 575 | |
| 576 | size = pnfs_generic_pg_test(pgio, prev, req); | ||
| 577 | |||
| 578 | if (!size || pgio->pg_count + req->wb_bytes > | ||
| 579 | (unsigned long)pgio->pg_layout_private) | ||
| 580 | return 0; | ||
| 572 | 581 | ||
| 573 | return pgio->pg_count + req->wb_bytes <= | 582 | return min(size, req->wb_bytes); |
| 574 | (unsigned long)pgio->pg_layout_private; | ||
| 575 | } | 583 | } |
| 576 | 584 | ||
| 577 | static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) | 585 | static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index e4f9cbfec67b..765d3f54e986 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
| @@ -53,10 +53,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | |||
| 53 | struct objlayout *objlay; | 53 | struct objlayout *objlay; |
| 54 | 54 | ||
| 55 | objlay = kzalloc(sizeof(struct objlayout), gfp_flags); | 55 | objlay = kzalloc(sizeof(struct objlayout), gfp_flags); |
| 56 | if (objlay) { | 56 | if (!objlay) |
| 57 | spin_lock_init(&objlay->lock); | 57 | return NULL; |
| 58 | INIT_LIST_HEAD(&objlay->err_list); | 58 | spin_lock_init(&objlay->lock); |
| 59 | } | 59 | INIT_LIST_HEAD(&objlay->err_list); |
| 60 | dprintk("%s: Return %p\n", __func__, objlay); | 60 | dprintk("%s: Return %p\n", __func__, objlay); |
| 61 | return &objlay->pnfs_layout; | 61 | return &objlay->pnfs_layout; |
| 62 | } | 62 | } |
| @@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, | |||
| 229 | static void _rpc_read_complete(struct work_struct *work) | 229 | static void _rpc_read_complete(struct work_struct *work) |
| 230 | { | 230 | { |
| 231 | struct rpc_task *task; | 231 | struct rpc_task *task; |
| 232 | struct nfs_read_data *rdata; | 232 | struct nfs_pgio_data *rdata; |
| 233 | 233 | ||
| 234 | dprintk("%s enter\n", __func__); | 234 | dprintk("%s enter\n", __func__); |
| 235 | task = container_of(work, struct rpc_task, u.tk_work); | 235 | task = container_of(work, struct rpc_task, u.tk_work); |
| 236 | rdata = container_of(task, struct nfs_read_data, task); | 236 | rdata = container_of(task, struct nfs_pgio_data, task); |
| 237 | 237 | ||
| 238 | pnfs_ld_read_done(rdata); | 238 | pnfs_ld_read_done(rdata); |
| 239 | } | 239 | } |
| @@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work) | |||
| 241 | void | 241 | void |
| 242 | objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | 242 | objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) |
| 243 | { | 243 | { |
| 244 | struct nfs_read_data *rdata = oir->rpcdata; | 244 | struct nfs_pgio_data *rdata = oir->rpcdata; |
| 245 | 245 | ||
| 246 | oir->status = rdata->task.tk_status = status; | 246 | oir->status = rdata->task.tk_status = status; |
| 247 | if (status >= 0) | 247 | if (status >= 0) |
| @@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | |||
| 266 | * Perform sync or async reads. | 266 | * Perform sync or async reads. |
| 267 | */ | 267 | */ |
| 268 | enum pnfs_try_status | 268 | enum pnfs_try_status |
| 269 | objlayout_read_pagelist(struct nfs_read_data *rdata) | 269 | objlayout_read_pagelist(struct nfs_pgio_data *rdata) |
| 270 | { | 270 | { |
| 271 | struct nfs_pgio_header *hdr = rdata->header; | 271 | struct nfs_pgio_header *hdr = rdata->header; |
| 272 | struct inode *inode = hdr->inode; | 272 | struct inode *inode = hdr->inode; |
| @@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) | |||
| 312 | static void _rpc_write_complete(struct work_struct *work) | 312 | static void _rpc_write_complete(struct work_struct *work) |
| 313 | { | 313 | { |
| 314 | struct rpc_task *task; | 314 | struct rpc_task *task; |
| 315 | struct nfs_write_data *wdata; | 315 | struct nfs_pgio_data *wdata; |
| 316 | 316 | ||
| 317 | dprintk("%s enter\n", __func__); | 317 | dprintk("%s enter\n", __func__); |
| 318 | task = container_of(work, struct rpc_task, u.tk_work); | 318 | task = container_of(work, struct rpc_task, u.tk_work); |
| 319 | wdata = container_of(task, struct nfs_write_data, task); | 319 | wdata = container_of(task, struct nfs_pgio_data, task); |
| 320 | 320 | ||
| 321 | pnfs_ld_write_done(wdata); | 321 | pnfs_ld_write_done(wdata); |
| 322 | } | 322 | } |
| @@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work) | |||
| 324 | void | 324 | void |
| 325 | objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | 325 | objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) |
| 326 | { | 326 | { |
| 327 | struct nfs_write_data *wdata = oir->rpcdata; | 327 | struct nfs_pgio_data *wdata = oir->rpcdata; |
| 328 | 328 | ||
| 329 | oir->status = wdata->task.tk_status = status; | 329 | oir->status = wdata->task.tk_status = status; |
| 330 | if (status >= 0) { | 330 | if (status >= 0) { |
| @@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) | |||
| 351 | * Perform sync or async writes. | 351 | * Perform sync or async writes. |
| 352 | */ | 352 | */ |
| 353 | enum pnfs_try_status | 353 | enum pnfs_try_status |
| 354 | objlayout_write_pagelist(struct nfs_write_data *wdata, | 354 | objlayout_write_pagelist(struct nfs_pgio_data *wdata, |
| 355 | int how) | 355 | int how) |
| 356 | { | 356 | { |
| 357 | struct nfs_pgio_header *hdr = wdata->header; | 357 | struct nfs_pgio_header *hdr = wdata->header; |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 87aa1dec6120..01e041029a6c 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
| @@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg); | |||
| 119 | */ | 119 | */ |
| 120 | extern void objio_free_result(struct objlayout_io_res *oir); | 120 | extern void objio_free_result(struct objlayout_io_res *oir); |
| 121 | 121 | ||
| 122 | extern int objio_read_pagelist(struct nfs_read_data *rdata); | 122 | extern int objio_read_pagelist(struct nfs_pgio_data *rdata); |
| 123 | extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); | 123 | extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); |
| 124 | 124 | ||
| 125 | /* | 125 | /* |
| 126 | * callback API | 126 | * callback API |
| @@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( | |||
| 168 | extern void objlayout_free_lseg(struct pnfs_layout_segment *); | 168 | extern void objlayout_free_lseg(struct pnfs_layout_segment *); |
| 169 | 169 | ||
| 170 | extern enum pnfs_try_status objlayout_read_pagelist( | 170 | extern enum pnfs_try_status objlayout_read_pagelist( |
| 171 | struct nfs_read_data *); | 171 | struct nfs_pgio_data *); |
| 172 | 172 | ||
| 173 | extern enum pnfs_try_status objlayout_write_pagelist( | 173 | extern enum pnfs_try_status objlayout_write_pagelist( |
| 174 | struct nfs_write_data *, | 174 | struct nfs_pgio_data *, |
| 175 | int how); | 175 | int how); |
| 176 | 176 | ||
| 177 | extern void objlayout_encode_layoutcommit( | 177 | extern void objlayout_encode_layoutcommit( |
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 03ed984ab4d8..b6ee3a6ee96d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
| @@ -24,9 +24,14 @@ | |||
| 24 | #include "internal.h" | 24 | #include "internal.h" |
| 25 | #include "pnfs.h" | 25 | #include "pnfs.h" |
| 26 | 26 | ||
| 27 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | ||
| 28 | |||
| 27 | static struct kmem_cache *nfs_page_cachep; | 29 | static struct kmem_cache *nfs_page_cachep; |
| 30 | static const struct rpc_call_ops nfs_pgio_common_ops; | ||
| 31 | |||
| 32 | static void nfs_free_request(struct nfs_page *); | ||
| 28 | 33 | ||
| 29 | bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) | 34 | static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) |
| 30 | { | 35 | { |
| 31 | p->npages = pagecount; | 36 | p->npages = pagecount; |
| 32 | if (pagecount <= ARRAY_SIZE(p->page_array)) | 37 | if (pagecount <= ARRAY_SIZE(p->page_array)) |
| @@ -133,11 +138,156 @@ nfs_iocounter_wait(struct nfs_io_counter *c) | |||
| 133 | return __nfs_iocounter_wait(c); | 138 | return __nfs_iocounter_wait(c); |
| 134 | } | 139 | } |
| 135 | 140 | ||
| 141 | static int nfs_wait_bit_uninterruptible(void *word) | ||
| 142 | { | ||
| 143 | io_schedule(); | ||
| 144 | return 0; | ||
| 145 | } | ||
| 146 | |||
| 147 | /* | ||
| 148 | * nfs_page_group_lock - lock the head of the page group | ||
| 149 | * @req - request in group that is to be locked | ||
| 150 | * | ||
| 151 | * this lock must be held if modifying the page group list | ||
| 152 | */ | ||
| 153 | void | ||
| 154 | nfs_page_group_lock(struct nfs_page *req) | ||
| 155 | { | ||
| 156 | struct nfs_page *head = req->wb_head; | ||
| 157 | |||
| 158 | WARN_ON_ONCE(head != head->wb_head); | ||
| 159 | |||
| 160 | wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, | ||
| 161 | nfs_wait_bit_uninterruptible, | ||
| 162 | TASK_UNINTERRUPTIBLE); | ||
| 163 | } | ||
| 164 | |||
| 165 | /* | ||
| 166 | * nfs_page_group_unlock - unlock the head of the page group | ||
| 167 | * @req - request in group that is to be unlocked | ||
| 168 | */ | ||
| 169 | void | ||
| 170 | nfs_page_group_unlock(struct nfs_page *req) | ||
| 171 | { | ||
| 172 | struct nfs_page *head = req->wb_head; | ||
| 173 | |||
| 174 | WARN_ON_ONCE(head != head->wb_head); | ||
| 175 | |||
| 176 | smp_mb__before_atomic(); | ||
| 177 | clear_bit(PG_HEADLOCK, &head->wb_flags); | ||
| 178 | smp_mb__after_atomic(); | ||
| 179 | wake_up_bit(&head->wb_flags, PG_HEADLOCK); | ||
| 180 | } | ||
| 181 | |||
| 182 | /* | ||
| 183 | * nfs_page_group_sync_on_bit_locked | ||
| 184 | * | ||
| 185 | * must be called with page group lock held | ||
| 186 | */ | ||
| 187 | static bool | ||
| 188 | nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) | ||
| 189 | { | ||
| 190 | struct nfs_page *head = req->wb_head; | ||
| 191 | struct nfs_page *tmp; | ||
| 192 | |||
| 193 | WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags)); | ||
| 194 | WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags)); | ||
| 195 | |||
| 196 | tmp = req->wb_this_page; | ||
| 197 | while (tmp != req) { | ||
| 198 | if (!test_bit(bit, &tmp->wb_flags)) | ||
| 199 | return false; | ||
| 200 | tmp = tmp->wb_this_page; | ||
| 201 | } | ||
| 202 | |||
| 203 | /* true! reset all bits */ | ||
| 204 | tmp = req; | ||
| 205 | do { | ||
| 206 | clear_bit(bit, &tmp->wb_flags); | ||
| 207 | tmp = tmp->wb_this_page; | ||
| 208 | } while (tmp != req); | ||
| 209 | |||
| 210 | return true; | ||
| 211 | } | ||
| 212 | |||
| 213 | /* | ||
| 214 | * nfs_page_group_sync_on_bit - set bit on current request, but only | ||
| 215 | * return true if the bit is set for all requests in page group | ||
| 216 | * @req - request in page group | ||
| 217 | * @bit - PG_* bit that is used to sync page group | ||
| 218 | */ | ||
| 219 | bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) | ||
| 220 | { | ||
| 221 | bool ret; | ||
| 222 | |||
| 223 | nfs_page_group_lock(req); | ||
| 224 | ret = nfs_page_group_sync_on_bit_locked(req, bit); | ||
| 225 | nfs_page_group_unlock(req); | ||
| 226 | |||
| 227 | return ret; | ||
| 228 | } | ||
| 229 | |||
| 230 | /* | ||
| 231 | * nfs_page_group_init - Initialize the page group linkage for @req | ||
| 232 | * @req - a new nfs request | ||
| 233 | * @prev - the previous request in page group, or NULL if @req is the first | ||
| 234 | * or only request in the group (the head). | ||
| 235 | */ | ||
| 236 | static inline void | ||
| 237 | nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) | ||
| 238 | { | ||
| 239 | WARN_ON_ONCE(prev == req); | ||
| 240 | |||
| 241 | if (!prev) { | ||
| 242 | req->wb_head = req; | ||
| 243 | req->wb_this_page = req; | ||
| 244 | } else { | ||
| 245 | WARN_ON_ONCE(prev->wb_this_page != prev->wb_head); | ||
| 246 | WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags)); | ||
| 247 | req->wb_head = prev->wb_head; | ||
| 248 | req->wb_this_page = prev->wb_this_page; | ||
| 249 | prev->wb_this_page = req; | ||
| 250 | |||
| 251 | /* grab extra ref if head request has extra ref from | ||
| 252 | * the write/commit path to handle handoff between write | ||
| 253 | * and commit lists */ | ||
| 254 | if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) | ||
| 255 | kref_get(&req->wb_kref); | ||
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 | /* | ||
| 260 | * nfs_page_group_destroy - sync the destruction of page groups | ||
| 261 | * @req - request that no longer needs the page group | ||
| 262 | * | ||
| 263 | * releases the page group reference from each member once all | ||
| 264 | * members have called this function. | ||
| 265 | */ | ||
| 266 | static void | ||
| 267 | nfs_page_group_destroy(struct kref *kref) | ||
| 268 | { | ||
| 269 | struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); | ||
| 270 | struct nfs_page *tmp, *next; | ||
| 271 | |||
| 272 | if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) | ||
| 273 | return; | ||
| 274 | |||
| 275 | tmp = req; | ||
| 276 | do { | ||
| 277 | next = tmp->wb_this_page; | ||
| 278 | /* unlink and free */ | ||
| 279 | tmp->wb_this_page = tmp; | ||
| 280 | tmp->wb_head = tmp; | ||
| 281 | nfs_free_request(tmp); | ||
| 282 | tmp = next; | ||
| 283 | } while (tmp != req); | ||
| 284 | } | ||
| 285 | |||
| 136 | /** | 286 | /** |
| 137 | * nfs_create_request - Create an NFS read/write request. | 287 | * nfs_create_request - Create an NFS read/write request. |
| 138 | * @ctx: open context to use | 288 | * @ctx: open context to use |
| 139 | * @inode: inode to which the request is attached | ||
| 140 | * @page: page to write | 289 | * @page: page to write |
| 290 | * @last: last nfs request created for this page group or NULL if head | ||
| 141 | * @offset: starting offset within the page for the write | 291 | * @offset: starting offset within the page for the write |
| 142 | * @count: number of bytes to read/write | 292 | * @count: number of bytes to read/write |
| 143 | * | 293 | * |
| @@ -146,9 +296,9 @@ nfs_iocounter_wait(struct nfs_io_counter *c) | |||
| 146 | * User should ensure it is safe to sleep in this function. | 296 | * User should ensure it is safe to sleep in this function. |
| 147 | */ | 297 | */ |
| 148 | struct nfs_page * | 298 | struct nfs_page * |
| 149 | nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | 299 | nfs_create_request(struct nfs_open_context *ctx, struct page *page, |
| 150 | struct page *page, | 300 | struct nfs_page *last, unsigned int offset, |
| 151 | unsigned int offset, unsigned int count) | 301 | unsigned int count) |
| 152 | { | 302 | { |
| 153 | struct nfs_page *req; | 303 | struct nfs_page *req; |
| 154 | struct nfs_lock_context *l_ctx; | 304 | struct nfs_lock_context *l_ctx; |
| @@ -180,6 +330,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, | |||
| 180 | req->wb_bytes = count; | 330 | req->wb_bytes = count; |
| 181 | req->wb_context = get_nfs_open_context(ctx); | 331 | req->wb_context = get_nfs_open_context(ctx); |
| 182 | kref_init(&req->wb_kref); | 332 | kref_init(&req->wb_kref); |
| 333 | nfs_page_group_init(req, last); | ||
| 183 | return req; | 334 | return req; |
| 184 | } | 335 | } |
| 185 | 336 | ||
| @@ -237,16 +388,22 @@ static void nfs_clear_request(struct nfs_page *req) | |||
| 237 | } | 388 | } |
| 238 | } | 389 | } |
| 239 | 390 | ||
| 240 | |||
| 241 | /** | 391 | /** |
| 242 | * nfs_release_request - Release the count on an NFS read/write request | 392 | * nfs_release_request - Release the count on an NFS read/write request |
| 243 | * @req: request to release | 393 | * @req: request to release |
| 244 | * | 394 | * |
| 245 | * Note: Should never be called with the spinlock held! | 395 | * Note: Should never be called with the spinlock held! |
| 246 | */ | 396 | */ |
| 247 | static void nfs_free_request(struct kref *kref) | 397 | static void nfs_free_request(struct nfs_page *req) |
| 248 | { | 398 | { |
| 249 | struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); | 399 | WARN_ON_ONCE(req->wb_this_page != req); |
| 400 | |||
| 401 | /* extra debug: make sure no sync bits are still set */ | ||
| 402 | WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); | ||
| 403 | WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags)); | ||
| 404 | WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags)); | ||
| 405 | WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags)); | ||
| 406 | WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags)); | ||
| 250 | 407 | ||
| 251 | /* Release struct file and open context */ | 408 | /* Release struct file and open context */ |
| 252 | nfs_clear_request(req); | 409 | nfs_clear_request(req); |
| @@ -255,13 +412,7 @@ static void nfs_free_request(struct kref *kref) | |||
| 255 | 412 | ||
| 256 | void nfs_release_request(struct nfs_page *req) | 413 | void nfs_release_request(struct nfs_page *req) |
| 257 | { | 414 | { |
| 258 | kref_put(&req->wb_kref, nfs_free_request); | 415 | kref_put(&req->wb_kref, nfs_page_group_destroy); |
| 259 | } | ||
| 260 | |||
| 261 | static int nfs_wait_bit_uninterruptible(void *word) | ||
| 262 | { | ||
| 263 | io_schedule(); | ||
| 264 | return 0; | ||
| 265 | } | 416 | } |
| 266 | 417 | ||
| 267 | /** | 418 | /** |
| @@ -279,22 +430,249 @@ nfs_wait_on_request(struct nfs_page *req) | |||
| 279 | TASK_UNINTERRUPTIBLE); | 430 | TASK_UNINTERRUPTIBLE); |
| 280 | } | 431 | } |
| 281 | 432 | ||
| 282 | bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) | 433 | /* |
| 434 | * nfs_generic_pg_test - determine if requests can be coalesced | ||
| 435 | * @desc: pointer to descriptor | ||
| 436 | * @prev: previous request in desc, or NULL | ||
| 437 | * @req: this request | ||
| 438 | * | ||
| 439 | * Returns zero if @req can be coalesced into @desc, otherwise it returns | ||
| 440 | * the size of the request. | ||
| 441 | */ | ||
| 442 | size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | ||
| 443 | struct nfs_page *prev, struct nfs_page *req) | ||
| 283 | { | 444 | { |
| 284 | /* | 445 | if (desc->pg_count > desc->pg_bsize) { |
| 285 | * FIXME: ideally we should be able to coalesce all requests | 446 | /* should never happen */ |
| 286 | * that are not block boundary aligned, but currently this | 447 | WARN_ON_ONCE(1); |
| 287 | * is problematic for the case of bsize < PAGE_CACHE_SIZE, | ||
| 288 | * since nfs_flush_multi and nfs_pagein_multi assume you | ||
| 289 | * can have only one struct nfs_page. | ||
| 290 | */ | ||
| 291 | if (desc->pg_bsize < PAGE_SIZE) | ||
| 292 | return 0; | 448 | return 0; |
| 449 | } | ||
| 293 | 450 | ||
| 294 | return desc->pg_count + req->wb_bytes <= desc->pg_bsize; | 451 | return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); |
| 295 | } | 452 | } |
| 296 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); | 453 | EXPORT_SYMBOL_GPL(nfs_generic_pg_test); |
| 297 | 454 | ||
| 455 | static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) | ||
| 456 | { | ||
| 457 | return container_of(hdr, struct nfs_rw_header, header); | ||
| 458 | } | ||
| 459 | |||
| 460 | /** | ||
| 461 | * nfs_rw_header_alloc - Allocate a header for a read or write | ||
| 462 | * @ops: Read or write function vector | ||
| 463 | */ | ||
| 464 | struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) | ||
| 465 | { | ||
| 466 | struct nfs_rw_header *header = ops->rw_alloc_header(); | ||
| 467 | |||
| 468 | if (header) { | ||
| 469 | struct nfs_pgio_header *hdr = &header->header; | ||
| 470 | |||
| 471 | INIT_LIST_HEAD(&hdr->pages); | ||
| 472 | spin_lock_init(&hdr->lock); | ||
| 473 | atomic_set(&hdr->refcnt, 0); | ||
| 474 | hdr->rw_ops = ops; | ||
| 475 | } | ||
| 476 | return header; | ||
| 477 | } | ||
| 478 | EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); | ||
| 479 | |||
| 480 | /* | ||
| 481 | * nfs_rw_header_free - Free a read or write header | ||
| 482 | * @hdr: The header to free | ||
| 483 | */ | ||
| 484 | void nfs_rw_header_free(struct nfs_pgio_header *hdr) | ||
| 485 | { | ||
| 486 | hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); | ||
| 487 | } | ||
| 488 | EXPORT_SYMBOL_GPL(nfs_rw_header_free); | ||
| 489 | |||
| 490 | /** | ||
| 491 | * nfs_pgio_data_alloc - Allocate pageio data | ||
| 492 | * @hdr: The header making a request | ||
| 493 | * @pagecount: Number of pages to create | ||
| 494 | */ | ||
| 495 | static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, | ||
| 496 | unsigned int pagecount) | ||
| 497 | { | ||
| 498 | struct nfs_pgio_data *data, *prealloc; | ||
| 499 | |||
| 500 | prealloc = &NFS_RW_HEADER(hdr)->rpc_data; | ||
| 501 | if (prealloc->header == NULL) | ||
| 502 | data = prealloc; | ||
| 503 | else | ||
| 504 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
| 505 | if (!data) | ||
| 506 | goto out; | ||
| 507 | |||
| 508 | if (nfs_pgarray_set(&data->pages, pagecount)) { | ||
| 509 | data->header = hdr; | ||
| 510 | atomic_inc(&hdr->refcnt); | ||
| 511 | } else { | ||
| 512 | if (data != prealloc) | ||
| 513 | kfree(data); | ||
| 514 | data = NULL; | ||
| 515 | } | ||
| 516 | out: | ||
| 517 | return data; | ||
| 518 | } | ||
| 519 | |||
| 520 | /** | ||
| 521 | * nfs_pgio_data_release - Properly free pageio data | ||
| 522 | * @data: The data to release | ||
| 523 | */ | ||
| 524 | void nfs_pgio_data_release(struct nfs_pgio_data *data) | ||
| 525 | { | ||
| 526 | struct nfs_pgio_header *hdr = data->header; | ||
| 527 | struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); | ||
| 528 | |||
| 529 | put_nfs_open_context(data->args.context); | ||
| 530 | if (data->pages.pagevec != data->pages.page_array) | ||
| 531 | kfree(data->pages.pagevec); | ||
| 532 | if (data == &pageio_header->rpc_data) { | ||
| 533 | data->header = NULL; | ||
| 534 | data = NULL; | ||
| 535 | } | ||
| 536 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
| 537 | hdr->completion_ops->completion(hdr); | ||
| 538 | /* Note: we only free the rpc_task after callbacks are done. | ||
| 539 | * See the comment in rpc_free_task() for why | ||
| 540 | */ | ||
| 541 | kfree(data); | ||
| 542 | } | ||
| 543 | EXPORT_SYMBOL_GPL(nfs_pgio_data_release); | ||
| 544 | |||
| 545 | /** | ||
| 546 | * nfs_pgio_rpcsetup - Set up arguments for a pageio call | ||
| 547 | * @data: The pageio data | ||
| 548 | * @count: Number of bytes to read | ||
| 549 | * @offset: Initial offset | ||
| 550 | * @how: How to commit data (writes only) | ||
| 551 | * @cinfo: Commit information for the call (writes only) | ||
| 552 | */ | ||
| 553 | static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, | ||
| 554 | unsigned int count, unsigned int offset, | ||
| 555 | int how, struct nfs_commit_info *cinfo) | ||
| 556 | { | ||
| 557 | struct nfs_page *req = data->header->req; | ||
| 558 | |||
| 559 | /* Set up the RPC argument and reply structs | ||
| 560 | * NB: take care not to mess about with data->commit et al. */ | ||
| 561 | |||
| 562 | data->args.fh = NFS_FH(data->header->inode); | ||
| 563 | data->args.offset = req_offset(req) + offset; | ||
| 564 | /* pnfs_set_layoutcommit needs this */ | ||
| 565 | data->mds_offset = data->args.offset; | ||
| 566 | data->args.pgbase = req->wb_pgbase + offset; | ||
| 567 | data->args.pages = data->pages.pagevec; | ||
| 568 | data->args.count = count; | ||
| 569 | data->args.context = get_nfs_open_context(req->wb_context); | ||
| 570 | data->args.lock_context = req->wb_lock_context; | ||
| 571 | data->args.stable = NFS_UNSTABLE; | ||
| 572 | switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { | ||
| 573 | case 0: | ||
| 574 | break; | ||
| 575 | case FLUSH_COND_STABLE: | ||
| 576 | if (nfs_reqs_to_commit(cinfo)) | ||
| 577 | break; | ||
| 578 | default: | ||
| 579 | data->args.stable = NFS_FILE_SYNC; | ||
| 580 | } | ||
| 581 | |||
| 582 | data->res.fattr = &data->fattr; | ||
| 583 | data->res.count = count; | ||
| 584 | data->res.eof = 0; | ||
| 585 | data->res.verf = &data->verf; | ||
| 586 | nfs_fattr_init(&data->fattr); | ||
| 587 | } | ||
| 588 | |||
| 589 | /** | ||
| 590 | * nfs_pgio_prepare - Prepare pageio data to go over the wire | ||
| 591 | * @task: The current task | ||
| 592 | * @calldata: pageio data to prepare | ||
| 593 | */ | ||
| 594 | static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) | ||
| 595 | { | ||
| 596 | struct nfs_pgio_data *data = calldata; | ||
| 597 | int err; | ||
| 598 | err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); | ||
| 599 | if (err) | ||
| 600 | rpc_exit(task, err); | ||
| 601 | } | ||
| 602 | |||
| 603 | int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, | ||
| 604 | const struct rpc_call_ops *call_ops, int how, int flags) | ||
| 605 | { | ||
| 606 | struct rpc_task *task; | ||
| 607 | struct rpc_message msg = { | ||
| 608 | .rpc_argp = &data->args, | ||
| 609 | .rpc_resp = &data->res, | ||
| 610 | .rpc_cred = data->header->cred, | ||
| 611 | }; | ||
| 612 | struct rpc_task_setup task_setup_data = { | ||
| 613 | .rpc_client = clnt, | ||
| 614 | .task = &data->task, | ||
| 615 | .rpc_message = &msg, | ||
| 616 | .callback_ops = call_ops, | ||
| 617 | .callback_data = data, | ||
| 618 | .workqueue = nfsiod_workqueue, | ||
| 619 | .flags = RPC_TASK_ASYNC | flags, | ||
| 620 | }; | ||
| 621 | int ret = 0; | ||
| 622 | |||
| 623 | data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); | ||
| 624 | |||
| 625 | dprintk("NFS: %5u initiated pgio call " | ||
| 626 | "(req %s/%llu, %u bytes @ offset %llu)\n", | ||
| 627 | data->task.tk_pid, | ||
| 628 | data->header->inode->i_sb->s_id, | ||
| 629 | (unsigned long long)NFS_FILEID(data->header->inode), | ||
| 630 | data->args.count, | ||
| 631 | (unsigned long long)data->args.offset); | ||
| 632 | |||
| 633 | task = rpc_run_task(&task_setup_data); | ||
| 634 | if (IS_ERR(task)) { | ||
| 635 | ret = PTR_ERR(task); | ||
| 636 | goto out; | ||
| 637 | } | ||
| 638 | if (how & FLUSH_SYNC) { | ||
| 639 | ret = rpc_wait_for_completion_task(task); | ||
| 640 | if (ret == 0) | ||
| 641 | ret = task->tk_status; | ||
| 642 | } | ||
| 643 | rpc_put_task(task); | ||
| 644 | out: | ||
| 645 | return ret; | ||
| 646 | } | ||
| 647 | EXPORT_SYMBOL_GPL(nfs_initiate_pgio); | ||
| 648 | |||
| 649 | /** | ||
| 650 | * nfs_pgio_error - Clean up from a pageio error | ||
| 651 | * @desc: IO descriptor | ||
| 652 | * @hdr: pageio header | ||
| 653 | */ | ||
| 654 | static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, | ||
| 655 | struct nfs_pgio_header *hdr) | ||
| 656 | { | ||
| 657 | set_bit(NFS_IOHDR_REDO, &hdr->flags); | ||
| 658 | nfs_pgio_data_release(hdr->data); | ||
| 659 | hdr->data = NULL; | ||
| 660 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
| 661 | return -ENOMEM; | ||
| 662 | } | ||
| 663 | |||
| 664 | /** | ||
| 665 | * nfs_pgio_release - Release pageio data | ||
| 666 | * @calldata: The pageio data to release | ||
| 667 | */ | ||
| 668 | static void nfs_pgio_release(void *calldata) | ||
| 669 | { | ||
| 670 | struct nfs_pgio_data *data = calldata; | ||
| 671 | if (data->header->rw_ops->rw_release) | ||
| 672 | data->header->rw_ops->rw_release(data); | ||
| 673 | nfs_pgio_data_release(data); | ||
| 674 | } | ||
| 675 | |||
| 298 | /** | 676 | /** |
| 299 | * nfs_pageio_init - initialise a page io descriptor | 677 | * nfs_pageio_init - initialise a page io descriptor |
| 300 | * @desc: pointer to descriptor | 678 | * @desc: pointer to descriptor |
| @@ -307,6 +685,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
| 307 | struct inode *inode, | 685 | struct inode *inode, |
| 308 | const struct nfs_pageio_ops *pg_ops, | 686 | const struct nfs_pageio_ops *pg_ops, |
| 309 | const struct nfs_pgio_completion_ops *compl_ops, | 687 | const struct nfs_pgio_completion_ops *compl_ops, |
| 688 | const struct nfs_rw_ops *rw_ops, | ||
| 310 | size_t bsize, | 689 | size_t bsize, |
| 311 | int io_flags) | 690 | int io_flags) |
| 312 | { | 691 | { |
| @@ -320,6 +699,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
| 320 | desc->pg_inode = inode; | 699 | desc->pg_inode = inode; |
| 321 | desc->pg_ops = pg_ops; | 700 | desc->pg_ops = pg_ops; |
| 322 | desc->pg_completion_ops = compl_ops; | 701 | desc->pg_completion_ops = compl_ops; |
| 702 | desc->pg_rw_ops = rw_ops; | ||
| 323 | desc->pg_ioflags = io_flags; | 703 | desc->pg_ioflags = io_flags; |
| 324 | desc->pg_error = 0; | 704 | desc->pg_error = 0; |
| 325 | desc->pg_lseg = NULL; | 705 | desc->pg_lseg = NULL; |
| @@ -328,6 +708,94 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
| 328 | } | 708 | } |
| 329 | EXPORT_SYMBOL_GPL(nfs_pageio_init); | 709 | EXPORT_SYMBOL_GPL(nfs_pageio_init); |
| 330 | 710 | ||
| 711 | /** | ||
| 712 | * nfs_pgio_result - Basic pageio error handling | ||
| 713 | * @task: The task that ran | ||
| 714 | * @calldata: Pageio data to check | ||
| 715 | */ | ||
| 716 | static void nfs_pgio_result(struct rpc_task *task, void *calldata) | ||
| 717 | { | ||
| 718 | struct nfs_pgio_data *data = calldata; | ||
| 719 | struct inode *inode = data->header->inode; | ||
| 720 | |||
| 721 | dprintk("NFS: %s: %5u, (status %d)\n", __func__, | ||
| 722 | task->tk_pid, task->tk_status); | ||
| 723 | |||
| 724 | if (data->header->rw_ops->rw_done(task, data, inode) != 0) | ||
| 725 | return; | ||
| 726 | if (task->tk_status < 0) | ||
| 727 | nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); | ||
| 728 | else | ||
| 729 | data->header->rw_ops->rw_result(task, data); | ||
| 730 | } | ||
| 731 | |||
| 732 | /* | ||
| 733 | * Create an RPC task for the given read or write request and kick it. | ||
| 734 | * The page must have been locked by the caller. | ||
| 735 | * | ||
| 736 | * It may happen that the page we're passed is not marked dirty. | ||
| 737 | * This is the case if nfs_updatepage detects a conflicting request | ||
| 738 | * that has been written but not committed. | ||
| 739 | */ | ||
| 740 | int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, | ||
| 741 | struct nfs_pgio_header *hdr) | ||
| 742 | { | ||
| 743 | struct nfs_page *req; | ||
| 744 | struct page **pages; | ||
| 745 | struct nfs_pgio_data *data; | ||
| 746 | struct list_head *head = &desc->pg_list; | ||
| 747 | struct nfs_commit_info cinfo; | ||
| 748 | |||
| 749 | data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, | ||
| 750 | desc->pg_count)); | ||
| 751 | if (!data) | ||
| 752 | return nfs_pgio_error(desc, hdr); | ||
| 753 | |||
| 754 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); | ||
| 755 | pages = data->pages.pagevec; | ||
| 756 | while (!list_empty(head)) { | ||
| 757 | req = nfs_list_entry(head->next); | ||
| 758 | nfs_list_remove_request(req); | ||
| 759 | nfs_list_add_request(req, &hdr->pages); | ||
| 760 | *pages++ = req->wb_page; | ||
| 761 | } | ||
| 762 | |||
| 763 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
| 764 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) | ||
| 765 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
| 766 | |||
| 767 | /* Set up the argument struct */ | ||
| 768 | nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); | ||
| 769 | hdr->data = data; | ||
| 770 | desc->pg_rpc_callops = &nfs_pgio_common_ops; | ||
| 771 | return 0; | ||
| 772 | } | ||
| 773 | EXPORT_SYMBOL_GPL(nfs_generic_pgio); | ||
| 774 | |||
| 775 | static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) | ||
| 776 | { | ||
| 777 | struct nfs_rw_header *rw_hdr; | ||
| 778 | struct nfs_pgio_header *hdr; | ||
| 779 | int ret; | ||
| 780 | |||
| 781 | rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); | ||
| 782 | if (!rw_hdr) { | ||
| 783 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
| 784 | return -ENOMEM; | ||
| 785 | } | ||
| 786 | hdr = &rw_hdr->header; | ||
| 787 | nfs_pgheader_init(desc, hdr, nfs_rw_header_free); | ||
| 788 | atomic_inc(&hdr->refcnt); | ||
| 789 | ret = nfs_generic_pgio(desc, hdr); | ||
| 790 | if (ret == 0) | ||
| 791 | ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), | ||
| 792 | hdr->data, desc->pg_rpc_callops, | ||
| 793 | desc->pg_ioflags, 0); | ||
| 794 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
| 795 | hdr->completion_ops->completion(hdr); | ||
| 796 | return ret; | ||
| 797 | } | ||
| 798 | |||
| 331 | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, | 799 | static bool nfs_match_open_context(const struct nfs_open_context *ctx1, |
| 332 | const struct nfs_open_context *ctx2) | 800 | const struct nfs_open_context *ctx2) |
| 333 | { | 801 | { |
| @@ -356,18 +824,23 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
| 356 | struct nfs_page *req, | 824 | struct nfs_page *req, |
| 357 | struct nfs_pageio_descriptor *pgio) | 825 | struct nfs_pageio_descriptor *pgio) |
| 358 | { | 826 | { |
| 359 | if (!nfs_match_open_context(req->wb_context, prev->wb_context)) | 827 | size_t size; |
| 360 | return false; | 828 | |
| 361 | if (req->wb_context->dentry->d_inode->i_flock != NULL && | 829 | if (prev) { |
| 362 | !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) | 830 | if (!nfs_match_open_context(req->wb_context, prev->wb_context)) |
| 363 | return false; | 831 | return false; |
| 364 | if (req->wb_pgbase != 0) | 832 | if (req->wb_context->dentry->d_inode->i_flock != NULL && |
| 365 | return false; | 833 | !nfs_match_lock_context(req->wb_lock_context, |
| 366 | if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) | 834 | prev->wb_lock_context)) |
| 367 | return false; | 835 | return false; |
| 368 | if (req_offset(req) != req_offset(prev) + prev->wb_bytes) | 836 | if (req_offset(req) != req_offset(prev) + prev->wb_bytes) |
| 369 | return false; | 837 | return false; |
| 370 | return pgio->pg_ops->pg_test(pgio, prev, req); | 838 | } |
| 839 | size = pgio->pg_ops->pg_test(pgio, prev, req); | ||
| 840 | WARN_ON_ONCE(size > req->wb_bytes); | ||
| 841 | if (size && size < req->wb_bytes) | ||
| 842 | req->wb_bytes = size; | ||
| 843 | return size > 0; | ||
| 371 | } | 844 | } |
| 372 | 845 | ||
| 373 | /** | 846 | /** |
| @@ -381,17 +854,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, | |||
| 381 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, | 854 | static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, |
| 382 | struct nfs_page *req) | 855 | struct nfs_page *req) |
| 383 | { | 856 | { |
| 857 | struct nfs_page *prev = NULL; | ||
| 384 | if (desc->pg_count != 0) { | 858 | if (desc->pg_count != 0) { |
| 385 | struct nfs_page *prev; | ||
| 386 | |||
| 387 | prev = nfs_list_entry(desc->pg_list.prev); | 859 | prev = nfs_list_entry(desc->pg_list.prev); |
| 388 | if (!nfs_can_coalesce_requests(prev, req, desc)) | ||
| 389 | return 0; | ||
| 390 | } else { | 860 | } else { |
| 391 | if (desc->pg_ops->pg_init) | 861 | if (desc->pg_ops->pg_init) |
| 392 | desc->pg_ops->pg_init(desc, req); | 862 | desc->pg_ops->pg_init(desc, req); |
| 393 | desc->pg_base = req->wb_pgbase; | 863 | desc->pg_base = req->wb_pgbase; |
| 394 | } | 864 | } |
| 865 | if (!nfs_can_coalesce_requests(prev, req, desc)) | ||
| 866 | return 0; | ||
| 395 | nfs_list_remove_request(req); | 867 | nfs_list_remove_request(req); |
| 396 | nfs_list_add_request(req, &desc->pg_list); | 868 | nfs_list_add_request(req, &desc->pg_list); |
| 397 | desc->pg_count += req->wb_bytes; | 869 | desc->pg_count += req->wb_bytes; |
| @@ -421,22 +893,73 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) | |||
| 421 | * @desc: destination io descriptor | 893 | * @desc: destination io descriptor |
| 422 | * @req: request | 894 | * @req: request |
| 423 | * | 895 | * |
| 896 | * This may split a request into subrequests which are all part of the | ||
| 897 | * same page group. | ||
| 898 | * | ||
| 424 | * Returns true if the request 'req' was successfully coalesced into the | 899 | * Returns true if the request 'req' was successfully coalesced into the |
| 425 | * existing list of pages 'desc'. | 900 | * existing list of pages 'desc'. |
| 426 | */ | 901 | */ |
| 427 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | 902 | static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, |
| 428 | struct nfs_page *req) | 903 | struct nfs_page *req) |
| 429 | { | 904 | { |
| 430 | while (!nfs_pageio_do_add_request(desc, req)) { | 905 | struct nfs_page *subreq; |
| 431 | desc->pg_moreio = 1; | 906 | unsigned int bytes_left = 0; |
| 432 | nfs_pageio_doio(desc); | 907 | unsigned int offset, pgbase; |
| 433 | if (desc->pg_error < 0) | 908 | |
| 434 | return 0; | 909 | nfs_page_group_lock(req); |
| 435 | desc->pg_moreio = 0; | 910 | |
| 436 | if (desc->pg_recoalesce) | 911 | subreq = req; |
| 437 | return 0; | 912 | bytes_left = subreq->wb_bytes; |
| 438 | } | 913 | offset = subreq->wb_offset; |
| 914 | pgbase = subreq->wb_pgbase; | ||
| 915 | |||
| 916 | do { | ||
| 917 | if (!nfs_pageio_do_add_request(desc, subreq)) { | ||
| 918 | /* make sure pg_test call(s) did nothing */ | ||
| 919 | WARN_ON_ONCE(subreq->wb_bytes != bytes_left); | ||
| 920 | WARN_ON_ONCE(subreq->wb_offset != offset); | ||
| 921 | WARN_ON_ONCE(subreq->wb_pgbase != pgbase); | ||
| 922 | |||
| 923 | nfs_page_group_unlock(req); | ||
| 924 | desc->pg_moreio = 1; | ||
| 925 | nfs_pageio_doio(desc); | ||
| 926 | if (desc->pg_error < 0) | ||
| 927 | return 0; | ||
| 928 | desc->pg_moreio = 0; | ||
| 929 | if (desc->pg_recoalesce) | ||
| 930 | return 0; | ||
| 931 | /* retry add_request for this subreq */ | ||
| 932 | nfs_page_group_lock(req); | ||
| 933 | continue; | ||
| 934 | } | ||
| 935 | |||
| 936 | /* check for buggy pg_test call(s) */ | ||
| 937 | WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE); | ||
| 938 | WARN_ON_ONCE(subreq->wb_bytes > bytes_left); | ||
| 939 | WARN_ON_ONCE(subreq->wb_bytes == 0); | ||
| 940 | |||
| 941 | bytes_left -= subreq->wb_bytes; | ||
| 942 | offset += subreq->wb_bytes; | ||
| 943 | pgbase += subreq->wb_bytes; | ||
| 944 | |||
| 945 | if (bytes_left) { | ||
| 946 | subreq = nfs_create_request(req->wb_context, | ||
| 947 | req->wb_page, | ||
| 948 | subreq, pgbase, bytes_left); | ||
| 949 | if (IS_ERR(subreq)) | ||
| 950 | goto err_ptr; | ||
| 951 | nfs_lock_request(subreq); | ||
| 952 | subreq->wb_offset = offset; | ||
| 953 | subreq->wb_index = req->wb_index; | ||
| 954 | } | ||
| 955 | } while (bytes_left > 0); | ||
| 956 | |||
| 957 | nfs_page_group_unlock(req); | ||
| 439 | return 1; | 958 | return 1; |
| 959 | err_ptr: | ||
| 960 | desc->pg_error = PTR_ERR(subreq); | ||
| 961 | nfs_page_group_unlock(req); | ||
| 962 | return 0; | ||
| 440 | } | 963 | } |
| 441 | 964 | ||
| 442 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) | 965 | static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) |
| @@ -535,3 +1058,13 @@ void nfs_destroy_nfspagecache(void) | |||
| 535 | kmem_cache_destroy(nfs_page_cachep); | 1058 | kmem_cache_destroy(nfs_page_cachep); |
| 536 | } | 1059 | } |
| 537 | 1060 | ||
| 1061 | static const struct rpc_call_ops nfs_pgio_common_ops = { | ||
| 1062 | .rpc_call_prepare = nfs_pgio_prepare, | ||
| 1063 | .rpc_call_done = nfs_pgio_result, | ||
| 1064 | .rpc_release = nfs_pgio_release, | ||
| 1065 | }; | ||
| 1066 | |||
| 1067 | const struct nfs_pageio_ops nfs_pgio_rw_ops = { | ||
| 1068 | .pg_test = nfs_generic_pg_test, | ||
| 1069 | .pg_doio = nfs_generic_pg_pgios, | ||
| 1070 | }; | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fd9536e494bc..6fdcd233d6f7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
| @@ -1388,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r | |||
| 1388 | 1388 | ||
| 1389 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 1389 | WARN_ON_ONCE(pgio->pg_lseg != NULL); |
| 1390 | 1390 | ||
| 1391 | if (req->wb_offset != req->wb_pgbase) { | ||
| 1392 | nfs_pageio_reset_read_mds(pgio); | ||
| 1393 | return; | ||
| 1394 | } | ||
| 1395 | |||
| 1396 | if (pgio->pg_dreq == NULL) | 1391 | if (pgio->pg_dreq == NULL) |
| 1397 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); | 1392 | rd_size = i_size_read(pgio->pg_inode) - req_offset(req); |
| 1398 | else | 1393 | else |
| @@ -1417,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
| 1417 | { | 1412 | { |
| 1418 | WARN_ON_ONCE(pgio->pg_lseg != NULL); | 1413 | WARN_ON_ONCE(pgio->pg_lseg != NULL); |
| 1419 | 1414 | ||
| 1420 | if (req->wb_offset != req->wb_pgbase) { | ||
| 1421 | nfs_pageio_reset_write_mds(pgio); | ||
| 1422 | return; | ||
| 1423 | } | ||
| 1424 | |||
| 1425 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | 1415 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, |
| 1426 | req->wb_context, | 1416 | req->wb_context, |
| 1427 | req_offset(req), | 1417 | req_offset(req), |
| @@ -1434,56 +1424,49 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | |||
| 1434 | } | 1424 | } |
| 1435 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); | 1425 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); |
| 1436 | 1426 | ||
| 1437 | void | 1427 | /* |
| 1438 | pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, | 1428 | * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number |
| 1439 | const struct nfs_pgio_completion_ops *compl_ops) | 1429 | * of bytes (maximum @req->wb_bytes) that can be coalesced. |
| 1440 | { | 1430 | */ |
| 1441 | struct nfs_server *server = NFS_SERVER(inode); | 1431 | size_t |
| 1442 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | ||
| 1443 | |||
| 1444 | if (ld == NULL) | ||
| 1445 | nfs_pageio_init_read(pgio, inode, compl_ops); | ||
| 1446 | else | ||
| 1447 | nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); | ||
| 1448 | } | ||
| 1449 | |||
| 1450 | void | ||
| 1451 | pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, | ||
| 1452 | int ioflags, | ||
| 1453 | const struct nfs_pgio_completion_ops *compl_ops) | ||
| 1454 | { | ||
| 1455 | struct nfs_server *server = NFS_SERVER(inode); | ||
| 1456 | struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; | ||
| 1457 | |||
| 1458 | if (ld == NULL) | ||
| 1459 | nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); | ||
| 1460 | else | ||
| 1461 | nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | bool | ||
| 1465 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | 1432 | pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, |
| 1466 | struct nfs_page *req) | 1433 | struct nfs_page *req) |
| 1467 | { | 1434 | { |
| 1468 | if (pgio->pg_lseg == NULL) | 1435 | unsigned int size; |
| 1469 | return nfs_generic_pg_test(pgio, prev, req); | 1436 | u64 seg_end, req_start, seg_left; |
| 1437 | |||
| 1438 | size = nfs_generic_pg_test(pgio, prev, req); | ||
| 1439 | if (!size) | ||
| 1440 | return 0; | ||
| 1470 | 1441 | ||
| 1471 | /* | 1442 | /* |
| 1472 | * Test if a nfs_page is fully contained in the pnfs_layout_range. | 1443 | * 'size' contains the number of bytes left in the current page (up |
| 1473 | * Note that this test makes several assumptions: | 1444 | * to the original size asked for in @req->wb_bytes). |
| 1474 | * - that the previous nfs_page in the struct nfs_pageio_descriptor | 1445 | * |
| 1475 | * is known to lie within the range. | 1446 | * Calculate how many bytes are left in the layout segment |
| 1476 | * - that the nfs_page being tested is known to be contiguous with the | 1447 | * and if there are less bytes than 'size', return that instead. |
| 1477 | * previous nfs_page. | ||
| 1478 | * - Layout ranges are page aligned, so we only have to test the | ||
| 1479 | * start offset of the request. | ||
| 1480 | * | 1448 | * |
| 1481 | * Please also note that 'end_offset' is actually the offset of the | 1449 | * Please also note that 'end_offset' is actually the offset of the |
| 1482 | * first byte that lies outside the pnfs_layout_range. FIXME? | 1450 | * first byte that lies outside the pnfs_layout_range. FIXME? |
| 1483 | * | 1451 | * |
| 1484 | */ | 1452 | */ |
| 1485 | return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, | 1453 | if (pgio->pg_lseg) { |
| 1486 | pgio->pg_lseg->pls_range.length); | 1454 | seg_end = end_offset(pgio->pg_lseg->pls_range.offset, |
| 1455 | pgio->pg_lseg->pls_range.length); | ||
| 1456 | req_start = req_offset(req); | ||
| 1457 | WARN_ON_ONCE(req_start > seg_end); | ||
| 1458 | /* start of request is past the last byte of this segment */ | ||
| 1459 | if (req_start >= seg_end) | ||
| 1460 | return 0; | ||
| 1461 | |||
| 1462 | /* adjust 'size' iff there are fewer bytes left in the | ||
| 1463 | * segment than what nfs_generic_pg_test returned */ | ||
| 1464 | seg_left = seg_end - req_start; | ||
| 1465 | if (seg_left < size) | ||
| 1466 | size = (unsigned int)seg_left; | ||
| 1467 | } | ||
| 1468 | |||
| 1469 | return size; | ||
| 1487 | } | 1470 | } |
| 1488 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); | 1471 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); |
| 1489 | 1472 | ||
| @@ -1496,7 +1479,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, | |||
| 1496 | LIST_HEAD(failed); | 1479 | LIST_HEAD(failed); |
| 1497 | 1480 | ||
| 1498 | /* Resend all requests through the MDS */ | 1481 | /* Resend all requests through the MDS */ |
| 1499 | nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); | 1482 | nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); |
| 1500 | pgio.pg_dreq = dreq; | 1483 | pgio.pg_dreq = dreq; |
| 1501 | while (!list_empty(head)) { | 1484 | while (!list_empty(head)) { |
| 1502 | struct nfs_page *req = nfs_list_entry(head->next); | 1485 | struct nfs_page *req = nfs_list_entry(head->next); |
| @@ -1519,7 +1502,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, | |||
| 1519 | } | 1502 | } |
| 1520 | EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); | 1503 | EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); |
| 1521 | 1504 | ||
| 1522 | static void pnfs_ld_handle_write_error(struct nfs_write_data *data) | 1505 | static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) |
| 1523 | { | 1506 | { |
| 1524 | struct nfs_pgio_header *hdr = data->header; | 1507 | struct nfs_pgio_header *hdr = data->header; |
| 1525 | 1508 | ||
| @@ -1538,7 +1521,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) | |||
| 1538 | /* | 1521 | /* |
| 1539 | * Called by non rpc-based layout drivers | 1522 | * Called by non rpc-based layout drivers |
| 1540 | */ | 1523 | */ |
| 1541 | void pnfs_ld_write_done(struct nfs_write_data *data) | 1524 | void pnfs_ld_write_done(struct nfs_pgio_data *data) |
| 1542 | { | 1525 | { |
| 1543 | struct nfs_pgio_header *hdr = data->header; | 1526 | struct nfs_pgio_header *hdr = data->header; |
| 1544 | 1527 | ||
| @@ -1554,7 +1537,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done); | |||
| 1554 | 1537 | ||
| 1555 | static void | 1538 | static void |
| 1556 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | 1539 | pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, |
| 1557 | struct nfs_write_data *data) | 1540 | struct nfs_pgio_data *data) |
| 1558 | { | 1541 | { |
| 1559 | struct nfs_pgio_header *hdr = data->header; | 1542 | struct nfs_pgio_header *hdr = data->header; |
| 1560 | 1543 | ||
| @@ -1563,11 +1546,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | |||
| 1563 | nfs_pageio_reset_write_mds(desc); | 1546 | nfs_pageio_reset_write_mds(desc); |
| 1564 | desc->pg_recoalesce = 1; | 1547 | desc->pg_recoalesce = 1; |
| 1565 | } | 1548 | } |
| 1566 | nfs_writedata_release(data); | 1549 | nfs_pgio_data_release(data); |
| 1567 | } | 1550 | } |
| 1568 | 1551 | ||
| 1569 | static enum pnfs_try_status | 1552 | static enum pnfs_try_status |
| 1570 | pnfs_try_to_write_data(struct nfs_write_data *wdata, | 1553 | pnfs_try_to_write_data(struct nfs_pgio_data *wdata, |
| 1571 | const struct rpc_call_ops *call_ops, | 1554 | const struct rpc_call_ops *call_ops, |
| 1572 | struct pnfs_layout_segment *lseg, | 1555 | struct pnfs_layout_segment *lseg, |
| 1573 | int how) | 1556 | int how) |
| @@ -1589,41 +1572,36 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, | |||
| 1589 | } | 1572 | } |
| 1590 | 1573 | ||
| 1591 | static void | 1574 | static void |
| 1592 | pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) | 1575 | pnfs_do_write(struct nfs_pageio_descriptor *desc, |
| 1576 | struct nfs_pgio_header *hdr, int how) | ||
| 1593 | { | 1577 | { |
| 1594 | struct nfs_write_data *data; | 1578 | struct nfs_pgio_data *data = hdr->data; |
| 1595 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 1579 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; |
| 1596 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 1580 | struct pnfs_layout_segment *lseg = desc->pg_lseg; |
| 1581 | enum pnfs_try_status trypnfs; | ||
| 1597 | 1582 | ||
| 1598 | desc->pg_lseg = NULL; | 1583 | desc->pg_lseg = NULL; |
| 1599 | while (!list_empty(head)) { | 1584 | trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); |
| 1600 | enum pnfs_try_status trypnfs; | 1585 | if (trypnfs == PNFS_NOT_ATTEMPTED) |
| 1601 | 1586 | pnfs_write_through_mds(desc, data); | |
| 1602 | data = list_first_entry(head, struct nfs_write_data, list); | ||
| 1603 | list_del_init(&data->list); | ||
| 1604 | |||
| 1605 | trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); | ||
| 1606 | if (trypnfs == PNFS_NOT_ATTEMPTED) | ||
| 1607 | pnfs_write_through_mds(desc, data); | ||
| 1608 | } | ||
| 1609 | pnfs_put_lseg(lseg); | 1587 | pnfs_put_lseg(lseg); |
| 1610 | } | 1588 | } |
| 1611 | 1589 | ||
| 1612 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) | 1590 | static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) |
| 1613 | { | 1591 | { |
| 1614 | pnfs_put_lseg(hdr->lseg); | 1592 | pnfs_put_lseg(hdr->lseg); |
| 1615 | nfs_writehdr_free(hdr); | 1593 | nfs_rw_header_free(hdr); |
| 1616 | } | 1594 | } |
| 1617 | EXPORT_SYMBOL_GPL(pnfs_writehdr_free); | 1595 | EXPORT_SYMBOL_GPL(pnfs_writehdr_free); |
| 1618 | 1596 | ||
| 1619 | int | 1597 | int |
| 1620 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | 1598 | pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) |
| 1621 | { | 1599 | { |
| 1622 | struct nfs_write_header *whdr; | 1600 | struct nfs_rw_header *whdr; |
| 1623 | struct nfs_pgio_header *hdr; | 1601 | struct nfs_pgio_header *hdr; |
| 1624 | int ret; | 1602 | int ret; |
| 1625 | 1603 | ||
| 1626 | whdr = nfs_writehdr_alloc(); | 1604 | whdr = nfs_rw_header_alloc(desc->pg_rw_ops); |
| 1627 | if (!whdr) { | 1605 | if (!whdr) { |
| 1628 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 1606 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); |
| 1629 | pnfs_put_lseg(desc->pg_lseg); | 1607 | pnfs_put_lseg(desc->pg_lseg); |
| @@ -1634,12 +1612,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | |||
| 1634 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); | 1612 | nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); |
| 1635 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 1613 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
| 1636 | atomic_inc(&hdr->refcnt); | 1614 | atomic_inc(&hdr->refcnt); |
| 1637 | ret = nfs_generic_flush(desc, hdr); | 1615 | ret = nfs_generic_pgio(desc, hdr); |
| 1638 | if (ret != 0) { | 1616 | if (ret != 0) { |
| 1639 | pnfs_put_lseg(desc->pg_lseg); | 1617 | pnfs_put_lseg(desc->pg_lseg); |
| 1640 | desc->pg_lseg = NULL; | 1618 | desc->pg_lseg = NULL; |
| 1641 | } else | 1619 | } else |
| 1642 | pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); | 1620 | pnfs_do_write(desc, hdr, desc->pg_ioflags); |
| 1643 | if (atomic_dec_and_test(&hdr->refcnt)) | 1621 | if (atomic_dec_and_test(&hdr->refcnt)) |
| 1644 | hdr->completion_ops->completion(hdr); | 1622 | hdr->completion_ops->completion(hdr); |
| 1645 | return ret; | 1623 | return ret; |
| @@ -1655,7 +1633,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, | |||
| 1655 | LIST_HEAD(failed); | 1633 | LIST_HEAD(failed); |
| 1656 | 1634 | ||
| 1657 | /* Resend all requests through the MDS */ | 1635 | /* Resend all requests through the MDS */ |
| 1658 | nfs_pageio_init_read(&pgio, inode, compl_ops); | 1636 | nfs_pageio_init_read(&pgio, inode, true, compl_ops); |
| 1659 | pgio.pg_dreq = dreq; | 1637 | pgio.pg_dreq = dreq; |
| 1660 | while (!list_empty(head)) { | 1638 | while (!list_empty(head)) { |
| 1661 | struct nfs_page *req = nfs_list_entry(head->next); | 1639 | struct nfs_page *req = nfs_list_entry(head->next); |
| @@ -1674,7 +1652,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, | |||
| 1674 | } | 1652 | } |
| 1675 | EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); | 1653 | EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); |
| 1676 | 1654 | ||
| 1677 | static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | 1655 | static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) |
| 1678 | { | 1656 | { |
| 1679 | struct nfs_pgio_header *hdr = data->header; | 1657 | struct nfs_pgio_header *hdr = data->header; |
| 1680 | 1658 | ||
| @@ -1693,7 +1671,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) | |||
| 1693 | /* | 1671 | /* |
| 1694 | * Called by non rpc-based layout drivers | 1672 | * Called by non rpc-based layout drivers |
| 1695 | */ | 1673 | */ |
| 1696 | void pnfs_ld_read_done(struct nfs_read_data *data) | 1674 | void pnfs_ld_read_done(struct nfs_pgio_data *data) |
| 1697 | { | 1675 | { |
| 1698 | struct nfs_pgio_header *hdr = data->header; | 1676 | struct nfs_pgio_header *hdr = data->header; |
| 1699 | 1677 | ||
| @@ -1709,7 +1687,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done); | |||
| 1709 | 1687 | ||
| 1710 | static void | 1688 | static void |
| 1711 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | 1689 | pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, |
| 1712 | struct nfs_read_data *data) | 1690 | struct nfs_pgio_data *data) |
| 1713 | { | 1691 | { |
| 1714 | struct nfs_pgio_header *hdr = data->header; | 1692 | struct nfs_pgio_header *hdr = data->header; |
| 1715 | 1693 | ||
| @@ -1718,14 +1696,14 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | |||
| 1718 | nfs_pageio_reset_read_mds(desc); | 1696 | nfs_pageio_reset_read_mds(desc); |
| 1719 | desc->pg_recoalesce = 1; | 1697 | desc->pg_recoalesce = 1; |
| 1720 | } | 1698 | } |
| 1721 | nfs_readdata_release(data); | 1699 | nfs_pgio_data_release(data); |
| 1722 | } | 1700 | } |
| 1723 | 1701 | ||
| 1724 | /* | 1702 | /* |
| 1725 | * Call the appropriate parallel I/O subsystem read function. | 1703 | * Call the appropriate parallel I/O subsystem read function. |
| 1726 | */ | 1704 | */ |
| 1727 | static enum pnfs_try_status | 1705 | static enum pnfs_try_status |
| 1728 | pnfs_try_to_read_data(struct nfs_read_data *rdata, | 1706 | pnfs_try_to_read_data(struct nfs_pgio_data *rdata, |
| 1729 | const struct rpc_call_ops *call_ops, | 1707 | const struct rpc_call_ops *call_ops, |
| 1730 | struct pnfs_layout_segment *lseg) | 1708 | struct pnfs_layout_segment *lseg) |
| 1731 | { | 1709 | { |
| @@ -1747,41 +1725,35 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, | |||
| 1747 | } | 1725 | } |
| 1748 | 1726 | ||
| 1749 | static void | 1727 | static void |
| 1750 | pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) | 1728 | pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) |
| 1751 | { | 1729 | { |
| 1752 | struct nfs_read_data *data; | 1730 | struct nfs_pgio_data *data = hdr->data; |
| 1753 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; | 1731 | const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; |
| 1754 | struct pnfs_layout_segment *lseg = desc->pg_lseg; | 1732 | struct pnfs_layout_segment *lseg = desc->pg_lseg; |
| 1733 | enum pnfs_try_status trypnfs; | ||
| 1755 | 1734 | ||
| 1756 | desc->pg_lseg = NULL; | 1735 | desc->pg_lseg = NULL; |
| 1757 | while (!list_empty(head)) { | 1736 | trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); |
| 1758 | enum pnfs_try_status trypnfs; | 1737 | if (trypnfs == PNFS_NOT_ATTEMPTED) |
| 1759 | 1738 | pnfs_read_through_mds(desc, data); | |
| 1760 | data = list_first_entry(head, struct nfs_read_data, list); | ||
| 1761 | list_del_init(&data->list); | ||
| 1762 | |||
| 1763 | trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); | ||
| 1764 | if (trypnfs == PNFS_NOT_ATTEMPTED) | ||
| 1765 | pnfs_read_through_mds(desc, data); | ||
| 1766 | } | ||
| 1767 | pnfs_put_lseg(lseg); | 1739 | pnfs_put_lseg(lseg); |
| 1768 | } | 1740 | } |
| 1769 | 1741 | ||
| 1770 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) | 1742 | static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) |
| 1771 | { | 1743 | { |
| 1772 | pnfs_put_lseg(hdr->lseg); | 1744 | pnfs_put_lseg(hdr->lseg); |
| 1773 | nfs_readhdr_free(hdr); | 1745 | nfs_rw_header_free(hdr); |
| 1774 | } | 1746 | } |
| 1775 | EXPORT_SYMBOL_GPL(pnfs_readhdr_free); | 1747 | EXPORT_SYMBOL_GPL(pnfs_readhdr_free); |
| 1776 | 1748 | ||
| 1777 | int | 1749 | int |
| 1778 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | 1750 | pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) |
| 1779 | { | 1751 | { |
| 1780 | struct nfs_read_header *rhdr; | 1752 | struct nfs_rw_header *rhdr; |
| 1781 | struct nfs_pgio_header *hdr; | 1753 | struct nfs_pgio_header *hdr; |
| 1782 | int ret; | 1754 | int ret; |
| 1783 | 1755 | ||
| 1784 | rhdr = nfs_readhdr_alloc(); | 1756 | rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); |
| 1785 | if (!rhdr) { | 1757 | if (!rhdr) { |
| 1786 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | 1758 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); |
| 1787 | ret = -ENOMEM; | 1759 | ret = -ENOMEM; |
| @@ -1793,12 +1765,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | |||
| 1793 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); | 1765 | nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); |
| 1794 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); | 1766 | hdr->lseg = pnfs_get_lseg(desc->pg_lseg); |
| 1795 | atomic_inc(&hdr->refcnt); | 1767 | atomic_inc(&hdr->refcnt); |
| 1796 | ret = nfs_generic_pagein(desc, hdr); | 1768 | ret = nfs_generic_pgio(desc, hdr); |
| 1797 | if (ret != 0) { | 1769 | if (ret != 0) { |
| 1798 | pnfs_put_lseg(desc->pg_lseg); | 1770 | pnfs_put_lseg(desc->pg_lseg); |
| 1799 | desc->pg_lseg = NULL; | 1771 | desc->pg_lseg = NULL; |
| 1800 | } else | 1772 | } else |
| 1801 | pnfs_do_multiple_reads(desc, &hdr->rpc_list); | 1773 | pnfs_do_read(desc, hdr); |
| 1802 | if (atomic_dec_and_test(&hdr->refcnt)) | 1774 | if (atomic_dec_and_test(&hdr->refcnt)) |
| 1803 | hdr->completion_ops->completion(hdr); | 1775 | hdr->completion_ops->completion(hdr); |
| 1804 | return ret; | 1776 | return ret; |
| @@ -1848,7 +1820,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | |||
| 1848 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); | 1820 | EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); |
| 1849 | 1821 | ||
| 1850 | void | 1822 | void |
| 1851 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) | 1823 | pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) |
| 1852 | { | 1824 | { |
| 1853 | struct nfs_pgio_header *hdr = wdata->header; | 1825 | struct nfs_pgio_header *hdr = wdata->header; |
| 1854 | struct inode *inode = hdr->inode; | 1826 | struct inode *inode = hdr->inode; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c3058a076596..4fb309a2b4c4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
| @@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type { | |||
| 113 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted | 113 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted |
| 114 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS | 114 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS |
| 115 | */ | 115 | */ |
| 116 | enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); | 116 | enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); |
| 117 | enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); | 117 | enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); |
| 118 | 118 | ||
| 119 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); | 119 | void (*free_deviceid_node) (struct nfs4_deviceid_node *); |
| 120 | 120 | ||
| @@ -180,11 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); | |||
| 180 | void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); | 180 | void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); |
| 181 | void pnfs_put_lseg(struct pnfs_layout_segment *lseg); | 181 | void pnfs_put_lseg(struct pnfs_layout_segment *lseg); |
| 182 | 182 | ||
| 183 | void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, | ||
| 184 | const struct nfs_pgio_completion_ops *); | ||
| 185 | void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, | ||
| 186 | int, const struct nfs_pgio_completion_ops *); | ||
| 187 | |||
| 188 | void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); | 183 | void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); |
| 189 | void unset_pnfs_layoutdriver(struct nfs_server *); | 184 | void unset_pnfs_layoutdriver(struct nfs_server *); |
| 190 | void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); | 185 | void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); |
| @@ -192,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); | |||
| 192 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, | 187 | void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, |
| 193 | struct nfs_page *req, u64 wb_size); | 188 | struct nfs_page *req, u64 wb_size); |
| 194 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); | 189 | int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); |
| 195 | bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); | 190 | size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, |
| 191 | struct nfs_page *prev, struct nfs_page *req); | ||
| 196 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); | 192 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); |
| 197 | struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); | 193 | struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); |
| 198 | void pnfs_free_lseg_list(struct list_head *tmp_list); | 194 | void pnfs_free_lseg_list(struct list_head *tmp_list); |
| @@ -217,13 +213,13 @@ bool pnfs_roc(struct inode *ino); | |||
| 217 | void pnfs_roc_release(struct inode *ino); | 213 | void pnfs_roc_release(struct inode *ino); |
| 218 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 214 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
| 219 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); | 215 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); |
| 220 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | 216 | void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); |
| 221 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); | 217 | void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); |
| 222 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | 218 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); |
| 223 | int _pnfs_return_layout(struct inode *); | 219 | int _pnfs_return_layout(struct inode *); |
| 224 | int pnfs_commit_and_return_layout(struct inode *); | 220 | int pnfs_commit_and_return_layout(struct inode *); |
| 225 | void pnfs_ld_write_done(struct nfs_write_data *); | 221 | void pnfs_ld_write_done(struct nfs_pgio_data *); |
| 226 | void pnfs_ld_read_done(struct nfs_read_data *); | 222 | void pnfs_ld_read_done(struct nfs_pgio_data *); |
| 227 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | 223 | struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, |
| 228 | struct nfs_open_context *ctx, | 224 | struct nfs_open_context *ctx, |
| 229 | loff_t pos, | 225 | loff_t pos, |
| @@ -461,18 +457,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) | |||
| 461 | { | 457 | { |
| 462 | } | 458 | } |
| 463 | 459 | ||
| 464 | static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, | ||
| 465 | const struct nfs_pgio_completion_ops *compl_ops) | ||
| 466 | { | ||
| 467 | nfs_pageio_init_read(pgio, inode, compl_ops); | ||
| 468 | } | ||
| 469 | |||
| 470 | static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, | ||
| 471 | const struct nfs_pgio_completion_ops *compl_ops) | ||
| 472 | { | ||
| 473 | nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); | ||
| 474 | } | ||
| 475 | |||
| 476 | static inline int | 460 | static inline int |
| 477 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, | 461 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, |
| 478 | struct nfs_commit_info *cinfo) | 462 | struct nfs_commit_info *cinfo) |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e55ce9e8b034..c171ce1a8a30 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
| @@ -578,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 578 | return 0; | 578 | return 0; |
| 579 | } | 579 | } |
| 580 | 580 | ||
| 581 | static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) | 581 | static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) |
| 582 | { | 582 | { |
| 583 | struct inode *inode = data->header->inode; | 583 | struct inode *inode = data->header->inode; |
| 584 | 584 | ||
| @@ -594,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) | |||
| 594 | return 0; | 594 | return 0; |
| 595 | } | 595 | } |
| 596 | 596 | ||
| 597 | static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) | 597 | static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
| 598 | { | 598 | { |
| 599 | msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; | 599 | msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; |
| 600 | } | 600 | } |
| 601 | 601 | ||
| 602 | static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) | 602 | static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) |
| 603 | { | 603 | { |
| 604 | rpc_call_start(task); | 604 | rpc_call_start(task); |
| 605 | return 0; | 605 | return 0; |
| 606 | } | 606 | } |
| 607 | 607 | ||
| 608 | static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) | 608 | static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) |
| 609 | { | 609 | { |
| 610 | struct inode *inode = data->header->inode; | 610 | struct inode *inode = data->header->inode; |
| 611 | 611 | ||
| @@ -614,19 +614,13 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) | |||
| 614 | return 0; | 614 | return 0; |
| 615 | } | 615 | } |
| 616 | 616 | ||
| 617 | static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) | 617 | static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) |
| 618 | { | 618 | { |
| 619 | /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ | 619 | /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ |
| 620 | data->args.stable = NFS_FILE_SYNC; | 620 | data->args.stable = NFS_FILE_SYNC; |
| 621 | msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; | 621 | msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; |
| 622 | } | 622 | } |
| 623 | 623 | ||
| 624 | static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) | ||
| 625 | { | ||
| 626 | rpc_call_start(task); | ||
| 627 | return 0; | ||
| 628 | } | ||
| 629 | |||
| 630 | static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) | 624 | static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) |
| 631 | { | 625 | { |
| 632 | BUG(); | 626 | BUG(); |
| @@ -734,13 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = { | |||
| 734 | .fsinfo = nfs_proc_fsinfo, | 728 | .fsinfo = nfs_proc_fsinfo, |
| 735 | .pathconf = nfs_proc_pathconf, | 729 | .pathconf = nfs_proc_pathconf, |
| 736 | .decode_dirent = nfs2_decode_dirent, | 730 | .decode_dirent = nfs2_decode_dirent, |
| 731 | .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare, | ||
| 737 | .read_setup = nfs_proc_read_setup, | 732 | .read_setup = nfs_proc_read_setup, |
| 738 | .read_pageio_init = nfs_pageio_init_read, | ||
| 739 | .read_rpc_prepare = nfs_proc_read_rpc_prepare, | ||
| 740 | .read_done = nfs_read_done, | 733 | .read_done = nfs_read_done, |
| 741 | .write_setup = nfs_proc_write_setup, | 734 | .write_setup = nfs_proc_write_setup, |
| 742 | .write_pageio_init = nfs_pageio_init_write, | ||
| 743 | .write_rpc_prepare = nfs_proc_write_rpc_prepare, | ||
| 744 | .write_done = nfs_write_done, | 735 | .write_done = nfs_write_done, |
| 745 | .commit_setup = nfs_proc_commit_setup, | 736 | .commit_setup = nfs_proc_commit_setup, |
| 746 | .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, | 737 | .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 411aedda14bb..e818a475ca64 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
| @@ -24,85 +24,24 @@ | |||
| 24 | #include "internal.h" | 24 | #include "internal.h" |
| 25 | #include "iostat.h" | 25 | #include "iostat.h" |
| 26 | #include "fscache.h" | 26 | #include "fscache.h" |
| 27 | #include "pnfs.h" | ||
| 27 | 28 | ||
| 28 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | 29 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE |
| 29 | 30 | ||
| 30 | static const struct nfs_pageio_ops nfs_pageio_read_ops; | ||
| 31 | static const struct rpc_call_ops nfs_read_common_ops; | ||
| 32 | static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; | 31 | static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; |
| 32 | static const struct nfs_rw_ops nfs_rw_read_ops; | ||
| 33 | 33 | ||
| 34 | static struct kmem_cache *nfs_rdata_cachep; | 34 | static struct kmem_cache *nfs_rdata_cachep; |
| 35 | 35 | ||
| 36 | struct nfs_read_header *nfs_readhdr_alloc(void) | 36 | static struct nfs_rw_header *nfs_readhdr_alloc(void) |
| 37 | { | 37 | { |
| 38 | struct nfs_read_header *rhdr; | 38 | return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); |
| 39 | |||
| 40 | rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); | ||
| 41 | if (rhdr) { | ||
| 42 | struct nfs_pgio_header *hdr = &rhdr->header; | ||
| 43 | |||
| 44 | INIT_LIST_HEAD(&hdr->pages); | ||
| 45 | INIT_LIST_HEAD(&hdr->rpc_list); | ||
| 46 | spin_lock_init(&hdr->lock); | ||
| 47 | atomic_set(&hdr->refcnt, 0); | ||
| 48 | } | ||
| 49 | return rhdr; | ||
| 50 | } | 39 | } |
| 51 | EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); | ||
| 52 | 40 | ||
| 53 | static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, | 41 | static void nfs_readhdr_free(struct nfs_rw_header *rhdr) |
| 54 | unsigned int pagecount) | ||
| 55 | { | 42 | { |
| 56 | struct nfs_read_data *data, *prealloc; | ||
| 57 | |||
| 58 | prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; | ||
| 59 | if (prealloc->header == NULL) | ||
| 60 | data = prealloc; | ||
| 61 | else | ||
| 62 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
| 63 | if (!data) | ||
| 64 | goto out; | ||
| 65 | |||
| 66 | if (nfs_pgarray_set(&data->pages, pagecount)) { | ||
| 67 | data->header = hdr; | ||
| 68 | atomic_inc(&hdr->refcnt); | ||
| 69 | } else { | ||
| 70 | if (data != prealloc) | ||
| 71 | kfree(data); | ||
| 72 | data = NULL; | ||
| 73 | } | ||
| 74 | out: | ||
| 75 | return data; | ||
| 76 | } | ||
| 77 | |||
| 78 | void nfs_readhdr_free(struct nfs_pgio_header *hdr) | ||
| 79 | { | ||
| 80 | struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); | ||
| 81 | |||
| 82 | kmem_cache_free(nfs_rdata_cachep, rhdr); | 43 | kmem_cache_free(nfs_rdata_cachep, rhdr); |
| 83 | } | 44 | } |
| 84 | EXPORT_SYMBOL_GPL(nfs_readhdr_free); | ||
| 85 | |||
| 86 | void nfs_readdata_release(struct nfs_read_data *rdata) | ||
| 87 | { | ||
| 88 | struct nfs_pgio_header *hdr = rdata->header; | ||
| 89 | struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); | ||
| 90 | |||
| 91 | put_nfs_open_context(rdata->args.context); | ||
| 92 | if (rdata->pages.pagevec != rdata->pages.page_array) | ||
| 93 | kfree(rdata->pages.pagevec); | ||
| 94 | if (rdata == &read_header->rpc_data) { | ||
| 95 | rdata->header = NULL; | ||
| 96 | rdata = NULL; | ||
| 97 | } | ||
| 98 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
| 99 | hdr->completion_ops->completion(hdr); | ||
| 100 | /* Note: we only free the rpc_task after callbacks are done. | ||
| 101 | * See the comment in rpc_free_task() for why | ||
| 102 | */ | ||
| 103 | kfree(rdata); | ||
| 104 | } | ||
| 105 | EXPORT_SYMBOL_GPL(nfs_readdata_release); | ||
| 106 | 45 | ||
| 107 | static | 46 | static |
| 108 | int nfs_return_empty_page(struct page *page) | 47 | int nfs_return_empty_page(struct page *page) |
| @@ -114,17 +53,24 @@ int nfs_return_empty_page(struct page *page) | |||
| 114 | } | 53 | } |
| 115 | 54 | ||
| 116 | void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, | 55 | void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, |
| 117 | struct inode *inode, | 56 | struct inode *inode, bool force_mds, |
| 118 | const struct nfs_pgio_completion_ops *compl_ops) | 57 | const struct nfs_pgio_completion_ops *compl_ops) |
| 119 | { | 58 | { |
| 120 | nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, | 59 | struct nfs_server *server = NFS_SERVER(inode); |
| 121 | NFS_SERVER(inode)->rsize, 0); | 60 | const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; |
| 61 | |||
| 62 | #ifdef CONFIG_NFS_V4_1 | ||
| 63 | if (server->pnfs_curr_ld && !force_mds) | ||
| 64 | pg_ops = server->pnfs_curr_ld->pg_read_ops; | ||
| 65 | #endif | ||
| 66 | nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, | ||
| 67 | server->rsize, 0); | ||
| 122 | } | 68 | } |
| 123 | EXPORT_SYMBOL_GPL(nfs_pageio_init_read); | 69 | EXPORT_SYMBOL_GPL(nfs_pageio_init_read); |
| 124 | 70 | ||
| 125 | void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) | 71 | void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) |
| 126 | { | 72 | { |
| 127 | pgio->pg_ops = &nfs_pageio_read_ops; | 73 | pgio->pg_ops = &nfs_pgio_rw_ops; |
| 128 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; | 74 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; |
| 129 | } | 75 | } |
| 130 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); | 76 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); |
| @@ -139,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
| 139 | len = nfs_page_length(page); | 85 | len = nfs_page_length(page); |
| 140 | if (len == 0) | 86 | if (len == 0) |
| 141 | return nfs_return_empty_page(page); | 87 | return nfs_return_empty_page(page); |
| 142 | new = nfs_create_request(ctx, inode, page, 0, len); | 88 | new = nfs_create_request(ctx, page, NULL, 0, len); |
| 143 | if (IS_ERR(new)) { | 89 | if (IS_ERR(new)) { |
| 144 | unlock_page(page); | 90 | unlock_page(page); |
| 145 | return PTR_ERR(new); | 91 | return PTR_ERR(new); |
| @@ -147,7 +93,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
| 147 | if (len < PAGE_CACHE_SIZE) | 93 | if (len < PAGE_CACHE_SIZE) |
| 148 | zero_user_segment(page, len, PAGE_CACHE_SIZE); | 94 | zero_user_segment(page, len, PAGE_CACHE_SIZE); |
| 149 | 95 | ||
| 150 | NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); | 96 | nfs_pageio_init_read(&pgio, inode, false, |
| 97 | &nfs_async_read_completion_ops); | ||
| 151 | nfs_pageio_add_request(&pgio, new); | 98 | nfs_pageio_add_request(&pgio, new); |
| 152 | nfs_pageio_complete(&pgio); | 99 | nfs_pageio_complete(&pgio); |
| 153 | NFS_I(inode)->read_io += pgio.pg_bytes_written; | 100 | NFS_I(inode)->read_io += pgio.pg_bytes_written; |
| @@ -158,10 +105,16 @@ static void nfs_readpage_release(struct nfs_page *req) | |||
| 158 | { | 105 | { |
| 159 | struct inode *d_inode = req->wb_context->dentry->d_inode; | 106 | struct inode *d_inode = req->wb_context->dentry->d_inode; |
| 160 | 107 | ||
| 161 | if (PageUptodate(req->wb_page)) | 108 | dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, |
| 162 | nfs_readpage_to_fscache(d_inode, req->wb_page, 0); | 109 | (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, |
| 110 | (long long)req_offset(req)); | ||
| 163 | 111 | ||
| 164 | unlock_page(req->wb_page); | 112 | if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { |
| 113 | if (PageUptodate(req->wb_page)) | ||
| 114 | nfs_readpage_to_fscache(d_inode, req->wb_page, 0); | ||
| 115 | |||
| 116 | unlock_page(req->wb_page); | ||
| 117 | } | ||
| 165 | 118 | ||
| 166 | dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", | 119 | dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", |
| 167 | req->wb_context->dentry->d_inode->i_sb->s_id, | 120 | req->wb_context->dentry->d_inode->i_sb->s_id, |
| @@ -171,7 +124,12 @@ static void nfs_readpage_release(struct nfs_page *req) | |||
| 171 | nfs_release_request(req); | 124 | nfs_release_request(req); |
| 172 | } | 125 | } |
| 173 | 126 | ||
| 174 | /* Note io was page aligned */ | 127 | static void nfs_page_group_set_uptodate(struct nfs_page *req) |
| 128 | { | ||
| 129 | if (nfs_page_group_sync_on_bit(req, PG_UPTODATE)) | ||
| 130 | SetPageUptodate(req->wb_page); | ||
| 131 | } | ||
| 132 | |||
| 175 | static void nfs_read_completion(struct nfs_pgio_header *hdr) | 133 | static void nfs_read_completion(struct nfs_pgio_header *hdr) |
| 176 | { | 134 | { |
| 177 | unsigned long bytes = 0; | 135 | unsigned long bytes = 0; |
| @@ -181,21 +139,32 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) | |||
| 181 | while (!list_empty(&hdr->pages)) { | 139 | while (!list_empty(&hdr->pages)) { |
| 182 | struct nfs_page *req = nfs_list_entry(hdr->pages.next); | 140 | struct nfs_page *req = nfs_list_entry(hdr->pages.next); |
| 183 | struct page *page = req->wb_page; | 141 | struct page *page = req->wb_page; |
| 142 | unsigned long start = req->wb_pgbase; | ||
| 143 | unsigned long end = req->wb_pgbase + req->wb_bytes; | ||
| 184 | 144 | ||
| 185 | if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { | 145 | if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { |
| 186 | if (bytes > hdr->good_bytes) | 146 | /* note: regions of the page not covered by a |
| 187 | zero_user(page, 0, PAGE_SIZE); | 147 | * request are zeroed in nfs_readpage_async / |
| 188 | else if (hdr->good_bytes - bytes < PAGE_SIZE) | 148 | * readpage_async_filler */ |
| 189 | zero_user_segment(page, | 149 | if (bytes > hdr->good_bytes) { |
| 190 | hdr->good_bytes & ~PAGE_MASK, | 150 | /* nothing in this request was good, so zero |
| 191 | PAGE_SIZE); | 151 | * the full extent of the request */ |
| 152 | zero_user_segment(page, start, end); | ||
| 153 | |||
| 154 | } else if (hdr->good_bytes - bytes < req->wb_bytes) { | ||
| 155 | /* part of this request has good bytes, but | ||
| 156 | * not all. zero the bad bytes */ | ||
| 157 | start += hdr->good_bytes - bytes; | ||
| 158 | WARN_ON(start < req->wb_pgbase); | ||
| 159 | zero_user_segment(page, start, end); | ||
| 160 | } | ||
| 192 | } | 161 | } |
| 193 | bytes += req->wb_bytes; | 162 | bytes += req->wb_bytes; |
| 194 | if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { | 163 | if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { |
| 195 | if (bytes <= hdr->good_bytes) | 164 | if (bytes <= hdr->good_bytes) |
| 196 | SetPageUptodate(page); | 165 | nfs_page_group_set_uptodate(req); |
| 197 | } else | 166 | } else |
| 198 | SetPageUptodate(page); | 167 | nfs_page_group_set_uptodate(req); |
| 199 | nfs_list_remove_request(req); | 168 | nfs_list_remove_request(req); |
| 200 | nfs_readpage_release(req); | 169 | nfs_readpage_release(req); |
| 201 | } | 170 | } |
| @@ -203,95 +172,14 @@ out: | |||
| 203 | hdr->release(hdr); | 172 | hdr->release(hdr); |
| 204 | } | 173 | } |
| 205 | 174 | ||
| 206 | int nfs_initiate_read(struct rpc_clnt *clnt, | 175 | static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, |
| 207 | struct nfs_read_data *data, | 176 | struct rpc_task_setup *task_setup_data, int how) |
| 208 | const struct rpc_call_ops *call_ops, int flags) | ||
| 209 | { | 177 | { |
| 210 | struct inode *inode = data->header->inode; | 178 | struct inode *inode = data->header->inode; |
| 211 | int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; | 179 | int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; |
| 212 | struct rpc_task *task; | ||
| 213 | struct rpc_message msg = { | ||
| 214 | .rpc_argp = &data->args, | ||
| 215 | .rpc_resp = &data->res, | ||
| 216 | .rpc_cred = data->header->cred, | ||
| 217 | }; | ||
| 218 | struct rpc_task_setup task_setup_data = { | ||
| 219 | .task = &data->task, | ||
| 220 | .rpc_client = clnt, | ||
| 221 | .rpc_message = &msg, | ||
| 222 | .callback_ops = call_ops, | ||
| 223 | .callback_data = data, | ||
| 224 | .workqueue = nfsiod_workqueue, | ||
| 225 | .flags = RPC_TASK_ASYNC | swap_flags | flags, | ||
| 226 | }; | ||
| 227 | 180 | ||
| 228 | /* Set up the initial task struct. */ | 181 | task_setup_data->flags |= swap_flags; |
| 229 | NFS_PROTO(inode)->read_setup(data, &msg); | 182 | NFS_PROTO(inode)->read_setup(data, msg); |
| 230 | |||
| 231 | dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ " | ||
| 232 | "offset %llu)\n", | ||
| 233 | data->task.tk_pid, | ||
| 234 | inode->i_sb->s_id, | ||
| 235 | (unsigned long long)NFS_FILEID(inode), | ||
| 236 | data->args.count, | ||
| 237 | (unsigned long long)data->args.offset); | ||
| 238 | |||
| 239 | task = rpc_run_task(&task_setup_data); | ||
| 240 | if (IS_ERR(task)) | ||
| 241 | return PTR_ERR(task); | ||
| 242 | rpc_put_task(task); | ||
| 243 | return 0; | ||
| 244 | } | ||
| 245 | EXPORT_SYMBOL_GPL(nfs_initiate_read); | ||
| 246 | |||
| 247 | /* | ||
| 248 | * Set up the NFS read request struct | ||
| 249 | */ | ||
| 250 | static void nfs_read_rpcsetup(struct nfs_read_data *data, | ||
| 251 | unsigned int count, unsigned int offset) | ||
| 252 | { | ||
| 253 | struct nfs_page *req = data->header->req; | ||
| 254 | |||
| 255 | data->args.fh = NFS_FH(data->header->inode); | ||
| 256 | data->args.offset = req_offset(req) + offset; | ||
| 257 | data->args.pgbase = req->wb_pgbase + offset; | ||
| 258 | data->args.pages = data->pages.pagevec; | ||
| 259 | data->args.count = count; | ||
| 260 | data->args.context = get_nfs_open_context(req->wb_context); | ||
| 261 | data->args.lock_context = req->wb_lock_context; | ||
| 262 | |||
| 263 | data->res.fattr = &data->fattr; | ||
| 264 | data->res.count = count; | ||
| 265 | data->res.eof = 0; | ||
| 266 | nfs_fattr_init(&data->fattr); | ||
| 267 | } | ||
| 268 | |||
| 269 | static int nfs_do_read(struct nfs_read_data *data, | ||
| 270 | const struct rpc_call_ops *call_ops) | ||
| 271 | { | ||
| 272 | struct inode *inode = data->header->inode; | ||
| 273 | |||
| 274 | return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0); | ||
| 275 | } | ||
| 276 | |||
| 277 | static int | ||
| 278 | nfs_do_multiple_reads(struct list_head *head, | ||
| 279 | const struct rpc_call_ops *call_ops) | ||
| 280 | { | ||
| 281 | struct nfs_read_data *data; | ||
| 282 | int ret = 0; | ||
| 283 | |||
| 284 | while (!list_empty(head)) { | ||
| 285 | int ret2; | ||
| 286 | |||
| 287 | data = list_first_entry(head, struct nfs_read_data, list); | ||
| 288 | list_del_init(&data->list); | ||
| 289 | |||
| 290 | ret2 = nfs_do_read(data, call_ops); | ||
| 291 | if (ret == 0) | ||
| 292 | ret = ret2; | ||
| 293 | } | ||
| 294 | return ret; | ||
| 295 | } | 183 | } |
| 296 | 184 | ||
| 297 | static void | 185 | static void |
| @@ -311,143 +199,14 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { | |||
| 311 | .completion = nfs_read_completion, | 199 | .completion = nfs_read_completion, |
| 312 | }; | 200 | }; |
| 313 | 201 | ||
| 314 | static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, | ||
| 315 | struct nfs_pgio_header *hdr) | ||
| 316 | { | ||
| 317 | set_bit(NFS_IOHDR_REDO, &hdr->flags); | ||
| 318 | while (!list_empty(&hdr->rpc_list)) { | ||
| 319 | struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, | ||
| 320 | struct nfs_read_data, list); | ||
| 321 | list_del(&data->list); | ||
| 322 | nfs_readdata_release(data); | ||
| 323 | } | ||
| 324 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
| 325 | } | ||
| 326 | |||
| 327 | /* | ||
| 328 | * Generate multiple requests to fill a single page. | ||
| 329 | * | ||
| 330 | * We optimize to reduce the number of read operations on the wire. If we | ||
| 331 | * detect that we're reading a page, or an area of a page, that is past the | ||
| 332 | * end of file, we do not generate NFS read operations but just clear the | ||
| 333 | * parts of the page that would have come back zero from the server anyway. | ||
| 334 | * | ||
| 335 | * We rely on the cached value of i_size to make this determination; another | ||
| 336 | * client can fill pages on the server past our cached end-of-file, but we | ||
| 337 | * won't see the new data until our attribute cache is updated. This is more | ||
| 338 | * or less conventional NFS client behavior. | ||
| 339 | */ | ||
| 340 | static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, | ||
| 341 | struct nfs_pgio_header *hdr) | ||
| 342 | { | ||
| 343 | struct nfs_page *req = hdr->req; | ||
| 344 | struct page *page = req->wb_page; | ||
| 345 | struct nfs_read_data *data; | ||
| 346 | size_t rsize = desc->pg_bsize, nbytes; | ||
| 347 | unsigned int offset; | ||
| 348 | |||
| 349 | offset = 0; | ||
| 350 | nbytes = desc->pg_count; | ||
| 351 | do { | ||
| 352 | size_t len = min(nbytes,rsize); | ||
| 353 | |||
| 354 | data = nfs_readdata_alloc(hdr, 1); | ||
| 355 | if (!data) { | ||
| 356 | nfs_pagein_error(desc, hdr); | ||
| 357 | return -ENOMEM; | ||
| 358 | } | ||
| 359 | data->pages.pagevec[0] = page; | ||
| 360 | nfs_read_rpcsetup(data, len, offset); | ||
| 361 | list_add(&data->list, &hdr->rpc_list); | ||
| 362 | nbytes -= len; | ||
| 363 | offset += len; | ||
| 364 | } while (nbytes != 0); | ||
| 365 | |||
| 366 | nfs_list_remove_request(req); | ||
| 367 | nfs_list_add_request(req, &hdr->pages); | ||
| 368 | desc->pg_rpc_callops = &nfs_read_common_ops; | ||
| 369 | return 0; | ||
| 370 | } | ||
| 371 | |||
| 372 | static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, | ||
| 373 | struct nfs_pgio_header *hdr) | ||
| 374 | { | ||
| 375 | struct nfs_page *req; | ||
| 376 | struct page **pages; | ||
| 377 | struct nfs_read_data *data; | ||
| 378 | struct list_head *head = &desc->pg_list; | ||
| 379 | |||
| 380 | data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, | ||
| 381 | desc->pg_count)); | ||
| 382 | if (!data) { | ||
| 383 | nfs_pagein_error(desc, hdr); | ||
| 384 | return -ENOMEM; | ||
| 385 | } | ||
| 386 | |||
| 387 | pages = data->pages.pagevec; | ||
| 388 | while (!list_empty(head)) { | ||
| 389 | req = nfs_list_entry(head->next); | ||
| 390 | nfs_list_remove_request(req); | ||
| 391 | nfs_list_add_request(req, &hdr->pages); | ||
| 392 | *pages++ = req->wb_page; | ||
| 393 | } | ||
| 394 | |||
| 395 | nfs_read_rpcsetup(data, desc->pg_count, 0); | ||
| 396 | list_add(&data->list, &hdr->rpc_list); | ||
| 397 | desc->pg_rpc_callops = &nfs_read_common_ops; | ||
| 398 | return 0; | ||
| 399 | } | ||
| 400 | |||
| 401 | int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, | ||
| 402 | struct nfs_pgio_header *hdr) | ||
| 403 | { | ||
| 404 | if (desc->pg_bsize < PAGE_CACHE_SIZE) | ||
| 405 | return nfs_pagein_multi(desc, hdr); | ||
| 406 | return nfs_pagein_one(desc, hdr); | ||
| 407 | } | ||
| 408 | EXPORT_SYMBOL_GPL(nfs_generic_pagein); | ||
| 409 | |||
| 410 | static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | ||
| 411 | { | ||
| 412 | struct nfs_read_header *rhdr; | ||
| 413 | struct nfs_pgio_header *hdr; | ||
| 414 | int ret; | ||
| 415 | |||
| 416 | rhdr = nfs_readhdr_alloc(); | ||
| 417 | if (!rhdr) { | ||
| 418 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
| 419 | return -ENOMEM; | ||
| 420 | } | ||
| 421 | hdr = &rhdr->header; | ||
| 422 | nfs_pgheader_init(desc, hdr, nfs_readhdr_free); | ||
| 423 | atomic_inc(&hdr->refcnt); | ||
| 424 | ret = nfs_generic_pagein(desc, hdr); | ||
| 425 | if (ret == 0) | ||
| 426 | ret = nfs_do_multiple_reads(&hdr->rpc_list, | ||
| 427 | desc->pg_rpc_callops); | ||
| 428 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
| 429 | hdr->completion_ops->completion(hdr); | ||
| 430 | return ret; | ||
| 431 | } | ||
| 432 | |||
| 433 | static const struct nfs_pageio_ops nfs_pageio_read_ops = { | ||
| 434 | .pg_test = nfs_generic_pg_test, | ||
| 435 | .pg_doio = nfs_generic_pg_readpages, | ||
| 436 | }; | ||
| 437 | |||
| 438 | /* | 202 | /* |
| 439 | * This is the callback from RPC telling us whether a reply was | 203 | * This is the callback from RPC telling us whether a reply was |
| 440 | * received or some error occurred (timeout or socket shutdown). | 204 | * received or some error occurred (timeout or socket shutdown). |
| 441 | */ | 205 | */ |
| 442 | int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) | 206 | static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, |
| 207 | struct inode *inode) | ||
| 443 | { | 208 | { |
| 444 | struct inode *inode = data->header->inode; | 209 | int status = NFS_PROTO(inode)->read_done(task, data); |
| 445 | int status; | ||
| 446 | |||
| 447 | dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, | ||
| 448 | task->tk_status); | ||
| 449 | |||
| 450 | status = NFS_PROTO(inode)->read_done(task, data); | ||
| 451 | if (status != 0) | 210 | if (status != 0) |
| 452 | return status; | 211 | return status; |
| 453 | 212 | ||
| @@ -460,10 +219,10 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) | |||
| 460 | return 0; | 219 | return 0; |
| 461 | } | 220 | } |
| 462 | 221 | ||
| 463 | static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) | 222 | static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) |
| 464 | { | 223 | { |
| 465 | struct nfs_readargs *argp = &data->args; | 224 | struct nfs_pgio_args *argp = &data->args; |
| 466 | struct nfs_readres *resp = &data->res; | 225 | struct nfs_pgio_res *resp = &data->res; |
| 467 | 226 | ||
| 468 | /* This is a short read! */ | 227 | /* This is a short read! */ |
| 469 | nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); | 228 | nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); |
| @@ -480,17 +239,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data | |||
| 480 | rpc_restart_call_prepare(task); | 239 | rpc_restart_call_prepare(task); |
| 481 | } | 240 | } |
| 482 | 241 | ||
| 483 | static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) | 242 | static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) |
| 484 | { | 243 | { |
| 485 | struct nfs_read_data *data = calldata; | ||
| 486 | struct nfs_pgio_header *hdr = data->header; | 244 | struct nfs_pgio_header *hdr = data->header; |
| 487 | 245 | ||
| 488 | /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ | 246 | if (data->res.eof) { |
| 489 | if (nfs_readpage_result(task, data) != 0) | ||
| 490 | return; | ||
| 491 | if (task->tk_status < 0) | ||
| 492 | nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); | ||
| 493 | else if (data->res.eof) { | ||
| 494 | loff_t bound; | 247 | loff_t bound; |
| 495 | 248 | ||
| 496 | bound = data->args.offset + data->res.count; | 249 | bound = data->args.offset + data->res.count; |
| @@ -505,26 +258,6 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) | |||
| 505 | nfs_readpage_retry(task, data); | 258 | nfs_readpage_retry(task, data); |
| 506 | } | 259 | } |
| 507 | 260 | ||
| 508 | static void nfs_readpage_release_common(void *calldata) | ||
| 509 | { | ||
| 510 | nfs_readdata_release(calldata); | ||
| 511 | } | ||
| 512 | |||
| 513 | void nfs_read_prepare(struct rpc_task *task, void *calldata) | ||
| 514 | { | ||
| 515 | struct nfs_read_data *data = calldata; | ||
| 516 | int err; | ||
| 517 | err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); | ||
| 518 | if (err) | ||
| 519 | rpc_exit(task, err); | ||
| 520 | } | ||
| 521 | |||
| 522 | static const struct rpc_call_ops nfs_read_common_ops = { | ||
| 523 | .rpc_call_prepare = nfs_read_prepare, | ||
| 524 | .rpc_call_done = nfs_readpage_result_common, | ||
| 525 | .rpc_release = nfs_readpage_release_common, | ||
| 526 | }; | ||
| 527 | |||
| 528 | /* | 261 | /* |
| 529 | * Read a page over NFS. | 262 | * Read a page over NFS. |
| 530 | * We read the page synchronously in the following case: | 263 | * We read the page synchronously in the following case: |
| @@ -592,7 +325,6 @@ static int | |||
| 592 | readpage_async_filler(void *data, struct page *page) | 325 | readpage_async_filler(void *data, struct page *page) |
| 593 | { | 326 | { |
| 594 | struct nfs_readdesc *desc = (struct nfs_readdesc *)data; | 327 | struct nfs_readdesc *desc = (struct nfs_readdesc *)data; |
| 595 | struct inode *inode = page_file_mapping(page)->host; | ||
| 596 | struct nfs_page *new; | 328 | struct nfs_page *new; |
| 597 | unsigned int len; | 329 | unsigned int len; |
| 598 | int error; | 330 | int error; |
| @@ -601,7 +333,7 @@ readpage_async_filler(void *data, struct page *page) | |||
| 601 | if (len == 0) | 333 | if (len == 0) |
| 602 | return nfs_return_empty_page(page); | 334 | return nfs_return_empty_page(page); |
| 603 | 335 | ||
| 604 | new = nfs_create_request(desc->ctx, inode, page, 0, len); | 336 | new = nfs_create_request(desc->ctx, page, NULL, 0, len); |
| 605 | if (IS_ERR(new)) | 337 | if (IS_ERR(new)) |
| 606 | goto out_error; | 338 | goto out_error; |
| 607 | 339 | ||
| @@ -654,7 +386,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
| 654 | if (ret == 0) | 386 | if (ret == 0) |
| 655 | goto read_complete; /* all pages were read */ | 387 | goto read_complete; /* all pages were read */ |
| 656 | 388 | ||
| 657 | NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); | 389 | nfs_pageio_init_read(&pgio, inode, false, |
| 390 | &nfs_async_read_completion_ops); | ||
| 658 | 391 | ||
| 659 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); | 392 | ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); |
| 660 | 393 | ||
| @@ -671,7 +404,7 @@ out: | |||
| 671 | int __init nfs_init_readpagecache(void) | 404 | int __init nfs_init_readpagecache(void) |
| 672 | { | 405 | { |
| 673 | nfs_rdata_cachep = kmem_cache_create("nfs_read_data", | 406 | nfs_rdata_cachep = kmem_cache_create("nfs_read_data", |
| 674 | sizeof(struct nfs_read_header), | 407 | sizeof(struct nfs_rw_header), |
| 675 | 0, SLAB_HWCACHE_ALIGN, | 408 | 0, SLAB_HWCACHE_ALIGN, |
| 676 | NULL); | 409 | NULL); |
| 677 | if (nfs_rdata_cachep == NULL) | 410 | if (nfs_rdata_cachep == NULL) |
| @@ -684,3 +417,12 @@ void nfs_destroy_readpagecache(void) | |||
| 684 | { | 417 | { |
| 685 | kmem_cache_destroy(nfs_rdata_cachep); | 418 | kmem_cache_destroy(nfs_rdata_cachep); |
| 686 | } | 419 | } |
| 420 | |||
| 421 | static const struct nfs_rw_ops nfs_rw_read_ops = { | ||
| 422 | .rw_mode = FMODE_READ, | ||
| 423 | .rw_alloc_header = nfs_readhdr_alloc, | ||
| 424 | .rw_free_header = nfs_readhdr_free, | ||
| 425 | .rw_done = nfs_readpage_done, | ||
| 426 | .rw_result = nfs_readpage_result, | ||
| 427 | .rw_initiate = nfs_initiate_read, | ||
| 428 | }; | ||
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2cb56943e232..084af1060d79 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -2180,11 +2180,23 @@ out_no_address: | |||
| 2180 | return -EINVAL; | 2180 | return -EINVAL; |
| 2181 | } | 2181 | } |
| 2182 | 2182 | ||
| 2183 | #define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | ||
| 2184 | | NFS_MOUNT_SECURE \ | ||
| 2185 | | NFS_MOUNT_TCP \ | ||
| 2186 | | NFS_MOUNT_VER3 \ | ||
| 2187 | | NFS_MOUNT_KERBEROS \ | ||
| 2188 | | NFS_MOUNT_NONLM \ | ||
| 2189 | | NFS_MOUNT_BROKEN_SUID \ | ||
| 2190 | | NFS_MOUNT_STRICTLOCK \ | ||
| 2191 | | NFS_MOUNT_UNSHARED \ | ||
| 2192 | | NFS_MOUNT_NORESVPORT \ | ||
| 2193 | | NFS_MOUNT_LEGACY_INTERFACE) | ||
| 2194 | |||
| 2183 | static int | 2195 | static int |
| 2184 | nfs_compare_remount_data(struct nfs_server *nfss, | 2196 | nfs_compare_remount_data(struct nfs_server *nfss, |
| 2185 | struct nfs_parsed_mount_data *data) | 2197 | struct nfs_parsed_mount_data *data) |
| 2186 | { | 2198 | { |
| 2187 | if (data->flags != nfss->flags || | 2199 | if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || |
| 2188 | data->rsize != nfss->rsize || | 2200 | data->rsize != nfss->rsize || |
| 2189 | data->wsize != nfss->wsize || | 2201 | data->wsize != nfss->wsize || |
| 2190 | data->version != nfss->nfs_client->rpc_ops->version || | 2202 | data->version != nfss->nfs_client->rpc_ops->version || |
| @@ -2248,6 +2260,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) | |||
| 2248 | data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; | 2260 | data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; |
| 2249 | data->version = nfsvers; | 2261 | data->version = nfsvers; |
| 2250 | data->minorversion = nfss->nfs_client->cl_minorversion; | 2262 | data->minorversion = nfss->nfs_client->cl_minorversion; |
| 2263 | data->net = current->nsproxy->net_ns; | ||
| 2251 | memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, | 2264 | memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, |
| 2252 | data->nfs_server.addrlen); | 2265 | data->nfs_server.addrlen); |
| 2253 | 2266 | ||
| @@ -2347,18 +2360,6 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) | |||
| 2347 | nfs_initialise_sb(sb); | 2360 | nfs_initialise_sb(sb); |
| 2348 | } | 2361 | } |
| 2349 | 2362 | ||
| 2350 | #define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | ||
| 2351 | | NFS_MOUNT_SECURE \ | ||
| 2352 | | NFS_MOUNT_TCP \ | ||
| 2353 | | NFS_MOUNT_VER3 \ | ||
| 2354 | | NFS_MOUNT_KERBEROS \ | ||
| 2355 | | NFS_MOUNT_NONLM \ | ||
| 2356 | | NFS_MOUNT_BROKEN_SUID \ | ||
| 2357 | | NFS_MOUNT_STRICTLOCK \ | ||
| 2358 | | NFS_MOUNT_UNSHARED \ | ||
| 2359 | | NFS_MOUNT_NORESVPORT \ | ||
| 2360 | | NFS_MOUNT_LEGACY_INTERFACE) | ||
| 2361 | |||
| 2362 | static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) | 2363 | static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) |
| 2363 | { | 2364 | { |
| 2364 | const struct nfs_server *a = s->s_fs_info; | 2365 | const struct nfs_server *a = s->s_fs_info; |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ffb9459f180b..3ee5af4e738e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -42,10 +42,10 @@ | |||
| 42 | * Local function declarations | 42 | * Local function declarations |
| 43 | */ | 43 | */ |
| 44 | static void nfs_redirty_request(struct nfs_page *req); | 44 | static void nfs_redirty_request(struct nfs_page *req); |
| 45 | static const struct rpc_call_ops nfs_write_common_ops; | ||
| 46 | static const struct rpc_call_ops nfs_commit_ops; | 45 | static const struct rpc_call_ops nfs_commit_ops; |
| 47 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; | 46 | static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; |
| 48 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops; | 47 | static const struct nfs_commit_completion_ops nfs_commit_completion_ops; |
| 48 | static const struct nfs_rw_ops nfs_rw_write_ops; | ||
| 49 | 49 | ||
| 50 | static struct kmem_cache *nfs_wdata_cachep; | 50 | static struct kmem_cache *nfs_wdata_cachep; |
| 51 | static mempool_t *nfs_wdata_mempool; | 51 | static mempool_t *nfs_wdata_mempool; |
| @@ -70,76 +70,19 @@ void nfs_commit_free(struct nfs_commit_data *p) | |||
| 70 | } | 70 | } |
| 71 | EXPORT_SYMBOL_GPL(nfs_commit_free); | 71 | EXPORT_SYMBOL_GPL(nfs_commit_free); |
| 72 | 72 | ||
| 73 | struct nfs_write_header *nfs_writehdr_alloc(void) | 73 | static struct nfs_rw_header *nfs_writehdr_alloc(void) |
| 74 | { | 74 | { |
| 75 | struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); | 75 | struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); |
| 76 | |||
| 77 | if (p) { | ||
| 78 | struct nfs_pgio_header *hdr = &p->header; | ||
| 79 | 76 | ||
| 77 | if (p) | ||
| 80 | memset(p, 0, sizeof(*p)); | 78 | memset(p, 0, sizeof(*p)); |
| 81 | INIT_LIST_HEAD(&hdr->pages); | ||
| 82 | INIT_LIST_HEAD(&hdr->rpc_list); | ||
| 83 | spin_lock_init(&hdr->lock); | ||
| 84 | atomic_set(&hdr->refcnt, 0); | ||
| 85 | hdr->verf = &p->verf; | ||
| 86 | } | ||
| 87 | return p; | 79 | return p; |
| 88 | } | 80 | } |
| 89 | EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); | ||
| 90 | |||
| 91 | static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, | ||
| 92 | unsigned int pagecount) | ||
| 93 | { | ||
| 94 | struct nfs_write_data *data, *prealloc; | ||
| 95 | |||
| 96 | prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; | ||
| 97 | if (prealloc->header == NULL) | ||
| 98 | data = prealloc; | ||
| 99 | else | ||
| 100 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
| 101 | if (!data) | ||
| 102 | goto out; | ||
| 103 | |||
| 104 | if (nfs_pgarray_set(&data->pages, pagecount)) { | ||
| 105 | data->header = hdr; | ||
| 106 | atomic_inc(&hdr->refcnt); | ||
| 107 | } else { | ||
| 108 | if (data != prealloc) | ||
| 109 | kfree(data); | ||
| 110 | data = NULL; | ||
| 111 | } | ||
| 112 | out: | ||
| 113 | return data; | ||
| 114 | } | ||
| 115 | 81 | ||
| 116 | void nfs_writehdr_free(struct nfs_pgio_header *hdr) | 82 | static void nfs_writehdr_free(struct nfs_rw_header *whdr) |
| 117 | { | 83 | { |
| 118 | struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); | ||
| 119 | mempool_free(whdr, nfs_wdata_mempool); | 84 | mempool_free(whdr, nfs_wdata_mempool); |
| 120 | } | 85 | } |
| 121 | EXPORT_SYMBOL_GPL(nfs_writehdr_free); | ||
| 122 | |||
| 123 | void nfs_writedata_release(struct nfs_write_data *wdata) | ||
| 124 | { | ||
| 125 | struct nfs_pgio_header *hdr = wdata->header; | ||
| 126 | struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); | ||
| 127 | |||
| 128 | put_nfs_open_context(wdata->args.context); | ||
| 129 | if (wdata->pages.pagevec != wdata->pages.page_array) | ||
| 130 | kfree(wdata->pages.pagevec); | ||
| 131 | if (wdata == &write_header->rpc_data) { | ||
| 132 | wdata->header = NULL; | ||
| 133 | wdata = NULL; | ||
| 134 | } | ||
| 135 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
| 136 | hdr->completion_ops->completion(hdr); | ||
| 137 | /* Note: we only free the rpc_task after callbacks are done. | ||
| 138 | * See the comment in rpc_free_task() for why | ||
| 139 | */ | ||
| 140 | kfree(wdata); | ||
| 141 | } | ||
| 142 | EXPORT_SYMBOL_GPL(nfs_writedata_release); | ||
| 143 | 86 | ||
| 144 | static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) | 87 | static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) |
| 145 | { | 88 | { |
| @@ -211,18 +154,78 @@ static void nfs_set_pageerror(struct page *page) | |||
| 211 | nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); | 154 | nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); |
| 212 | } | 155 | } |
| 213 | 156 | ||
| 157 | /* | ||
| 158 | * nfs_page_group_search_locked | ||
| 159 | * @head - head request of page group | ||
| 160 | * @page_offset - offset into page | ||
| 161 | * | ||
| 162 | * Search page group with head @head to find a request that contains the | ||
| 163 | * page offset @page_offset. | ||
| 164 | * | ||
| 165 | * Returns a pointer to the first matching nfs request, or NULL if no | ||
| 166 | * match is found. | ||
| 167 | * | ||
| 168 | * Must be called with the page group lock held | ||
| 169 | */ | ||
| 170 | static struct nfs_page * | ||
| 171 | nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset) | ||
| 172 | { | ||
| 173 | struct nfs_page *req; | ||
| 174 | |||
| 175 | WARN_ON_ONCE(head != head->wb_head); | ||
| 176 | WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags)); | ||
| 177 | |||
| 178 | req = head; | ||
| 179 | do { | ||
| 180 | if (page_offset >= req->wb_pgbase && | ||
| 181 | page_offset < (req->wb_pgbase + req->wb_bytes)) | ||
| 182 | return req; | ||
| 183 | |||
| 184 | req = req->wb_this_page; | ||
| 185 | } while (req != head); | ||
| 186 | |||
| 187 | return NULL; | ||
| 188 | } | ||
| 189 | |||
| 190 | /* | ||
| 191 | * nfs_page_group_covers_page | ||
| 192 | * @head - head request of page group | ||
| 193 | * | ||
| 194 | * Return true if the page group with head @head covers the whole page, | ||
| 195 | * returns false otherwise | ||
| 196 | */ | ||
| 197 | static bool nfs_page_group_covers_page(struct nfs_page *req) | ||
| 198 | { | ||
| 199 | struct nfs_page *tmp; | ||
| 200 | unsigned int pos = 0; | ||
| 201 | unsigned int len = nfs_page_length(req->wb_page); | ||
| 202 | |||
| 203 | nfs_page_group_lock(req); | ||
| 204 | |||
| 205 | do { | ||
| 206 | tmp = nfs_page_group_search_locked(req->wb_head, pos); | ||
| 207 | if (tmp) { | ||
| 208 | /* no way this should happen */ | ||
| 209 | WARN_ON_ONCE(tmp->wb_pgbase != pos); | ||
| 210 | pos += tmp->wb_bytes - (pos - tmp->wb_pgbase); | ||
| 211 | } | ||
| 212 | } while (tmp && pos < len); | ||
| 213 | |||
| 214 | nfs_page_group_unlock(req); | ||
| 215 | WARN_ON_ONCE(pos > len); | ||
| 216 | return pos == len; | ||
| 217 | } | ||
| 218 | |||
| 214 | /* We can set the PG_uptodate flag if we see that a write request | 219 | /* We can set the PG_uptodate flag if we see that a write request |
| 215 | * covers the full page. | 220 | * covers the full page. |
| 216 | */ | 221 | */ |
| 217 | static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) | 222 | static void nfs_mark_uptodate(struct nfs_page *req) |
| 218 | { | 223 | { |
| 219 | if (PageUptodate(page)) | 224 | if (PageUptodate(req->wb_page)) |
| 220 | return; | ||
| 221 | if (base != 0) | ||
| 222 | return; | 225 | return; |
| 223 | if (count != nfs_page_length(page)) | 226 | if (!nfs_page_group_covers_page(req)) |
| 224 | return; | 227 | return; |
| 225 | SetPageUptodate(page); | 228 | SetPageUptodate(req->wb_page); |
| 226 | } | 229 | } |
| 227 | 230 | ||
| 228 | static int wb_priority(struct writeback_control *wbc) | 231 | static int wb_priority(struct writeback_control *wbc) |
| @@ -258,12 +261,15 @@ static void nfs_set_page_writeback(struct page *page) | |||
| 258 | } | 261 | } |
| 259 | } | 262 | } |
| 260 | 263 | ||
| 261 | static void nfs_end_page_writeback(struct page *page) | 264 | static void nfs_end_page_writeback(struct nfs_page *req) |
| 262 | { | 265 | { |
| 263 | struct inode *inode = page_file_mapping(page)->host; | 266 | struct inode *inode = page_file_mapping(req->wb_page)->host; |
| 264 | struct nfs_server *nfss = NFS_SERVER(inode); | 267 | struct nfs_server *nfss = NFS_SERVER(inode); |
| 265 | 268 | ||
| 266 | end_page_writeback(page); | 269 | if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) |
| 270 | return; | ||
| 271 | |||
| 272 | end_page_writeback(req->wb_page); | ||
| 267 | if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) | 273 | if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) |
| 268 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); | 274 | clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); |
| 269 | } | 275 | } |
| @@ -354,10 +360,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc | |||
| 354 | struct nfs_pageio_descriptor pgio; | 360 | struct nfs_pageio_descriptor pgio; |
| 355 | int err; | 361 | int err; |
| 356 | 362 | ||
| 357 | NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, | 363 | nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), |
| 358 | page->mapping->host, | 364 | false, &nfs_async_write_completion_ops); |
| 359 | wb_priority(wbc), | ||
| 360 | &nfs_async_write_completion_ops); | ||
| 361 | err = nfs_do_writepage(page, wbc, &pgio); | 365 | err = nfs_do_writepage(page, wbc, &pgio); |
| 362 | nfs_pageio_complete(&pgio); | 366 | nfs_pageio_complete(&pgio); |
| 363 | if (err < 0) | 367 | if (err < 0) |
| @@ -400,7 +404,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
| 400 | 404 | ||
| 401 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); | 405 | nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); |
| 402 | 406 | ||
| 403 | NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); | 407 | nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, |
| 408 | &nfs_async_write_completion_ops); | ||
| 404 | err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); | 409 | err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); |
| 405 | nfs_pageio_complete(&pgio); | 410 | nfs_pageio_complete(&pgio); |
| 406 | 411 | ||
| @@ -425,6 +430,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
| 425 | { | 430 | { |
| 426 | struct nfs_inode *nfsi = NFS_I(inode); | 431 | struct nfs_inode *nfsi = NFS_I(inode); |
| 427 | 432 | ||
| 433 | WARN_ON_ONCE(req->wb_this_page != req); | ||
| 434 | |||
| 428 | /* Lock the request! */ | 435 | /* Lock the request! */ |
| 429 | nfs_lock_request(req); | 436 | nfs_lock_request(req); |
| 430 | 437 | ||
| @@ -441,6 +448,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
| 441 | set_page_private(req->wb_page, (unsigned long)req); | 448 | set_page_private(req->wb_page, (unsigned long)req); |
| 442 | } | 449 | } |
| 443 | nfsi->npages++; | 450 | nfsi->npages++; |
| 451 | set_bit(PG_INODE_REF, &req->wb_flags); | ||
| 444 | kref_get(&req->wb_kref); | 452 | kref_get(&req->wb_kref); |
| 445 | spin_unlock(&inode->i_lock); | 453 | spin_unlock(&inode->i_lock); |
| 446 | } | 454 | } |
| @@ -452,15 +460,20 @@ static void nfs_inode_remove_request(struct nfs_page *req) | |||
| 452 | { | 460 | { |
| 453 | struct inode *inode = req->wb_context->dentry->d_inode; | 461 | struct inode *inode = req->wb_context->dentry->d_inode; |
| 454 | struct nfs_inode *nfsi = NFS_I(inode); | 462 | struct nfs_inode *nfsi = NFS_I(inode); |
| 463 | struct nfs_page *head; | ||
| 455 | 464 | ||
| 456 | spin_lock(&inode->i_lock); | 465 | if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { |
| 457 | if (likely(!PageSwapCache(req->wb_page))) { | 466 | head = req->wb_head; |
| 458 | set_page_private(req->wb_page, 0); | 467 | |
| 459 | ClearPagePrivate(req->wb_page); | 468 | spin_lock(&inode->i_lock); |
| 460 | clear_bit(PG_MAPPED, &req->wb_flags); | 469 | if (likely(!PageSwapCache(head->wb_page))) { |
| 470 | set_page_private(head->wb_page, 0); | ||
| 471 | ClearPagePrivate(head->wb_page); | ||
| 472 | clear_bit(PG_MAPPED, &head->wb_flags); | ||
| 473 | } | ||
| 474 | nfsi->npages--; | ||
| 475 | spin_unlock(&inode->i_lock); | ||
| 461 | } | 476 | } |
| 462 | nfsi->npages--; | ||
| 463 | spin_unlock(&inode->i_lock); | ||
| 464 | nfs_release_request(req); | 477 | nfs_release_request(req); |
| 465 | } | 478 | } |
| 466 | 479 | ||
| @@ -583,7 +596,7 @@ nfs_clear_request_commit(struct nfs_page *req) | |||
| 583 | } | 596 | } |
| 584 | 597 | ||
| 585 | static inline | 598 | static inline |
| 586 | int nfs_write_need_commit(struct nfs_write_data *data) | 599 | int nfs_write_need_commit(struct nfs_pgio_data *data) |
| 587 | { | 600 | { |
| 588 | if (data->verf.committed == NFS_DATA_SYNC) | 601 | if (data->verf.committed == NFS_DATA_SYNC) |
| 589 | return data->header->lseg == NULL; | 602 | return data->header->lseg == NULL; |
| @@ -614,7 +627,7 @@ nfs_clear_request_commit(struct nfs_page *req) | |||
| 614 | } | 627 | } |
| 615 | 628 | ||
| 616 | static inline | 629 | static inline |
| 617 | int nfs_write_need_commit(struct nfs_write_data *data) | 630 | int nfs_write_need_commit(struct nfs_pgio_data *data) |
| 618 | { | 631 | { |
| 619 | return 0; | 632 | return 0; |
| 620 | } | 633 | } |
| @@ -625,6 +638,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) | |||
| 625 | { | 638 | { |
| 626 | struct nfs_commit_info cinfo; | 639 | struct nfs_commit_info cinfo; |
| 627 | unsigned long bytes = 0; | 640 | unsigned long bytes = 0; |
| 641 | bool do_destroy; | ||
| 628 | 642 | ||
| 629 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) | 643 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) |
| 630 | goto out; | 644 | goto out; |
| @@ -645,7 +659,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) | |||
| 645 | goto next; | 659 | goto next; |
| 646 | } | 660 | } |
| 647 | if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { | 661 | if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { |
| 648 | memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); | 662 | memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); |
| 649 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); | 663 | nfs_mark_request_commit(req, hdr->lseg, &cinfo); |
| 650 | goto next; | 664 | goto next; |
| 651 | } | 665 | } |
| @@ -653,7 +667,8 @@ remove_req: | |||
| 653 | nfs_inode_remove_request(req); | 667 | nfs_inode_remove_request(req); |
| 654 | next: | 668 | next: |
| 655 | nfs_unlock_request(req); | 669 | nfs_unlock_request(req); |
| 656 | nfs_end_page_writeback(req->wb_page); | 670 | nfs_end_page_writeback(req); |
| 671 | do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); | ||
| 657 | nfs_release_request(req); | 672 | nfs_release_request(req); |
| 658 | } | 673 | } |
| 659 | out: | 674 | out: |
| @@ -661,7 +676,7 @@ out: | |||
| 661 | } | 676 | } |
| 662 | 677 | ||
| 663 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | 678 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) |
| 664 | static unsigned long | 679 | unsigned long |
| 665 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | 680 | nfs_reqs_to_commit(struct nfs_commit_info *cinfo) |
| 666 | { | 681 | { |
| 667 | return cinfo->mds->ncommit; | 682 | return cinfo->mds->ncommit; |
| @@ -718,7 +733,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, | |||
| 718 | } | 733 | } |
| 719 | 734 | ||
| 720 | #else | 735 | #else |
| 721 | static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) | 736 | unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) |
| 722 | { | 737 | { |
| 723 | return 0; | 738 | return 0; |
| 724 | } | 739 | } |
| @@ -758,6 +773,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | |||
| 758 | if (req == NULL) | 773 | if (req == NULL) |
| 759 | goto out_unlock; | 774 | goto out_unlock; |
| 760 | 775 | ||
| 776 | /* should be handled by nfs_flush_incompatible */ | ||
| 777 | WARN_ON_ONCE(req->wb_head != req); | ||
| 778 | WARN_ON_ONCE(req->wb_this_page != req); | ||
| 779 | |||
| 761 | rqend = req->wb_offset + req->wb_bytes; | 780 | rqend = req->wb_offset + req->wb_bytes; |
| 762 | /* | 781 | /* |
| 763 | * Tell the caller to flush out the request if | 782 | * Tell the caller to flush out the request if |
| @@ -819,7 +838,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, | |||
| 819 | req = nfs_try_to_update_request(inode, page, offset, bytes); | 838 | req = nfs_try_to_update_request(inode, page, offset, bytes); |
| 820 | if (req != NULL) | 839 | if (req != NULL) |
| 821 | goto out; | 840 | goto out; |
| 822 | req = nfs_create_request(ctx, inode, page, offset, bytes); | 841 | req = nfs_create_request(ctx, page, NULL, offset, bytes); |
| 823 | if (IS_ERR(req)) | 842 | if (IS_ERR(req)) |
| 824 | goto out; | 843 | goto out; |
| 825 | nfs_inode_add_request(inode, req); | 844 | nfs_inode_add_request(inode, req); |
| @@ -837,7 +856,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, | |||
| 837 | return PTR_ERR(req); | 856 | return PTR_ERR(req); |
| 838 | /* Update file length */ | 857 | /* Update file length */ |
| 839 | nfs_grow_file(page, offset, count); | 858 | nfs_grow_file(page, offset, count); |
| 840 | nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); | 859 | nfs_mark_uptodate(req); |
| 841 | nfs_mark_request_dirty(req); | 860 | nfs_mark_request_dirty(req); |
| 842 | nfs_unlock_and_release_request(req); | 861 | nfs_unlock_and_release_request(req); |
| 843 | return 0; | 862 | return 0; |
| @@ -863,6 +882,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page) | |||
| 863 | return 0; | 882 | return 0; |
| 864 | l_ctx = req->wb_lock_context; | 883 | l_ctx = req->wb_lock_context; |
| 865 | do_flush = req->wb_page != page || req->wb_context != ctx; | 884 | do_flush = req->wb_page != page || req->wb_context != ctx; |
| 885 | /* for now, flush if more than 1 request in page_group */ | ||
| 886 | do_flush |= req->wb_this_page != req; | ||
| 866 | if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { | 887 | if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { |
| 867 | do_flush |= l_ctx->lockowner.l_owner != current->files | 888 | do_flush |= l_ctx->lockowner.l_owner != current->files |
| 868 | || l_ctx->lockowner.l_pid != current->tgid; | 889 | || l_ctx->lockowner.l_pid != current->tgid; |
| @@ -990,126 +1011,17 @@ static int flush_task_priority(int how) | |||
| 990 | return RPC_PRIORITY_NORMAL; | 1011 | return RPC_PRIORITY_NORMAL; |
| 991 | } | 1012 | } |
| 992 | 1013 | ||
| 993 | int nfs_initiate_write(struct rpc_clnt *clnt, | 1014 | static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, |
| 994 | struct nfs_write_data *data, | 1015 | struct rpc_task_setup *task_setup_data, int how) |
| 995 | const struct rpc_call_ops *call_ops, | ||
| 996 | int how, int flags) | ||
| 997 | { | 1016 | { |
| 998 | struct inode *inode = data->header->inode; | 1017 | struct inode *inode = data->header->inode; |
| 999 | int priority = flush_task_priority(how); | 1018 | int priority = flush_task_priority(how); |
| 1000 | struct rpc_task *task; | ||
| 1001 | struct rpc_message msg = { | ||
| 1002 | .rpc_argp = &data->args, | ||
| 1003 | .rpc_resp = &data->res, | ||
| 1004 | .rpc_cred = data->header->cred, | ||
| 1005 | }; | ||
| 1006 | struct rpc_task_setup task_setup_data = { | ||
| 1007 | .rpc_client = clnt, | ||
| 1008 | .task = &data->task, | ||
| 1009 | .rpc_message = &msg, | ||
| 1010 | .callback_ops = call_ops, | ||
| 1011 | .callback_data = data, | ||
| 1012 | .workqueue = nfsiod_workqueue, | ||
| 1013 | .flags = RPC_TASK_ASYNC | flags, | ||
| 1014 | .priority = priority, | ||
| 1015 | }; | ||
| 1016 | int ret = 0; | ||
| 1017 | |||
| 1018 | /* Set up the initial task struct. */ | ||
| 1019 | NFS_PROTO(inode)->write_setup(data, &msg); | ||
| 1020 | 1019 | ||
| 1021 | dprintk("NFS: %5u initiated write call " | 1020 | task_setup_data->priority = priority; |
| 1022 | "(req %s/%llu, %u bytes @ offset %llu)\n", | 1021 | NFS_PROTO(inode)->write_setup(data, msg); |
| 1023 | data->task.tk_pid, | ||
| 1024 | inode->i_sb->s_id, | ||
| 1025 | (unsigned long long)NFS_FILEID(inode), | ||
| 1026 | data->args.count, | ||
| 1027 | (unsigned long long)data->args.offset); | ||
| 1028 | 1022 | ||
| 1029 | nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, | 1023 | nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, |
| 1030 | &task_setup_data.rpc_client, &msg, data); | 1024 | &task_setup_data->rpc_client, msg, data); |
| 1031 | |||
| 1032 | task = rpc_run_task(&task_setup_data); | ||
| 1033 | if (IS_ERR(task)) { | ||
| 1034 | ret = PTR_ERR(task); | ||
| 1035 | goto out; | ||
| 1036 | } | ||
| 1037 | if (how & FLUSH_SYNC) { | ||
| 1038 | ret = rpc_wait_for_completion_task(task); | ||
| 1039 | if (ret == 0) | ||
| 1040 | ret = task->tk_status; | ||
| 1041 | } | ||
| 1042 | rpc_put_task(task); | ||
| 1043 | out: | ||
| 1044 | return ret; | ||
| 1045 | } | ||
| 1046 | EXPORT_SYMBOL_GPL(nfs_initiate_write); | ||
| 1047 | |||
| 1048 | /* | ||
| 1049 | * Set up the argument/result storage required for the RPC call. | ||
| 1050 | */ | ||
| 1051 | static void nfs_write_rpcsetup(struct nfs_write_data *data, | ||
| 1052 | unsigned int count, unsigned int offset, | ||
| 1053 | int how, struct nfs_commit_info *cinfo) | ||
| 1054 | { | ||
| 1055 | struct nfs_page *req = data->header->req; | ||
| 1056 | |||
| 1057 | /* Set up the RPC argument and reply structs | ||
| 1058 | * NB: take care not to mess about with data->commit et al. */ | ||
| 1059 | |||
| 1060 | data->args.fh = NFS_FH(data->header->inode); | ||
| 1061 | data->args.offset = req_offset(req) + offset; | ||
| 1062 | /* pnfs_set_layoutcommit needs this */ | ||
| 1063 | data->mds_offset = data->args.offset; | ||
| 1064 | data->args.pgbase = req->wb_pgbase + offset; | ||
| 1065 | data->args.pages = data->pages.pagevec; | ||
| 1066 | data->args.count = count; | ||
| 1067 | data->args.context = get_nfs_open_context(req->wb_context); | ||
| 1068 | data->args.lock_context = req->wb_lock_context; | ||
| 1069 | data->args.stable = NFS_UNSTABLE; | ||
| 1070 | switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { | ||
| 1071 | case 0: | ||
| 1072 | break; | ||
| 1073 | case FLUSH_COND_STABLE: | ||
| 1074 | if (nfs_reqs_to_commit(cinfo)) | ||
| 1075 | break; | ||
| 1076 | default: | ||
| 1077 | data->args.stable = NFS_FILE_SYNC; | ||
| 1078 | } | ||
| 1079 | |||
| 1080 | data->res.fattr = &data->fattr; | ||
| 1081 | data->res.count = count; | ||
| 1082 | data->res.verf = &data->verf; | ||
| 1083 | nfs_fattr_init(&data->fattr); | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | static int nfs_do_write(struct nfs_write_data *data, | ||
| 1087 | const struct rpc_call_ops *call_ops, | ||
| 1088 | int how) | ||
| 1089 | { | ||
| 1090 | struct inode *inode = data->header->inode; | ||
| 1091 | |||
| 1092 | return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0); | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | static int nfs_do_multiple_writes(struct list_head *head, | ||
| 1096 | const struct rpc_call_ops *call_ops, | ||
| 1097 | int how) | ||
| 1098 | { | ||
| 1099 | struct nfs_write_data *data; | ||
| 1100 | int ret = 0; | ||
| 1101 | |||
| 1102 | while (!list_empty(head)) { | ||
| 1103 | int ret2; | ||
| 1104 | |||
| 1105 | data = list_first_entry(head, struct nfs_write_data, list); | ||
| 1106 | list_del_init(&data->list); | ||
| 1107 | |||
| 1108 | ret2 = nfs_do_write(data, call_ops, how); | ||
| 1109 | if (ret == 0) | ||
| 1110 | ret = ret2; | ||
| 1111 | } | ||
| 1112 | return ret; | ||
| 1113 | } | 1025 | } |
| 1114 | 1026 | ||
| 1115 | /* If a nfs_flush_* function fails, it should remove reqs from @head and | 1027 | /* If a nfs_flush_* function fails, it should remove reqs from @head and |
| @@ -1120,7 +1032,7 @@ static void nfs_redirty_request(struct nfs_page *req) | |||
| 1120 | { | 1032 | { |
| 1121 | nfs_mark_request_dirty(req); | 1033 | nfs_mark_request_dirty(req); |
| 1122 | nfs_unlock_request(req); | 1034 | nfs_unlock_request(req); |
| 1123 | nfs_end_page_writeback(req->wb_page); | 1035 | nfs_end_page_writeback(req); |
| 1124 | nfs_release_request(req); | 1036 | nfs_release_request(req); |
| 1125 | } | 1037 | } |
| 1126 | 1038 | ||
| @@ -1140,173 +1052,30 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { | |||
| 1140 | .completion = nfs_write_completion, | 1052 | .completion = nfs_write_completion, |
| 1141 | }; | 1053 | }; |
| 1142 | 1054 | ||
| 1143 | static void nfs_flush_error(struct nfs_pageio_descriptor *desc, | ||
| 1144 | struct nfs_pgio_header *hdr) | ||
| 1145 | { | ||
| 1146 | set_bit(NFS_IOHDR_REDO, &hdr->flags); | ||
| 1147 | while (!list_empty(&hdr->rpc_list)) { | ||
| 1148 | struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, | ||
| 1149 | struct nfs_write_data, list); | ||
| 1150 | list_del(&data->list); | ||
| 1151 | nfs_writedata_release(data); | ||
| 1152 | } | ||
| 1153 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
| 1154 | } | ||
| 1155 | |||
| 1156 | /* | ||
| 1157 | * Generate multiple small requests to write out a single | ||
| 1158 | * contiguous dirty area on one page. | ||
| 1159 | */ | ||
| 1160 | static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, | ||
| 1161 | struct nfs_pgio_header *hdr) | ||
| 1162 | { | ||
| 1163 | struct nfs_page *req = hdr->req; | ||
| 1164 | struct page *page = req->wb_page; | ||
| 1165 | struct nfs_write_data *data; | ||
| 1166 | size_t wsize = desc->pg_bsize, nbytes; | ||
| 1167 | unsigned int offset; | ||
| 1168 | int requests = 0; | ||
| 1169 | struct nfs_commit_info cinfo; | ||
| 1170 | |||
| 1171 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); | ||
| 1172 | |||
| 1173 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
| 1174 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || | ||
| 1175 | desc->pg_count > wsize)) | ||
| 1176 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
| 1177 | |||
| 1178 | |||
| 1179 | offset = 0; | ||
| 1180 | nbytes = desc->pg_count; | ||
| 1181 | do { | ||
| 1182 | size_t len = min(nbytes, wsize); | ||
| 1183 | |||
| 1184 | data = nfs_writedata_alloc(hdr, 1); | ||
| 1185 | if (!data) { | ||
| 1186 | nfs_flush_error(desc, hdr); | ||
| 1187 | return -ENOMEM; | ||
| 1188 | } | ||
| 1189 | data->pages.pagevec[0] = page; | ||
| 1190 | nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); | ||
| 1191 | list_add(&data->list, &hdr->rpc_list); | ||
| 1192 | requests++; | ||
| 1193 | nbytes -= len; | ||
| 1194 | offset += len; | ||
| 1195 | } while (nbytes != 0); | ||
| 1196 | nfs_list_remove_request(req); | ||
| 1197 | nfs_list_add_request(req, &hdr->pages); | ||
| 1198 | desc->pg_rpc_callops = &nfs_write_common_ops; | ||
| 1199 | return 0; | ||
| 1200 | } | ||
| 1201 | |||
| 1202 | /* | ||
| 1203 | * Create an RPC task for the given write request and kick it. | ||
| 1204 | * The page must have been locked by the caller. | ||
| 1205 | * | ||
| 1206 | * It may happen that the page we're passed is not marked dirty. | ||
| 1207 | * This is the case if nfs_updatepage detects a conflicting request | ||
| 1208 | * that has been written but not committed. | ||
| 1209 | */ | ||
| 1210 | static int nfs_flush_one(struct nfs_pageio_descriptor *desc, | ||
| 1211 | struct nfs_pgio_header *hdr) | ||
| 1212 | { | ||
| 1213 | struct nfs_page *req; | ||
| 1214 | struct page **pages; | ||
| 1215 | struct nfs_write_data *data; | ||
| 1216 | struct list_head *head = &desc->pg_list; | ||
| 1217 | struct nfs_commit_info cinfo; | ||
| 1218 | |||
| 1219 | data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, | ||
| 1220 | desc->pg_count)); | ||
| 1221 | if (!data) { | ||
| 1222 | nfs_flush_error(desc, hdr); | ||
| 1223 | return -ENOMEM; | ||
| 1224 | } | ||
| 1225 | |||
| 1226 | nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); | ||
| 1227 | pages = data->pages.pagevec; | ||
| 1228 | while (!list_empty(head)) { | ||
| 1229 | req = nfs_list_entry(head->next); | ||
| 1230 | nfs_list_remove_request(req); | ||
| 1231 | nfs_list_add_request(req, &hdr->pages); | ||
| 1232 | *pages++ = req->wb_page; | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
| 1236 | (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) | ||
| 1237 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
| 1238 | |||
| 1239 | /* Set up the argument struct */ | ||
| 1240 | nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); | ||
| 1241 | list_add(&data->list, &hdr->rpc_list); | ||
| 1242 | desc->pg_rpc_callops = &nfs_write_common_ops; | ||
| 1243 | return 0; | ||
| 1244 | } | ||
| 1245 | |||
| 1246 | int nfs_generic_flush(struct nfs_pageio_descriptor *desc, | ||
| 1247 | struct nfs_pgio_header *hdr) | ||
| 1248 | { | ||
| 1249 | if (desc->pg_bsize < PAGE_CACHE_SIZE) | ||
| 1250 | return nfs_flush_multi(desc, hdr); | ||
| 1251 | return nfs_flush_one(desc, hdr); | ||
| 1252 | } | ||
| 1253 | EXPORT_SYMBOL_GPL(nfs_generic_flush); | ||
| 1254 | |||
| 1255 | static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) | ||
| 1256 | { | ||
| 1257 | struct nfs_write_header *whdr; | ||
| 1258 | struct nfs_pgio_header *hdr; | ||
| 1259 | int ret; | ||
| 1260 | |||
| 1261 | whdr = nfs_writehdr_alloc(); | ||
| 1262 | if (!whdr) { | ||
| 1263 | desc->pg_completion_ops->error_cleanup(&desc->pg_list); | ||
| 1264 | return -ENOMEM; | ||
| 1265 | } | ||
| 1266 | hdr = &whdr->header; | ||
| 1267 | nfs_pgheader_init(desc, hdr, nfs_writehdr_free); | ||
| 1268 | atomic_inc(&hdr->refcnt); | ||
| 1269 | ret = nfs_generic_flush(desc, hdr); | ||
| 1270 | if (ret == 0) | ||
| 1271 | ret = nfs_do_multiple_writes(&hdr->rpc_list, | ||
| 1272 | desc->pg_rpc_callops, | ||
| 1273 | desc->pg_ioflags); | ||
| 1274 | if (atomic_dec_and_test(&hdr->refcnt)) | ||
| 1275 | hdr->completion_ops->completion(hdr); | ||
| 1276 | return ret; | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | static const struct nfs_pageio_ops nfs_pageio_write_ops = { | ||
| 1280 | .pg_test = nfs_generic_pg_test, | ||
| 1281 | .pg_doio = nfs_generic_pg_writepages, | ||
| 1282 | }; | ||
| 1283 | |||
| 1284 | void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, | 1055 | void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, |
| 1285 | struct inode *inode, int ioflags, | 1056 | struct inode *inode, int ioflags, bool force_mds, |
| 1286 | const struct nfs_pgio_completion_ops *compl_ops) | 1057 | const struct nfs_pgio_completion_ops *compl_ops) |
| 1287 | { | 1058 | { |
| 1288 | nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, | 1059 | struct nfs_server *server = NFS_SERVER(inode); |
| 1289 | NFS_SERVER(inode)->wsize, ioflags); | 1060 | const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; |
| 1061 | |||
| 1062 | #ifdef CONFIG_NFS_V4_1 | ||
| 1063 | if (server->pnfs_curr_ld && !force_mds) | ||
| 1064 | pg_ops = server->pnfs_curr_ld->pg_write_ops; | ||
| 1065 | #endif | ||
| 1066 | nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, | ||
| 1067 | server->wsize, ioflags); | ||
| 1290 | } | 1068 | } |
| 1291 | EXPORT_SYMBOL_GPL(nfs_pageio_init_write); | 1069 | EXPORT_SYMBOL_GPL(nfs_pageio_init_write); |
| 1292 | 1070 | ||
| 1293 | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) | 1071 | void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) |
| 1294 | { | 1072 | { |
| 1295 | pgio->pg_ops = &nfs_pageio_write_ops; | 1073 | pgio->pg_ops = &nfs_pgio_rw_ops; |
| 1296 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; | 1074 | pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; |
| 1297 | } | 1075 | } |
| 1298 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); | 1076 | EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); |
| 1299 | 1077 | ||
| 1300 | 1078 | ||
| 1301 | void nfs_write_prepare(struct rpc_task *task, void *calldata) | ||
| 1302 | { | ||
| 1303 | struct nfs_write_data *data = calldata; | ||
| 1304 | int err; | ||
| 1305 | err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); | ||
| 1306 | if (err) | ||
| 1307 | rpc_exit(task, err); | ||
| 1308 | } | ||
| 1309 | |||
| 1310 | void nfs_commit_prepare(struct rpc_task *task, void *calldata) | 1079 | void nfs_commit_prepare(struct rpc_task *task, void *calldata) |
| 1311 | { | 1080 | { |
| 1312 | struct nfs_commit_data *data = calldata; | 1081 | struct nfs_commit_data *data = calldata; |
| @@ -1314,23 +1083,8 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) | |||
| 1314 | NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); | 1083 | NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); |
| 1315 | } | 1084 | } |
| 1316 | 1085 | ||
| 1317 | /* | 1086 | static void nfs_writeback_release_common(struct nfs_pgio_data *data) |
| 1318 | * Handle a write reply that flushes a whole page. | ||
| 1319 | * | ||
| 1320 | * FIXME: There is an inherent race with invalidate_inode_pages and | ||
| 1321 | * writebacks since the page->count is kept > 1 for as long | ||
| 1322 | * as the page has a write request pending. | ||
| 1323 | */ | ||
| 1324 | static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) | ||
| 1325 | { | ||
| 1326 | struct nfs_write_data *data = calldata; | ||
| 1327 | |||
| 1328 | nfs_writeback_done(task, data); | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | static void nfs_writeback_release_common(void *calldata) | ||
| 1332 | { | 1087 | { |
| 1333 | struct nfs_write_data *data = calldata; | ||
| 1334 | struct nfs_pgio_header *hdr = data->header; | 1088 | struct nfs_pgio_header *hdr = data->header; |
| 1335 | int status = data->task.tk_status; | 1089 | int status = data->task.tk_status; |
| 1336 | 1090 | ||
| @@ -1339,34 +1093,46 @@ static void nfs_writeback_release_common(void *calldata) | |||
| 1339 | if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) | 1093 | if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) |
| 1340 | ; /* Do nothing */ | 1094 | ; /* Do nothing */ |
| 1341 | else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) | 1095 | else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) |
| 1342 | memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); | 1096 | memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf)); |
| 1343 | else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) | 1097 | else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf))) |
| 1344 | set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); | 1098 | set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); |
| 1345 | spin_unlock(&hdr->lock); | 1099 | spin_unlock(&hdr->lock); |
| 1346 | } | 1100 | } |
| 1347 | nfs_writedata_release(data); | ||
| 1348 | } | 1101 | } |
| 1349 | 1102 | ||
| 1350 | static const struct rpc_call_ops nfs_write_common_ops = { | 1103 | /* |
| 1351 | .rpc_call_prepare = nfs_write_prepare, | 1104 | * Special version of should_remove_suid() that ignores capabilities. |
| 1352 | .rpc_call_done = nfs_writeback_done_common, | 1105 | */ |
| 1353 | .rpc_release = nfs_writeback_release_common, | 1106 | static int nfs_should_remove_suid(const struct inode *inode) |
| 1354 | }; | 1107 | { |
| 1108 | umode_t mode = inode->i_mode; | ||
| 1109 | int kill = 0; | ||
| 1110 | |||
| 1111 | /* suid always must be killed */ | ||
| 1112 | if (unlikely(mode & S_ISUID)) | ||
| 1113 | kill = ATTR_KILL_SUID; | ||
| 1355 | 1114 | ||
| 1115 | /* | ||
| 1116 | * sgid without any exec bits is just a mandatory locking mark; leave | ||
| 1117 | * it alone. If some exec bits are set, it's a real sgid; kill it. | ||
| 1118 | */ | ||
| 1119 | if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) | ||
| 1120 | kill |= ATTR_KILL_SGID; | ||
| 1121 | |||
| 1122 | if (unlikely(kill && S_ISREG(mode))) | ||
| 1123 | return kill; | ||
| 1124 | |||
| 1125 | return 0; | ||
| 1126 | } | ||
| 1356 | 1127 | ||
| 1357 | /* | 1128 | /* |
| 1358 | * This function is called when the WRITE call is complete. | 1129 | * This function is called when the WRITE call is complete. |
| 1359 | */ | 1130 | */ |
| 1360 | void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | 1131 | static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, |
| 1132 | struct inode *inode) | ||
| 1361 | { | 1133 | { |
| 1362 | struct nfs_writeargs *argp = &data->args; | ||
| 1363 | struct nfs_writeres *resp = &data->res; | ||
| 1364 | struct inode *inode = data->header->inode; | ||
| 1365 | int status; | 1134 | int status; |
| 1366 | 1135 | ||
| 1367 | dprintk("NFS: %5u nfs_writeback_done (status %d)\n", | ||
| 1368 | task->tk_pid, task->tk_status); | ||
| 1369 | |||
| 1370 | /* | 1136 | /* |
| 1371 | * ->write_done will attempt to use post-op attributes to detect | 1137 | * ->write_done will attempt to use post-op attributes to detect |
| 1372 | * conflicting writes by other clients. A strict interpretation | 1138 | * conflicting writes by other clients. A strict interpretation |
| @@ -1376,11 +1142,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
| 1376 | */ | 1142 | */ |
| 1377 | status = NFS_PROTO(inode)->write_done(task, data); | 1143 | status = NFS_PROTO(inode)->write_done(task, data); |
| 1378 | if (status != 0) | 1144 | if (status != 0) |
| 1379 | return; | 1145 | return status; |
| 1380 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); | 1146 | nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); |
| 1381 | 1147 | ||
| 1382 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) | 1148 | #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) |
| 1383 | if (resp->verf->committed < argp->stable && task->tk_status >= 0) { | 1149 | if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { |
| 1384 | /* We tried a write call, but the server did not | 1150 | /* We tried a write call, but the server did not |
| 1385 | * commit data to stable storage even though we | 1151 | * commit data to stable storage even though we |
| 1386 | * requested it. | 1152 | * requested it. |
| @@ -1396,18 +1162,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
| 1396 | dprintk("NFS: faulty NFS server %s:" | 1162 | dprintk("NFS: faulty NFS server %s:" |
| 1397 | " (committed = %d) != (stable = %d)\n", | 1163 | " (committed = %d) != (stable = %d)\n", |
| 1398 | NFS_SERVER(inode)->nfs_client->cl_hostname, | 1164 | NFS_SERVER(inode)->nfs_client->cl_hostname, |
| 1399 | resp->verf->committed, argp->stable); | 1165 | data->res.verf->committed, data->args.stable); |
| 1400 | complain = jiffies + 300 * HZ; | 1166 | complain = jiffies + 300 * HZ; |
| 1401 | } | 1167 | } |
| 1402 | } | 1168 | } |
| 1403 | #endif | 1169 | #endif |
| 1404 | if (task->tk_status < 0) | 1170 | |
| 1405 | nfs_set_pgio_error(data->header, task->tk_status, argp->offset); | 1171 | /* Deal with the suid/sgid bit corner case */ |
| 1406 | else if (resp->count < argp->count) { | 1172 | if (nfs_should_remove_suid(inode)) |
| 1173 | nfs_mark_for_revalidate(inode); | ||
| 1174 | return 0; | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | /* | ||
| 1178 | * This function is called when the WRITE call is complete. | ||
| 1179 | */ | ||
| 1180 | static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) | ||
| 1181 | { | ||
| 1182 | struct nfs_pgio_args *argp = &data->args; | ||
| 1183 | struct nfs_pgio_res *resp = &data->res; | ||
| 1184 | |||
| 1185 | if (resp->count < argp->count) { | ||
| 1407 | static unsigned long complain; | 1186 | static unsigned long complain; |
| 1408 | 1187 | ||
| 1409 | /* This a short write! */ | 1188 | /* This a short write! */ |
| 1410 | nfs_inc_stats(inode, NFSIOS_SHORTWRITE); | 1189 | nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); |
| 1411 | 1190 | ||
| 1412 | /* Has the server at least made some progress? */ | 1191 | /* Has the server at least made some progress? */ |
| 1413 | if (resp->count == 0) { | 1192 | if (resp->count == 0) { |
| @@ -1874,7 +1653,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, | |||
| 1874 | int __init nfs_init_writepagecache(void) | 1653 | int __init nfs_init_writepagecache(void) |
| 1875 | { | 1654 | { |
| 1876 | nfs_wdata_cachep = kmem_cache_create("nfs_write_data", | 1655 | nfs_wdata_cachep = kmem_cache_create("nfs_write_data", |
| 1877 | sizeof(struct nfs_write_header), | 1656 | sizeof(struct nfs_rw_header), |
| 1878 | 0, SLAB_HWCACHE_ALIGN, | 1657 | 0, SLAB_HWCACHE_ALIGN, |
| 1879 | NULL); | 1658 | NULL); |
| 1880 | if (nfs_wdata_cachep == NULL) | 1659 | if (nfs_wdata_cachep == NULL) |
| @@ -1936,3 +1715,12 @@ void nfs_destroy_writepagecache(void) | |||
| 1936 | kmem_cache_destroy(nfs_wdata_cachep); | 1715 | kmem_cache_destroy(nfs_wdata_cachep); |
| 1937 | } | 1716 | } |
| 1938 | 1717 | ||
| 1718 | static const struct nfs_rw_ops nfs_rw_write_ops = { | ||
| 1719 | .rw_mode = FMODE_WRITE, | ||
| 1720 | .rw_alloc_header = nfs_writehdr_alloc, | ||
| 1721 | .rw_free_header = nfs_writehdr_free, | ||
| 1722 | .rw_release = nfs_writeback_release_common, | ||
| 1723 | .rw_done = nfs_writeback_done, | ||
| 1724 | .rw_result = nfs_writeback_result, | ||
| 1725 | .rw_initiate = nfs_initiate_write, | ||
| 1726 | }; | ||
diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 3e794c12e90a..610af5155ef2 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h | |||
| @@ -46,6 +46,9 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc | |||
| 46 | enum nfs3_stable_how { | 46 | enum nfs3_stable_how { |
| 47 | NFS_UNSTABLE = 0, | 47 | NFS_UNSTABLE = 0, |
| 48 | NFS_DATA_SYNC = 1, | 48 | NFS_DATA_SYNC = 1, |
| 49 | NFS_FILE_SYNC = 2 | 49 | NFS_FILE_SYNC = 2, |
| 50 | |||
| 51 | /* used by direct.c to mark verf as invalid */ | ||
| 52 | NFS_INVALID_STABLE_HOW = -1 | ||
| 50 | }; | 53 | }; |
| 51 | #endif /* _LINUX_NFS_H */ | 54 | #endif /* _LINUX_NFS_H */ |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fa6918b0f829..919576b8e2cf 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
| @@ -520,7 +520,6 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); | |||
| 520 | extern int nfs_writepages(struct address_space *, struct writeback_control *); | 520 | extern int nfs_writepages(struct address_space *, struct writeback_control *); |
| 521 | extern int nfs_flush_incompatible(struct file *file, struct page *page); | 521 | extern int nfs_flush_incompatible(struct file *file, struct page *page); |
| 522 | extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); | 522 | extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); |
| 523 | extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); | ||
| 524 | 523 | ||
| 525 | /* | 524 | /* |
| 526 | * Try to write back everything synchronously (but check the | 525 | * Try to write back everything synchronously (but check the |
| @@ -553,7 +552,6 @@ nfs_have_writebacks(struct inode *inode) | |||
| 553 | extern int nfs_readpage(struct file *, struct page *); | 552 | extern int nfs_readpage(struct file *, struct page *); |
| 554 | extern int nfs_readpages(struct file *, struct address_space *, | 553 | extern int nfs_readpages(struct file *, struct address_space *, |
| 555 | struct list_head *, unsigned); | 554 | struct list_head *, unsigned); |
| 556 | extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); | ||
| 557 | extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, | 555 | extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, |
| 558 | struct page *); | 556 | struct page *); |
| 559 | 557 | ||
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 92ce5783b707..7d9096d95d4a 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h | |||
| @@ -22,12 +22,17 @@ | |||
| 22 | * Valid flags for a dirty buffer | 22 | * Valid flags for a dirty buffer |
| 23 | */ | 23 | */ |
| 24 | enum { | 24 | enum { |
| 25 | PG_BUSY = 0, | 25 | PG_BUSY = 0, /* nfs_{un}lock_request */ |
| 26 | PG_MAPPED, | 26 | PG_MAPPED, /* page private set for buffered io */ |
| 27 | PG_CLEAN, | 27 | PG_CLEAN, /* write succeeded */ |
| 28 | PG_NEED_COMMIT, | 28 | PG_COMMIT_TO_DS, /* used by pnfs layouts */ |
| 29 | PG_NEED_RESCHED, | 29 | PG_INODE_REF, /* extra ref held by inode (head req only) */ |
| 30 | PG_COMMIT_TO_DS, | 30 | PG_HEADLOCK, /* page group lock of wb_head */ |
| 31 | PG_TEARDOWN, /* page group sync for destroy */ | ||
| 32 | PG_UNLOCKPAGE, /* page group sync bit in read path */ | ||
| 33 | PG_UPTODATE, /* page group sync bit in read path */ | ||
| 34 | PG_WB_END, /* page group sync bit in write path */ | ||
| 35 | PG_REMOVE, /* page group sync bit in write path */ | ||
| 31 | }; | 36 | }; |
| 32 | 37 | ||
| 33 | struct nfs_inode; | 38 | struct nfs_inode; |
| @@ -43,15 +48,29 @@ struct nfs_page { | |||
| 43 | struct kref wb_kref; /* reference count */ | 48 | struct kref wb_kref; /* reference count */ |
| 44 | unsigned long wb_flags; | 49 | unsigned long wb_flags; |
| 45 | struct nfs_write_verifier wb_verf; /* Commit cookie */ | 50 | struct nfs_write_verifier wb_verf; /* Commit cookie */ |
| 51 | struct nfs_page *wb_this_page; /* list of reqs for this page */ | ||
| 52 | struct nfs_page *wb_head; /* head pointer for req list */ | ||
| 46 | }; | 53 | }; |
| 47 | 54 | ||
| 48 | struct nfs_pageio_descriptor; | 55 | struct nfs_pageio_descriptor; |
| 49 | struct nfs_pageio_ops { | 56 | struct nfs_pageio_ops { |
| 50 | void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); | 57 | void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); |
| 51 | bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); | 58 | size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, |
| 59 | struct nfs_page *); | ||
| 52 | int (*pg_doio)(struct nfs_pageio_descriptor *); | 60 | int (*pg_doio)(struct nfs_pageio_descriptor *); |
| 53 | }; | 61 | }; |
| 54 | 62 | ||
| 63 | struct nfs_rw_ops { | ||
| 64 | const fmode_t rw_mode; | ||
| 65 | struct nfs_rw_header *(*rw_alloc_header)(void); | ||
| 66 | void (*rw_free_header)(struct nfs_rw_header *); | ||
| 67 | void (*rw_release)(struct nfs_pgio_data *); | ||
| 68 | int (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *); | ||
| 69 | void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *); | ||
| 70 | void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *, | ||
| 71 | struct rpc_task_setup *, int); | ||
| 72 | }; | ||
| 73 | |||
| 55 | struct nfs_pageio_descriptor { | 74 | struct nfs_pageio_descriptor { |
| 56 | struct list_head pg_list; | 75 | struct list_head pg_list; |
| 57 | unsigned long pg_bytes_written; | 76 | unsigned long pg_bytes_written; |
| @@ -63,6 +82,7 @@ struct nfs_pageio_descriptor { | |||
| 63 | 82 | ||
| 64 | struct inode *pg_inode; | 83 | struct inode *pg_inode; |
| 65 | const struct nfs_pageio_ops *pg_ops; | 84 | const struct nfs_pageio_ops *pg_ops; |
| 85 | const struct nfs_rw_ops *pg_rw_ops; | ||
| 66 | int pg_ioflags; | 86 | int pg_ioflags; |
| 67 | int pg_error; | 87 | int pg_error; |
| 68 | const struct rpc_call_ops *pg_rpc_callops; | 88 | const struct rpc_call_ops *pg_rpc_callops; |
| @@ -75,29 +95,33 @@ struct nfs_pageio_descriptor { | |||
| 75 | #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) | 95 | #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) |
| 76 | 96 | ||
| 77 | extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, | 97 | extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, |
| 78 | struct inode *inode, | ||
| 79 | struct page *page, | 98 | struct page *page, |
| 99 | struct nfs_page *last, | ||
| 80 | unsigned int offset, | 100 | unsigned int offset, |
| 81 | unsigned int count); | 101 | unsigned int count); |
| 82 | extern void nfs_release_request(struct nfs_page *req); | 102 | extern void nfs_release_request(struct nfs_page *); |
| 83 | 103 | ||
| 84 | 104 | ||
| 85 | extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | 105 | extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, |
| 86 | struct inode *inode, | 106 | struct inode *inode, |
| 87 | const struct nfs_pageio_ops *pg_ops, | 107 | const struct nfs_pageio_ops *pg_ops, |
| 88 | const struct nfs_pgio_completion_ops *compl_ops, | 108 | const struct nfs_pgio_completion_ops *compl_ops, |
| 109 | const struct nfs_rw_ops *rw_ops, | ||
| 89 | size_t bsize, | 110 | size_t bsize, |
| 90 | int how); | 111 | int how); |
| 91 | extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, | 112 | extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, |
| 92 | struct nfs_page *); | 113 | struct nfs_page *); |
| 93 | extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); | 114 | extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); |
| 94 | extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); | 115 | extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); |
| 95 | extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, | 116 | extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, |
| 96 | struct nfs_page *prev, | 117 | struct nfs_page *prev, |
| 97 | struct nfs_page *req); | 118 | struct nfs_page *req); |
| 98 | extern int nfs_wait_on_request(struct nfs_page *); | 119 | extern int nfs_wait_on_request(struct nfs_page *); |
| 99 | extern void nfs_unlock_request(struct nfs_page *req); | 120 | extern void nfs_unlock_request(struct nfs_page *req); |
| 100 | extern void nfs_unlock_and_release_request(struct nfs_page *req); | 121 | extern void nfs_unlock_and_release_request(struct nfs_page *); |
| 122 | extern void nfs_page_group_lock(struct nfs_page *); | ||
| 123 | extern void nfs_page_group_unlock(struct nfs_page *); | ||
| 124 | extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); | ||
| 101 | 125 | ||
| 102 | /* | 126 | /* |
| 103 | * Lock the page of an asynchronous request | 127 | * Lock the page of an asynchronous request |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6fb5b2335b59..9a1396e70310 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
| @@ -489,31 +489,21 @@ struct nfs4_delegreturnres { | |||
| 489 | }; | 489 | }; |
| 490 | 490 | ||
| 491 | /* | 491 | /* |
| 492 | * Arguments to the read call. | 492 | * Arguments to the write call. |
| 493 | */ | 493 | */ |
| 494 | struct nfs_readargs { | 494 | struct nfs_write_verifier { |
| 495 | struct nfs4_sequence_args seq_args; | 495 | char data[8]; |
| 496 | struct nfs_fh * fh; | ||
| 497 | struct nfs_open_context *context; | ||
| 498 | struct nfs_lock_context *lock_context; | ||
| 499 | nfs4_stateid stateid; | ||
| 500 | __u64 offset; | ||
| 501 | __u32 count; | ||
| 502 | unsigned int pgbase; | ||
| 503 | struct page ** pages; | ||
| 504 | }; | 496 | }; |
| 505 | 497 | ||
| 506 | struct nfs_readres { | 498 | struct nfs_writeverf { |
| 507 | struct nfs4_sequence_res seq_res; | 499 | struct nfs_write_verifier verifier; |
| 508 | struct nfs_fattr * fattr; | 500 | enum nfs3_stable_how committed; |
| 509 | __u32 count; | ||
| 510 | int eof; | ||
| 511 | }; | 501 | }; |
| 512 | 502 | ||
| 513 | /* | 503 | /* |
| 514 | * Arguments to the write call. | 504 | * Arguments shared by the read and write call. |
| 515 | */ | 505 | */ |
| 516 | struct nfs_writeargs { | 506 | struct nfs_pgio_args { |
| 517 | struct nfs4_sequence_args seq_args; | 507 | struct nfs4_sequence_args seq_args; |
| 518 | struct nfs_fh * fh; | 508 | struct nfs_fh * fh; |
| 519 | struct nfs_open_context *context; | 509 | struct nfs_open_context *context; |
| @@ -521,27 +511,20 @@ struct nfs_writeargs { | |||
| 521 | nfs4_stateid stateid; | 511 | nfs4_stateid stateid; |
| 522 | __u64 offset; | 512 | __u64 offset; |
| 523 | __u32 count; | 513 | __u32 count; |
| 524 | enum nfs3_stable_how stable; | ||
| 525 | unsigned int pgbase; | 514 | unsigned int pgbase; |
| 526 | struct page ** pages; | 515 | struct page ** pages; |
| 527 | const u32 * bitmask; | 516 | const u32 * bitmask; /* used by write */ |
| 528 | }; | 517 | enum nfs3_stable_how stable; /* used by write */ |
| 529 | |||
| 530 | struct nfs_write_verifier { | ||
| 531 | char data[8]; | ||
| 532 | }; | 518 | }; |
| 533 | 519 | ||
| 534 | struct nfs_writeverf { | 520 | struct nfs_pgio_res { |
| 535 | struct nfs_write_verifier verifier; | ||
| 536 | enum nfs3_stable_how committed; | ||
| 537 | }; | ||
| 538 | |||
| 539 | struct nfs_writeres { | ||
| 540 | struct nfs4_sequence_res seq_res; | 521 | struct nfs4_sequence_res seq_res; |
| 541 | struct nfs_fattr * fattr; | 522 | struct nfs_fattr * fattr; |
| 542 | struct nfs_writeverf * verf; | ||
| 543 | __u32 count; | 523 | __u32 count; |
| 544 | const struct nfs_server *server; | 524 | int eof; /* used by read */ |
| 525 | struct nfs_writeverf * verf; /* used by write */ | ||
| 526 | const struct nfs_server *server; /* used by write */ | ||
| 527 | |||
| 545 | }; | 528 | }; |
| 546 | 529 | ||
| 547 | /* | 530 | /* |
| @@ -1129,6 +1112,7 @@ struct pnfs_commit_bucket { | |||
| 1129 | struct list_head committing; | 1112 | struct list_head committing; |
| 1130 | struct pnfs_layout_segment *wlseg; | 1113 | struct pnfs_layout_segment *wlseg; |
| 1131 | struct pnfs_layout_segment *clseg; | 1114 | struct pnfs_layout_segment *clseg; |
| 1115 | struct nfs_writeverf direct_verf; | ||
| 1132 | }; | 1116 | }; |
| 1133 | 1117 | ||
| 1134 | struct pnfs_ds_commit_info { | 1118 | struct pnfs_ds_commit_info { |
| @@ -1264,20 +1248,6 @@ struct nfs_page_array { | |||
| 1264 | struct page *page_array[NFS_PAGEVEC_SIZE]; | 1248 | struct page *page_array[NFS_PAGEVEC_SIZE]; |
| 1265 | }; | 1249 | }; |
| 1266 | 1250 | ||
| 1267 | struct nfs_read_data { | ||
| 1268 | struct nfs_pgio_header *header; | ||
| 1269 | struct list_head list; | ||
| 1270 | struct rpc_task task; | ||
| 1271 | struct nfs_fattr fattr; /* fattr storage */ | ||
| 1272 | struct nfs_readargs args; | ||
| 1273 | struct nfs_readres res; | ||
| 1274 | unsigned long timestamp; /* For lease renewal */ | ||
| 1275 | int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); | ||
| 1276 | __u64 mds_offset; | ||
| 1277 | struct nfs_page_array pages; | ||
| 1278 | struct nfs_client *ds_clp; /* pNFS data server */ | ||
| 1279 | }; | ||
| 1280 | |||
| 1281 | /* used as flag bits in nfs_pgio_header */ | 1251 | /* used as flag bits in nfs_pgio_header */ |
| 1282 | enum { | 1252 | enum { |
| 1283 | NFS_IOHDR_ERROR = 0, | 1253 | NFS_IOHDR_ERROR = 0, |
| @@ -1287,19 +1257,22 @@ enum { | |||
| 1287 | NFS_IOHDR_NEED_RESCHED, | 1257 | NFS_IOHDR_NEED_RESCHED, |
| 1288 | }; | 1258 | }; |
| 1289 | 1259 | ||
| 1260 | struct nfs_pgio_data; | ||
| 1261 | |||
| 1290 | struct nfs_pgio_header { | 1262 | struct nfs_pgio_header { |
| 1291 | struct inode *inode; | 1263 | struct inode *inode; |
| 1292 | struct rpc_cred *cred; | 1264 | struct rpc_cred *cred; |
| 1293 | struct list_head pages; | 1265 | struct list_head pages; |
| 1294 | struct list_head rpc_list; | 1266 | struct nfs_pgio_data *data; |
| 1295 | atomic_t refcnt; | 1267 | atomic_t refcnt; |
| 1296 | struct nfs_page *req; | 1268 | struct nfs_page *req; |
| 1297 | struct nfs_writeverf *verf; | 1269 | struct nfs_writeverf verf; /* Used for writes */ |
| 1298 | struct pnfs_layout_segment *lseg; | 1270 | struct pnfs_layout_segment *lseg; |
| 1299 | loff_t io_start; | 1271 | loff_t io_start; |
| 1300 | const struct rpc_call_ops *mds_ops; | 1272 | const struct rpc_call_ops *mds_ops; |
| 1301 | void (*release) (struct nfs_pgio_header *hdr); | 1273 | void (*release) (struct nfs_pgio_header *hdr); |
| 1302 | const struct nfs_pgio_completion_ops *completion_ops; | 1274 | const struct nfs_pgio_completion_ops *completion_ops; |
| 1275 | const struct nfs_rw_ops *rw_ops; | ||
| 1303 | struct nfs_direct_req *dreq; | 1276 | struct nfs_direct_req *dreq; |
| 1304 | void *layout_private; | 1277 | void *layout_private; |
| 1305 | spinlock_t lock; | 1278 | spinlock_t lock; |
| @@ -1310,30 +1283,24 @@ struct nfs_pgio_header { | |||
| 1310 | unsigned long flags; | 1283 | unsigned long flags; |
| 1311 | }; | 1284 | }; |
| 1312 | 1285 | ||
| 1313 | struct nfs_read_header { | 1286 | struct nfs_pgio_data { |
| 1314 | struct nfs_pgio_header header; | ||
| 1315 | struct nfs_read_data rpc_data; | ||
| 1316 | }; | ||
| 1317 | |||
| 1318 | struct nfs_write_data { | ||
| 1319 | struct nfs_pgio_header *header; | 1287 | struct nfs_pgio_header *header; |
| 1320 | struct list_head list; | ||
| 1321 | struct rpc_task task; | 1288 | struct rpc_task task; |
| 1322 | struct nfs_fattr fattr; | 1289 | struct nfs_fattr fattr; |
| 1323 | struct nfs_writeverf verf; | 1290 | struct nfs_writeverf verf; /* Used for writes */ |
| 1324 | struct nfs_writeargs args; /* argument struct */ | 1291 | struct nfs_pgio_args args; /* argument struct */ |
| 1325 | struct nfs_writeres res; /* result struct */ | 1292 | struct nfs_pgio_res res; /* result struct */ |
| 1326 | unsigned long timestamp; /* For lease renewal */ | 1293 | unsigned long timestamp; /* For lease renewal */ |
| 1327 | int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); | 1294 | int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data); |
| 1328 | __u64 mds_offset; /* Filelayout dense stripe */ | 1295 | __u64 mds_offset; /* Filelayout dense stripe */ |
| 1329 | struct nfs_page_array pages; | 1296 | struct nfs_page_array pages; |
| 1330 | struct nfs_client *ds_clp; /* pNFS data server */ | 1297 | struct nfs_client *ds_clp; /* pNFS data server */ |
| 1298 | int ds_idx; /* ds index if ds_clp is set */ | ||
| 1331 | }; | 1299 | }; |
| 1332 | 1300 | ||
| 1333 | struct nfs_write_header { | 1301 | struct nfs_rw_header { |
| 1334 | struct nfs_pgio_header header; | 1302 | struct nfs_pgio_header header; |
| 1335 | struct nfs_write_data rpc_data; | 1303 | struct nfs_pgio_data rpc_data; |
| 1336 | struct nfs_writeverf verf; | ||
| 1337 | }; | 1304 | }; |
| 1338 | 1305 | ||
| 1339 | struct nfs_mds_commit_info { | 1306 | struct nfs_mds_commit_info { |
| @@ -1465,16 +1432,11 @@ struct nfs_rpc_ops { | |||
| 1465 | struct nfs_pathconf *); | 1432 | struct nfs_pathconf *); |
| 1466 | int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); | 1433 | int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); |
| 1467 | int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); | 1434 | int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); |
| 1468 | void (*read_setup) (struct nfs_read_data *, struct rpc_message *); | 1435 | int (*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *); |
| 1469 | void (*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, | 1436 | void (*read_setup) (struct nfs_pgio_data *, struct rpc_message *); |
| 1470 | const struct nfs_pgio_completion_ops *); | 1437 | int (*read_done) (struct rpc_task *, struct nfs_pgio_data *); |
| 1471 | int (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *); | 1438 | void (*write_setup) (struct nfs_pgio_data *, struct rpc_message *); |
| 1472 | int (*read_done) (struct rpc_task *, struct nfs_read_data *); | 1439 | int (*write_done) (struct rpc_task *, struct nfs_pgio_data *); |
| 1473 | void (*write_setup) (struct nfs_write_data *, struct rpc_message *); | ||
| 1474 | void (*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int, | ||
| 1475 | const struct nfs_pgio_completion_ops *); | ||
| 1476 | int (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *); | ||
| 1477 | int (*write_done) (struct rpc_task *, struct nfs_write_data *); | ||
| 1478 | void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); | 1440 | void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); |
| 1479 | void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); | 1441 | void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); |
| 1480 | int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); | 1442 | int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); |
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3876f0f1dfd3..fcbfe8783243 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
| @@ -24,6 +24,12 @@ | |||
| 24 | #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) | 24 | #define RPC_MAX_SLOT_TABLE_LIMIT (65536U) |
| 25 | #define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT | 25 | #define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT |
| 26 | 26 | ||
| 27 | #define RPC_CWNDSHIFT (8U) | ||
| 28 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) | ||
| 29 | #define RPC_INITCWND RPC_CWNDSCALE | ||
| 30 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) | ||
| 31 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) | ||
| 32 | |||
| 27 | /* | 33 | /* |
| 28 | * This describes a timeout strategy | 34 | * This describes a timeout strategy |
| 29 | */ | 35 | */ |
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 27ce26240932..92d5ab99fbf3 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c | |||
| @@ -218,10 +218,8 @@ static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor) | |||
| 218 | 218 | ||
| 219 | spin_lock(®istered_mechs_lock); | 219 | spin_lock(®istered_mechs_lock); |
| 220 | list_for_each_entry(pos, ®istered_mechs, gm_list) { | 220 | list_for_each_entry(pos, ®istered_mechs, gm_list) { |
| 221 | if (!mech_supports_pseudoflavor(pos, pseudoflavor)) { | 221 | if (!mech_supports_pseudoflavor(pos, pseudoflavor)) |
| 222 | module_put(pos->gm_owner); | ||
| 223 | continue; | 222 | continue; |
| 224 | } | ||
| 225 | if (try_module_get(pos->gm_owner)) | 223 | if (try_module_get(pos->gm_owner)) |
| 226 | gm = pos; | 224 | gm = pos; |
| 227 | break; | 225 | break; |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 25578afe1548..c0365c14b858 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
| @@ -832,7 +832,8 @@ static void rpc_async_schedule(struct work_struct *work) | |||
| 832 | * @size: requested byte size | 832 | * @size: requested byte size |
| 833 | * | 833 | * |
| 834 | * To prevent rpciod from hanging, this allocator never sleeps, | 834 | * To prevent rpciod from hanging, this allocator never sleeps, |
| 835 | * returning NULL if the request cannot be serviced immediately. | 835 | * returning NULL and suppressing warning if the request cannot be serviced |
| 836 | * immediately. | ||
| 836 | * The caller can arrange to sleep in a way that is safe for rpciod. | 837 | * The caller can arrange to sleep in a way that is safe for rpciod. |
| 837 | * | 838 | * |
| 838 | * Most requests are 'small' (under 2KiB) and can be serviced from a | 839 | * Most requests are 'small' (under 2KiB) and can be serviced from a |
| @@ -845,7 +846,7 @@ static void rpc_async_schedule(struct work_struct *work) | |||
| 845 | void *rpc_malloc(struct rpc_task *task, size_t size) | 846 | void *rpc_malloc(struct rpc_task *task, size_t size) |
| 846 | { | 847 | { |
| 847 | struct rpc_buffer *buf; | 848 | struct rpc_buffer *buf; |
| 848 | gfp_t gfp = GFP_NOWAIT; | 849 | gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; |
| 849 | 850 | ||
| 850 | if (RPC_IS_SWAPPER(task)) | 851 | if (RPC_IS_SWAPPER(task)) |
| 851 | gfp |= __GFP_MEMALLOC; | 852 | gfp |= __GFP_MEMALLOC; |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 89d051de6b3e..c3b2b3369e52 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
| @@ -71,24 +71,6 @@ static void xprt_destroy(struct rpc_xprt *xprt); | |||
| 71 | static DEFINE_SPINLOCK(xprt_list_lock); | 71 | static DEFINE_SPINLOCK(xprt_list_lock); |
| 72 | static LIST_HEAD(xprt_list); | 72 | static LIST_HEAD(xprt_list); |
| 73 | 73 | ||
| 74 | /* | ||
| 75 | * The transport code maintains an estimate on the maximum number of out- | ||
| 76 | * standing RPC requests, using a smoothed version of the congestion | ||
| 77 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
| 78 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
| 79 | * halved; otherwise, it is incremented by 1/cwnd when | ||
| 80 | * | ||
| 81 | * - a reply is received and | ||
| 82 | * - a full number of requests are outstanding and | ||
| 83 | * - the congestion window hasn't been updated recently. | ||
| 84 | */ | ||
| 85 | #define RPC_CWNDSHIFT (8U) | ||
| 86 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) | ||
| 87 | #define RPC_INITCWND RPC_CWNDSCALE | ||
| 88 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) | ||
| 89 | |||
| 90 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) | ||
| 91 | |||
| 92 | /** | 74 | /** |
| 93 | * xprt_register_transport - register a transport implementation | 75 | * xprt_register_transport - register a transport implementation |
| 94 | * @transport: transport to register | 76 | * @transport: transport to register |
| @@ -446,7 +428,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); | |||
| 446 | * @task: recently completed RPC request used to adjust window | 428 | * @task: recently completed RPC request used to adjust window |
| 447 | * @result: result code of completed RPC request | 429 | * @result: result code of completed RPC request |
| 448 | * | 430 | * |
| 449 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. | 431 | * The transport code maintains an estimate on the maximum number of out- |
| 432 | * standing RPC requests, using a smoothed version of the congestion | ||
| 433 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
| 434 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
| 435 | * halved; otherwise, it is incremented by 1/cwnd when | ||
| 436 | * | ||
| 437 | * - a reply is received and | ||
| 438 | * - a full number of requests are outstanding and | ||
| 439 | * - the congestion window hasn't been updated recently. | ||
| 450 | */ | 440 | */ |
| 451 | void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) | 441 | void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result) |
| 452 | { | 442 | { |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 96ead526b125..693966d3f33b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
| @@ -78,8 +78,7 @@ static const char transfertypes[][12] = { | |||
| 78 | * elements. Segments are then coalesced when registered, if possible | 78 | * elements. Segments are then coalesced when registered, if possible |
| 79 | * within the selected memreg mode. | 79 | * within the selected memreg mode. |
| 80 | * | 80 | * |
| 81 | * Note, this routine is never called if the connection's memory | 81 | * Returns positive number of segments converted, or a negative errno. |
| 82 | * registration strategy is 0 (bounce buffers). | ||
| 83 | */ | 82 | */ |
| 84 | 83 | ||
| 85 | static int | 84 | static int |
| @@ -102,10 +101,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
| 102 | page_base = xdrbuf->page_base & ~PAGE_MASK; | 101 | page_base = xdrbuf->page_base & ~PAGE_MASK; |
| 103 | p = 0; | 102 | p = 0; |
| 104 | while (len && n < nsegs) { | 103 | while (len && n < nsegs) { |
| 104 | if (!ppages[p]) { | ||
| 105 | /* alloc the pagelist for receiving buffer */ | ||
| 106 | ppages[p] = alloc_page(GFP_ATOMIC); | ||
| 107 | if (!ppages[p]) | ||
| 108 | return -ENOMEM; | ||
| 109 | } | ||
| 105 | seg[n].mr_page = ppages[p]; | 110 | seg[n].mr_page = ppages[p]; |
| 106 | seg[n].mr_offset = (void *)(unsigned long) page_base; | 111 | seg[n].mr_offset = (void *)(unsigned long) page_base; |
| 107 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); | 112 | seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); |
| 108 | BUG_ON(seg[n].mr_len > PAGE_SIZE); | 113 | if (seg[n].mr_len > PAGE_SIZE) |
| 114 | return -EIO; | ||
| 109 | len -= seg[n].mr_len; | 115 | len -= seg[n].mr_len; |
| 110 | ++n; | 116 | ++n; |
| 111 | ++p; | 117 | ++p; |
| @@ -114,7 +120,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
| 114 | 120 | ||
| 115 | /* Message overflows the seg array */ | 121 | /* Message overflows the seg array */ |
| 116 | if (len && n == nsegs) | 122 | if (len && n == nsegs) |
| 117 | return 0; | 123 | return -EIO; |
| 118 | 124 | ||
| 119 | if (xdrbuf->tail[0].iov_len) { | 125 | if (xdrbuf->tail[0].iov_len) { |
| 120 | /* the rpcrdma protocol allows us to omit any trailing | 126 | /* the rpcrdma protocol allows us to omit any trailing |
| @@ -123,7 +129,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
| 123 | return n; | 129 | return n; |
| 124 | if (n == nsegs) | 130 | if (n == nsegs) |
| 125 | /* Tail remains, but we're out of segments */ | 131 | /* Tail remains, but we're out of segments */ |
| 126 | return 0; | 132 | return -EIO; |
| 127 | seg[n].mr_page = NULL; | 133 | seg[n].mr_page = NULL; |
| 128 | seg[n].mr_offset = xdrbuf->tail[0].iov_base; | 134 | seg[n].mr_offset = xdrbuf->tail[0].iov_base; |
| 129 | seg[n].mr_len = xdrbuf->tail[0].iov_len; | 135 | seg[n].mr_len = xdrbuf->tail[0].iov_len; |
| @@ -164,15 +170,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, | |||
| 164 | * Reply chunk (a counted array): | 170 | * Reply chunk (a counted array): |
| 165 | * N elements: | 171 | * N elements: |
| 166 | * 1 - N - HLOO - HLOO - ... - HLOO | 172 | * 1 - N - HLOO - HLOO - ... - HLOO |
| 173 | * | ||
| 174 | * Returns positive RPC/RDMA header size, or negative errno. | ||
| 167 | */ | 175 | */ |
| 168 | 176 | ||
| 169 | static unsigned int | 177 | static ssize_t |
| 170 | rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | 178 | rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, |
| 171 | struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) | 179 | struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) |
| 172 | { | 180 | { |
| 173 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 181 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 174 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); | 182 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); |
| 175 | int nsegs, nchunks = 0; | 183 | int n, nsegs, nchunks = 0; |
| 176 | unsigned int pos; | 184 | unsigned int pos; |
| 177 | struct rpcrdma_mr_seg *seg = req->rl_segments; | 185 | struct rpcrdma_mr_seg *seg = req->rl_segments; |
| 178 | struct rpcrdma_read_chunk *cur_rchunk = NULL; | 186 | struct rpcrdma_read_chunk *cur_rchunk = NULL; |
| @@ -198,12 +206,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
| 198 | pos = target->head[0].iov_len; | 206 | pos = target->head[0].iov_len; |
| 199 | 207 | ||
| 200 | nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); | 208 | nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); |
| 201 | if (nsegs == 0) | 209 | if (nsegs < 0) |
| 202 | return 0; | 210 | return nsegs; |
| 203 | 211 | ||
| 204 | do { | 212 | do { |
| 205 | /* bind/register the memory, then build chunk from result. */ | 213 | n = rpcrdma_register_external(seg, nsegs, |
| 206 | int n = rpcrdma_register_external(seg, nsegs, | ||
| 207 | cur_wchunk != NULL, r_xprt); | 214 | cur_wchunk != NULL, r_xprt); |
| 208 | if (n <= 0) | 215 | if (n <= 0) |
| 209 | goto out; | 216 | goto out; |
| @@ -248,10 +255,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
| 248 | /* success. all failures return above */ | 255 | /* success. all failures return above */ |
| 249 | req->rl_nchunks = nchunks; | 256 | req->rl_nchunks = nchunks; |
| 250 | 257 | ||
| 251 | BUG_ON(nchunks == 0); | ||
| 252 | BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) | ||
| 253 | && (nchunks > 3)); | ||
| 254 | |||
| 255 | /* | 258 | /* |
| 256 | * finish off header. If write, marshal discrim and nchunks. | 259 | * finish off header. If write, marshal discrim and nchunks. |
| 257 | */ | 260 | */ |
| @@ -278,8 +281,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
| 278 | out: | 281 | out: |
| 279 | for (pos = 0; nchunks--;) | 282 | for (pos = 0; nchunks--;) |
| 280 | pos += rpcrdma_deregister_external( | 283 | pos += rpcrdma_deregister_external( |
| 281 | &req->rl_segments[pos], r_xprt, NULL); | 284 | &req->rl_segments[pos], r_xprt); |
| 282 | return 0; | 285 | return n; |
| 283 | } | 286 | } |
| 284 | 287 | ||
| 285 | /* | 288 | /* |
| @@ -361,6 +364,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) | |||
| 361 | * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. | 364 | * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. |
| 362 | * [2] -- optional padding. | 365 | * [2] -- optional padding. |
| 363 | * [3] -- if padded, header only in [1] and data here. | 366 | * [3] -- if padded, header only in [1] and data here. |
| 367 | * | ||
| 368 | * Returns zero on success, otherwise a negative errno. | ||
| 364 | */ | 369 | */ |
| 365 | 370 | ||
| 366 | int | 371 | int |
| @@ -370,7 +375,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 370 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 375 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 371 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 376 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 372 | char *base; | 377 | char *base; |
| 373 | size_t hdrlen, rpclen, padlen; | 378 | size_t rpclen, padlen; |
| 379 | ssize_t hdrlen; | ||
| 374 | enum rpcrdma_chunktype rtype, wtype; | 380 | enum rpcrdma_chunktype rtype, wtype; |
| 375 | struct rpcrdma_msg *headerp; | 381 | struct rpcrdma_msg *headerp; |
| 376 | 382 | ||
| @@ -441,14 +447,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 441 | /* The following simplification is not true forever */ | 447 | /* The following simplification is not true forever */ |
| 442 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) | 448 | if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) |
| 443 | wtype = rpcrdma_noch; | 449 | wtype = rpcrdma_noch; |
| 444 | BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch); | 450 | if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { |
| 445 | 451 | dprintk("RPC: %s: cannot marshal multiple chunk lists\n", | |
| 446 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS && | 452 | __func__); |
| 447 | (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) { | 453 | return -EIO; |
| 448 | /* forced to "pure inline"? */ | ||
| 449 | dprintk("RPC: %s: too much data (%d/%d) for inline\n", | ||
| 450 | __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len); | ||
| 451 | return -1; | ||
| 452 | } | 454 | } |
| 453 | 455 | ||
| 454 | hdrlen = 28; /*sizeof *headerp;*/ | 456 | hdrlen = 28; /*sizeof *headerp;*/ |
| @@ -474,8 +476,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 474 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; | 476 | headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; |
| 475 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; | 477 | headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; |
| 476 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ | 478 | hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ |
| 477 | BUG_ON(wtype != rpcrdma_noch); | 479 | if (wtype != rpcrdma_noch) { |
| 478 | 480 | dprintk("RPC: %s: invalid chunk list\n", | |
| 481 | __func__); | ||
| 482 | return -EIO; | ||
| 483 | } | ||
| 479 | } else { | 484 | } else { |
| 480 | headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; | 485 | headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; |
| 481 | headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; | 486 | headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; |
| @@ -492,8 +497,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 492 | * on receive. Therefore, we request a reply chunk | 497 | * on receive. Therefore, we request a reply chunk |
| 493 | * for non-writes wherever feasible and efficient. | 498 | * for non-writes wherever feasible and efficient. |
| 494 | */ | 499 | */ |
| 495 | if (wtype == rpcrdma_noch && | 500 | if (wtype == rpcrdma_noch) |
| 496 | r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER) | ||
| 497 | wtype = rpcrdma_replych; | 501 | wtype = rpcrdma_replych; |
| 498 | } | 502 | } |
| 499 | } | 503 | } |
| @@ -511,9 +515,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) | |||
| 511 | hdrlen = rpcrdma_create_chunks(rqst, | 515 | hdrlen = rpcrdma_create_chunks(rqst, |
| 512 | &rqst->rq_rcv_buf, headerp, wtype); | 516 | &rqst->rq_rcv_buf, headerp, wtype); |
| 513 | } | 517 | } |
| 514 | 518 | if (hdrlen < 0) | |
| 515 | if (hdrlen == 0) | 519 | return hdrlen; |
| 516 | return -1; | ||
| 517 | 520 | ||
| 518 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" | 521 | dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" |
| 519 | " headerp 0x%p base 0x%p lkey 0x%x\n", | 522 | " headerp 0x%p base 0x%p lkey 0x%x\n", |
| @@ -680,15 +683,11 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
| 680 | rqst->rq_private_buf = rqst->rq_rcv_buf; | 683 | rqst->rq_private_buf = rqst->rq_rcv_buf; |
| 681 | } | 684 | } |
| 682 | 685 | ||
| 683 | /* | ||
| 684 | * This function is called when an async event is posted to | ||
| 685 | * the connection which changes the connection state. All it | ||
| 686 | * does at this point is mark the connection up/down, the rpc | ||
| 687 | * timers do the rest. | ||
| 688 | */ | ||
| 689 | void | 686 | void |
| 690 | rpcrdma_conn_func(struct rpcrdma_ep *ep) | 687 | rpcrdma_connect_worker(struct work_struct *work) |
| 691 | { | 688 | { |
| 689 | struct rpcrdma_ep *ep = | ||
| 690 | container_of(work, struct rpcrdma_ep, rep_connect_worker.work); | ||
| 692 | struct rpc_xprt *xprt = ep->rep_xprt; | 691 | struct rpc_xprt *xprt = ep->rep_xprt; |
| 693 | 692 | ||
| 694 | spin_lock_bh(&xprt->transport_lock); | 693 | spin_lock_bh(&xprt->transport_lock); |
| @@ -705,13 +704,15 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) | |||
| 705 | } | 704 | } |
| 706 | 705 | ||
| 707 | /* | 706 | /* |
| 708 | * This function is called when memory window unbind which we are waiting | 707 | * This function is called when an async event is posted to |
| 709 | * for completes. Just use rr_func (zeroed by upcall) to signal completion. | 708 | * the connection which changes the connection state. All it |
| 709 | * does at this point is mark the connection up/down, the rpc | ||
| 710 | * timers do the rest. | ||
| 710 | */ | 711 | */ |
| 711 | static void | 712 | void |
| 712 | rpcrdma_unbind_func(struct rpcrdma_rep *rep) | 713 | rpcrdma_conn_func(struct rpcrdma_ep *ep) |
| 713 | { | 714 | { |
| 714 | wake_up(&rep->rr_unbind); | 715 | schedule_delayed_work(&ep->rep_connect_worker, 0); |
| 715 | } | 716 | } |
| 716 | 717 | ||
| 717 | /* | 718 | /* |
| @@ -728,7 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
| 728 | struct rpc_xprt *xprt = rep->rr_xprt; | 729 | struct rpc_xprt *xprt = rep->rr_xprt; |
| 729 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 730 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 730 | __be32 *iptr; | 731 | __be32 *iptr; |
| 731 | int i, rdmalen, status; | 732 | int rdmalen, status; |
| 733 | unsigned long cwnd; | ||
| 732 | 734 | ||
| 733 | /* Check status. If bad, signal disconnect and return rep to pool */ | 735 | /* Check status. If bad, signal disconnect and return rep to pool */ |
| 734 | if (rep->rr_len == ~0U) { | 736 | if (rep->rr_len == ~0U) { |
| @@ -783,6 +785,7 @@ repost: | |||
| 783 | 785 | ||
| 784 | /* from here on, the reply is no longer an orphan */ | 786 | /* from here on, the reply is no longer an orphan */ |
| 785 | req->rl_reply = rep; | 787 | req->rl_reply = rep; |
| 788 | xprt->reestablish_timeout = 0; | ||
| 786 | 789 | ||
| 787 | /* check for expected message types */ | 790 | /* check for expected message types */ |
| 788 | /* The order of some of these tests is important. */ | 791 | /* The order of some of these tests is important. */ |
| @@ -857,26 +860,10 @@ badheader: | |||
| 857 | break; | 860 | break; |
| 858 | } | 861 | } |
| 859 | 862 | ||
| 860 | /* If using mw bind, start the deregister process now. */ | 863 | cwnd = xprt->cwnd; |
| 861 | /* (Note: if mr_free(), cannot perform it here, in tasklet context) */ | 864 | xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT; |
| 862 | if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) { | 865 | if (xprt->cwnd > cwnd) |
| 863 | case RPCRDMA_MEMWINDOWS: | 866 | xprt_release_rqst_cong(rqst->rq_task); |
| 864 | for (i = 0; req->rl_nchunks-- > 1;) | ||
| 865 | i += rpcrdma_deregister_external( | ||
| 866 | &req->rl_segments[i], r_xprt, NULL); | ||
| 867 | /* Optionally wait (not here) for unbinds to complete */ | ||
| 868 | rep->rr_func = rpcrdma_unbind_func; | ||
| 869 | (void) rpcrdma_deregister_external(&req->rl_segments[i], | ||
| 870 | r_xprt, rep); | ||
| 871 | break; | ||
| 872 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 873 | for (i = 0; req->rl_nchunks--;) | ||
| 874 | i += rpcrdma_deregister_external(&req->rl_segments[i], | ||
| 875 | r_xprt, NULL); | ||
| 876 | break; | ||
| 877 | default: | ||
| 878 | break; | ||
| 879 | } | ||
| 880 | 867 | ||
| 881 | dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", | 868 | dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", |
| 882 | __func__, xprt, rqst, status); | 869 | __func__, xprt, rqst, status); |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 1eb9c468d0c9..66f91f0d071a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
| @@ -149,6 +149,11 @@ static struct ctl_table sunrpc_table[] = { | |||
| 149 | 149 | ||
| 150 | #endif | 150 | #endif |
| 151 | 151 | ||
| 152 | #define RPCRDMA_BIND_TO (60U * HZ) | ||
| 153 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | ||
| 154 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | ||
| 155 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | ||
| 156 | |||
| 152 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | 157 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ |
| 153 | 158 | ||
| 154 | static void | 159 | static void |
| @@ -229,7 +234,6 @@ static void | |||
| 229 | xprt_rdma_destroy(struct rpc_xprt *xprt) | 234 | xprt_rdma_destroy(struct rpc_xprt *xprt) |
| 230 | { | 235 | { |
| 231 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 236 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 232 | int rc; | ||
| 233 | 237 | ||
| 234 | dprintk("RPC: %s: called\n", __func__); | 238 | dprintk("RPC: %s: called\n", __func__); |
| 235 | 239 | ||
| @@ -238,10 +242,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) | |||
| 238 | xprt_clear_connected(xprt); | 242 | xprt_clear_connected(xprt); |
| 239 | 243 | ||
| 240 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); | 244 | rpcrdma_buffer_destroy(&r_xprt->rx_buf); |
| 241 | rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); | 245 | rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); |
| 242 | if (rc) | ||
| 243 | dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", | ||
| 244 | __func__, rc); | ||
| 245 | rpcrdma_ia_close(&r_xprt->rx_ia); | 246 | rpcrdma_ia_close(&r_xprt->rx_ia); |
| 246 | 247 | ||
| 247 | xprt_rdma_free_addresses(xprt); | 248 | xprt_rdma_free_addresses(xprt); |
| @@ -289,9 +290,9 @@ xprt_setup_rdma(struct xprt_create *args) | |||
| 289 | 290 | ||
| 290 | /* 60 second timeout, no retries */ | 291 | /* 60 second timeout, no retries */ |
| 291 | xprt->timeout = &xprt_rdma_default_timeout; | 292 | xprt->timeout = &xprt_rdma_default_timeout; |
| 292 | xprt->bind_timeout = (60U * HZ); | 293 | xprt->bind_timeout = RPCRDMA_BIND_TO; |
| 293 | xprt->reestablish_timeout = (5U * HZ); | 294 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
| 294 | xprt->idle_timeout = (5U * 60 * HZ); | 295 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; |
| 295 | 296 | ||
| 296 | xprt->resvport = 0; /* privileged port not needed */ | 297 | xprt->resvport = 0; /* privileged port not needed */ |
| 297 | xprt->tsh_size = 0; /* RPC-RDMA handles framing */ | 298 | xprt->tsh_size = 0; /* RPC-RDMA handles framing */ |
| @@ -391,7 +392,7 @@ out4: | |||
| 391 | xprt_rdma_free_addresses(xprt); | 392 | xprt_rdma_free_addresses(xprt); |
| 392 | rc = -EINVAL; | 393 | rc = -EINVAL; |
| 393 | out3: | 394 | out3: |
| 394 | (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); | 395 | rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); |
| 395 | out2: | 396 | out2: |
| 396 | rpcrdma_ia_close(&new_xprt->rx_ia); | 397 | rpcrdma_ia_close(&new_xprt->rx_ia); |
| 397 | out1: | 398 | out1: |
| @@ -436,10 +437,10 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) | |||
| 436 | schedule_delayed_work(&r_xprt->rdma_connect, | 437 | schedule_delayed_work(&r_xprt->rdma_connect, |
| 437 | xprt->reestablish_timeout); | 438 | xprt->reestablish_timeout); |
| 438 | xprt->reestablish_timeout <<= 1; | 439 | xprt->reestablish_timeout <<= 1; |
| 439 | if (xprt->reestablish_timeout > (30 * HZ)) | 440 | if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) |
| 440 | xprt->reestablish_timeout = (30 * HZ); | 441 | xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; |
| 441 | else if (xprt->reestablish_timeout < (5 * HZ)) | 442 | else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) |
| 442 | xprt->reestablish_timeout = (5 * HZ); | 443 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; |
| 443 | } else { | 444 | } else { |
| 444 | schedule_delayed_work(&r_xprt->rdma_connect, 0); | 445 | schedule_delayed_work(&r_xprt->rdma_connect, 0); |
| 445 | if (!RPC_IS_ASYNC(task)) | 446 | if (!RPC_IS_ASYNC(task)) |
| @@ -447,23 +448,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) | |||
| 447 | } | 448 | } |
| 448 | } | 449 | } |
| 449 | 450 | ||
| 450 | static int | ||
| 451 | xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) | ||
| 452 | { | ||
| 453 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
| 454 | int credits = atomic_read(&r_xprt->rx_buf.rb_credits); | ||
| 455 | |||
| 456 | /* == RPC_CWNDSCALE @ init, but *after* setup */ | ||
| 457 | if (r_xprt->rx_buf.rb_cwndscale == 0UL) { | ||
| 458 | r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; | ||
| 459 | dprintk("RPC: %s: cwndscale %lu\n", __func__, | ||
| 460 | r_xprt->rx_buf.rb_cwndscale); | ||
| 461 | BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); | ||
| 462 | } | ||
| 463 | xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; | ||
| 464 | return xprt_reserve_xprt_cong(xprt, task); | ||
| 465 | } | ||
| 466 | |||
| 467 | /* | 451 | /* |
| 468 | * The RDMA allocate/free functions need the task structure as a place | 452 | * The RDMA allocate/free functions need the task structure as a place |
| 469 | * to hide the struct rpcrdma_req, which is necessary for the actual send/recv | 453 | * to hide the struct rpcrdma_req, which is necessary for the actual send/recv |
| @@ -479,7 +463,8 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
| 479 | struct rpcrdma_req *req, *nreq; | 463 | struct rpcrdma_req *req, *nreq; |
| 480 | 464 | ||
| 481 | req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); | 465 | req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); |
| 482 | BUG_ON(NULL == req); | 466 | if (req == NULL) |
| 467 | return NULL; | ||
| 483 | 468 | ||
| 484 | if (size > req->rl_size) { | 469 | if (size > req->rl_size) { |
| 485 | dprintk("RPC: %s: size %zd too large for buffer[%zd]: " | 470 | dprintk("RPC: %s: size %zd too large for buffer[%zd]: " |
| @@ -503,18 +488,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
| 503 | * If the allocation or registration fails, the RPC framework | 488 | * If the allocation or registration fails, the RPC framework |
| 504 | * will (doggedly) retry. | 489 | * will (doggedly) retry. |
| 505 | */ | 490 | */ |
| 506 | if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == | ||
| 507 | RPCRDMA_BOUNCEBUFFERS) { | ||
| 508 | /* forced to "pure inline" */ | ||
| 509 | dprintk("RPC: %s: too much data (%zd) for inline " | ||
| 510 | "(r/w max %d/%d)\n", __func__, size, | ||
| 511 | rpcx_to_rdmad(xprt).inline_rsize, | ||
| 512 | rpcx_to_rdmad(xprt).inline_wsize); | ||
| 513 | size = req->rl_size; | ||
| 514 | rpc_exit(task, -EIO); /* fail the operation */ | ||
| 515 | rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; | ||
| 516 | goto out; | ||
| 517 | } | ||
| 518 | if (task->tk_flags & RPC_TASK_SWAPPER) | 491 | if (task->tk_flags & RPC_TASK_SWAPPER) |
| 519 | nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); | 492 | nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); |
| 520 | else | 493 | else |
| @@ -543,7 +516,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
| 543 | req = nreq; | 516 | req = nreq; |
| 544 | } | 517 | } |
| 545 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); | 518 | dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); |
| 546 | out: | ||
| 547 | req->rl_connect_cookie = 0; /* our reserved value */ | 519 | req->rl_connect_cookie = 0; /* our reserved value */ |
| 548 | return req->rl_xdr_buf; | 520 | return req->rl_xdr_buf; |
| 549 | 521 | ||
| @@ -579,9 +551,7 @@ xprt_rdma_free(void *buffer) | |||
| 579 | __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); | 551 | __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); |
| 580 | 552 | ||
| 581 | /* | 553 | /* |
| 582 | * Finish the deregistration. When using mw bind, this was | 554 | * Finish the deregistration. The process is considered |
| 583 | * begun in rpcrdma_reply_handler(). In all other modes, we | ||
| 584 | * do it here, in thread context. The process is considered | ||
| 585 | * complete when the rr_func vector becomes NULL - this | 555 | * complete when the rr_func vector becomes NULL - this |
| 586 | * was put in place during rpcrdma_reply_handler() - the wait | 556 | * was put in place during rpcrdma_reply_handler() - the wait |
| 587 | * call below will not block if the dereg is "done". If | 557 | * call below will not block if the dereg is "done". If |
| @@ -590,12 +560,7 @@ xprt_rdma_free(void *buffer) | |||
| 590 | for (i = 0; req->rl_nchunks;) { | 560 | for (i = 0; req->rl_nchunks;) { |
| 591 | --req->rl_nchunks; | 561 | --req->rl_nchunks; |
| 592 | i += rpcrdma_deregister_external( | 562 | i += rpcrdma_deregister_external( |
| 593 | &req->rl_segments[i], r_xprt, NULL); | 563 | &req->rl_segments[i], r_xprt); |
| 594 | } | ||
| 595 | |||
| 596 | if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { | ||
| 597 | rep->rr_func = NULL; /* abandon the callback */ | ||
| 598 | req->rl_reply = NULL; | ||
| 599 | } | 564 | } |
| 600 | 565 | ||
| 601 | if (req->rl_iov.length == 0) { /* see allocate above */ | 566 | if (req->rl_iov.length == 0) { /* see allocate above */ |
| @@ -630,13 +595,12 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
| 630 | struct rpc_xprt *xprt = rqst->rq_xprt; | 595 | struct rpc_xprt *xprt = rqst->rq_xprt; |
| 631 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); | 596 | struct rpcrdma_req *req = rpcr_to_rdmar(rqst); |
| 632 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 597 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
| 598 | int rc; | ||
| 633 | 599 | ||
| 634 | /* marshal the send itself */ | 600 | if (req->rl_niovs == 0) { |
| 635 | if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { | 601 | rc = rpcrdma_marshal_req(rqst); |
| 636 | r_xprt->rx_stats.failed_marshal_count++; | 602 | if (rc < 0) |
| 637 | dprintk("RPC: %s: rpcrdma_marshal_req failed\n", | 603 | goto failed_marshal; |
| 638 | __func__); | ||
| 639 | return -EIO; | ||
| 640 | } | 604 | } |
| 641 | 605 | ||
| 642 | if (req->rl_reply == NULL) /* e.g. reconnection */ | 606 | if (req->rl_reply == NULL) /* e.g. reconnection */ |
| @@ -660,6 +624,12 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
| 660 | rqst->rq_bytes_sent = 0; | 624 | rqst->rq_bytes_sent = 0; |
| 661 | return 0; | 625 | return 0; |
| 662 | 626 | ||
| 627 | failed_marshal: | ||
| 628 | r_xprt->rx_stats.failed_marshal_count++; | ||
| 629 | dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", | ||
| 630 | __func__, rc); | ||
| 631 | if (rc == -EIO) | ||
| 632 | return -EIO; | ||
| 663 | drop_connection: | 633 | drop_connection: |
| 664 | xprt_disconnect_done(xprt); | 634 | xprt_disconnect_done(xprt); |
| 665 | return -ENOTCONN; /* implies disconnect */ | 635 | return -ENOTCONN; /* implies disconnect */ |
| @@ -705,7 +675,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | |||
| 705 | */ | 675 | */ |
| 706 | 676 | ||
| 707 | static struct rpc_xprt_ops xprt_rdma_procs = { | 677 | static struct rpc_xprt_ops xprt_rdma_procs = { |
| 708 | .reserve_xprt = xprt_rdma_reserve_xprt, | 678 | .reserve_xprt = xprt_reserve_xprt_cong, |
| 709 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ | 679 | .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ |
| 710 | .alloc_slot = xprt_alloc_slot, | 680 | .alloc_slot = xprt_alloc_slot, |
| 711 | .release_request = xprt_release_rqst_cong, /* ditto */ | 681 | .release_request = xprt_release_rqst_cong, /* ditto */ |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 93726560eaa8..13dbd1c389ff 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
| @@ -48,8 +48,8 @@ | |||
| 48 | */ | 48 | */ |
| 49 | 49 | ||
| 50 | #include <linux/interrupt.h> | 50 | #include <linux/interrupt.h> |
| 51 | #include <linux/pci.h> /* for Tavor hack below */ | ||
| 52 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
| 52 | #include <asm/bitops.h> | ||
| 53 | 53 | ||
| 54 | #include "xprt_rdma.h" | 54 | #include "xprt_rdma.h" |
| 55 | 55 | ||
| @@ -142,98 +142,139 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) | |||
| 142 | } | 142 | } |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | static inline | 145 | static void |
| 146 | void rpcrdma_event_process(struct ib_wc *wc) | 146 | rpcrdma_sendcq_process_wc(struct ib_wc *wc) |
| 147 | { | 147 | { |
| 148 | struct rpcrdma_mw *frmr; | 148 | struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; |
| 149 | struct rpcrdma_rep *rep = | ||
| 150 | (struct rpcrdma_rep *)(unsigned long) wc->wr_id; | ||
| 151 | 149 | ||
| 152 | dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", | 150 | dprintk("RPC: %s: frmr %p status %X opcode %d\n", |
| 153 | __func__, rep, wc->status, wc->opcode, wc->byte_len); | 151 | __func__, frmr, wc->status, wc->opcode); |
| 154 | 152 | ||
| 155 | if (!rep) /* send or bind completion that we don't care about */ | 153 | if (wc->wr_id == 0ULL) |
| 156 | return; | 154 | return; |
| 157 | 155 | if (wc->status != IB_WC_SUCCESS) | |
| 158 | if (IB_WC_SUCCESS != wc->status) { | ||
| 159 | dprintk("RPC: %s: WC opcode %d status %X, connection lost\n", | ||
| 160 | __func__, wc->opcode, wc->status); | ||
| 161 | rep->rr_len = ~0U; | ||
| 162 | if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV) | ||
| 163 | rpcrdma_schedule_tasklet(rep); | ||
| 164 | return; | 156 | return; |
| 165 | } | ||
| 166 | 157 | ||
| 167 | switch (wc->opcode) { | 158 | if (wc->opcode == IB_WC_FAST_REG_MR) |
| 168 | case IB_WC_FAST_REG_MR: | ||
| 169 | frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
| 170 | frmr->r.frmr.state = FRMR_IS_VALID; | 159 | frmr->r.frmr.state = FRMR_IS_VALID; |
| 171 | break; | 160 | else if (wc->opcode == IB_WC_LOCAL_INV) |
| 172 | case IB_WC_LOCAL_INV: | ||
| 173 | frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | ||
| 174 | frmr->r.frmr.state = FRMR_IS_INVALID; | 161 | frmr->r.frmr.state = FRMR_IS_INVALID; |
| 175 | break; | ||
| 176 | case IB_WC_RECV: | ||
| 177 | rep->rr_len = wc->byte_len; | ||
| 178 | ib_dma_sync_single_for_cpu( | ||
| 179 | rdmab_to_ia(rep->rr_buffer)->ri_id->device, | ||
| 180 | rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); | ||
| 181 | /* Keep (only) the most recent credits, after check validity */ | ||
| 182 | if (rep->rr_len >= 16) { | ||
| 183 | struct rpcrdma_msg *p = | ||
| 184 | (struct rpcrdma_msg *) rep->rr_base; | ||
| 185 | unsigned int credits = ntohl(p->rm_credit); | ||
| 186 | if (credits == 0) { | ||
| 187 | dprintk("RPC: %s: server" | ||
| 188 | " dropped credits to 0!\n", __func__); | ||
| 189 | /* don't deadlock */ | ||
| 190 | credits = 1; | ||
| 191 | } else if (credits > rep->rr_buffer->rb_max_requests) { | ||
| 192 | dprintk("RPC: %s: server" | ||
| 193 | " over-crediting: %d (%d)\n", | ||
| 194 | __func__, credits, | ||
| 195 | rep->rr_buffer->rb_max_requests); | ||
| 196 | credits = rep->rr_buffer->rb_max_requests; | ||
| 197 | } | ||
| 198 | atomic_set(&rep->rr_buffer->rb_credits, credits); | ||
| 199 | } | ||
| 200 | /* fall through */ | ||
| 201 | case IB_WC_BIND_MW: | ||
| 202 | rpcrdma_schedule_tasklet(rep); | ||
| 203 | break; | ||
| 204 | default: | ||
| 205 | dprintk("RPC: %s: unexpected WC event %X\n", | ||
| 206 | __func__, wc->opcode); | ||
| 207 | break; | ||
| 208 | } | ||
| 209 | } | 162 | } |
| 210 | 163 | ||
| 211 | static inline int | 164 | static int |
| 212 | rpcrdma_cq_poll(struct ib_cq *cq) | 165 | rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) |
| 213 | { | 166 | { |
| 214 | struct ib_wc wc; | 167 | struct ib_wc *wcs; |
| 215 | int rc; | 168 | int budget, count, rc; |
| 216 | 169 | ||
| 217 | for (;;) { | 170 | budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; |
| 218 | rc = ib_poll_cq(cq, 1, &wc); | 171 | do { |
| 219 | if (rc < 0) { | 172 | wcs = ep->rep_send_wcs; |
| 220 | dprintk("RPC: %s: ib_poll_cq failed %i\n", | 173 | |
| 221 | __func__, rc); | 174 | rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); |
| 175 | if (rc <= 0) | ||
| 222 | return rc; | 176 | return rc; |
| 223 | } | ||
| 224 | if (rc == 0) | ||
| 225 | break; | ||
| 226 | 177 | ||
| 227 | rpcrdma_event_process(&wc); | 178 | count = rc; |
| 179 | while (count-- > 0) | ||
| 180 | rpcrdma_sendcq_process_wc(wcs++); | ||
| 181 | } while (rc == RPCRDMA_POLLSIZE && --budget); | ||
| 182 | return 0; | ||
| 183 | } | ||
| 184 | |||
| 185 | /* | ||
| 186 | * Handle send, fast_reg_mr, and local_inv completions. | ||
| 187 | * | ||
| 188 | * Send events are typically suppressed and thus do not result | ||
| 189 | * in an upcall. Occasionally one is signaled, however. This | ||
| 190 | * prevents the provider's completion queue from wrapping and | ||
| 191 | * losing a completion. | ||
| 192 | */ | ||
| 193 | static void | ||
| 194 | rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context) | ||
| 195 | { | ||
| 196 | struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; | ||
| 197 | int rc; | ||
| 198 | |||
| 199 | rc = rpcrdma_sendcq_poll(cq, ep); | ||
| 200 | if (rc) { | ||
| 201 | dprintk("RPC: %s: ib_poll_cq failed: %i\n", | ||
| 202 | __func__, rc); | ||
| 203 | return; | ||
| 228 | } | 204 | } |
| 229 | 205 | ||
| 206 | rc = ib_req_notify_cq(cq, | ||
| 207 | IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); | ||
| 208 | if (rc == 0) | ||
| 209 | return; | ||
| 210 | if (rc < 0) { | ||
| 211 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
| 212 | __func__, rc); | ||
| 213 | return; | ||
| 214 | } | ||
| 215 | |||
| 216 | rpcrdma_sendcq_poll(cq, ep); | ||
| 217 | } | ||
| 218 | |||
| 219 | static void | ||
| 220 | rpcrdma_recvcq_process_wc(struct ib_wc *wc) | ||
| 221 | { | ||
| 222 | struct rpcrdma_rep *rep = | ||
| 223 | (struct rpcrdma_rep *)(unsigned long)wc->wr_id; | ||
| 224 | |||
| 225 | dprintk("RPC: %s: rep %p status %X opcode %X length %u\n", | ||
| 226 | __func__, rep, wc->status, wc->opcode, wc->byte_len); | ||
| 227 | |||
| 228 | if (wc->status != IB_WC_SUCCESS) { | ||
| 229 | rep->rr_len = ~0U; | ||
| 230 | goto out_schedule; | ||
| 231 | } | ||
| 232 | if (wc->opcode != IB_WC_RECV) | ||
| 233 | return; | ||
| 234 | |||
| 235 | rep->rr_len = wc->byte_len; | ||
| 236 | ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, | ||
| 237 | rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); | ||
| 238 | |||
| 239 | if (rep->rr_len >= 16) { | ||
| 240 | struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base; | ||
| 241 | unsigned int credits = ntohl(p->rm_credit); | ||
| 242 | |||
| 243 | if (credits == 0) | ||
| 244 | credits = 1; /* don't deadlock */ | ||
| 245 | else if (credits > rep->rr_buffer->rb_max_requests) | ||
| 246 | credits = rep->rr_buffer->rb_max_requests; | ||
| 247 | atomic_set(&rep->rr_buffer->rb_credits, credits); | ||
| 248 | } | ||
| 249 | |||
| 250 | out_schedule: | ||
| 251 | rpcrdma_schedule_tasklet(rep); | ||
| 252 | } | ||
| 253 | |||
| 254 | static int | ||
| 255 | rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) | ||
| 256 | { | ||
| 257 | struct ib_wc *wcs; | ||
| 258 | int budget, count, rc; | ||
| 259 | |||
| 260 | budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; | ||
| 261 | do { | ||
| 262 | wcs = ep->rep_recv_wcs; | ||
| 263 | |||
| 264 | rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs); | ||
| 265 | if (rc <= 0) | ||
| 266 | return rc; | ||
| 267 | |||
| 268 | count = rc; | ||
| 269 | while (count-- > 0) | ||
| 270 | rpcrdma_recvcq_process_wc(wcs++); | ||
| 271 | } while (rc == RPCRDMA_POLLSIZE && --budget); | ||
| 230 | return 0; | 272 | return 0; |
| 231 | } | 273 | } |
| 232 | 274 | ||
| 233 | /* | 275 | /* |
| 234 | * rpcrdma_cq_event_upcall | 276 | * Handle receive completions. |
| 235 | * | 277 | * |
| 236 | * This upcall handles recv, send, bind and unbind events. | ||
| 237 | * It is reentrant but processes single events in order to maintain | 278 | * It is reentrant but processes single events in order to maintain |
| 238 | * ordering of receives to keep server credits. | 279 | * ordering of receives to keep server credits. |
| 239 | * | 280 | * |
| @@ -242,26 +283,31 @@ rpcrdma_cq_poll(struct ib_cq *cq) | |||
| 242 | * connection shutdown. That is, the structures required for | 283 | * connection shutdown. That is, the structures required for |
| 243 | * the completion of the reply handler must remain intact until | 284 | * the completion of the reply handler must remain intact until |
| 244 | * all memory has been reclaimed. | 285 | * all memory has been reclaimed. |
| 245 | * | ||
| 246 | * Note that send events are suppressed and do not result in an upcall. | ||
| 247 | */ | 286 | */ |
| 248 | static void | 287 | static void |
| 249 | rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) | 288 | rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) |
| 250 | { | 289 | { |
| 290 | struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context; | ||
| 251 | int rc; | 291 | int rc; |
| 252 | 292 | ||
| 253 | rc = rpcrdma_cq_poll(cq); | 293 | rc = rpcrdma_recvcq_poll(cq, ep); |
| 254 | if (rc) | 294 | if (rc) { |
| 295 | dprintk("RPC: %s: ib_poll_cq failed: %i\n", | ||
| 296 | __func__, rc); | ||
| 255 | return; | 297 | return; |
| 298 | } | ||
| 256 | 299 | ||
| 257 | rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); | 300 | rc = ib_req_notify_cq(cq, |
| 258 | if (rc) { | 301 | IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); |
| 259 | dprintk("RPC: %s: ib_req_notify_cq failed %i\n", | 302 | if (rc == 0) |
| 303 | return; | ||
| 304 | if (rc < 0) { | ||
| 305 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
| 260 | __func__, rc); | 306 | __func__, rc); |
| 261 | return; | 307 | return; |
| 262 | } | 308 | } |
| 263 | 309 | ||
| 264 | rpcrdma_cq_poll(cq); | 310 | rpcrdma_recvcq_poll(cq, ep); |
| 265 | } | 311 | } |
| 266 | 312 | ||
| 267 | #ifdef RPC_DEBUG | 313 | #ifdef RPC_DEBUG |
| @@ -493,54 +539,32 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 493 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; | 539 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; |
| 494 | } | 540 | } |
| 495 | 541 | ||
| 496 | switch (memreg) { | 542 | if (memreg == RPCRDMA_FRMR) { |
| 497 | case RPCRDMA_MEMWINDOWS: | ||
| 498 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 499 | if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { | ||
| 500 | dprintk("RPC: %s: MEMWINDOWS registration " | ||
| 501 | "specified but not supported by adapter, " | ||
| 502 | "using slower RPCRDMA_REGISTER\n", | ||
| 503 | __func__); | ||
| 504 | memreg = RPCRDMA_REGISTER; | ||
| 505 | } | ||
| 506 | break; | ||
| 507 | case RPCRDMA_MTHCAFMR: | ||
| 508 | if (!ia->ri_id->device->alloc_fmr) { | ||
| 509 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 510 | dprintk("RPC: %s: MTHCAFMR registration " | ||
| 511 | "specified but not supported by adapter, " | ||
| 512 | "using riskier RPCRDMA_ALLPHYSICAL\n", | ||
| 513 | __func__); | ||
| 514 | memreg = RPCRDMA_ALLPHYSICAL; | ||
| 515 | #else | ||
| 516 | dprintk("RPC: %s: MTHCAFMR registration " | ||
| 517 | "specified but not supported by adapter, " | ||
| 518 | "using slower RPCRDMA_REGISTER\n", | ||
| 519 | __func__); | ||
| 520 | memreg = RPCRDMA_REGISTER; | ||
| 521 | #endif | ||
| 522 | } | ||
| 523 | break; | ||
| 524 | case RPCRDMA_FRMR: | ||
| 525 | /* Requires both frmr reg and local dma lkey */ | 543 | /* Requires both frmr reg and local dma lkey */ |
| 526 | if ((devattr.device_cap_flags & | 544 | if ((devattr.device_cap_flags & |
| 527 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != | 545 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != |
| 528 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { | 546 | (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { |
| 529 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 530 | dprintk("RPC: %s: FRMR registration " | 547 | dprintk("RPC: %s: FRMR registration " |
| 531 | "specified but not supported by adapter, " | 548 | "not supported by HCA\n", __func__); |
| 532 | "using riskier RPCRDMA_ALLPHYSICAL\n", | 549 | memreg = RPCRDMA_MTHCAFMR; |
| 533 | __func__); | 550 | } else { |
| 551 | /* Mind the ia limit on FRMR page list depth */ | ||
| 552 | ia->ri_max_frmr_depth = min_t(unsigned int, | ||
| 553 | RPCRDMA_MAX_DATA_SEGS, | ||
| 554 | devattr.max_fast_reg_page_list_len); | ||
| 555 | } | ||
| 556 | } | ||
| 557 | if (memreg == RPCRDMA_MTHCAFMR) { | ||
| 558 | if (!ia->ri_id->device->alloc_fmr) { | ||
| 559 | dprintk("RPC: %s: MTHCAFMR registration " | ||
| 560 | "not supported by HCA\n", __func__); | ||
| 561 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 534 | memreg = RPCRDMA_ALLPHYSICAL; | 562 | memreg = RPCRDMA_ALLPHYSICAL; |
| 535 | #else | 563 | #else |
| 536 | dprintk("RPC: %s: FRMR registration " | 564 | rc = -ENOMEM; |
| 537 | "specified but not supported by adapter, " | 565 | goto out2; |
| 538 | "using slower RPCRDMA_REGISTER\n", | ||
| 539 | __func__); | ||
| 540 | memreg = RPCRDMA_REGISTER; | ||
| 541 | #endif | 566 | #endif |
| 542 | } | 567 | } |
| 543 | break; | ||
| 544 | } | 568 | } |
| 545 | 569 | ||
| 546 | /* | 570 | /* |
| @@ -552,8 +576,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 552 | * adapter. | 576 | * adapter. |
| 553 | */ | 577 | */ |
| 554 | switch (memreg) { | 578 | switch (memreg) { |
| 555 | case RPCRDMA_BOUNCEBUFFERS: | ||
| 556 | case RPCRDMA_REGISTER: | ||
| 557 | case RPCRDMA_FRMR: | 579 | case RPCRDMA_FRMR: |
| 558 | break; | 580 | break; |
| 559 | #if RPCRDMA_PERSISTENT_REGISTRATION | 581 | #if RPCRDMA_PERSISTENT_REGISTRATION |
| @@ -563,30 +585,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
| 563 | IB_ACCESS_REMOTE_READ; | 585 | IB_ACCESS_REMOTE_READ; |
| 564 | goto register_setup; | 586 | goto register_setup; |
| 565 | #endif | 587 | #endif |
| 566 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 567 | case RPCRDMA_MEMWINDOWS: | ||
| 568 | mem_priv = IB_ACCESS_LOCAL_WRITE | | ||
| 569 | IB_ACCESS_MW_BIND; | ||
| 570 | goto register_setup; | ||
| 571 | case RPCRDMA_MTHCAFMR: | 588 | case RPCRDMA_MTHCAFMR: |
| 572 | if (ia->ri_have_dma_lkey) | 589 | if (ia->ri_have_dma_lkey) |
| 573 | break; | 590 | break; |
| 574 | mem_priv = IB_ACCESS_LOCAL_WRITE; | 591 | mem_priv = IB_ACCESS_LOCAL_WRITE; |
| 592 | #if RPCRDMA_PERSISTENT_REGISTRATION | ||
| 575 | register_setup: | 593 | register_setup: |
| 594 | #endif | ||
| 576 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); | 595 | ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); |
| 577 | if (IS_ERR(ia->ri_bind_mem)) { | 596 | if (IS_ERR(ia->ri_bind_mem)) { |
| 578 | printk(KERN_ALERT "%s: ib_get_dma_mr for " | 597 | printk(KERN_ALERT "%s: ib_get_dma_mr for " |
| 579 | "phys register failed with %lX\n\t" | 598 | "phys register failed with %lX\n", |
| 580 | "Will continue with degraded performance\n", | ||
| 581 | __func__, PTR_ERR(ia->ri_bind_mem)); | 599 | __func__, PTR_ERR(ia->ri_bind_mem)); |
| 582 | memreg = RPCRDMA_REGISTER; | 600 | rc = -ENOMEM; |
| 583 | ia->ri_bind_mem = NULL; | 601 | goto out2; |
| 584 | } | 602 | } |
| 585 | break; | 603 | break; |
| 586 | default: | 604 | default: |
| 587 | printk(KERN_ERR "%s: invalid memory registration mode %d\n", | 605 | printk(KERN_ERR "RPC: Unsupported memory " |
| 588 | __func__, memreg); | 606 | "registration mode: %d\n", memreg); |
| 589 | rc = -EINVAL; | 607 | rc = -ENOMEM; |
| 590 | goto out2; | 608 | goto out2; |
| 591 | } | 609 | } |
| 592 | dprintk("RPC: %s: memory registration strategy is %d\n", | 610 | dprintk("RPC: %s: memory registration strategy is %d\n", |
| @@ -640,6 +658,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 640 | struct rpcrdma_create_data_internal *cdata) | 658 | struct rpcrdma_create_data_internal *cdata) |
| 641 | { | 659 | { |
| 642 | struct ib_device_attr devattr; | 660 | struct ib_device_attr devattr; |
| 661 | struct ib_cq *sendcq, *recvcq; | ||
| 643 | int rc, err; | 662 | int rc, err; |
| 644 | 663 | ||
| 645 | rc = ib_query_device(ia->ri_id->device, &devattr); | 664 | rc = ib_query_device(ia->ri_id->device, &devattr); |
| @@ -659,32 +678,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 659 | ep->rep_attr.srq = NULL; | 678 | ep->rep_attr.srq = NULL; |
| 660 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; | 679 | ep->rep_attr.cap.max_send_wr = cdata->max_requests; |
| 661 | switch (ia->ri_memreg_strategy) { | 680 | switch (ia->ri_memreg_strategy) { |
| 662 | case RPCRDMA_FRMR: | 681 | case RPCRDMA_FRMR: { |
| 682 | int depth = 7; | ||
| 683 | |||
| 663 | /* Add room for frmr register and invalidate WRs. | 684 | /* Add room for frmr register and invalidate WRs. |
| 664 | * 1. FRMR reg WR for head | 685 | * 1. FRMR reg WR for head |
| 665 | * 2. FRMR invalidate WR for head | 686 | * 2. FRMR invalidate WR for head |
| 666 | * 3. FRMR reg WR for pagelist | 687 | * 3. N FRMR reg WRs for pagelist |
| 667 | * 4. FRMR invalidate WR for pagelist | 688 | * 4. N FRMR invalidate WRs for pagelist |
| 668 | * 5. FRMR reg WR for tail | 689 | * 5. FRMR reg WR for tail |
| 669 | * 6. FRMR invalidate WR for tail | 690 | * 6. FRMR invalidate WR for tail |
| 670 | * 7. The RDMA_SEND WR | 691 | * 7. The RDMA_SEND WR |
| 671 | */ | 692 | */ |
| 672 | ep->rep_attr.cap.max_send_wr *= 7; | 693 | |
| 694 | /* Calculate N if the device max FRMR depth is smaller than | ||
| 695 | * RPCRDMA_MAX_DATA_SEGS. | ||
| 696 | */ | ||
| 697 | if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { | ||
| 698 | int delta = RPCRDMA_MAX_DATA_SEGS - | ||
| 699 | ia->ri_max_frmr_depth; | ||
| 700 | |||
| 701 | do { | ||
| 702 | depth += 2; /* FRMR reg + invalidate */ | ||
| 703 | delta -= ia->ri_max_frmr_depth; | ||
| 704 | } while (delta > 0); | ||
| 705 | |||
| 706 | } | ||
| 707 | ep->rep_attr.cap.max_send_wr *= depth; | ||
| 673 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { | 708 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { |
| 674 | cdata->max_requests = devattr.max_qp_wr / 7; | 709 | cdata->max_requests = devattr.max_qp_wr / depth; |
| 675 | if (!cdata->max_requests) | 710 | if (!cdata->max_requests) |
| 676 | return -EINVAL; | 711 | return -EINVAL; |
| 677 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; | 712 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * |
| 713 | depth; | ||
| 678 | } | 714 | } |
| 679 | break; | 715 | break; |
| 680 | case RPCRDMA_MEMWINDOWS_ASYNC: | 716 | } |
| 681 | case RPCRDMA_MEMWINDOWS: | ||
| 682 | /* Add room for mw_binds+unbinds - overkill! */ | ||
| 683 | ep->rep_attr.cap.max_send_wr++; | ||
| 684 | ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); | ||
| 685 | if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) | ||
| 686 | return -EINVAL; | ||
| 687 | break; | ||
| 688 | default: | 717 | default: |
| 689 | break; | 718 | break; |
| 690 | } | 719 | } |
| @@ -705,46 +734,51 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 705 | ep->rep_attr.cap.max_recv_sge); | 734 | ep->rep_attr.cap.max_recv_sge); |
| 706 | 735 | ||
| 707 | /* set trigger for requesting send completion */ | 736 | /* set trigger for requesting send completion */ |
| 708 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; | 737 | ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; |
| 709 | switch (ia->ri_memreg_strategy) { | ||
| 710 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 711 | case RPCRDMA_MEMWINDOWS: | ||
| 712 | ep->rep_cqinit -= RPCRDMA_MAX_SEGS; | ||
| 713 | break; | ||
| 714 | default: | ||
| 715 | break; | ||
| 716 | } | ||
| 717 | if (ep->rep_cqinit <= 2) | 738 | if (ep->rep_cqinit <= 2) |
| 718 | ep->rep_cqinit = 0; | 739 | ep->rep_cqinit = 0; |
| 719 | INIT_CQCOUNT(ep); | 740 | INIT_CQCOUNT(ep); |
| 720 | ep->rep_ia = ia; | 741 | ep->rep_ia = ia; |
| 721 | init_waitqueue_head(&ep->rep_connect_wait); | 742 | init_waitqueue_head(&ep->rep_connect_wait); |
| 743 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); | ||
| 722 | 744 | ||
| 723 | /* | 745 | sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, |
| 724 | * Create a single cq for receive dto and mw_bind (only ever | 746 | rpcrdma_cq_async_error_upcall, ep, |
| 725 | * care about unbind, really). Send completions are suppressed. | ||
| 726 | * Use single threaded tasklet upcalls to maintain ordering. | ||
| 727 | */ | ||
| 728 | ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, | ||
| 729 | rpcrdma_cq_async_error_upcall, NULL, | ||
| 730 | ep->rep_attr.cap.max_recv_wr + | ||
| 731 | ep->rep_attr.cap.max_send_wr + 1, 0); | 747 | ep->rep_attr.cap.max_send_wr + 1, 0); |
| 732 | if (IS_ERR(ep->rep_cq)) { | 748 | if (IS_ERR(sendcq)) { |
| 733 | rc = PTR_ERR(ep->rep_cq); | 749 | rc = PTR_ERR(sendcq); |
| 734 | dprintk("RPC: %s: ib_create_cq failed: %i\n", | 750 | dprintk("RPC: %s: failed to create send CQ: %i\n", |
| 735 | __func__, rc); | 751 | __func__, rc); |
| 736 | goto out1; | 752 | goto out1; |
| 737 | } | 753 | } |
| 738 | 754 | ||
| 739 | rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); | 755 | rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP); |
| 756 | if (rc) { | ||
| 757 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | ||
| 758 | __func__, rc); | ||
| 759 | goto out2; | ||
| 760 | } | ||
| 761 | |||
| 762 | recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, | ||
| 763 | rpcrdma_cq_async_error_upcall, ep, | ||
| 764 | ep->rep_attr.cap.max_recv_wr + 1, 0); | ||
| 765 | if (IS_ERR(recvcq)) { | ||
| 766 | rc = PTR_ERR(recvcq); | ||
| 767 | dprintk("RPC: %s: failed to create recv CQ: %i\n", | ||
| 768 | __func__, rc); | ||
| 769 | goto out2; | ||
| 770 | } | ||
| 771 | |||
| 772 | rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP); | ||
| 740 | if (rc) { | 773 | if (rc) { |
| 741 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", | 774 | dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", |
| 742 | __func__, rc); | 775 | __func__, rc); |
| 776 | ib_destroy_cq(recvcq); | ||
| 743 | goto out2; | 777 | goto out2; |
| 744 | } | 778 | } |
| 745 | 779 | ||
| 746 | ep->rep_attr.send_cq = ep->rep_cq; | 780 | ep->rep_attr.send_cq = sendcq; |
| 747 | ep->rep_attr.recv_cq = ep->rep_cq; | 781 | ep->rep_attr.recv_cq = recvcq; |
| 748 | 782 | ||
| 749 | /* Initialize cma parameters */ | 783 | /* Initialize cma parameters */ |
| 750 | 784 | ||
| @@ -754,9 +788,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 754 | 788 | ||
| 755 | /* Client offers RDMA Read but does not initiate */ | 789 | /* Client offers RDMA Read but does not initiate */ |
| 756 | ep->rep_remote_cma.initiator_depth = 0; | 790 | ep->rep_remote_cma.initiator_depth = 0; |
| 757 | if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) | 791 | if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ |
| 758 | ep->rep_remote_cma.responder_resources = 0; | ||
| 759 | else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ | ||
| 760 | ep->rep_remote_cma.responder_resources = 32; | 792 | ep->rep_remote_cma.responder_resources = 32; |
| 761 | else | 793 | else |
| 762 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; | 794 | ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; |
| @@ -768,7 +800,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
| 768 | return 0; | 800 | return 0; |
| 769 | 801 | ||
| 770 | out2: | 802 | out2: |
| 771 | err = ib_destroy_cq(ep->rep_cq); | 803 | err = ib_destroy_cq(sendcq); |
| 772 | if (err) | 804 | if (err) |
| 773 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | 805 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", |
| 774 | __func__, err); | 806 | __func__, err); |
| @@ -782,11 +814,8 @@ out1: | |||
| 782 | * Disconnect and destroy endpoint. After this, the only | 814 | * Disconnect and destroy endpoint. After this, the only |
| 783 | * valid operations on the ep are to free it (if dynamically | 815 | * valid operations on the ep are to free it (if dynamically |
| 784 | * allocated) or re-create it. | 816 | * allocated) or re-create it. |
| 785 | * | ||
| 786 | * The caller's error handling must be sure to not leak the endpoint | ||
| 787 | * if this function fails. | ||
| 788 | */ | 817 | */ |
| 789 | int | 818 | void |
| 790 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | 819 | rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) |
| 791 | { | 820 | { |
| 792 | int rc; | 821 | int rc; |
| @@ -794,6 +823,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 794 | dprintk("RPC: %s: entering, connected is %d\n", | 823 | dprintk("RPC: %s: entering, connected is %d\n", |
| 795 | __func__, ep->rep_connected); | 824 | __func__, ep->rep_connected); |
| 796 | 825 | ||
| 826 | cancel_delayed_work_sync(&ep->rep_connect_worker); | ||
| 827 | |||
| 797 | if (ia->ri_id->qp) { | 828 | if (ia->ri_id->qp) { |
| 798 | rc = rpcrdma_ep_disconnect(ep, ia); | 829 | rc = rpcrdma_ep_disconnect(ep, ia); |
| 799 | if (rc) | 830 | if (rc) |
| @@ -809,13 +840,17 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 809 | ep->rep_pad_mr = NULL; | 840 | ep->rep_pad_mr = NULL; |
| 810 | } | 841 | } |
| 811 | 842 | ||
| 812 | rpcrdma_clean_cq(ep->rep_cq); | 843 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); |
| 813 | rc = ib_destroy_cq(ep->rep_cq); | 844 | rc = ib_destroy_cq(ep->rep_attr.recv_cq); |
| 814 | if (rc) | 845 | if (rc) |
| 815 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | 846 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", |
| 816 | __func__, rc); | 847 | __func__, rc); |
| 817 | 848 | ||
| 818 | return rc; | 849 | rpcrdma_clean_cq(ep->rep_attr.send_cq); |
| 850 | rc = ib_destroy_cq(ep->rep_attr.send_cq); | ||
| 851 | if (rc) | ||
| 852 | dprintk("RPC: %s: ib_destroy_cq returned %i\n", | ||
| 853 | __func__, rc); | ||
| 819 | } | 854 | } |
| 820 | 855 | ||
| 821 | /* | 856 | /* |
| @@ -831,17 +866,20 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 831 | if (ep->rep_connected != 0) { | 866 | if (ep->rep_connected != 0) { |
| 832 | struct rpcrdma_xprt *xprt; | 867 | struct rpcrdma_xprt *xprt; |
| 833 | retry: | 868 | retry: |
| 869 | dprintk("RPC: %s: reconnecting...\n", __func__); | ||
| 834 | rc = rpcrdma_ep_disconnect(ep, ia); | 870 | rc = rpcrdma_ep_disconnect(ep, ia); |
| 835 | if (rc && rc != -ENOTCONN) | 871 | if (rc && rc != -ENOTCONN) |
| 836 | dprintk("RPC: %s: rpcrdma_ep_disconnect" | 872 | dprintk("RPC: %s: rpcrdma_ep_disconnect" |
| 837 | " status %i\n", __func__, rc); | 873 | " status %i\n", __func__, rc); |
| 838 | rpcrdma_clean_cq(ep->rep_cq); | 874 | |
| 875 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); | ||
| 876 | rpcrdma_clean_cq(ep->rep_attr.send_cq); | ||
| 839 | 877 | ||
| 840 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 878 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
| 841 | id = rpcrdma_create_id(xprt, ia, | 879 | id = rpcrdma_create_id(xprt, ia, |
| 842 | (struct sockaddr *)&xprt->rx_data.addr); | 880 | (struct sockaddr *)&xprt->rx_data.addr); |
| 843 | if (IS_ERR(id)) { | 881 | if (IS_ERR(id)) { |
| 844 | rc = PTR_ERR(id); | 882 | rc = -EHOSTUNREACH; |
| 845 | goto out; | 883 | goto out; |
| 846 | } | 884 | } |
| 847 | /* TEMP TEMP TEMP - fail if new device: | 885 | /* TEMP TEMP TEMP - fail if new device: |
| @@ -855,35 +893,32 @@ retry: | |||
| 855 | printk("RPC: %s: can't reconnect on " | 893 | printk("RPC: %s: can't reconnect on " |
| 856 | "different device!\n", __func__); | 894 | "different device!\n", __func__); |
| 857 | rdma_destroy_id(id); | 895 | rdma_destroy_id(id); |
| 858 | rc = -ENETDOWN; | 896 | rc = -ENETUNREACH; |
| 859 | goto out; | 897 | goto out; |
| 860 | } | 898 | } |
| 861 | /* END TEMP */ | 899 | /* END TEMP */ |
| 900 | rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); | ||
| 901 | if (rc) { | ||
| 902 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
| 903 | __func__, rc); | ||
| 904 | rdma_destroy_id(id); | ||
| 905 | rc = -ENETUNREACH; | ||
| 906 | goto out; | ||
| 907 | } | ||
| 862 | rdma_destroy_qp(ia->ri_id); | 908 | rdma_destroy_qp(ia->ri_id); |
| 863 | rdma_destroy_id(ia->ri_id); | 909 | rdma_destroy_id(ia->ri_id); |
| 864 | ia->ri_id = id; | 910 | ia->ri_id = id; |
| 911 | } else { | ||
| 912 | dprintk("RPC: %s: connecting...\n", __func__); | ||
| 913 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | ||
| 914 | if (rc) { | ||
| 915 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
| 916 | __func__, rc); | ||
| 917 | /* do not update ep->rep_connected */ | ||
| 918 | return -ENETUNREACH; | ||
| 919 | } | ||
| 865 | } | 920 | } |
| 866 | 921 | ||
| 867 | rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); | ||
| 868 | if (rc) { | ||
| 869 | dprintk("RPC: %s: rdma_create_qp failed %i\n", | ||
| 870 | __func__, rc); | ||
| 871 | goto out; | ||
| 872 | } | ||
| 873 | |||
| 874 | /* XXX Tavor device performs badly with 2K MTU! */ | ||
| 875 | if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { | ||
| 876 | struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); | ||
| 877 | if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && | ||
| 878 | (pcid->vendor == PCI_VENDOR_ID_MELLANOX || | ||
| 879 | pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { | ||
| 880 | struct ib_qp_attr attr = { | ||
| 881 | .path_mtu = IB_MTU_1024 | ||
| 882 | }; | ||
| 883 | rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); | ||
| 884 | } | ||
| 885 | } | ||
| 886 | |||
| 887 | ep->rep_connected = 0; | 922 | ep->rep_connected = 0; |
| 888 | 923 | ||
| 889 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); | 924 | rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); |
| @@ -944,7 +979,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) | |||
| 944 | { | 979 | { |
| 945 | int rc; | 980 | int rc; |
| 946 | 981 | ||
| 947 | rpcrdma_clean_cq(ep->rep_cq); | 982 | rpcrdma_clean_cq(ep->rep_attr.recv_cq); |
| 983 | rpcrdma_clean_cq(ep->rep_attr.send_cq); | ||
| 948 | rc = rdma_disconnect(ia->ri_id); | 984 | rc = rdma_disconnect(ia->ri_id); |
| 949 | if (!rc) { | 985 | if (!rc) { |
| 950 | /* returns without wait if not connected */ | 986 | /* returns without wait if not connected */ |
| @@ -967,7 +1003,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 967 | struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) | 1003 | struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) |
| 968 | { | 1004 | { |
| 969 | char *p; | 1005 | char *p; |
| 970 | size_t len; | 1006 | size_t len, rlen, wlen; |
| 971 | int i, rc; | 1007 | int i, rc; |
| 972 | struct rpcrdma_mw *r; | 1008 | struct rpcrdma_mw *r; |
| 973 | 1009 | ||
| @@ -997,11 +1033,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 997 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | 1033 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * |
| 998 | sizeof(struct rpcrdma_mw); | 1034 | sizeof(struct rpcrdma_mw); |
| 999 | break; | 1035 | break; |
| 1000 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1001 | case RPCRDMA_MEMWINDOWS: | ||
| 1002 | len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * | ||
| 1003 | sizeof(struct rpcrdma_mw); | ||
| 1004 | break; | ||
| 1005 | default: | 1036 | default: |
| 1006 | break; | 1037 | break; |
| 1007 | } | 1038 | } |
| @@ -1032,32 +1063,29 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1032 | } | 1063 | } |
| 1033 | p += cdata->padding; | 1064 | p += cdata->padding; |
| 1034 | 1065 | ||
| 1035 | /* | ||
| 1036 | * Allocate the fmr's, or mw's for mw_bind chunk registration. | ||
| 1037 | * We "cycle" the mw's in order to minimize rkey reuse, | ||
| 1038 | * and also reduce unbind-to-bind collision. | ||
| 1039 | */ | ||
| 1040 | INIT_LIST_HEAD(&buf->rb_mws); | 1066 | INIT_LIST_HEAD(&buf->rb_mws); |
| 1041 | r = (struct rpcrdma_mw *)p; | 1067 | r = (struct rpcrdma_mw *)p; |
| 1042 | switch (ia->ri_memreg_strategy) { | 1068 | switch (ia->ri_memreg_strategy) { |
| 1043 | case RPCRDMA_FRMR: | 1069 | case RPCRDMA_FRMR: |
| 1044 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { | 1070 | for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { |
| 1045 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, | 1071 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, |
| 1046 | RPCRDMA_MAX_SEGS); | 1072 | ia->ri_max_frmr_depth); |
| 1047 | if (IS_ERR(r->r.frmr.fr_mr)) { | 1073 | if (IS_ERR(r->r.frmr.fr_mr)) { |
| 1048 | rc = PTR_ERR(r->r.frmr.fr_mr); | 1074 | rc = PTR_ERR(r->r.frmr.fr_mr); |
| 1049 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" | 1075 | dprintk("RPC: %s: ib_alloc_fast_reg_mr" |
| 1050 | " failed %i\n", __func__, rc); | 1076 | " failed %i\n", __func__, rc); |
| 1051 | goto out; | 1077 | goto out; |
| 1052 | } | 1078 | } |
| 1053 | r->r.frmr.fr_pgl = | 1079 | r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list( |
| 1054 | ib_alloc_fast_reg_page_list(ia->ri_id->device, | 1080 | ia->ri_id->device, |
| 1055 | RPCRDMA_MAX_SEGS); | 1081 | ia->ri_max_frmr_depth); |
| 1056 | if (IS_ERR(r->r.frmr.fr_pgl)) { | 1082 | if (IS_ERR(r->r.frmr.fr_pgl)) { |
| 1057 | rc = PTR_ERR(r->r.frmr.fr_pgl); | 1083 | rc = PTR_ERR(r->r.frmr.fr_pgl); |
| 1058 | dprintk("RPC: %s: " | 1084 | dprintk("RPC: %s: " |
| 1059 | "ib_alloc_fast_reg_page_list " | 1085 | "ib_alloc_fast_reg_page_list " |
| 1060 | "failed %i\n", __func__, rc); | 1086 | "failed %i\n", __func__, rc); |
| 1087 | |||
| 1088 | ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1061 | goto out; | 1089 | goto out; |
| 1062 | } | 1090 | } |
| 1063 | list_add(&r->mw_list, &buf->rb_mws); | 1091 | list_add(&r->mw_list, &buf->rb_mws); |
| @@ -1082,21 +1110,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1082 | ++r; | 1110 | ++r; |
| 1083 | } | 1111 | } |
| 1084 | break; | 1112 | break; |
| 1085 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1086 | case RPCRDMA_MEMWINDOWS: | ||
| 1087 | /* Allocate one extra request's worth, for full cycling */ | ||
| 1088 | for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { | ||
| 1089 | r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1); | ||
| 1090 | if (IS_ERR(r->r.mw)) { | ||
| 1091 | rc = PTR_ERR(r->r.mw); | ||
| 1092 | dprintk("RPC: %s: ib_alloc_mw" | ||
| 1093 | " failed %i\n", __func__, rc); | ||
| 1094 | goto out; | ||
| 1095 | } | ||
| 1096 | list_add(&r->mw_list, &buf->rb_mws); | ||
| 1097 | ++r; | ||
| 1098 | } | ||
| 1099 | break; | ||
| 1100 | default: | 1113 | default: |
| 1101 | break; | 1114 | break; |
| 1102 | } | 1115 | } |
| @@ -1105,16 +1118,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1105 | * Allocate/init the request/reply buffers. Doing this | 1118 | * Allocate/init the request/reply buffers. Doing this |
| 1106 | * using kmalloc for now -- one for each buf. | 1119 | * using kmalloc for now -- one for each buf. |
| 1107 | */ | 1120 | */ |
| 1121 | wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req)); | ||
| 1122 | rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep)); | ||
| 1123 | dprintk("RPC: %s: wlen = %zu, rlen = %zu\n", | ||
| 1124 | __func__, wlen, rlen); | ||
| 1125 | |||
| 1108 | for (i = 0; i < buf->rb_max_requests; i++) { | 1126 | for (i = 0; i < buf->rb_max_requests; i++) { |
| 1109 | struct rpcrdma_req *req; | 1127 | struct rpcrdma_req *req; |
| 1110 | struct rpcrdma_rep *rep; | 1128 | struct rpcrdma_rep *rep; |
| 1111 | 1129 | ||
| 1112 | len = cdata->inline_wsize + sizeof(struct rpcrdma_req); | 1130 | req = kmalloc(wlen, GFP_KERNEL); |
| 1113 | /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ | ||
| 1114 | /* Typical ~2400b, so rounding up saves work later */ | ||
| 1115 | if (len < 4096) | ||
| 1116 | len = 4096; | ||
| 1117 | req = kmalloc(len, GFP_KERNEL); | ||
| 1118 | if (req == NULL) { | 1131 | if (req == NULL) { |
| 1119 | dprintk("RPC: %s: request buffer %d alloc" | 1132 | dprintk("RPC: %s: request buffer %d alloc" |
| 1120 | " failed\n", __func__, i); | 1133 | " failed\n", __func__, i); |
| @@ -1126,16 +1139,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1126 | buf->rb_send_bufs[i]->rl_buffer = buf; | 1139 | buf->rb_send_bufs[i]->rl_buffer = buf; |
| 1127 | 1140 | ||
| 1128 | rc = rpcrdma_register_internal(ia, req->rl_base, | 1141 | rc = rpcrdma_register_internal(ia, req->rl_base, |
| 1129 | len - offsetof(struct rpcrdma_req, rl_base), | 1142 | wlen - offsetof(struct rpcrdma_req, rl_base), |
| 1130 | &buf->rb_send_bufs[i]->rl_handle, | 1143 | &buf->rb_send_bufs[i]->rl_handle, |
| 1131 | &buf->rb_send_bufs[i]->rl_iov); | 1144 | &buf->rb_send_bufs[i]->rl_iov); |
| 1132 | if (rc) | 1145 | if (rc) |
| 1133 | goto out; | 1146 | goto out; |
| 1134 | 1147 | ||
| 1135 | buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); | 1148 | buf->rb_send_bufs[i]->rl_size = wlen - |
| 1149 | sizeof(struct rpcrdma_req); | ||
| 1136 | 1150 | ||
| 1137 | len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); | 1151 | rep = kmalloc(rlen, GFP_KERNEL); |
| 1138 | rep = kmalloc(len, GFP_KERNEL); | ||
| 1139 | if (rep == NULL) { | 1152 | if (rep == NULL) { |
| 1140 | dprintk("RPC: %s: reply buffer %d alloc failed\n", | 1153 | dprintk("RPC: %s: reply buffer %d alloc failed\n", |
| 1141 | __func__, i); | 1154 | __func__, i); |
| @@ -1145,10 +1158,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, | |||
| 1145 | memset(rep, 0, sizeof(struct rpcrdma_rep)); | 1158 | memset(rep, 0, sizeof(struct rpcrdma_rep)); |
| 1146 | buf->rb_recv_bufs[i] = rep; | 1159 | buf->rb_recv_bufs[i] = rep; |
| 1147 | buf->rb_recv_bufs[i]->rr_buffer = buf; | 1160 | buf->rb_recv_bufs[i]->rr_buffer = buf; |
| 1148 | init_waitqueue_head(&rep->rr_unbind); | ||
| 1149 | 1161 | ||
| 1150 | rc = rpcrdma_register_internal(ia, rep->rr_base, | 1162 | rc = rpcrdma_register_internal(ia, rep->rr_base, |
| 1151 | len - offsetof(struct rpcrdma_rep, rr_base), | 1163 | rlen - offsetof(struct rpcrdma_rep, rr_base), |
| 1152 | &buf->rb_recv_bufs[i]->rr_handle, | 1164 | &buf->rb_recv_bufs[i]->rr_handle, |
| 1153 | &buf->rb_recv_bufs[i]->rr_iov); | 1165 | &buf->rb_recv_bufs[i]->rr_iov); |
| 1154 | if (rc) | 1166 | if (rc) |
| @@ -1179,7 +1191,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1179 | 1191 | ||
| 1180 | /* clean up in reverse order from create | 1192 | /* clean up in reverse order from create |
| 1181 | * 1. recv mr memory (mr free, then kfree) | 1193 | * 1. recv mr memory (mr free, then kfree) |
| 1182 | * 1a. bind mw memory | ||
| 1183 | * 2. send mr memory (mr free, then kfree) | 1194 | * 2. send mr memory (mr free, then kfree) |
| 1184 | * 3. padding (if any) [moved to rpcrdma_ep_destroy] | 1195 | * 3. padding (if any) [moved to rpcrdma_ep_destroy] |
| 1185 | * 4. arrays | 1196 | * 4. arrays |
| @@ -1194,41 +1205,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1194 | kfree(buf->rb_recv_bufs[i]); | 1205 | kfree(buf->rb_recv_bufs[i]); |
| 1195 | } | 1206 | } |
| 1196 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { | 1207 | if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { |
| 1197 | while (!list_empty(&buf->rb_mws)) { | ||
| 1198 | r = list_entry(buf->rb_mws.next, | ||
| 1199 | struct rpcrdma_mw, mw_list); | ||
| 1200 | list_del(&r->mw_list); | ||
| 1201 | switch (ia->ri_memreg_strategy) { | ||
| 1202 | case RPCRDMA_FRMR: | ||
| 1203 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1204 | if (rc) | ||
| 1205 | dprintk("RPC: %s:" | ||
| 1206 | " ib_dereg_mr" | ||
| 1207 | " failed %i\n", | ||
| 1208 | __func__, rc); | ||
| 1209 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
| 1210 | break; | ||
| 1211 | case RPCRDMA_MTHCAFMR: | ||
| 1212 | rc = ib_dealloc_fmr(r->r.fmr); | ||
| 1213 | if (rc) | ||
| 1214 | dprintk("RPC: %s:" | ||
| 1215 | " ib_dealloc_fmr" | ||
| 1216 | " failed %i\n", | ||
| 1217 | __func__, rc); | ||
| 1218 | break; | ||
| 1219 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1220 | case RPCRDMA_MEMWINDOWS: | ||
| 1221 | rc = ib_dealloc_mw(r->r.mw); | ||
| 1222 | if (rc) | ||
| 1223 | dprintk("RPC: %s:" | ||
| 1224 | " ib_dealloc_mw" | ||
| 1225 | " failed %i\n", | ||
| 1226 | __func__, rc); | ||
| 1227 | break; | ||
| 1228 | default: | ||
| 1229 | break; | ||
| 1230 | } | ||
| 1231 | } | ||
| 1232 | rpcrdma_deregister_internal(ia, | 1208 | rpcrdma_deregister_internal(ia, |
| 1233 | buf->rb_send_bufs[i]->rl_handle, | 1209 | buf->rb_send_bufs[i]->rl_handle, |
| 1234 | &buf->rb_send_bufs[i]->rl_iov); | 1210 | &buf->rb_send_bufs[i]->rl_iov); |
| @@ -1236,6 +1212,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
| 1236 | } | 1212 | } |
| 1237 | } | 1213 | } |
| 1238 | 1214 | ||
| 1215 | while (!list_empty(&buf->rb_mws)) { | ||
| 1216 | r = list_entry(buf->rb_mws.next, | ||
| 1217 | struct rpcrdma_mw, mw_list); | ||
| 1218 | list_del(&r->mw_list); | ||
| 1219 | switch (ia->ri_memreg_strategy) { | ||
| 1220 | case RPCRDMA_FRMR: | ||
| 1221 | rc = ib_dereg_mr(r->r.frmr.fr_mr); | ||
| 1222 | if (rc) | ||
| 1223 | dprintk("RPC: %s:" | ||
| 1224 | " ib_dereg_mr" | ||
| 1225 | " failed %i\n", | ||
| 1226 | __func__, rc); | ||
| 1227 | ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); | ||
| 1228 | break; | ||
| 1229 | case RPCRDMA_MTHCAFMR: | ||
| 1230 | rc = ib_dealloc_fmr(r->r.fmr); | ||
| 1231 | if (rc) | ||
| 1232 | dprintk("RPC: %s:" | ||
| 1233 | " ib_dealloc_fmr" | ||
| 1234 | " failed %i\n", | ||
| 1235 | __func__, rc); | ||
| 1236 | break; | ||
| 1237 | default: | ||
| 1238 | break; | ||
| 1239 | } | ||
| 1240 | } | ||
| 1241 | |||
| 1239 | kfree(buf->rb_pool); | 1242 | kfree(buf->rb_pool); |
| 1240 | } | 1243 | } |
| 1241 | 1244 | ||
| @@ -1299,21 +1302,17 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) | |||
| 1299 | int i; | 1302 | int i; |
| 1300 | unsigned long flags; | 1303 | unsigned long flags; |
| 1301 | 1304 | ||
| 1302 | BUG_ON(req->rl_nchunks != 0); | ||
| 1303 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1305 | spin_lock_irqsave(&buffers->rb_lock, flags); |
| 1304 | buffers->rb_send_bufs[--buffers->rb_send_index] = req; | 1306 | buffers->rb_send_bufs[--buffers->rb_send_index] = req; |
| 1305 | req->rl_niovs = 0; | 1307 | req->rl_niovs = 0; |
| 1306 | if (req->rl_reply) { | 1308 | if (req->rl_reply) { |
| 1307 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; | 1309 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; |
| 1308 | init_waitqueue_head(&req->rl_reply->rr_unbind); | ||
| 1309 | req->rl_reply->rr_func = NULL; | 1310 | req->rl_reply->rr_func = NULL; |
| 1310 | req->rl_reply = NULL; | 1311 | req->rl_reply = NULL; |
| 1311 | } | 1312 | } |
| 1312 | switch (ia->ri_memreg_strategy) { | 1313 | switch (ia->ri_memreg_strategy) { |
| 1313 | case RPCRDMA_FRMR: | 1314 | case RPCRDMA_FRMR: |
| 1314 | case RPCRDMA_MTHCAFMR: | 1315 | case RPCRDMA_MTHCAFMR: |
| 1315 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1316 | case RPCRDMA_MEMWINDOWS: | ||
| 1317 | /* | 1316 | /* |
| 1318 | * Cycle mw's back in reverse order, and "spin" them. | 1317 | * Cycle mw's back in reverse order, and "spin" them. |
| 1319 | * This delays and scrambles reuse as much as possible. | 1318 | * This delays and scrambles reuse as much as possible. |
| @@ -1358,8 +1357,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) | |||
| 1358 | 1357 | ||
| 1359 | /* | 1358 | /* |
| 1360 | * Put reply buffers back into pool when not attached to | 1359 | * Put reply buffers back into pool when not attached to |
| 1361 | * request. This happens in error conditions, and when | 1360 | * request. This happens in error conditions. |
| 1362 | * aborting unbinds. Pre-decrement counter/array index. | ||
| 1363 | */ | 1361 | */ |
| 1364 | void | 1362 | void |
| 1365 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | 1363 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) |
| @@ -1498,8 +1496,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | |||
| 1498 | seg1->mr_offset -= pageoff; /* start of page */ | 1496 | seg1->mr_offset -= pageoff; /* start of page */ |
| 1499 | seg1->mr_len += pageoff; | 1497 | seg1->mr_len += pageoff; |
| 1500 | len = -pageoff; | 1498 | len = -pageoff; |
| 1501 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | 1499 | if (*nsegs > ia->ri_max_frmr_depth) |
| 1502 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | 1500 | *nsegs = ia->ri_max_frmr_depth; |
| 1503 | for (page_no = i = 0; i < *nsegs;) { | 1501 | for (page_no = i = 0; i < *nsegs;) { |
| 1504 | rpcrdma_map_one(ia, seg, writing); | 1502 | rpcrdma_map_one(ia, seg, writing); |
| 1505 | pa = seg->mr_dma; | 1503 | pa = seg->mr_dma; |
| @@ -1536,10 +1534,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | |||
| 1536 | } else | 1534 | } else |
| 1537 | post_wr = &frmr_wr; | 1535 | post_wr = &frmr_wr; |
| 1538 | 1536 | ||
| 1539 | /* Bump the key */ | ||
| 1540 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
| 1541 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
| 1542 | |||
| 1543 | /* Prepare FRMR WR */ | 1537 | /* Prepare FRMR WR */ |
| 1544 | memset(&frmr_wr, 0, sizeof frmr_wr); | 1538 | memset(&frmr_wr, 0, sizeof frmr_wr); |
| 1545 | frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; | 1539 | frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; |
| @@ -1550,7 +1544,16 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, | |||
| 1550 | frmr_wr.wr.fast_reg.page_list_len = page_no; | 1544 | frmr_wr.wr.fast_reg.page_list_len = page_no; |
| 1551 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; | 1545 | frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; |
| 1552 | frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; | 1546 | frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT; |
| 1553 | BUG_ON(frmr_wr.wr.fast_reg.length < len); | 1547 | if (frmr_wr.wr.fast_reg.length < len) { |
| 1548 | while (seg1->mr_nsegs--) | ||
| 1549 | rpcrdma_unmap_one(ia, seg++); | ||
| 1550 | return -EIO; | ||
| 1551 | } | ||
| 1552 | |||
| 1553 | /* Bump the key */ | ||
| 1554 | key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); | ||
| 1555 | ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); | ||
| 1556 | |||
| 1554 | frmr_wr.wr.fast_reg.access_flags = (writing ? | 1557 | frmr_wr.wr.fast_reg.access_flags = (writing ? |
| 1555 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | 1558 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
| 1556 | IB_ACCESS_REMOTE_READ); | 1559 | IB_ACCESS_REMOTE_READ); |
| @@ -1661,135 +1664,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, | |||
| 1661 | return rc; | 1664 | return rc; |
| 1662 | } | 1665 | } |
| 1663 | 1666 | ||
| 1664 | static int | ||
| 1665 | rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, | ||
| 1666 | int *nsegs, int writing, struct rpcrdma_ia *ia, | ||
| 1667 | struct rpcrdma_xprt *r_xprt) | ||
| 1668 | { | ||
| 1669 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
| 1670 | IB_ACCESS_REMOTE_READ); | ||
| 1671 | struct ib_mw_bind param; | ||
| 1672 | int rc; | ||
| 1673 | |||
| 1674 | *nsegs = 1; | ||
| 1675 | rpcrdma_map_one(ia, seg, writing); | ||
| 1676 | param.bind_info.mr = ia->ri_bind_mem; | ||
| 1677 | param.wr_id = 0ULL; /* no send cookie */ | ||
| 1678 | param.bind_info.addr = seg->mr_dma; | ||
| 1679 | param.bind_info.length = seg->mr_len; | ||
| 1680 | param.send_flags = 0; | ||
| 1681 | param.bind_info.mw_access_flags = mem_priv; | ||
| 1682 | |||
| 1683 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1684 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
| 1685 | if (rc) { | ||
| 1686 | dprintk("RPC: %s: failed ib_bind_mw " | ||
| 1687 | "%u@0x%llx status %i\n", | ||
| 1688 | __func__, seg->mr_len, | ||
| 1689 | (unsigned long long)seg->mr_dma, rc); | ||
| 1690 | rpcrdma_unmap_one(ia, seg); | ||
| 1691 | } else { | ||
| 1692 | seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; | ||
| 1693 | seg->mr_base = param.bind_info.addr; | ||
| 1694 | seg->mr_nsegs = 1; | ||
| 1695 | } | ||
| 1696 | return rc; | ||
| 1697 | } | ||
| 1698 | |||
| 1699 | static int | ||
| 1700 | rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, | ||
| 1701 | struct rpcrdma_ia *ia, | ||
| 1702 | struct rpcrdma_xprt *r_xprt, void **r) | ||
| 1703 | { | ||
| 1704 | struct ib_mw_bind param; | ||
| 1705 | LIST_HEAD(l); | ||
| 1706 | int rc; | ||
| 1707 | |||
| 1708 | BUG_ON(seg->mr_nsegs != 1); | ||
| 1709 | param.bind_info.mr = ia->ri_bind_mem; | ||
| 1710 | param.bind_info.addr = 0ULL; /* unbind */ | ||
| 1711 | param.bind_info.length = 0; | ||
| 1712 | param.bind_info.mw_access_flags = 0; | ||
| 1713 | if (*r) { | ||
| 1714 | param.wr_id = (u64) (unsigned long) *r; | ||
| 1715 | param.send_flags = IB_SEND_SIGNALED; | ||
| 1716 | INIT_CQCOUNT(&r_xprt->rx_ep); | ||
| 1717 | } else { | ||
| 1718 | param.wr_id = 0ULL; | ||
| 1719 | param.send_flags = 0; | ||
| 1720 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
| 1721 | } | ||
| 1722 | rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); | ||
| 1723 | rpcrdma_unmap_one(ia, seg); | ||
| 1724 | if (rc) | ||
| 1725 | dprintk("RPC: %s: failed ib_(un)bind_mw," | ||
| 1726 | " status %i\n", __func__, rc); | ||
| 1727 | else | ||
| 1728 | *r = NULL; /* will upcall on completion */ | ||
| 1729 | return rc; | ||
| 1730 | } | ||
| 1731 | |||
| 1732 | static int | ||
| 1733 | rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, | ||
| 1734 | int *nsegs, int writing, struct rpcrdma_ia *ia) | ||
| 1735 | { | ||
| 1736 | int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : | ||
| 1737 | IB_ACCESS_REMOTE_READ); | ||
| 1738 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1739 | struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; | ||
| 1740 | int len, i, rc = 0; | ||
| 1741 | |||
| 1742 | if (*nsegs > RPCRDMA_MAX_DATA_SEGS) | ||
| 1743 | *nsegs = RPCRDMA_MAX_DATA_SEGS; | ||
| 1744 | for (len = 0, i = 0; i < *nsegs;) { | ||
| 1745 | rpcrdma_map_one(ia, seg, writing); | ||
| 1746 | ipb[i].addr = seg->mr_dma; | ||
| 1747 | ipb[i].size = seg->mr_len; | ||
| 1748 | len += seg->mr_len; | ||
| 1749 | ++seg; | ||
| 1750 | ++i; | ||
| 1751 | /* Check for holes */ | ||
| 1752 | if ((i < *nsegs && offset_in_page(seg->mr_offset)) || | ||
| 1753 | offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) | ||
| 1754 | break; | ||
| 1755 | } | ||
| 1756 | seg1->mr_base = seg1->mr_dma; | ||
| 1757 | seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, | ||
| 1758 | ipb, i, mem_priv, &seg1->mr_base); | ||
| 1759 | if (IS_ERR(seg1->mr_chunk.rl_mr)) { | ||
| 1760 | rc = PTR_ERR(seg1->mr_chunk.rl_mr); | ||
| 1761 | dprintk("RPC: %s: failed ib_reg_phys_mr " | ||
| 1762 | "%u@0x%llx (%d)... status %i\n", | ||
| 1763 | __func__, len, | ||
| 1764 | (unsigned long long)seg1->mr_dma, i, rc); | ||
| 1765 | while (i--) | ||
| 1766 | rpcrdma_unmap_one(ia, --seg); | ||
| 1767 | } else { | ||
| 1768 | seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; | ||
| 1769 | seg1->mr_nsegs = i; | ||
| 1770 | seg1->mr_len = len; | ||
| 1771 | } | ||
| 1772 | *nsegs = i; | ||
| 1773 | return rc; | ||
| 1774 | } | ||
| 1775 | |||
| 1776 | static int | ||
| 1777 | rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, | ||
| 1778 | struct rpcrdma_ia *ia) | ||
| 1779 | { | ||
| 1780 | struct rpcrdma_mr_seg *seg1 = seg; | ||
| 1781 | int rc; | ||
| 1782 | |||
| 1783 | rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); | ||
| 1784 | seg1->mr_chunk.rl_mr = NULL; | ||
| 1785 | while (seg1->mr_nsegs--) | ||
| 1786 | rpcrdma_unmap_one(ia, seg++); | ||
| 1787 | if (rc) | ||
| 1788 | dprintk("RPC: %s: failed ib_dereg_mr," | ||
| 1789 | " status %i\n", __func__, rc); | ||
| 1790 | return rc; | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | int | 1667 | int |
| 1794 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | 1668 | rpcrdma_register_external(struct rpcrdma_mr_seg *seg, |
| 1795 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) | 1669 | int nsegs, int writing, struct rpcrdma_xprt *r_xprt) |
| @@ -1819,16 +1693,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
| 1819 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); | 1693 | rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); |
| 1820 | break; | 1694 | break; |
| 1821 | 1695 | ||
| 1822 | /* Registration using memory windows */ | ||
| 1823 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1824 | case RPCRDMA_MEMWINDOWS: | ||
| 1825 | rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); | ||
| 1826 | break; | ||
| 1827 | |||
| 1828 | /* Default registration each time */ | ||
| 1829 | default: | 1696 | default: |
| 1830 | rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); | 1697 | return -1; |
| 1831 | break; | ||
| 1832 | } | 1698 | } |
| 1833 | if (rc) | 1699 | if (rc) |
| 1834 | return -1; | 1700 | return -1; |
| @@ -1838,7 +1704,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, | |||
| 1838 | 1704 | ||
| 1839 | int | 1705 | int |
| 1840 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | 1706 | rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, |
| 1841 | struct rpcrdma_xprt *r_xprt, void *r) | 1707 | struct rpcrdma_xprt *r_xprt) |
| 1842 | { | 1708 | { |
| 1843 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 1709 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
| 1844 | int nsegs = seg->mr_nsegs, rc; | 1710 | int nsegs = seg->mr_nsegs, rc; |
| @@ -1847,9 +1713,7 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
| 1847 | 1713 | ||
| 1848 | #if RPCRDMA_PERSISTENT_REGISTRATION | 1714 | #if RPCRDMA_PERSISTENT_REGISTRATION |
| 1849 | case RPCRDMA_ALLPHYSICAL: | 1715 | case RPCRDMA_ALLPHYSICAL: |
| 1850 | BUG_ON(nsegs != 1); | ||
| 1851 | rpcrdma_unmap_one(ia, seg); | 1716 | rpcrdma_unmap_one(ia, seg); |
| 1852 | rc = 0; | ||
| 1853 | break; | 1717 | break; |
| 1854 | #endif | 1718 | #endif |
| 1855 | 1719 | ||
| @@ -1861,21 +1725,9 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, | |||
| 1861 | rc = rpcrdma_deregister_fmr_external(seg, ia); | 1725 | rc = rpcrdma_deregister_fmr_external(seg, ia); |
| 1862 | break; | 1726 | break; |
| 1863 | 1727 | ||
| 1864 | case RPCRDMA_MEMWINDOWS_ASYNC: | ||
| 1865 | case RPCRDMA_MEMWINDOWS: | ||
| 1866 | rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); | ||
| 1867 | break; | ||
| 1868 | |||
| 1869 | default: | 1728 | default: |
| 1870 | rc = rpcrdma_deregister_default_external(seg, ia); | ||
| 1871 | break; | 1729 | break; |
| 1872 | } | 1730 | } |
| 1873 | if (r) { | ||
| 1874 | struct rpcrdma_rep *rep = r; | ||
| 1875 | void (*func)(struct rpcrdma_rep *) = rep->rr_func; | ||
| 1876 | rep->rr_func = NULL; | ||
| 1877 | func(rep); /* dereg done, callback now */ | ||
| 1878 | } | ||
| 1879 | return nsegs; | 1731 | return nsegs; |
| 1880 | } | 1732 | } |
| 1881 | 1733 | ||
| @@ -1950,7 +1802,6 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
| 1950 | ib_dma_sync_single_for_cpu(ia->ri_id->device, | 1802 | ib_dma_sync_single_for_cpu(ia->ri_id->device, |
| 1951 | rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); | 1803 | rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); |
| 1952 | 1804 | ||
| 1953 | DECR_CQCOUNT(ep); | ||
| 1954 | rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); | 1805 | rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); |
| 1955 | 1806 | ||
| 1956 | if (rc) | 1807 | if (rc) |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index cc1445dc1d1a..89e7cd479705 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
| @@ -43,6 +43,7 @@ | |||
| 43 | #include <linux/wait.h> /* wait_queue_head_t, etc */ | 43 | #include <linux/wait.h> /* wait_queue_head_t, etc */ |
| 44 | #include <linux/spinlock.h> /* spinlock_t, etc */ | 44 | #include <linux/spinlock.h> /* spinlock_t, etc */ |
| 45 | #include <linux/atomic.h> /* atomic_t, etc */ | 45 | #include <linux/atomic.h> /* atomic_t, etc */ |
| 46 | #include <linux/workqueue.h> /* struct work_struct */ | ||
| 46 | 47 | ||
| 47 | #include <rdma/rdma_cm.h> /* RDMA connection api */ | 48 | #include <rdma/rdma_cm.h> /* RDMA connection api */ |
| 48 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ | 49 | #include <rdma/ib_verbs.h> /* RDMA verbs api */ |
| @@ -66,18 +67,21 @@ struct rpcrdma_ia { | |||
| 66 | struct completion ri_done; | 67 | struct completion ri_done; |
| 67 | int ri_async_rc; | 68 | int ri_async_rc; |
| 68 | enum rpcrdma_memreg ri_memreg_strategy; | 69 | enum rpcrdma_memreg ri_memreg_strategy; |
| 70 | unsigned int ri_max_frmr_depth; | ||
| 69 | }; | 71 | }; |
| 70 | 72 | ||
| 71 | /* | 73 | /* |
| 72 | * RDMA Endpoint -- one per transport instance | 74 | * RDMA Endpoint -- one per transport instance |
| 73 | */ | 75 | */ |
| 74 | 76 | ||
| 77 | #define RPCRDMA_WC_BUDGET (128) | ||
| 78 | #define RPCRDMA_POLLSIZE (16) | ||
| 79 | |||
| 75 | struct rpcrdma_ep { | 80 | struct rpcrdma_ep { |
| 76 | atomic_t rep_cqcount; | 81 | atomic_t rep_cqcount; |
| 77 | int rep_cqinit; | 82 | int rep_cqinit; |
| 78 | int rep_connected; | 83 | int rep_connected; |
| 79 | struct rpcrdma_ia *rep_ia; | 84 | struct rpcrdma_ia *rep_ia; |
| 80 | struct ib_cq *rep_cq; | ||
| 81 | struct ib_qp_init_attr rep_attr; | 85 | struct ib_qp_init_attr rep_attr; |
| 82 | wait_queue_head_t rep_connect_wait; | 86 | wait_queue_head_t rep_connect_wait; |
| 83 | struct ib_sge rep_pad; /* holds zeroed pad */ | 87 | struct ib_sge rep_pad; /* holds zeroed pad */ |
| @@ -86,6 +90,9 @@ struct rpcrdma_ep { | |||
| 86 | struct rpc_xprt *rep_xprt; /* for rep_func */ | 90 | struct rpc_xprt *rep_xprt; /* for rep_func */ |
| 87 | struct rdma_conn_param rep_remote_cma; | 91 | struct rdma_conn_param rep_remote_cma; |
| 88 | struct sockaddr_storage rep_remote_addr; | 92 | struct sockaddr_storage rep_remote_addr; |
| 93 | struct delayed_work rep_connect_worker; | ||
| 94 | struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE]; | ||
| 95 | struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; | ||
| 89 | }; | 96 | }; |
| 90 | 97 | ||
| 91 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) | 98 | #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) |
| @@ -124,7 +131,6 @@ struct rpcrdma_rep { | |||
| 124 | struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ | 131 | struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ |
| 125 | void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ | 132 | void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ |
| 126 | struct list_head rr_list; /* tasklet list */ | 133 | struct list_head rr_list; /* tasklet list */ |
| 127 | wait_queue_head_t rr_unbind; /* optional unbind wait */ | ||
| 128 | struct ib_sge rr_iov; /* for posting */ | 134 | struct ib_sge rr_iov; /* for posting */ |
| 129 | struct ib_mr *rr_handle; /* handle for mem in rr_iov */ | 135 | struct ib_mr *rr_handle; /* handle for mem in rr_iov */ |
| 130 | char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ | 136 | char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ |
| @@ -159,7 +165,6 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ | |||
| 159 | struct ib_mr *rl_mr; /* if registered directly */ | 165 | struct ib_mr *rl_mr; /* if registered directly */ |
| 160 | struct rpcrdma_mw { /* if registered from region */ | 166 | struct rpcrdma_mw { /* if registered from region */ |
| 161 | union { | 167 | union { |
| 162 | struct ib_mw *mw; | ||
| 163 | struct ib_fmr *fmr; | 168 | struct ib_fmr *fmr; |
| 164 | struct { | 169 | struct { |
| 165 | struct ib_fast_reg_page_list *fr_pgl; | 170 | struct ib_fast_reg_page_list *fr_pgl; |
| @@ -207,7 +212,6 @@ struct rpcrdma_req { | |||
| 207 | struct rpcrdma_buffer { | 212 | struct rpcrdma_buffer { |
| 208 | spinlock_t rb_lock; /* protects indexes */ | 213 | spinlock_t rb_lock; /* protects indexes */ |
| 209 | atomic_t rb_credits; /* most recent server credits */ | 214 | atomic_t rb_credits; /* most recent server credits */ |
| 210 | unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ | ||
| 211 | int rb_max_requests;/* client max requests */ | 215 | int rb_max_requests;/* client max requests */ |
| 212 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ | 216 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ |
| 213 | int rb_send_index; | 217 | int rb_send_index; |
| @@ -300,7 +304,7 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); | |||
| 300 | */ | 304 | */ |
| 301 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, | 305 | int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, |
| 302 | struct rpcrdma_create_data_internal *); | 306 | struct rpcrdma_create_data_internal *); |
| 303 | int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); | 307 | void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); |
| 304 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); | 308 | int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
| 305 | int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); | 309 | int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); |
| 306 | 310 | ||
| @@ -330,11 +334,12 @@ int rpcrdma_deregister_internal(struct rpcrdma_ia *, | |||
| 330 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, | 334 | int rpcrdma_register_external(struct rpcrdma_mr_seg *, |
| 331 | int, int, struct rpcrdma_xprt *); | 335 | int, int, struct rpcrdma_xprt *); |
| 332 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, | 336 | int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, |
| 333 | struct rpcrdma_xprt *, void *); | 337 | struct rpcrdma_xprt *); |
| 334 | 338 | ||
| 335 | /* | 339 | /* |
| 336 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c | 340 | * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c |
| 337 | */ | 341 | */ |
| 342 | void rpcrdma_connect_worker(struct work_struct *); | ||
| 338 | void rpcrdma_conn_func(struct rpcrdma_ep *); | 343 | void rpcrdma_conn_func(struct rpcrdma_ep *); |
| 339 | void rpcrdma_reply_handler(struct rpcrdma_rep *); | 344 | void rpcrdma_reply_handler(struct rpcrdma_rep *); |
| 340 | 345 | ||
