aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/nfs/direct.c15
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/filelayout/filelayout.c18
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c190
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.h1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c16
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/internal.h14
-rw-r--r--fs/nfs/nfs4proc.c29
-rw-r--r--fs/nfs/nfs4sysctl.c2
-rw-r--r--fs/nfs/pagelist.c69
-rw-r--r--fs/nfs/pnfs.c62
-rw-r--r--fs/nfs/pnfs.h21
-rw-r--r--fs/nfs/read.c43
-rw-r--r--fs/nfs/write.c47
-rw-r--r--include/linux/nfs_fs.h14
-rw-r--r--include/linux/nfs_fs_sb.h1
-rw-r--r--include/linux/nfs_xdr.h2
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c26
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c64
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c174
-rw-r--r--net/sunrpc/xprtrdma/physical_ops.c13
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c16
-rw-r--r--net/sunrpc/xprtrdma/transport.c3
-rw-r--r--net/sunrpc/xprtrdma/verbs.c16
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h14
-rw-r--r--net/sunrpc/xprtsock.c14
27 files changed, 652 insertions, 240 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index a9a93927fe3e..7ab7ec9f4eed 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -664,6 +664,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
664 664
665 req = nfs_list_entry(reqs.next); 665 req = nfs_list_entry(reqs.next);
666 nfs_direct_setup_mirroring(dreq, &desc, req); 666 nfs_direct_setup_mirroring(dreq, &desc, req);
667 if (desc.pg_error < 0) {
668 list_splice_init(&reqs, &failed);
669 goto out_failed;
670 }
667 671
668 list_for_each_entry_safe(req, tmp, &reqs, wb_list) { 672 list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
669 if (!nfs_pageio_add_request(&desc, req)) { 673 if (!nfs_pageio_add_request(&desc, req)) {
@@ -671,13 +675,17 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
671 nfs_list_add_request(req, &failed); 675 nfs_list_add_request(req, &failed);
672 spin_lock(cinfo.lock); 676 spin_lock(cinfo.lock);
673 dreq->flags = 0; 677 dreq->flags = 0;
674 dreq->error = -EIO; 678 if (desc.pg_error < 0)
679 dreq->error = desc.pg_error;
680 else
681 dreq->error = -EIO;
675 spin_unlock(cinfo.lock); 682 spin_unlock(cinfo.lock);
676 } 683 }
677 nfs_release_request(req); 684 nfs_release_request(req);
678 } 685 }
679 nfs_pageio_complete(&desc); 686 nfs_pageio_complete(&desc);
680 687
688out_failed:
681 while (!list_empty(&failed)) { 689 while (!list_empty(&failed)) {
682 req = nfs_list_entry(failed.next); 690 req = nfs_list_entry(failed.next);
683 nfs_list_remove_request(req); 691 nfs_list_remove_request(req);
@@ -915,6 +923,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
915 } 923 }
916 924
917 nfs_direct_setup_mirroring(dreq, &desc, req); 925 nfs_direct_setup_mirroring(dreq, &desc, req);
926 if (desc.pg_error < 0) {
927 nfs_free_request(req);
928 result = desc.pg_error;
929 break;
930 }
918 931
919 nfs_lock_request(req); 932 nfs_lock_request(req);
920 req->wb_index = pos >> PAGE_SHIFT; 933 req->wb_index = pos >> PAGE_SHIFT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e6ef80ec699c..178ec8da028f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -545,7 +545,7 @@ static int nfs_launder_page(struct page *page)
545 inode->i_ino, (long long)page_offset(page)); 545 inode->i_ino, (long long)page_offset(page));
546 546
547 nfs_fscache_wait_on_page_write(nfsi, page); 547 nfs_fscache_wait_on_page_write(nfsi, page);
548 return nfs_wb_page(inode, page); 548 return nfs_wb_launder_page(inode, page);
549} 549}
550 550
551static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, 551static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 02ec07973bc4..bb1f4e7a3270 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -202,6 +202,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
202 task->tk_status); 202 task->tk_status);
203 nfs4_mark_deviceid_unavailable(devid); 203 nfs4_mark_deviceid_unavailable(devid);
204 pnfs_error_mark_layout_for_return(inode, lseg); 204 pnfs_error_mark_layout_for_return(inode, lseg);
205 pnfs_set_lo_fail(lseg);
205 rpc_wake_up(&tbl->slot_tbl_waitq); 206 rpc_wake_up(&tbl->slot_tbl_waitq);
206 /* fall through */ 207 /* fall through */
207 default: 208 default:
@@ -883,13 +884,19 @@ static void
883filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, 884filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
884 struct nfs_page *req) 885 struct nfs_page *req)
885{ 886{
886 if (!pgio->pg_lseg) 887 if (!pgio->pg_lseg) {
887 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 888 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
888 req->wb_context, 889 req->wb_context,
889 0, 890 0,
890 NFS4_MAX_UINT64, 891 NFS4_MAX_UINT64,
891 IOMODE_READ, 892 IOMODE_READ,
892 GFP_KERNEL); 893 GFP_KERNEL);
894 if (IS_ERR(pgio->pg_lseg)) {
895 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
896 pgio->pg_lseg = NULL;
897 return;
898 }
899 }
893 /* If no lseg, fall back to read through mds */ 900 /* If no lseg, fall back to read through mds */
894 if (pgio->pg_lseg == NULL) 901 if (pgio->pg_lseg == NULL)
895 nfs_pageio_reset_read_mds(pgio); 902 nfs_pageio_reset_read_mds(pgio);
@@ -902,13 +909,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
902 struct nfs_commit_info cinfo; 909 struct nfs_commit_info cinfo;
903 int status; 910 int status;
904 911
905 if (!pgio->pg_lseg) 912 if (!pgio->pg_lseg) {
906 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 913 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
907 req->wb_context, 914 req->wb_context,
908 0, 915 0,
909 NFS4_MAX_UINT64, 916 NFS4_MAX_UINT64,
910 IOMODE_RW, 917 IOMODE_RW,
911 GFP_NOFS); 918 GFP_NOFS);
919 if (IS_ERR(pgio->pg_lseg)) {
920 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
921 pgio->pg_lseg = NULL;
922 return;
923 }
924 }
925
912 /* If no lseg, fall back to write through mds */ 926 /* If no lseg, fall back to write through mds */
913 if (pgio->pg_lseg == NULL) 927 if (pgio->pg_lseg == NULL)
914 goto out_mds; 928 goto out_mds;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index df475d42df77..18c329b84ffb 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -505,9 +505,17 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
505 } 505 }
506 506
507 p = xdr_inline_decode(&stream, 4); 507 p = xdr_inline_decode(&stream, 4);
508 if (p) 508 if (!p)
509 fls->flags = be32_to_cpup(p); 509 goto out_sort_mirrors;
510 fls->flags = be32_to_cpup(p);
511
512 p = xdr_inline_decode(&stream, 4);
513 if (!p)
514 goto out_sort_mirrors;
515 for (i=0; i < fls->mirror_array_cnt; i++)
516 fls->mirror_array[i]->report_interval = be32_to_cpup(p);
510 517
518out_sort_mirrors:
511 ff_layout_sort_mirrors(fls); 519 ff_layout_sort_mirrors(fls);
512 rc = ff_layout_check_layout(lgr); 520 rc = ff_layout_check_layout(lgr);
513 if (rc) 521 if (rc)
@@ -603,7 +611,9 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
603 mirror->start_time = now; 611 mirror->start_time = now;
604 if (ktime_equal(mirror->last_report_time, notime)) 612 if (ktime_equal(mirror->last_report_time, notime))
605 mirror->last_report_time = now; 613 mirror->last_report_time = now;
606 if (layoutstats_timer != 0) 614 if (mirror->report_interval != 0)
615 report_interval = (s64)mirror->report_interval * 1000LL;
616 else if (layoutstats_timer != 0)
607 report_interval = (s64)layoutstats_timer * 1000LL; 617 report_interval = (s64)layoutstats_timer * 1000LL;
608 if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= 618 if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
609 report_interval) { 619 report_interval) {
@@ -785,13 +795,19 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
785 int ds_idx; 795 int ds_idx;
786 796
787 /* Use full layout for now */ 797 /* Use full layout for now */
788 if (!pgio->pg_lseg) 798 if (!pgio->pg_lseg) {
789 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 799 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
790 req->wb_context, 800 req->wb_context,
791 0, 801 0,
792 NFS4_MAX_UINT64, 802 NFS4_MAX_UINT64,
793 IOMODE_READ, 803 IOMODE_READ,
794 GFP_KERNEL); 804 GFP_KERNEL);
805 if (IS_ERR(pgio->pg_lseg)) {
806 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
807 pgio->pg_lseg = NULL;
808 return;
809 }
810 }
795 /* If no lseg, fall back to read through mds */ 811 /* If no lseg, fall back to read through mds */
796 if (pgio->pg_lseg == NULL) 812 if (pgio->pg_lseg == NULL)
797 goto out_mds; 813 goto out_mds;
@@ -825,13 +841,19 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
825 int i; 841 int i;
826 int status; 842 int status;
827 843
828 if (!pgio->pg_lseg) 844 if (!pgio->pg_lseg) {
829 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 845 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
830 req->wb_context, 846 req->wb_context,
831 0, 847 0,
832 NFS4_MAX_UINT64, 848 NFS4_MAX_UINT64,
833 IOMODE_RW, 849 IOMODE_RW,
834 GFP_NOFS); 850 GFP_NOFS);
851 if (IS_ERR(pgio->pg_lseg)) {
852 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
853 pgio->pg_lseg = NULL;
854 return;
855 }
856 }
835 /* If no lseg, fall back to write through mds */ 857 /* If no lseg, fall back to write through mds */
836 if (pgio->pg_lseg == NULL) 858 if (pgio->pg_lseg == NULL)
837 goto out_mds; 859 goto out_mds;
@@ -867,18 +889,25 @@ static unsigned int
867ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, 889ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
868 struct nfs_page *req) 890 struct nfs_page *req)
869{ 891{
870 if (!pgio->pg_lseg) 892 if (!pgio->pg_lseg) {
871 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 893 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
872 req->wb_context, 894 req->wb_context,
873 0, 895 0,
874 NFS4_MAX_UINT64, 896 NFS4_MAX_UINT64,
875 IOMODE_RW, 897 IOMODE_RW,
876 GFP_NOFS); 898 GFP_NOFS);
899 if (IS_ERR(pgio->pg_lseg)) {
900 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
901 pgio->pg_lseg = NULL;
902 goto out;
903 }
904 }
877 if (pgio->pg_lseg) 905 if (pgio->pg_lseg)
878 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); 906 return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);
879 907
880 /* no lseg means that pnfs is not in use, so no mirroring here */ 908 /* no lseg means that pnfs is not in use, so no mirroring here */
881 nfs_pageio_reset_write_mds(pgio); 909 nfs_pageio_reset_write_mds(pgio);
910out:
882 return 1; 911 return 1;
883} 912}
884 913
@@ -1090,7 +1119,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
1090 return -NFS4ERR_RESET_TO_PNFS; 1119 return -NFS4ERR_RESET_TO_PNFS;
1091out_retry: 1120out_retry:
1092 task->tk_status = 0; 1121 task->tk_status = 0;
1093 rpc_restart_call(task); 1122 rpc_restart_call_prepare(task);
1094 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); 1123 rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
1095 return -EAGAIN; 1124 return -EAGAIN;
1096} 1125}
@@ -1148,6 +1177,14 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
1148 } 1177 }
1149 } 1178 }
1150 1179
1180 switch (status) {
1181 case NFS4ERR_DELAY:
1182 case NFS4ERR_GRACE:
1183 return;
1184 default:
1185 break;
1186 }
1187
1151 mirror = FF_LAYOUT_COMP(lseg, idx); 1188 mirror = FF_LAYOUT_COMP(lseg, idx);
1152 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), 1189 err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
1153 mirror, offset, length, status, opnum, 1190 mirror, offset, length, status, opnum,
@@ -1231,14 +1268,31 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
1231 return ff_layout_test_devid_unavailable(node); 1268 return ff_layout_test_devid_unavailable(node);
1232} 1269}
1233 1270
1234static int ff_layout_read_prepare_common(struct rpc_task *task, 1271static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
1235 struct nfs_pgio_header *hdr) 1272 struct nfs_pgio_header *hdr)
1236{ 1273{
1274 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
1275 return;
1237 nfs4_ff_layout_stat_io_start_read(hdr->inode, 1276 nfs4_ff_layout_stat_io_start_read(hdr->inode,
1238 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1277 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1239 hdr->args.count, 1278 hdr->args.count,
1240 task->tk_start); 1279 task->tk_start);
1280}
1241 1281
1282static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
1283 struct nfs_pgio_header *hdr)
1284{
1285 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
1286 return;
1287 nfs4_ff_layout_stat_io_end_read(task,
1288 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1289 hdr->args.count,
1290 hdr->res.count);
1291}
1292
1293static int ff_layout_read_prepare_common(struct rpc_task *task,
1294 struct nfs_pgio_header *hdr)
1295{
1242 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 1296 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
1243 rpc_exit(task, -EIO); 1297 rpc_exit(task, -EIO);
1244 return -EIO; 1298 return -EIO;
@@ -1254,6 +1308,7 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,
1254 } 1308 }
1255 hdr->pgio_done_cb = ff_layout_read_done_cb; 1309 hdr->pgio_done_cb = ff_layout_read_done_cb;
1256 1310
1311 ff_layout_read_record_layoutstats_start(task, hdr);
1257 return 0; 1312 return 0;
1258} 1313}
1259 1314
@@ -1312,10 +1367,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)
1312 1367
1313 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); 1368 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
1314 1369
1315 nfs4_ff_layout_stat_io_end_read(task,
1316 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1317 hdr->args.count, hdr->res.count);
1318
1319 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 1370 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
1320 task->tk_status == 0) { 1371 task->tk_status == 0) {
1321 nfs4_sequence_done(task, &hdr->res.seq_res); 1372 nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1330,10 +1381,20 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data)
1330{ 1381{
1331 struct nfs_pgio_header *hdr = data; 1382 struct nfs_pgio_header *hdr = data;
1332 1383
1384 ff_layout_read_record_layoutstats_done(task, hdr);
1333 rpc_count_iostats_metrics(task, 1385 rpc_count_iostats_metrics(task,
1334 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); 1386 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]);
1335} 1387}
1336 1388
1389static void ff_layout_read_release(void *data)
1390{
1391 struct nfs_pgio_header *hdr = data;
1392
1393 ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
1394 pnfs_generic_rw_release(data);
1395}
1396
1397
1337static int ff_layout_write_done_cb(struct rpc_task *task, 1398static int ff_layout_write_done_cb(struct rpc_task *task,
1338 struct nfs_pgio_header *hdr) 1399 struct nfs_pgio_header *hdr)
1339{ 1400{
@@ -1351,15 +1412,12 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
1351 1412
1352 switch (err) { 1413 switch (err) {
1353 case -NFS4ERR_RESET_TO_PNFS: 1414 case -NFS4ERR_RESET_TO_PNFS:
1354 pnfs_set_retry_layoutget(hdr->lseg->pls_layout);
1355 ff_layout_reset_write(hdr, true); 1415 ff_layout_reset_write(hdr, true);
1356 return task->tk_status; 1416 return task->tk_status;
1357 case -NFS4ERR_RESET_TO_MDS: 1417 case -NFS4ERR_RESET_TO_MDS:
1358 pnfs_clear_retry_layoutget(hdr->lseg->pls_layout);
1359 ff_layout_reset_write(hdr, false); 1418 ff_layout_reset_write(hdr, false);
1360 return task->tk_status; 1419 return task->tk_status;
1361 case -EAGAIN: 1420 case -EAGAIN:
1362 rpc_restart_call_prepare(task);
1363 return -EAGAIN; 1421 return -EAGAIN;
1364 } 1422 }
1365 1423
@@ -1391,11 +1449,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
1391 1449
1392 switch (err) { 1450 switch (err) {
1393 case -NFS4ERR_RESET_TO_PNFS: 1451 case -NFS4ERR_RESET_TO_PNFS:
1394 pnfs_set_retry_layoutget(data->lseg->pls_layout);
1395 pnfs_generic_prepare_to_resend_writes(data); 1452 pnfs_generic_prepare_to_resend_writes(data);
1396 return -EAGAIN; 1453 return -EAGAIN;
1397 case -NFS4ERR_RESET_TO_MDS: 1454 case -NFS4ERR_RESET_TO_MDS:
1398 pnfs_clear_retry_layoutget(data->lseg->pls_layout);
1399 pnfs_generic_prepare_to_resend_writes(data); 1455 pnfs_generic_prepare_to_resend_writes(data);
1400 return -EAGAIN; 1456 return -EAGAIN;
1401 case -EAGAIN: 1457 case -EAGAIN:
@@ -1410,14 +1466,31 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
1410 return 0; 1466 return 0;
1411} 1467}
1412 1468
1413static int ff_layout_write_prepare_common(struct rpc_task *task, 1469static void ff_layout_write_record_layoutstats_start(struct rpc_task *task,
1414 struct nfs_pgio_header *hdr) 1470 struct nfs_pgio_header *hdr)
1415{ 1471{
1472 if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
1473 return;
1416 nfs4_ff_layout_stat_io_start_write(hdr->inode, 1474 nfs4_ff_layout_stat_io_start_write(hdr->inode,
1417 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), 1475 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1418 hdr->args.count, 1476 hdr->args.count,
1419 task->tk_start); 1477 task->tk_start);
1478}
1479
1480static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
1481 struct nfs_pgio_header *hdr)
1482{
1483 if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
1484 return;
1485 nfs4_ff_layout_stat_io_end_write(task,
1486 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1487 hdr->args.count, hdr->res.count,
1488 hdr->res.verf->committed);
1489}
1420 1490
1491static int ff_layout_write_prepare_common(struct rpc_task *task,
1492 struct nfs_pgio_header *hdr)
1493{
1421 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { 1494 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
1422 rpc_exit(task, -EIO); 1495 rpc_exit(task, -EIO);
1423 return -EIO; 1496 return -EIO;
@@ -1434,6 +1507,7 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,
1434 return -EAGAIN; 1507 return -EAGAIN;
1435 } 1508 }
1436 1509
1510 ff_layout_write_record_layoutstats_start(task, hdr);
1437 return 0; 1511 return 0;
1438} 1512}
1439 1513
@@ -1469,11 +1543,6 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data)
1469{ 1543{
1470 struct nfs_pgio_header *hdr = data; 1544 struct nfs_pgio_header *hdr = data;
1471 1545
1472 nfs4_ff_layout_stat_io_end_write(task,
1473 FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
1474 hdr->args.count, hdr->res.count,
1475 hdr->res.verf->committed);
1476
1477 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && 1546 if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
1478 task->tk_status == 0) { 1547 task->tk_status == 0) {
1479 nfs4_sequence_done(task, &hdr->res.seq_res); 1548 nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1488,18 +1557,53 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
1488{ 1557{
1489 struct nfs_pgio_header *hdr = data; 1558 struct nfs_pgio_header *hdr = data;
1490 1559
1560 ff_layout_write_record_layoutstats_done(task, hdr);
1491 rpc_count_iostats_metrics(task, 1561 rpc_count_iostats_metrics(task,
1492 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); 1562 &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
1493} 1563}
1494 1564
1495static void ff_layout_commit_prepare_common(struct rpc_task *task, 1565static void ff_layout_write_release(void *data)
1566{
1567 struct nfs_pgio_header *hdr = data;
1568
1569 ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
1570 pnfs_generic_rw_release(data);
1571}
1572
1573static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
1496 struct nfs_commit_data *cdata) 1574 struct nfs_commit_data *cdata)
1497{ 1575{
1576 if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags))
1577 return;
1498 nfs4_ff_layout_stat_io_start_write(cdata->inode, 1578 nfs4_ff_layout_stat_io_start_write(cdata->inode,
1499 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), 1579 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1500 0, task->tk_start); 1580 0, task->tk_start);
1501} 1581}
1502 1582
1583static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
1584 struct nfs_commit_data *cdata)
1585{
1586 struct nfs_page *req;
1587 __u64 count = 0;
1588
1589 if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags))
1590 return;
1591
1592 if (task->tk_status == 0) {
1593 list_for_each_entry(req, &cdata->pages, wb_list)
1594 count += req->wb_bytes;
1595 }
1596 nfs4_ff_layout_stat_io_end_write(task,
1597 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1598 count, count, NFS_FILE_SYNC);
1599}
1600
1601static void ff_layout_commit_prepare_common(struct rpc_task *task,
1602 struct nfs_commit_data *cdata)
1603{
1604 ff_layout_commit_record_layoutstats_start(task, cdata);
1605}
1606
1503static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) 1607static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
1504{ 1608{
1505 ff_layout_commit_prepare_common(task, data); 1609 ff_layout_commit_prepare_common(task, data);
@@ -1520,19 +1624,6 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)
1520 1624
1521static void ff_layout_commit_done(struct rpc_task *task, void *data) 1625static void ff_layout_commit_done(struct rpc_task *task, void *data)
1522{ 1626{
1523 struct nfs_commit_data *cdata = data;
1524 struct nfs_page *req;
1525 __u64 count = 0;
1526
1527 if (task->tk_status == 0) {
1528 list_for_each_entry(req, &cdata->pages, wb_list)
1529 count += req->wb_bytes;
1530 }
1531
1532 nfs4_ff_layout_stat_io_end_write(task,
1533 FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
1534 count, count, NFS_FILE_SYNC);
1535
1536 pnfs_generic_write_commit_done(task, data); 1627 pnfs_generic_write_commit_done(task, data);
1537} 1628}
1538 1629
@@ -1540,50 +1631,59 @@ static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
1540{ 1631{
1541 struct nfs_commit_data *cdata = data; 1632 struct nfs_commit_data *cdata = data;
1542 1633
1634 ff_layout_commit_record_layoutstats_done(task, cdata);
1543 rpc_count_iostats_metrics(task, 1635 rpc_count_iostats_metrics(task,
1544 &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); 1636 &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
1545} 1637}
1546 1638
1639static void ff_layout_commit_release(void *data)
1640{
1641 struct nfs_commit_data *cdata = data;
1642
1643 ff_layout_commit_record_layoutstats_done(&cdata->task, cdata);
1644 pnfs_generic_commit_release(data);
1645}
1646
1547static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { 1647static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
1548 .rpc_call_prepare = ff_layout_read_prepare_v3, 1648 .rpc_call_prepare = ff_layout_read_prepare_v3,
1549 .rpc_call_done = ff_layout_read_call_done, 1649 .rpc_call_done = ff_layout_read_call_done,
1550 .rpc_count_stats = ff_layout_read_count_stats, 1650 .rpc_count_stats = ff_layout_read_count_stats,
1551 .rpc_release = pnfs_generic_rw_release, 1651 .rpc_release = ff_layout_read_release,
1552}; 1652};
1553 1653
1554static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { 1654static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
1555 .rpc_call_prepare = ff_layout_read_prepare_v4, 1655 .rpc_call_prepare = ff_layout_read_prepare_v4,
1556 .rpc_call_done = ff_layout_read_call_done, 1656 .rpc_call_done = ff_layout_read_call_done,
1557 .rpc_count_stats = ff_layout_read_count_stats, 1657 .rpc_count_stats = ff_layout_read_count_stats,
1558 .rpc_release = pnfs_generic_rw_release, 1658 .rpc_release = ff_layout_read_release,
1559}; 1659};
1560 1660
1561static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { 1661static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
1562 .rpc_call_prepare = ff_layout_write_prepare_v3, 1662 .rpc_call_prepare = ff_layout_write_prepare_v3,
1563 .rpc_call_done = ff_layout_write_call_done, 1663 .rpc_call_done = ff_layout_write_call_done,
1564 .rpc_count_stats = ff_layout_write_count_stats, 1664 .rpc_count_stats = ff_layout_write_count_stats,
1565 .rpc_release = pnfs_generic_rw_release, 1665 .rpc_release = ff_layout_write_release,
1566}; 1666};
1567 1667
1568static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { 1668static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
1569 .rpc_call_prepare = ff_layout_write_prepare_v4, 1669 .rpc_call_prepare = ff_layout_write_prepare_v4,
1570 .rpc_call_done = ff_layout_write_call_done, 1670 .rpc_call_done = ff_layout_write_call_done,
1571 .rpc_count_stats = ff_layout_write_count_stats, 1671 .rpc_count_stats = ff_layout_write_count_stats,
1572 .rpc_release = pnfs_generic_rw_release, 1672 .rpc_release = ff_layout_write_release,
1573}; 1673};
1574 1674
1575static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { 1675static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
1576 .rpc_call_prepare = ff_layout_commit_prepare_v3, 1676 .rpc_call_prepare = ff_layout_commit_prepare_v3,
1577 .rpc_call_done = ff_layout_commit_done, 1677 .rpc_call_done = ff_layout_commit_done,
1578 .rpc_count_stats = ff_layout_commit_count_stats, 1678 .rpc_count_stats = ff_layout_commit_count_stats,
1579 .rpc_release = pnfs_generic_commit_release, 1679 .rpc_release = ff_layout_commit_release,
1580}; 1680};
1581 1681
1582static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { 1682static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
1583 .rpc_call_prepare = ff_layout_commit_prepare_v4, 1683 .rpc_call_prepare = ff_layout_commit_prepare_v4,
1584 .rpc_call_done = ff_layout_commit_done, 1684 .rpc_call_done = ff_layout_commit_done,
1585 .rpc_count_stats = ff_layout_commit_count_stats, 1685 .rpc_count_stats = ff_layout_commit_count_stats,
1586 .rpc_release = pnfs_generic_commit_release, 1686 .rpc_release = ff_layout_commit_release,
1587}; 1687};
1588 1688
1589static enum pnfs_try_status 1689static enum pnfs_try_status
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 2bb08bc6aaf0..dd353bb7dc0a 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -85,6 +85,7 @@ struct nfs4_ff_layout_mirror {
85 struct nfs4_ff_layoutstat write_stat; 85 struct nfs4_ff_layoutstat write_stat;
86 ktime_t start_time; 86 ktime_t start_time;
87 ktime_t last_report_time; 87 ktime_t last_report_time;
88 u32 report_interval;
88}; 89};
89 90
90struct nfs4_ff_layout_segment { 91struct nfs4_ff_layout_segment {
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e125e55de86d..bd0327541366 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -429,22 +429,14 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
429 mirror, lseg->pls_range.offset, 429 mirror, lseg->pls_range.offset,
430 lseg->pls_range.length, NFS4ERR_NXIO, 430 lseg->pls_range.length, NFS4ERR_NXIO,
431 OP_ILLEGAL, GFP_NOIO); 431 OP_ILLEGAL, GFP_NOIO);
432 if (fail_return) { 432 if (!fail_return) {
433 pnfs_error_mark_layout_for_return(ino, lseg);
434 if (ff_layout_has_available_ds(lseg))
435 pnfs_set_retry_layoutget(lseg->pls_layout);
436 else
437 pnfs_clear_retry_layoutget(lseg->pls_layout);
438
439 } else {
440 if (ff_layout_has_available_ds(lseg)) 433 if (ff_layout_has_available_ds(lseg))
441 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 434 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
442 &lseg->pls_layout->plh_flags); 435 &lseg->pls_layout->plh_flags);
443 else { 436 else
444 pnfs_error_mark_layout_for_return(ino, lseg); 437 pnfs_error_mark_layout_for_return(ino, lseg);
445 pnfs_clear_retry_layoutget(lseg->pls_layout); 438 } else
446 } 439 pnfs_error_mark_layout_for_return(ino, lseg);
447 }
448 } 440 }
449out_update_creds: 441out_update_creds:
450 if (ff_layout_update_mirror_cred(mirror, ds)) 442 if (ff_layout_update_mirror_cred(mirror, ds))
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c7e8b87da5b2..74fb1223c2f5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -912,6 +912,12 @@ void nfs_file_clear_open_context(struct file *filp)
912 if (ctx) { 912 if (ctx) {
913 struct inode *inode = d_inode(ctx->dentry); 913 struct inode *inode = d_inode(ctx->dentry);
914 914
915 /*
916 * We fatal error on write before. Try to writeback
917 * every page again.
918 */
919 if (ctx->error < 0)
920 invalidate_inode_pages2(inode->i_mapping);
915 filp->private_data = NULL; 921 filp->private_data = NULL;
916 spin_lock(&inode->i_lock); 922 spin_lock(&inode->i_lock);
917 list_move_tail(&ctx->list, &NFS_I(inode)->open_files); 923 list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 870e2ba7ba49..ee81792d2886 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -716,3 +716,17 @@ static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
716 return 0; 716 return 0;
717} 717}
718#endif 718#endif
719
720static inline bool nfs_error_is_fatal(int err)
721{
722 switch (err) {
723 case -ERESTARTSYS:
724 case -EIO:
725 case -ENOSPC:
726 case -EROFS:
727 case -E2BIG:
728 return true;
729 default:
730 return false;
731 }
732}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 883da29b9ace..5e5062c9b92b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5383,6 +5383,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
5383 if (data == NULL) 5383 if (data == NULL)
5384 return -ENOMEM; 5384 return -ENOMEM;
5385 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); 5385 nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
5386
5387 nfs4_state_protect(server->nfs_client,
5388 NFS_SP4_MACH_CRED_CLEANUP,
5389 &task_setup_data.rpc_client, &msg);
5390
5386 data->args.fhandle = &data->fh; 5391 data->args.fhandle = &data->fh;
5387 data->args.stateid = &data->stateid; 5392 data->args.stateid = &data->stateid;
5388 data->args.bitmask = server->cache_consistency_bitmask; 5393 data->args.bitmask = server->cache_consistency_bitmask;
@@ -6859,10 +6864,13 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = {
6859 }, 6864 },
6860 .allow.u.words = { 6865 .allow.u.words = {
6861 [0] = 1 << (OP_CLOSE) | 6866 [0] = 1 << (OP_CLOSE) |
6867 1 << (OP_OPEN_DOWNGRADE) |
6862 1 << (OP_LOCKU) | 6868 1 << (OP_LOCKU) |
6869 1 << (OP_DELEGRETURN) |
6863 1 << (OP_COMMIT), 6870 1 << (OP_COMMIT),
6864 [1] = 1 << (OP_SECINFO - 32) | 6871 [1] = 1 << (OP_SECINFO - 32) |
6865 1 << (OP_SECINFO_NO_NAME - 32) | 6872 1 << (OP_SECINFO_NO_NAME - 32) |
6873 1 << (OP_LAYOUTRETURN - 32) |
6866 1 << (OP_TEST_STATEID - 32) | 6874 1 << (OP_TEST_STATEID - 32) |
6867 1 << (OP_FREE_STATEID - 32) | 6875 1 << (OP_FREE_STATEID - 32) |
6868 1 << (OP_WRITE - 32) 6876 1 << (OP_WRITE - 32)
@@ -6927,11 +6935,19 @@ static int nfs4_sp4_select_mode(struct nfs_client *clp,
6927 } 6935 }
6928 6936
6929 if (test_bit(OP_CLOSE, sp->allow.u.longs) && 6937 if (test_bit(OP_CLOSE, sp->allow.u.longs) &&
6938 test_bit(OP_OPEN_DOWNGRADE, sp->allow.u.longs) &&
6939 test_bit(OP_DELEGRETURN, sp->allow.u.longs) &&
6930 test_bit(OP_LOCKU, sp->allow.u.longs)) { 6940 test_bit(OP_LOCKU, sp->allow.u.longs)) {
6931 dfprintk(MOUNT, " cleanup mode enabled\n"); 6941 dfprintk(MOUNT, " cleanup mode enabled\n");
6932 set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags); 6942 set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags);
6933 } 6943 }
6934 6944
6945 if (test_bit(OP_LAYOUTRETURN, sp->allow.u.longs)) {
6946 dfprintk(MOUNT, " pnfs cleanup mode enabled\n");
6947 set_bit(NFS_SP4_MACH_CRED_PNFS_CLEANUP,
6948 &clp->cl_sp4_flags);
6949 }
6950
6935 if (test_bit(OP_SECINFO, sp->allow.u.longs) && 6951 if (test_bit(OP_SECINFO, sp->allow.u.longs) &&
6936 test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) { 6952 test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) {
6937 dfprintk(MOUNT, " secinfo mode enabled\n"); 6953 dfprintk(MOUNT, " secinfo mode enabled\n");
@@ -7796,6 +7812,15 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
7796 switch (task->tk_status) { 7812 switch (task->tk_status) {
7797 case 0: 7813 case 0:
7798 goto out; 7814 goto out;
7815
7816 /*
7817 * NFS4ERR_LAYOUTUNAVAILABLE means we are not supposed to use pnfs
7818 * on the file. set tk_status to -ENODATA to tell upper layer to
7819 * retry go inband.
7820 */
7821 case -NFS4ERR_LAYOUTUNAVAILABLE:
7822 task->tk_status = -ENODATA;
7823 goto out;
7799 /* 7824 /*
7800 * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of 7825 * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of
7801 * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3). 7826 * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3).
@@ -8086,6 +8111,10 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
8086 }; 8111 };
8087 int status = 0; 8112 int status = 0;
8088 8113
8114 nfs4_state_protect(NFS_SERVER(lrp->args.inode)->nfs_client,
8115 NFS_SP4_MACH_CRED_PNFS_CLEANUP,
8116 &task_setup_data.rpc_client, &msg);
8117
8089 dprintk("--> %s\n", __func__); 8118 dprintk("--> %s\n", __func__);
8090 if (!sync) { 8119 if (!sync) {
8091 lrp->inode = nfs_igrab_and_active(lrp->args.inode); 8120 lrp->inode = nfs_igrab_and_active(lrp->args.inode);
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 0fbd3ab1be22..8693d77c45ea 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -12,7 +12,7 @@
12#include "nfs4idmap.h" 12#include "nfs4idmap.h"
13#include "callback.h" 13#include "callback.h"
14 14
15static const int nfs_set_port_min = 0; 15static const int nfs_set_port_min;
16static const int nfs_set_port_max = 65535; 16static const int nfs_set_port_max = 65535;
17static struct ctl_table_header *nfs4_callback_sysctl_table; 17static struct ctl_table_header *nfs4_callback_sysctl_table;
18 18
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c3a78450a239..eeddbf0bf4c4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -664,22 +664,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
664 * @desc: IO descriptor 664 * @desc: IO descriptor
665 * @hdr: pageio header 665 * @hdr: pageio header
666 */ 666 */
667static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, 667static void nfs_pgio_error(struct nfs_pgio_header *hdr)
668 struct nfs_pgio_header *hdr)
669{ 668{
670 struct nfs_pgio_mirror *mirror;
671 u32 midx;
672
673 set_bit(NFS_IOHDR_REDO, &hdr->flags); 669 set_bit(NFS_IOHDR_REDO, &hdr->flags);
674 nfs_pgio_data_destroy(hdr); 670 nfs_pgio_data_destroy(hdr);
675 hdr->completion_ops->completion(hdr); 671 hdr->completion_ops->completion(hdr);
676 /* TODO: Make sure it's right to clean up all mirrors here
677 * and not just hdr->pgio_mirror_idx */
678 for (midx = 0; midx < desc->pg_mirror_count; midx++) {
679 mirror = &desc->pg_mirrors[midx];
680 desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
681 }
682 return -ENOMEM;
683} 672}
684 673
685/** 674/**
@@ -800,8 +789,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
800 unsigned int pagecount, pageused; 789 unsigned int pagecount, pageused;
801 790
802 pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); 791 pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
803 if (!nfs_pgarray_set(&hdr->page_array, pagecount)) 792 if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
804 return nfs_pgio_error(desc, hdr); 793 nfs_pgio_error(hdr);
794 desc->pg_error = -ENOMEM;
795 return desc->pg_error;
796 }
805 797
806 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); 798 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
807 pages = hdr->page_array.pagevec; 799 pages = hdr->page_array.pagevec;
@@ -819,8 +811,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
819 *pages++ = last_page = req->wb_page; 811 *pages++ = last_page = req->wb_page;
820 } 812 }
821 } 813 }
822 if (WARN_ON_ONCE(pageused != pagecount)) 814 if (WARN_ON_ONCE(pageused != pagecount)) {
823 return nfs_pgio_error(desc, hdr); 815 nfs_pgio_error(hdr);
816 desc->pg_error = -EINVAL;
817 return desc->pg_error;
818 }
824 819
825 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 820 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
826 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) 821 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
@@ -843,10 +838,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
843 838
844 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 839 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
845 if (!hdr) { 840 if (!hdr) {
846 /* TODO: make sure this is right with mirroring - or 841 desc->pg_error = -ENOMEM;
847 * should it back out all mirrors? */ 842 return desc->pg_error;
848 desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
849 return -ENOMEM;
850 } 843 }
851 nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); 844 nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
852 ret = nfs_generic_pgio(desc, hdr); 845 ret = nfs_generic_pgio(desc, hdr);
@@ -874,6 +867,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,
874 867
875 mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); 868 mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
876 869
870 if (pgio->pg_error < 0)
871 return pgio->pg_error;
872
877 if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) 873 if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX)
878 return -EINVAL; 874 return -EINVAL;
879 875
@@ -976,6 +972,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
976 } else { 972 } else {
977 if (desc->pg_ops->pg_init) 973 if (desc->pg_ops->pg_init)
978 desc->pg_ops->pg_init(desc, req); 974 desc->pg_ops->pg_init(desc, req);
975 if (desc->pg_error < 0)
976 return 0;
979 mirror->pg_base = req->wb_pgbase; 977 mirror->pg_base = req->wb_pgbase;
980 } 978 }
981 if (!nfs_can_coalesce_requests(prev, req, desc)) 979 if (!nfs_can_coalesce_requests(prev, req, desc))
@@ -1141,6 +1139,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1141 bytes = req->wb_bytes; 1139 bytes = req->wb_bytes;
1142 1140
1143 nfs_pageio_setup_mirroring(desc, req); 1141 nfs_pageio_setup_mirroring(desc, req);
1142 if (desc->pg_error < 0)
1143 goto out_failed;
1144 1144
1145 for (midx = 0; midx < desc->pg_mirror_count; midx++) { 1145 for (midx = 0; midx < desc->pg_mirror_count; midx++) {
1146 if (midx) { 1146 if (midx) {
@@ -1157,7 +1157,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1157 1157
1158 if (IS_ERR(dupreq)) { 1158 if (IS_ERR(dupreq)) {
1159 nfs_page_group_unlock(req); 1159 nfs_page_group_unlock(req);
1160 return 0; 1160 desc->pg_error = PTR_ERR(dupreq);
1161 goto out_failed;
1161 } 1162 }
1162 1163
1163 nfs_lock_request(dupreq); 1164 nfs_lock_request(dupreq);
@@ -1170,10 +1171,32 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
1170 if (nfs_pgio_has_mirroring(desc)) 1171 if (nfs_pgio_has_mirroring(desc))
1171 desc->pg_mirror_idx = midx; 1172 desc->pg_mirror_idx = midx;
1172 if (!nfs_pageio_add_request_mirror(desc, dupreq)) 1173 if (!nfs_pageio_add_request_mirror(desc, dupreq))
1173 return 0; 1174 goto out_failed;
1174 } 1175 }
1175 1176
1176 return 1; 1177 return 1;
1178
1179out_failed:
1180 /*
1181 * We might have failed before sending any reqs over wire.
1182 * Clean up rest of the reqs in mirror pg_list.
1183 */
1184 if (desc->pg_error) {
1185 struct nfs_pgio_mirror *mirror;
1186 void (*func)(struct list_head *);
1187
1188 /* remember fatal errors */
1189 if (nfs_error_is_fatal(desc->pg_error))
1190 mapping_set_error(desc->pg_inode->i_mapping,
1191 desc->pg_error);
1192
1193 func = desc->pg_completion_ops->error_cleanup;
1194 for (midx = 0; midx < desc->pg_mirror_count; midx++) {
1195 mirror = &desc->pg_mirrors[midx];
1196 func(&mirror->pg_list);
1197 }
1198 }
1199 return 0;
1177} 1200}
1178 1201
1179/* 1202/*
@@ -1226,7 +1249,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
1226 nfs_pageio_complete(desc); 1249 nfs_pageio_complete(desc);
1227 if (!list_empty(&failed)) { 1250 if (!list_empty(&failed)) {
1228 list_move(&failed, &hdr->pages); 1251 list_move(&failed, &hdr->pages);
1229 return -EIO; 1252 return desc->pg_error < 0 ? desc->pg_error : -EIO;
1230 } 1253 }
1231 return 0; 1254 return 0;
1232} 1255}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 04db6d951b99..a3592cc34a20 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -618,7 +618,6 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
618 pnfs_get_layout_hdr(lo); 618 pnfs_get_layout_hdr(lo);
619 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); 619 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
620 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 620 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
621 pnfs_clear_retry_layoutget(lo);
622 spin_unlock(&nfsi->vfs_inode.i_lock); 621 spin_unlock(&nfsi->vfs_inode.i_lock);
623 pnfs_free_lseg_list(&tmp_list); 622 pnfs_free_lseg_list(&tmp_list);
624 pnfs_put_layout_hdr(lo); 623 pnfs_put_layout_hdr(lo);
@@ -906,17 +905,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
906 lseg = nfs4_proc_layoutget(lgp, gfp_flags); 905 lseg = nfs4_proc_layoutget(lgp, gfp_flags);
907 } while (lseg == ERR_PTR(-EAGAIN)); 906 } while (lseg == ERR_PTR(-EAGAIN));
908 907
909 if (IS_ERR(lseg)) { 908 if (IS_ERR(lseg) && !nfs_error_is_fatal(PTR_ERR(lseg)))
910 switch (PTR_ERR(lseg)) { 909 lseg = NULL;
911 case -ENOMEM: 910 else
912 case -ERESTARTSYS:
913 break;
914 default:
915 /* remember that LAYOUTGET failed and suspend trying */
916 pnfs_layout_io_set_failed(lo, range->iomode);
917 }
918 return NULL;
919 } else
920 pnfs_layout_clear_fail_bit(lo, 911 pnfs_layout_clear_fail_bit(lo,
921 pnfs_iomode_to_fail_bit(range->iomode)); 912 pnfs_iomode_to_fail_bit(range->iomode));
922 913
@@ -1104,7 +1095,6 @@ bool pnfs_roc(struct inode *ino)
1104 &lo->plh_flags)) 1095 &lo->plh_flags))
1105 layoutreturn = pnfs_prepare_layoutreturn(lo); 1096 layoutreturn = pnfs_prepare_layoutreturn(lo);
1106 1097
1107 pnfs_clear_retry_layoutget(lo);
1108 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) 1098 list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
1109 /* If we are sending layoutreturn, invalidate all valid lsegs */ 1099 /* If we are sending layoutreturn, invalidate all valid lsegs */
1110 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 1100 if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
@@ -1468,25 +1458,15 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
1468 return ret; 1458 return ret;
1469} 1459}
1470 1460
1471/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */
1472static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode)
1473{
1474 if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags))
1475 return 1;
1476 return nfs_wait_bit_killable(key, mode);
1477}
1478
1479static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) 1461static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
1480{ 1462{
1481 if (!pnfs_should_retry_layoutget(lo))
1482 return false;
1483 /* 1463 /*
1484 * send layoutcommit as it can hold up layoutreturn due to lseg 1464 * send layoutcommit as it can hold up layoutreturn due to lseg
1485 * reference 1465 * reference
1486 */ 1466 */
1487 pnfs_layoutcommit_inode(lo->plh_inode, false); 1467 pnfs_layoutcommit_inode(lo->plh_inode, false);
1488 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, 1468 return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
1489 pnfs_layoutget_retry_bit_wait, 1469 nfs_wait_bit_killable,
1490 TASK_UNINTERRUPTIBLE); 1470 TASK_UNINTERRUPTIBLE);
1491} 1471}
1492 1472
@@ -1561,8 +1541,7 @@ lookup_again:
1561 } 1541 }
1562 1542
1563 /* if LAYOUTGET already failed once we don't try again */ 1543 /* if LAYOUTGET already failed once we don't try again */
1564 if (pnfs_layout_io_test_failed(lo, iomode) && 1544 if (pnfs_layout_io_test_failed(lo, iomode)) {
1565 !pnfs_should_retry_layoutget(lo)) {
1566 trace_pnfs_update_layout(ino, pos, count, iomode, lo, 1545 trace_pnfs_update_layout(ino, pos, count, iomode, lo,
1567 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); 1546 PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
1568 goto out_unlock; 1547 goto out_unlock;
@@ -1639,7 +1618,6 @@ lookup_again:
1639 arg.length = PAGE_CACHE_ALIGN(arg.length); 1618 arg.length = PAGE_CACHE_ALIGN(arg.length);
1640 1619
1641 lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 1620 lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
1642 pnfs_clear_retry_layoutget(lo);
1643 atomic_dec(&lo->plh_outstanding); 1621 atomic_dec(&lo->plh_outstanding);
1644 trace_pnfs_update_layout(ino, pos, count, iomode, lo, 1622 trace_pnfs_update_layout(ino, pos, count, iomode, lo,
1645 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); 1623 PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
@@ -1652,7 +1630,7 @@ out:
1652 "(%s, offset: %llu, length: %llu)\n", 1630 "(%s, offset: %llu, length: %llu)\n",
1653 __func__, ino->i_sb->s_id, 1631 __func__, ino->i_sb->s_id,
1654 (unsigned long long)NFS_FILEID(ino), 1632 (unsigned long long)NFS_FILEID(ino),
1655 lseg == NULL ? "not found" : "found", 1633 IS_ERR_OR_NULL(lseg) ? "not found" : "found",
1656 iomode==IOMODE_RW ? "read/write" : "read-only", 1634 iomode==IOMODE_RW ? "read/write" : "read-only",
1657 (unsigned long long)pos, 1635 (unsigned long long)pos,
1658 (unsigned long long)count); 1636 (unsigned long long)count);
@@ -1804,7 +1782,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
1804 struct pnfs_layout_segment *lseg) 1782 struct pnfs_layout_segment *lseg)
1805{ 1783{
1806 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; 1784 struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
1807 int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode);
1808 struct pnfs_layout_range range = { 1785 struct pnfs_layout_range range = {
1809 .iomode = lseg->pls_range.iomode, 1786 .iomode = lseg->pls_range.iomode,
1810 .offset = 0, 1787 .offset = 0,
@@ -1814,8 +1791,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
1814 bool return_now = false; 1791 bool return_now = false;
1815 1792
1816 spin_lock(&inode->i_lock); 1793 spin_lock(&inode->i_lock);
1817 /* set failure bit so that pnfs path will be retried later */
1818 pnfs_layout_set_fail_bit(lo, iomode);
1819 pnfs_set_plh_return_iomode(lo, range.iomode); 1794 pnfs_set_plh_return_iomode(lo, range.iomode);
1820 /* 1795 /*
1821 * mark all matching lsegs so that we are sure to have no live 1796 * mark all matching lsegs so that we are sure to have no live
@@ -1856,6 +1831,11 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1856 rd_size, 1831 rd_size,
1857 IOMODE_READ, 1832 IOMODE_READ,
1858 GFP_KERNEL); 1833 GFP_KERNEL);
1834 if (IS_ERR(pgio->pg_lseg)) {
1835 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
1836 pgio->pg_lseg = NULL;
1837 return;
1838 }
1859 } 1839 }
1860 /* If no lseg, fall back to read through mds */ 1840 /* If no lseg, fall back to read through mds */
1861 if (pgio->pg_lseg == NULL) 1841 if (pgio->pg_lseg == NULL)
@@ -1868,13 +1848,19 @@ void
1868pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 1848pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
1869 struct nfs_page *req, u64 wb_size) 1849 struct nfs_page *req, u64 wb_size)
1870{ 1850{
1871 if (pgio->pg_lseg == NULL) 1851 if (pgio->pg_lseg == NULL) {
1872 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1852 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1873 req->wb_context, 1853 req->wb_context,
1874 req_offset(req), 1854 req_offset(req),
1875 wb_size, 1855 wb_size,
1876 IOMODE_RW, 1856 IOMODE_RW,
1877 GFP_NOFS); 1857 GFP_NOFS);
1858 if (IS_ERR(pgio->pg_lseg)) {
1859 pgio->pg_error = PTR_ERR(pgio->pg_lseg);
1860 pgio->pg_lseg = NULL;
1861 return;
1862 }
1863 }
1878 /* If no lseg, fall back to write through mds */ 1864 /* If no lseg, fall back to write through mds */
1879 if (pgio->pg_lseg == NULL) 1865 if (pgio->pg_lseg == NULL)
1880 nfs_pageio_reset_write_mds(pgio); 1866 nfs_pageio_reset_write_mds(pgio);
@@ -2042,15 +2028,13 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
2042int 2028int
2043pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 2029pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
2044{ 2030{
2045 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2046
2047 struct nfs_pgio_header *hdr; 2031 struct nfs_pgio_header *hdr;
2048 int ret; 2032 int ret;
2049 2033
2050 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2034 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
2051 if (!hdr) { 2035 if (!hdr) {
2052 desc->pg_completion_ops->error_cleanup(&mirror->pg_list); 2036 desc->pg_error = -ENOMEM;
2053 return -ENOMEM; 2037 return desc->pg_error;
2054 } 2038 }
2055 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 2039 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
2056 2040
@@ -2173,15 +2157,13 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
2173int 2157int
2174pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 2158pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
2175{ 2159{
2176 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2177
2178 struct nfs_pgio_header *hdr; 2160 struct nfs_pgio_header *hdr;
2179 int ret; 2161 int ret;
2180 2162
2181 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); 2163 hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
2182 if (!hdr) { 2164 if (!hdr) {
2183 desc->pg_completion_ops->error_cleanup(&mirror->pg_list); 2165 desc->pg_error = -ENOMEM;
2184 return -ENOMEM; 2166 return desc->pg_error;
2185 } 2167 }
2186 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 2168 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
2187 hdr->lseg = pnfs_get_lseg(desc->pg_lseg); 2169 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 78df618a1596..9f4e2a47f4aa 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -98,7 +98,6 @@ enum {
98 NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ 98 NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
99 NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ 99 NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
100 NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ 100 NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
101 NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */
102}; 101};
103 102
104enum layoutdriver_policy_flags { 103enum layoutdriver_policy_flags {
@@ -382,26 +381,6 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d)
382 return d; 381 return d;
383} 382}
384 383
385static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo)
386{
387 if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags))
388 atomic_inc(&lo->plh_refcount);
389}
390
391static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo)
392{
393 if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) {
394 atomic_dec(&lo->plh_refcount);
395 /* wake up waiters for LAYOUTRETURN as that is not needed */
396 wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
397 }
398}
399
400static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo)
401{
402 return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags);
403}
404
405static inline struct pnfs_layout_segment * 384static inline struct pnfs_layout_segment *
406pnfs_get_lseg(struct pnfs_layout_segment *lseg) 385pnfs_get_lseg(struct pnfs_layout_segment *lseg)
407{ 386{
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a5e33f33b5c..eb31e23e7def 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -85,6 +85,23 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
85} 85}
86EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 86EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
87 87
88static void nfs_readpage_release(struct nfs_page *req)
89{
90 struct inode *inode = d_inode(req->wb_context->dentry);
91
92 dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
93 (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
94 (long long)req_offset(req));
95
96 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
97 if (PageUptodate(req->wb_page))
98 nfs_readpage_to_fscache(inode, req->wb_page, 0);
99
100 unlock_page(req->wb_page);
101 }
102 nfs_release_request(req);
103}
104
88int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 105int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
89 struct page *page) 106 struct page *page)
90{ 107{
@@ -106,7 +123,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
106 123
107 nfs_pageio_init_read(&pgio, inode, false, 124 nfs_pageio_init_read(&pgio, inode, false,
108 &nfs_async_read_completion_ops); 125 &nfs_async_read_completion_ops);
109 nfs_pageio_add_request(&pgio, new); 126 if (!nfs_pageio_add_request(&pgio, new)) {
127 nfs_list_remove_request(new);
128 nfs_readpage_release(new);
129 }
110 nfs_pageio_complete(&pgio); 130 nfs_pageio_complete(&pgio);
111 131
112 /* It doesn't make sense to do mirrored reads! */ 132 /* It doesn't make sense to do mirrored reads! */
@@ -115,24 +135,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
115 pgm = &pgio.pg_mirrors[0]; 135 pgm = &pgio.pg_mirrors[0];
116 NFS_I(inode)->read_io += pgm->pg_bytes_written; 136 NFS_I(inode)->read_io += pgm->pg_bytes_written;
117 137
118 return 0; 138 return pgio.pg_error < 0 ? pgio.pg_error : 0;
119}
120
121static void nfs_readpage_release(struct nfs_page *req)
122{
123 struct inode *inode = d_inode(req->wb_context->dentry);
124
125 dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
126 (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
127 (long long)req_offset(req));
128
129 if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
130 if (PageUptodate(req->wb_page))
131 nfs_readpage_to_fscache(inode, req->wb_page, 0);
132
133 unlock_page(req->wb_page);
134 }
135 nfs_release_request(req);
136} 139}
137 140
138static void nfs_page_group_set_uptodate(struct nfs_page *req) 141static void nfs_page_group_set_uptodate(struct nfs_page *req)
@@ -361,6 +364,8 @@ readpage_async_filler(void *data, struct page *page)
361 if (len < PAGE_CACHE_SIZE) 364 if (len < PAGE_CACHE_SIZE)
362 zero_user_segment(page, len, PAGE_CACHE_SIZE); 365 zero_user_segment(page, len, PAGE_CACHE_SIZE);
363 if (!nfs_pageio_add_request(desc->pgio, new)) { 366 if (!nfs_pageio_add_request(desc->pgio, new)) {
367 nfs_list_remove_request(new);
368 nfs_readpage_release(new);
364 error = desc->pgio->pg_error; 369 error = desc->pgio->pg_error;
365 goto out_unlock; 370 goto out_unlock;
366 } 371 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2c26e04d9396..94828b3f8c95 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -547,12 +547,22 @@ try_again:
547 return head; 547 return head;
548} 548}
549 549
550static void nfs_write_error_remove_page(struct nfs_page *req)
551{
552 nfs_unlock_request(req);
553 nfs_end_page_writeback(req);
554 nfs_release_request(req);
555 generic_error_remove_page(page_file_mapping(req->wb_page),
556 req->wb_page);
557}
558
550/* 559/*
551 * Find an associated nfs write request, and prepare to flush it out 560 * Find an associated nfs write request, and prepare to flush it out
552 * May return an error if the user signalled nfs_wait_on_request(). 561 * May return an error if the user signalled nfs_wait_on_request().
553 */ 562 */
554static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 563static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
555 struct page *page, bool nonblock) 564 struct page *page, bool nonblock,
565 bool launder)
556{ 566{
557 struct nfs_page *req; 567 struct nfs_page *req;
558 int ret = 0; 568 int ret = 0;
@@ -569,8 +579,21 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
569 579
570 ret = 0; 580 ret = 0;
571 if (!nfs_pageio_add_request(pgio, req)) { 581 if (!nfs_pageio_add_request(pgio, req)) {
572 nfs_redirty_request(req);
573 ret = pgio->pg_error; 582 ret = pgio->pg_error;
583 /*
584 * Remove the problematic req upon fatal errors
585 * in launder case, while other dirty pages can
586 * still be around until they get flushed.
587 */
588 if (nfs_error_is_fatal(ret)) {
589 nfs_context_set_write_error(req->wb_context, ret);
590 if (launder) {
591 nfs_write_error_remove_page(req);
592 goto out;
593 }
594 }
595 nfs_redirty_request(req);
596 ret = -EAGAIN;
574 } else 597 } else
575 nfs_add_stats(page_file_mapping(page)->host, 598 nfs_add_stats(page_file_mapping(page)->host,
576 NFSIOS_WRITEPAGES, 1); 599 NFSIOS_WRITEPAGES, 1);
@@ -578,12 +601,14 @@ out:
578 return ret; 601 return ret;
579} 602}
580 603
581static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 604static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
605 struct nfs_pageio_descriptor *pgio, bool launder)
582{ 606{
583 int ret; 607 int ret;
584 608
585 nfs_pageio_cond_complete(pgio, page_file_index(page)); 609 nfs_pageio_cond_complete(pgio, page_file_index(page));
586 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); 610 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
611 launder);
587 if (ret == -EAGAIN) { 612 if (ret == -EAGAIN) {
588 redirty_page_for_writepage(wbc, page); 613 redirty_page_for_writepage(wbc, page);
589 ret = 0; 614 ret = 0;
@@ -594,7 +619,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
594/* 619/*
595 * Write an mmapped page to the server. 620 * Write an mmapped page to the server.
596 */ 621 */
597static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 622static int nfs_writepage_locked(struct page *page,
623 struct writeback_control *wbc,
624 bool launder)
598{ 625{
599 struct nfs_pageio_descriptor pgio; 626 struct nfs_pageio_descriptor pgio;
600 struct inode *inode = page_file_mapping(page)->host; 627 struct inode *inode = page_file_mapping(page)->host;
@@ -603,7 +630,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
603 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 630 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
604 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), 631 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
605 false, &nfs_async_write_completion_ops); 632 false, &nfs_async_write_completion_ops);
606 err = nfs_do_writepage(page, wbc, &pgio); 633 err = nfs_do_writepage(page, wbc, &pgio, launder);
607 nfs_pageio_complete(&pgio); 634 nfs_pageio_complete(&pgio);
608 if (err < 0) 635 if (err < 0)
609 return err; 636 return err;
@@ -616,7 +643,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
616{ 643{
617 int ret; 644 int ret;
618 645
619 ret = nfs_writepage_locked(page, wbc); 646 ret = nfs_writepage_locked(page, wbc, false);
620 unlock_page(page); 647 unlock_page(page);
621 return ret; 648 return ret;
622} 649}
@@ -625,7 +652,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
625{ 652{
626 int ret; 653 int ret;
627 654
628 ret = nfs_do_writepage(page, wbc, data); 655 ret = nfs_do_writepage(page, wbc, data, false);
629 unlock_page(page); 656 unlock_page(page);
630 return ret; 657 return ret;
631} 658}
@@ -1923,7 +1950,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1923/* 1950/*
1924 * Write back all requests on one page - we do this before reading it. 1951 * Write back all requests on one page - we do this before reading it.
1925 */ 1952 */
1926int nfs_wb_page(struct inode *inode, struct page *page) 1953int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
1927{ 1954{
1928 loff_t range_start = page_file_offset(page); 1955 loff_t range_start = page_file_offset(page);
1929 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1956 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
@@ -1940,7 +1967,7 @@ int nfs_wb_page(struct inode *inode, struct page *page)
1940 for (;;) { 1967 for (;;) {
1941 wait_on_page_writeback(page); 1968 wait_on_page_writeback(page);
1942 if (clear_page_dirty_for_io(page)) { 1969 if (clear_page_dirty_for_io(page)) {
1943 ret = nfs_writepage_locked(page, &wbc); 1970 ret = nfs_writepage_locked(page, &wbc, launder);
1944 if (ret < 0) 1971 if (ret < 0)
1945 goto out_error; 1972 goto out_error;
1946 continue; 1973 continue;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ebf0bd72a42b..9eee972863a7 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -516,13 +516,25 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
516 */ 516 */
517extern int nfs_sync_inode(struct inode *inode); 517extern int nfs_sync_inode(struct inode *inode);
518extern int nfs_wb_all(struct inode *inode); 518extern int nfs_wb_all(struct inode *inode);
519extern int nfs_wb_page(struct inode *inode, struct page* page); 519extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
520extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); 520extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
521extern int nfs_commit_inode(struct inode *, int); 521extern int nfs_commit_inode(struct inode *, int);
522extern struct nfs_commit_data *nfs_commitdata_alloc(void); 522extern struct nfs_commit_data *nfs_commitdata_alloc(void);
523extern void nfs_commit_free(struct nfs_commit_data *data); 523extern void nfs_commit_free(struct nfs_commit_data *data);
524 524
525static inline int 525static inline int
526nfs_wb_launder_page(struct inode *inode, struct page *page)
527{
528 return nfs_wb_single_page(inode, page, true);
529}
530
531static inline int
532nfs_wb_page(struct inode *inode, struct page *page)
533{
534 return nfs_wb_single_page(inode, page, false);
535}
536
537static inline int
526nfs_have_writebacks(struct inode *inode) 538nfs_have_writebacks(struct inode *inode)
527{ 539{
528 return NFS_I(inode)->nrequests != 0; 540 return NFS_I(inode)->nrequests != 0;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 2469ab0bb3a1..7fcc13c8cf1f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -102,6 +102,7 @@ struct nfs_client {
102#define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */ 102#define NFS_SP4_MACH_CRED_STATEID 4 /* TEST_STATEID and FREE_STATEID */
103#define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ 103#define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */
104#define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ 104#define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */
105#define NFS_SP4_MACH_CRED_PNFS_CLEANUP 7 /* LAYOUTRETURN */
105#endif /* CONFIG_NFS_V4 */ 106#endif /* CONFIG_NFS_V4 */
106 107
107 /* Our own IP address, as a null-terminated string. 108 /* Our own IP address, as a null-terminated string.
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bee3e60a7006..791098a08a87 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1375,6 +1375,7 @@ enum {
1375 NFS_IOHDR_ERROR = 0, 1375 NFS_IOHDR_ERROR = 0,
1376 NFS_IOHDR_EOF, 1376 NFS_IOHDR_EOF,
1377 NFS_IOHDR_REDO, 1377 NFS_IOHDR_REDO,
1378 NFS_IOHDR_STAT,
1378}; 1379};
1379 1380
1380struct nfs_pgio_header { 1381struct nfs_pgio_header {
@@ -1455,6 +1456,7 @@ struct nfs_commit_data {
1455 const struct rpc_call_ops *mds_ops; 1456 const struct rpc_call_ops *mds_ops;
1456 const struct nfs_commit_completion_ops *completion_ops; 1457 const struct nfs_commit_completion_ops *completion_ops;
1457 int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); 1458 int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
1459 unsigned long flags;
1458}; 1460};
1459 1461
1460struct nfs_pgio_completion_ops { 1462struct nfs_pgio_completion_ops {
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 2dcb44f69e53..cc1251d07297 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -15,7 +15,7 @@
15# define RPCDBG_FACILITY RPCDBG_TRANS 15# define RPCDBG_FACILITY RPCDBG_TRANS
16#endif 16#endif
17 17
18#define RPCRDMA_BACKCHANNEL_DEBUG 18#undef RPCRDMA_BACKCHANNEL_DEBUG
19 19
20static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, 20static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
21 struct rpc_rqst *rqst) 21 struct rpc_rqst *rqst)
@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
42 size_t size; 42 size_t size;
43 43
44 req = rpcrdma_create_req(r_xprt); 44 req = rpcrdma_create_req(r_xprt);
45 if (!req) 45 if (IS_ERR(req))
46 return -ENOMEM; 46 return PTR_ERR(req);
47 req->rl_backchannel = true; 47 req->rl_backchannel = true;
48 48
49 size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); 49 size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
@@ -84,9 +84,7 @@ out_fail:
84static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, 84static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
85 unsigned int count) 85 unsigned int count)
86{ 86{
87 struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
88 struct rpcrdma_rep *rep; 87 struct rpcrdma_rep *rep;
89 unsigned long flags;
90 int rc = 0; 88 int rc = 0;
91 89
92 while (count--) { 90 while (count--) {
@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
98 break; 96 break;
99 } 97 }
100 98
101 spin_lock_irqsave(&buffers->rb_lock, flags); 99 rpcrdma_recv_buffer_put(rep);
102 list_add(&rep->rr_list, &buffers->rb_recv_bufs);
103 spin_unlock_irqrestore(&buffers->rb_lock, flags);
104 } 100 }
105 101
106 return rc; 102 return rc;
@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
140 __func__); 136 __func__);
141 goto out_free; 137 goto out_free;
142 } 138 }
139 dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
143 140
144 rqst->rq_xprt = &r_xprt->rx_xprt; 141 rqst->rq_xprt = &r_xprt->rx_xprt;
145 INIT_LIST_HEAD(&rqst->rq_list); 142 INIT_LIST_HEAD(&rqst->rq_list);
@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
220 217
221 rpclen = rqst->rq_svec[0].iov_len; 218 rpclen = rqst->rq_svec[0].iov_len;
222 219
220#ifdef RPCRDMA_BACKCHANNEL_DEBUG
223 pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", 221 pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
224 __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); 222 __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
225 pr_info("RPC: %s: RPC/RDMA: %*ph\n", 223 pr_info("RPC: %s: RPC/RDMA: %*ph\n",
226 __func__, (int)RPCRDMA_HDRLEN_MIN, headerp); 224 __func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
227 pr_info("RPC: %s: RPC: %*ph\n", 225 pr_info("RPC: %s: RPC: %*ph\n",
228 __func__, (int)rpclen, rqst->rq_svec[0].iov_base); 226 __func__, (int)rpclen, rqst->rq_svec[0].iov_base);
227#endif
229 228
230 req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); 229 req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
231 req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; 230 req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
269{ 268{
270 struct rpc_xprt *xprt = rqst->rq_xprt; 269 struct rpc_xprt *xprt = rqst->rq_xprt;
271 270
271 dprintk("RPC: %s: freeing rqst %p (req %p)\n",
272 __func__, rqst, rpcr_to_rdmar(rqst));
273
272 smp_mb__before_atomic(); 274 smp_mb__before_atomic();
273 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); 275 WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
274 clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); 276 clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
333 struct rpc_rqst, rq_bc_pa_list); 335 struct rpc_rqst, rq_bc_pa_list);
334 list_del(&rqst->rq_bc_pa_list); 336 list_del(&rqst->rq_bc_pa_list);
335 spin_unlock(&xprt->bc_pa_lock); 337 spin_unlock(&xprt->bc_pa_lock);
336#ifdef RPCRDMA_BACKCHANNEL_DEBUG 338 dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
337 pr_info("RPC: %s: using rqst %p\n", __func__, rqst);
338#endif
339 339
340 /* Prepare rqst */ 340 /* Prepare rqst */
341 rqst->rq_reply_bytes_recvd = 0; 341 rqst->rq_reply_bytes_recvd = 0;
@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
355 * direction reply. 355 * direction reply.
356 */ 356 */
357 req = rpcr_to_rdmar(rqst); 357 req = rpcr_to_rdmar(rqst);
358#ifdef RPCRDMA_BACKCHANNEL_DEBUG 358 dprintk("RPC: %s: attaching rep %p to req %p\n",
359 pr_info("RPC: %s: attaching rep %p to req %p\n",
360 __func__, rep, req); 359 __func__, rep, req);
361#endif
362 req->rl_reply = rep; 360 req->rl_reply = rep;
363 361
364 /* Defeat the retransmit detection logic in send_request */ 362 /* Defeat the retransmit detection logic in send_request */
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index f1e8dafbd507..c14f3a4bff68 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -179,6 +179,69 @@ out_maperr:
179 return rc; 179 return rc;
180} 180}
181 181
182static void
183__fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
184{
185 struct ib_device *device = r_xprt->rx_ia.ri_device;
186 struct rpcrdma_mw *mw = seg->rl_mw;
187 int nsegs = seg->mr_nsegs;
188
189 seg->rl_mw = NULL;
190
191 while (nsegs--)
192 rpcrdma_unmap_one(device, seg++);
193
194 rpcrdma_put_mw(r_xprt, mw);
195}
196
197/* Invalidate all memory regions that were registered for "req".
198 *
199 * Sleeps until it is safe for the host CPU to access the
200 * previously mapped memory regions.
201 */
202static void
203fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
204{
205 struct rpcrdma_mr_seg *seg;
206 unsigned int i, nchunks;
207 struct rpcrdma_mw *mw;
208 LIST_HEAD(unmap_list);
209 int rc;
210
211 dprintk("RPC: %s: req %p\n", __func__, req);
212
213 /* ORDER: Invalidate all of the req's MRs first
214 *
215 * ib_unmap_fmr() is slow, so use a single call instead
216 * of one call per mapped MR.
217 */
218 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
219 seg = &req->rl_segments[i];
220 mw = seg->rl_mw;
221
222 list_add(&mw->r.fmr.fmr->list, &unmap_list);
223
224 i += seg->mr_nsegs;
225 }
226 rc = ib_unmap_fmr(&unmap_list);
227 if (rc)
228 pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);
229
230 /* ORDER: Now DMA unmap all of the req's MRs, and return
231 * them to the free MW list.
232 */
233 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
234 seg = &req->rl_segments[i];
235
236 __fmr_dma_unmap(r_xprt, seg);
237
238 i += seg->mr_nsegs;
239 seg->mr_nsegs = 0;
240 }
241
242 req->rl_nchunks = 0;
243}
244
182/* Use the ib_unmap_fmr() verb to prevent further remote 245/* Use the ib_unmap_fmr() verb to prevent further remote
183 * access via RDMA READ or RDMA WRITE. 246 * access via RDMA READ or RDMA WRITE.
184 */ 247 */
@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
231 294
232const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { 295const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
233 .ro_map = fmr_op_map, 296 .ro_map = fmr_op_map,
297 .ro_unmap_sync = fmr_op_unmap_sync,
234 .ro_unmap = fmr_op_unmap, 298 .ro_unmap = fmr_op_unmap,
235 .ro_open = fmr_op_open, 299 .ro_open = fmr_op_open,
236 .ro_maxpages = fmr_op_maxpages, 300 .ro_maxpages = fmr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 88cf9e7269c2..c6836844bd0e 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -245,12 +245,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
245 rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); 245 rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
246} 246}
247 247
248/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ 248/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs
249 * to be reset.
250 *
251 * WARNING: Only wr_id and status are reliable at this point
252 */
249static void 253static void
250frwr_sendcompletion(struct ib_wc *wc) 254__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r)
251{ 255{
252 struct rpcrdma_mw *r;
253
254 if (likely(wc->status == IB_WC_SUCCESS)) 256 if (likely(wc->status == IB_WC_SUCCESS))
255 return; 257 return;
256 258
@@ -261,9 +263,23 @@ frwr_sendcompletion(struct ib_wc *wc)
261 else 263 else
262 pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", 264 pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
263 __func__, r, ib_wc_status_msg(wc->status), wc->status); 265 __func__, r, ib_wc_status_msg(wc->status), wc->status);
266
264 r->r.frmr.fr_state = FRMR_IS_STALE; 267 r->r.frmr.fr_state = FRMR_IS_STALE;
265} 268}
266 269
270static void
271frwr_sendcompletion(struct ib_wc *wc)
272{
273 struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
274 struct rpcrdma_frmr *f = &r->r.frmr;
275
276 if (unlikely(wc->status != IB_WC_SUCCESS))
277 __frwr_sendcompletion_flush(wc, r);
278
279 if (f->fr_waiter)
280 complete(&f->fr_linv_done);
281}
282
267static int 283static int
268frwr_op_init(struct rpcrdma_xprt *r_xprt) 284frwr_op_init(struct rpcrdma_xprt *r_xprt)
269{ 285{
@@ -319,7 +335,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
319 struct rpcrdma_mw *mw; 335 struct rpcrdma_mw *mw;
320 struct rpcrdma_frmr *frmr; 336 struct rpcrdma_frmr *frmr;
321 struct ib_mr *mr; 337 struct ib_mr *mr;
322 struct ib_reg_wr reg_wr; 338 struct ib_reg_wr *reg_wr;
323 struct ib_send_wr *bad_wr; 339 struct ib_send_wr *bad_wr;
324 int rc, i, n, dma_nents; 340 int rc, i, n, dma_nents;
325 u8 key; 341 u8 key;
@@ -335,7 +351,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
335 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); 351 } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
336 frmr = &mw->r.frmr; 352 frmr = &mw->r.frmr;
337 frmr->fr_state = FRMR_IS_VALID; 353 frmr->fr_state = FRMR_IS_VALID;
354 frmr->fr_waiter = false;
338 mr = frmr->fr_mr; 355 mr = frmr->fr_mr;
356 reg_wr = &frmr->fr_regwr;
339 357
340 if (nsegs > ia->ri_max_frmr_depth) 358 if (nsegs > ia->ri_max_frmr_depth)
341 nsegs = ia->ri_max_frmr_depth; 359 nsegs = ia->ri_max_frmr_depth;
@@ -381,19 +399,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
381 key = (u8)(mr->rkey & 0x000000FF); 399 key = (u8)(mr->rkey & 0x000000FF);
382 ib_update_fast_reg_key(mr, ++key); 400 ib_update_fast_reg_key(mr, ++key);
383 401
384 reg_wr.wr.next = NULL; 402 reg_wr->wr.next = NULL;
385 reg_wr.wr.opcode = IB_WR_REG_MR; 403 reg_wr->wr.opcode = IB_WR_REG_MR;
386 reg_wr.wr.wr_id = (uintptr_t)mw; 404 reg_wr->wr.wr_id = (uintptr_t)mw;
387 reg_wr.wr.num_sge = 0; 405 reg_wr->wr.num_sge = 0;
388 reg_wr.wr.send_flags = 0; 406 reg_wr->wr.send_flags = 0;
389 reg_wr.mr = mr; 407 reg_wr->mr = mr;
390 reg_wr.key = mr->rkey; 408 reg_wr->key = mr->rkey;
391 reg_wr.access = writing ? 409 reg_wr->access = writing ?
392 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : 410 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
393 IB_ACCESS_REMOTE_READ; 411 IB_ACCESS_REMOTE_READ;
394 412
395 DECR_CQCOUNT(&r_xprt->rx_ep); 413 DECR_CQCOUNT(&r_xprt->rx_ep);
396 rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr); 414 rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
397 if (rc) 415 if (rc)
398 goto out_senderr; 416 goto out_senderr;
399 417
@@ -413,6 +431,116 @@ out_senderr:
413 return rc; 431 return rc;
414} 432}
415 433
434static struct ib_send_wr *
435__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
436{
437 struct rpcrdma_mw *mw = seg->rl_mw;
438 struct rpcrdma_frmr *f = &mw->r.frmr;
439 struct ib_send_wr *invalidate_wr;
440
441 f->fr_waiter = false;
442 f->fr_state = FRMR_IS_INVALID;
443 invalidate_wr = &f->fr_invwr;
444
445 memset(invalidate_wr, 0, sizeof(*invalidate_wr));
446 invalidate_wr->wr_id = (unsigned long)(void *)mw;
447 invalidate_wr->opcode = IB_WR_LOCAL_INV;
448 invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
449
450 return invalidate_wr;
451}
452
453static void
454__frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
455 int rc)
456{
457 struct ib_device *device = r_xprt->rx_ia.ri_device;
458 struct rpcrdma_mw *mw = seg->rl_mw;
459 struct rpcrdma_frmr *f = &mw->r.frmr;
460
461 seg->rl_mw = NULL;
462
463 ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir);
464
465 if (!rc)
466 rpcrdma_put_mw(r_xprt, mw);
467 else
468 __frwr_queue_recovery(mw);
469}
470
471/* Invalidate all memory regions that were registered for "req".
472 *
473 * Sleeps until it is safe for the host CPU to access the
474 * previously mapped memory regions.
475 */
476static void
477frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
478{
479 struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
480 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
481 struct rpcrdma_mr_seg *seg;
482 unsigned int i, nchunks;
483 struct rpcrdma_frmr *f;
484 int rc;
485
486 dprintk("RPC: %s: req %p\n", __func__, req);
487
488 /* ORDER: Invalidate all of the req's MRs first
489 *
490 * Chain the LOCAL_INV Work Requests and post them with
491 * a single ib_post_send() call.
492 */
493 invalidate_wrs = pos = prev = NULL;
494 seg = NULL;
495 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
496 seg = &req->rl_segments[i];
497
498 pos = __frwr_prepare_linv_wr(seg);
499
500 if (!invalidate_wrs)
501 invalidate_wrs = pos;
502 else
503 prev->next = pos;
504 prev = pos;
505
506 i += seg->mr_nsegs;
507 }
508 f = &seg->rl_mw->r.frmr;
509
510 /* Strong send queue ordering guarantees that when the
511 * last WR in the chain completes, all WRs in the chain
512 * are complete.
513 */
514 f->fr_invwr.send_flags = IB_SEND_SIGNALED;
515 f->fr_waiter = true;
516 init_completion(&f->fr_linv_done);
517 INIT_CQCOUNT(&r_xprt->rx_ep);
518
519 /* Transport disconnect drains the receive CQ before it
520 * replaces the QP. The RPC reply handler won't call us
521 * unless ri_id->qp is a valid pointer.
522 */
523 rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
524 if (rc)
525 pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
526
527 wait_for_completion(&f->fr_linv_done);
528
529 /* ORDER: Now DMA unmap all of the req's MRs, and return
530 * them to the free MW list.
531 */
532 for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
533 seg = &req->rl_segments[i];
534
535 __frwr_dma_unmap(r_xprt, seg, rc);
536
537 i += seg->mr_nsegs;
538 seg->mr_nsegs = 0;
539 }
540
541 req->rl_nchunks = 0;
542}
543
416/* Post a LOCAL_INV Work Request to prevent further remote access 544/* Post a LOCAL_INV Work Request to prevent further remote access
417 * via RDMA READ or RDMA WRITE. 545 * via RDMA READ or RDMA WRITE.
418 */ 546 */
@@ -423,23 +551,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
423 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 551 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
424 struct rpcrdma_mw *mw = seg1->rl_mw; 552 struct rpcrdma_mw *mw = seg1->rl_mw;
425 struct rpcrdma_frmr *frmr = &mw->r.frmr; 553 struct rpcrdma_frmr *frmr = &mw->r.frmr;
426 struct ib_send_wr invalidate_wr, *bad_wr; 554 struct ib_send_wr *invalidate_wr, *bad_wr;
427 int rc, nsegs = seg->mr_nsegs; 555 int rc, nsegs = seg->mr_nsegs;
428 556
429 dprintk("RPC: %s: FRMR %p\n", __func__, mw); 557 dprintk("RPC: %s: FRMR %p\n", __func__, mw);
430 558
431 seg1->rl_mw = NULL; 559 seg1->rl_mw = NULL;
432 frmr->fr_state = FRMR_IS_INVALID; 560 frmr->fr_state = FRMR_IS_INVALID;
561 invalidate_wr = &mw->r.frmr.fr_invwr;
433 562
434 memset(&invalidate_wr, 0, sizeof(invalidate_wr)); 563 memset(invalidate_wr, 0, sizeof(*invalidate_wr));
435 invalidate_wr.wr_id = (unsigned long)(void *)mw; 564 invalidate_wr->wr_id = (uintptr_t)mw;
436 invalidate_wr.opcode = IB_WR_LOCAL_INV; 565 invalidate_wr->opcode = IB_WR_LOCAL_INV;
437 invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; 566 invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
438 DECR_CQCOUNT(&r_xprt->rx_ep); 567 DECR_CQCOUNT(&r_xprt->rx_ep);
439 568
440 ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); 569 ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
441 read_lock(&ia->ri_qplock); 570 read_lock(&ia->ri_qplock);
442 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); 571 rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr);
443 read_unlock(&ia->ri_qplock); 572 read_unlock(&ia->ri_qplock);
444 if (rc) 573 if (rc)
445 goto out_err; 574 goto out_err;
@@ -471,6 +600,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
471 600
472const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { 601const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
473 .ro_map = frwr_op_map, 602 .ro_map = frwr_op_map,
603 .ro_unmap_sync = frwr_op_unmap_sync,
474 .ro_unmap = frwr_op_unmap, 604 .ro_unmap = frwr_op_unmap,
475 .ro_open = frwr_op_open, 605 .ro_open = frwr_op_open,
476 .ro_maxpages = frwr_op_maxpages, 606 .ro_maxpages = frwr_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
index 617b76f22154..dbb302ecf590 100644
--- a/net/sunrpc/xprtrdma/physical_ops.c
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
83 return 1; 83 return 1;
84} 84}
85 85
86/* DMA unmap all memory regions that were mapped for "req".
87 */
88static void
89physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
90{
91 struct ib_device *device = r_xprt->rx_ia.ri_device;
92 unsigned int i;
93
94 for (i = 0; req->rl_nchunks; --req->rl_nchunks)
95 rpcrdma_unmap_one(device, &req->rl_segments[i++]);
96}
97
86static void 98static void
87physical_op_destroy(struct rpcrdma_buffer *buf) 99physical_op_destroy(struct rpcrdma_buffer *buf)
88{ 100{
@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)
90 102
91const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { 103const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
92 .ro_map = physical_op_map, 104 .ro_map = physical_op_map,
105 .ro_unmap_sync = physical_op_unmap_sync,
93 .ro_unmap = physical_op_unmap, 106 .ro_unmap = physical_op_unmap,
94 .ro_open = physical_op_open, 107 .ro_open = physical_op_open,
95 .ro_maxpages = physical_op_maxpages, 108 .ro_maxpages = physical_op_maxpages,
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c10d9699441c..0f28f2d743ed 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
804 if (req->rl_reply) 804 if (req->rl_reply)
805 goto out_duplicate; 805 goto out_duplicate;
806 806
807 /* Sanity checking has passed. We are now committed
808 * to complete this transaction.
809 */
810 list_del_init(&rqst->rq_list);
811 spin_unlock_bh(&xprt->transport_lock);
807 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" 812 dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
808 " RPC request 0x%p xid 0x%08x\n", 813 " RPC request 0x%p xid 0x%08x\n",
809 __func__, rep, req, rqst, 814 __func__, rep, req, rqst,
@@ -888,12 +893,23 @@ badheader:
888 break; 893 break;
889 } 894 }
890 895
896 /* Invalidate and flush the data payloads before waking the
897 * waiting application. This guarantees the memory region is
898 * properly fenced from the server before the application
899 * accesses the data. It also ensures proper send flow
900 * control: waking the next RPC waits until this RPC has
901 * relinquished all its Send Queue entries.
902 */
903 if (req->rl_nchunks)
904 r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
905
891 credits = be32_to_cpu(headerp->rm_credit); 906 credits = be32_to_cpu(headerp->rm_credit);
892 if (credits == 0) 907 if (credits == 0)
893 credits = 1; /* don't deadlock */ 908 credits = 1; /* don't deadlock */
894 else if (credits > r_xprt->rx_buf.rb_max_requests) 909 else if (credits > r_xprt->rx_buf.rb_max_requests)
895 credits = r_xprt->rx_buf.rb_max_requests; 910 credits = r_xprt->rx_buf.rb_max_requests;
896 911
912 spin_lock_bh(&xprt->transport_lock);
897 cwnd = xprt->cwnd; 913 cwnd = xprt->cwnd;
898 xprt->cwnd = credits << RPC_CWNDSHIFT; 914 xprt->cwnd = credits << RPC_CWNDSHIFT;
899 if (xprt->cwnd > cwnd) 915 if (xprt->cwnd > cwnd)
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 8c545f7d7525..740bddcf3488 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer)
576 576
577 rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); 577 rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
578 req = rb->rg_owner; 578 req = rb->rg_owner;
579 if (req->rl_backchannel)
580 return;
581
579 r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); 582 r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
580 583
581 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); 584 dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index eadd1655145a..732c71ce5dca 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -616,10 +616,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
616 616
617 /* set trigger for requesting send completion */ 617 /* set trigger for requesting send completion */
618 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; 618 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
619 if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) 619 if (ep->rep_cqinit <= 2)
620 ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; 620 ep->rep_cqinit = 0; /* always signal? */
621 else if (ep->rep_cqinit <= 2)
622 ep->rep_cqinit = 0;
623 INIT_CQCOUNT(ep); 621 INIT_CQCOUNT(ep);
624 init_waitqueue_head(&ep->rep_connect_wait); 622 init_waitqueue_head(&ep->rep_connect_wait);
625 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); 623 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
@@ -852,10 +850,11 @@ retry:
852 850
853 if (extras) { 851 if (extras) {
854 rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); 852 rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
855 if (rc) 853 if (rc) {
856 pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", 854 pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
857 __func__, rc); 855 __func__, rc);
858 rc = 0; 856 rc = 0;
857 }
859 } 858 }
860 } 859 }
861 860
@@ -1337,15 +1336,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
1337 struct rpcrdma_ia *ia = &r_xprt->rx_ia; 1336 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
1338 struct rpcrdma_ep *ep = &r_xprt->rx_ep; 1337 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
1339 struct rpcrdma_rep *rep; 1338 struct rpcrdma_rep *rep;
1340 unsigned long flags;
1341 int rc; 1339 int rc;
1342 1340
1343 while (count--) { 1341 while (count--) {
1344 spin_lock_irqsave(&buffers->rb_lock, flags); 1342 spin_lock(&buffers->rb_lock);
1345 if (list_empty(&buffers->rb_recv_bufs)) 1343 if (list_empty(&buffers->rb_recv_bufs))
1346 goto out_reqbuf; 1344 goto out_reqbuf;
1347 rep = rpcrdma_buffer_get_rep_locked(buffers); 1345 rep = rpcrdma_buffer_get_rep_locked(buffers);
1348 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1346 spin_unlock(&buffers->rb_lock);
1349 1347
1350 rc = rpcrdma_ep_post_recv(ia, ep, rep); 1348 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1351 if (rc) 1349 if (rc)
@@ -1355,7 +1353,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
1355 return 0; 1353 return 0;
1356 1354
1357out_reqbuf: 1355out_reqbuf:
1358 spin_unlock_irqrestore(&buffers->rb_lock, flags); 1356 spin_unlock(&buffers->rb_lock);
1359 pr_warn("%s: no extra receive buffers\n", __func__); 1357 pr_warn("%s: no extra receive buffers\n", __func__);
1360 return -ENOMEM; 1358 return -ENOMEM;
1361 1359
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ac7f8d4f632a..728101ddc44b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -88,12 +88,6 @@ struct rpcrdma_ep {
88 struct delayed_work rep_connect_worker; 88 struct delayed_work rep_connect_worker;
89}; 89};
90 90
91/*
92 * Force a signaled SEND Work Request every so often,
93 * in case the provider needs to do some housekeeping.
94 */
95#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)
96
97#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) 91#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
98#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) 92#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
99 93
@@ -207,6 +201,12 @@ struct rpcrdma_frmr {
207 enum rpcrdma_frmr_state fr_state; 201 enum rpcrdma_frmr_state fr_state;
208 struct work_struct fr_work; 202 struct work_struct fr_work;
209 struct rpcrdma_xprt *fr_xprt; 203 struct rpcrdma_xprt *fr_xprt;
204 bool fr_waiter;
205 struct completion fr_linv_done;;
206 union {
207 struct ib_reg_wr fr_regwr;
208 struct ib_send_wr fr_invwr;
209 };
210}; 210};
211 211
212struct rpcrdma_fmr { 212struct rpcrdma_fmr {
@@ -364,6 +364,8 @@ struct rpcrdma_xprt;
364struct rpcrdma_memreg_ops { 364struct rpcrdma_memreg_ops {
365 int (*ro_map)(struct rpcrdma_xprt *, 365 int (*ro_map)(struct rpcrdma_xprt *,
366 struct rpcrdma_mr_seg *, int, bool); 366 struct rpcrdma_mr_seg *, int, bool);
367 void (*ro_unmap_sync)(struct rpcrdma_xprt *,
368 struct rpcrdma_req *);
367 int (*ro_unmap)(struct rpcrdma_xprt *, 369 int (*ro_unmap)(struct rpcrdma_xprt *,
368 struct rpcrdma_mr_seg *); 370 struct rpcrdma_mr_seg *);
369 int (*ro_open)(struct rpcrdma_ia *, 371 int (*ro_open)(struct rpcrdma_ia *,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2ffaf6a79499..70c13d675dc1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1907,18 +1907,6 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
1907 } 1907 }
1908} 1908}
1909#else 1909#else
1910static inline void xs_reclassify_socketu(struct socket *sock)
1911{
1912}
1913
1914static inline void xs_reclassify_socket4(struct socket *sock)
1915{
1916}
1917
1918static inline void xs_reclassify_socket6(struct socket *sock)
1919{
1920}
1921
1922static inline void xs_reclassify_socket(int family, struct socket *sock) 1910static inline void xs_reclassify_socket(int family, struct socket *sock)
1923{ 1911{
1924} 1912}
@@ -2008,7 +1996,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
2008 "transport socket (%d).\n", -status); 1996 "transport socket (%d).\n", -status);
2009 goto out; 1997 goto out;
2010 } 1998 }
2011 xs_reclassify_socketu(sock); 1999 xs_reclassify_socket(AF_LOCAL, sock);
2012 2000
2013 dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", 2001 dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
2014 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); 2002 xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);