diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-07-02 14:32:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-07-02 14:32:23 -0400 |
commit | 8688d9540cc6e17df4cba71615e27f04e0378fe6 (patch) | |
tree | 45ab333822188966217f6a3ec7e8289ca7eced72 | |
parent | 320cd413faefe2d30f4ee9651efddec5141bc95b (diff) | |
parent | b4839ebe21fc5d543b933d83644981ea73e9ba36 (diff) |
Merge tag 'nfs-for-4.2-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Stable patches:
- Fix a crash in the NFSv4 file locking code.
- Fix an fsync() regression, where we were failing to retry I/O in
some circumstances.
- Fix an infinite loop in NFSv4.0 OPEN stateid recovery
- Fix a memory leak when an attempted pnfs fails.
- Fix a memory leak in the backchannel code
- Large hostnames were not supported correctly in NFSv4.1
- Fix a pNFS/flexfiles bug that was impeding error reporting on I/O.
- Fix a couple of credential issues in pNFS/flexfiles
Bugfixes + cleanups:
- Open flag sanity checks in the NFSv4 atomic open codepath
- More NFSv4 delegation related bugfixes
- Various NFSv4.1 backchannel bugfixes and cleanups
- Fix the NFS swap socket code
- Various cleanups of the NFSv4 SETCLIENTID and EXCHANGE_ID code
- Fix a UDP transport deadlock issue
Features:
- More RDMA client transport improvements
- NFSv4.2 LAYOUTSTATS functionality for pnfs flexfiles"
* tag 'nfs-for-4.2-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (87 commits)
nfs: Remove invalid tk_pid from debug message
nfs: Remove invalid NFS_ATTR_FATTR_V4_REFERRAL checking in nfs4_get_rootfh
nfs: Drop bad comment in nfs41_walk_client_list()
nfs: Remove unneeded micro checking of CONFIG_PROC_FS
nfs: Don't setting FILE_CREATED flags always
nfs: Use remove_proc_subtree() instead remove_proc_entry()
nfs: Remove unused argument in nfs_server_set_fsinfo()
nfs: Fix a memory leak when meeting an unsupported state protect
nfs: take extra reference to fl->fl_file when running a LOCKU operation
NFSv4: When returning a delegation, don't reclaim an incompatible open mode.
NFSv4.2: LAYOUTSTATS is optional to implement
NFSv4.2: Fix up a decoding error in layoutstats
pNFS/flexfiles: Fix the reset of struct pgio_header when resending
pNFS/flexfiles: Turn off layoutcommit for servers that don't need it
pnfs/flexfiles: protect ktime manipulation with mirror lock
nfs: provide pnfs_report_layoutstat when NFS42 is disabled
nfs: verify open flags before allowing open
nfs: always update creds in mirror, even when we have an already connected ds
nfs: fix potential credential leak in ff_layout_update_mirror_cred
pnfs/flexfiles: report layoutstat regularly
...
51 files changed, 1858 insertions, 739 deletions
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 8d129bb7355a..682529c00996 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -458,7 +458,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) | |||
458 | * pg_authenticate method for nfsv4 callback threads. | 458 | * pg_authenticate method for nfsv4 callback threads. |
459 | * | 459 | * |
460 | * The authflavor has been negotiated, so an incorrect flavor is a server | 460 | * The authflavor has been negotiated, so an incorrect flavor is a server |
461 | * bug. Drop packets with incorrect authflavor. | 461 | * bug. Deny packets with incorrect authflavor. |
462 | * | 462 | * |
463 | * All other checking done after NFS decoding where the nfs_client can be | 463 | * All other checking done after NFS decoding where the nfs_client can be |
464 | * found in nfs4_callback_compound | 464 | * found in nfs4_callback_compound |
@@ -468,12 +468,12 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) | |||
468 | switch (rqstp->rq_authop->flavour) { | 468 | switch (rqstp->rq_authop->flavour) { |
469 | case RPC_AUTH_NULL: | 469 | case RPC_AUTH_NULL: |
470 | if (rqstp->rq_proc != CB_NULL) | 470 | if (rqstp->rq_proc != CB_NULL) |
471 | return SVC_DROP; | 471 | return SVC_DENIED; |
472 | break; | 472 | break; |
473 | case RPC_AUTH_GSS: | 473 | case RPC_AUTH_GSS: |
474 | /* No RPC_AUTH_GSS support yet in NFSv4.1 */ | 474 | /* No RPC_AUTH_GSS support yet in NFSv4.1 */ |
475 | if (svc_is_backchannel(rqstp)) | 475 | if (svc_is_backchannel(rqstp)) |
476 | return SVC_DROP; | 476 | return SVC_DENIED; |
477 | } | 477 | } |
478 | return SVC_OK; | 478 | return SVC_OK; |
479 | } | 479 | } |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 197806fb87ff..29e3c1b011b7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -327,10 +327,8 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) | |||
327 | dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr); | 327 | dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr); |
328 | 328 | ||
329 | /* Normal */ | 329 | /* Normal */ |
330 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { | 330 | if (likely(args->csa_sequenceid == slot->seq_nr + 1)) |
331 | slot->seq_nr++; | ||
332 | goto out_ok; | 331 | goto out_ok; |
333 | } | ||
334 | 332 | ||
335 | /* Replay */ | 333 | /* Replay */ |
336 | if (args->csa_sequenceid == slot->seq_nr) { | 334 | if (args->csa_sequenceid == slot->seq_nr) { |
@@ -418,6 +416,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
418 | struct cb_process_state *cps) | 416 | struct cb_process_state *cps) |
419 | { | 417 | { |
420 | struct nfs4_slot_table *tbl; | 418 | struct nfs4_slot_table *tbl; |
419 | struct nfs4_slot *slot; | ||
421 | struct nfs_client *clp; | 420 | struct nfs_client *clp; |
422 | int i; | 421 | int i; |
423 | __be32 status = htonl(NFS4ERR_BADSESSION); | 422 | __be32 status = htonl(NFS4ERR_BADSESSION); |
@@ -429,25 +428,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
429 | 428 | ||
430 | if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) | 429 | if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) |
431 | goto out; | 430 | goto out; |
431 | |||
432 | tbl = &clp->cl_session->bc_slot_table; | 432 | tbl = &clp->cl_session->bc_slot_table; |
433 | slot = tbl->slots + args->csa_slotid; | ||
433 | 434 | ||
434 | spin_lock(&tbl->slot_tbl_lock); | 435 | spin_lock(&tbl->slot_tbl_lock); |
435 | /* state manager is resetting the session */ | 436 | /* state manager is resetting the session */ |
436 | if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { | 437 | if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { |
437 | spin_unlock(&tbl->slot_tbl_lock); | ||
438 | status = htonl(NFS4ERR_DELAY); | 438 | status = htonl(NFS4ERR_DELAY); |
439 | /* Return NFS4ERR_BADSESSION if we're draining the session | 439 | /* Return NFS4ERR_BADSESSION if we're draining the session |
440 | * in order to reset it. | 440 | * in order to reset it. |
441 | */ | 441 | */ |
442 | if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) | 442 | if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) |
443 | status = htonl(NFS4ERR_BADSESSION); | 443 | status = htonl(NFS4ERR_BADSESSION); |
444 | goto out; | 444 | goto out_unlock; |
445 | } | 445 | } |
446 | 446 | ||
447 | status = validate_seqid(&clp->cl_session->bc_slot_table, args); | 447 | memcpy(&res->csr_sessionid, &args->csa_sessionid, |
448 | spin_unlock(&tbl->slot_tbl_lock); | 448 | sizeof(res->csr_sessionid)); |
449 | res->csr_sequenceid = args->csa_sequenceid; | ||
450 | res->csr_slotid = args->csa_slotid; | ||
451 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | ||
452 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | ||
453 | |||
454 | status = validate_seqid(tbl, args); | ||
449 | if (status) | 455 | if (status) |
450 | goto out; | 456 | goto out_unlock; |
451 | 457 | ||
452 | cps->slotid = args->csa_slotid; | 458 | cps->slotid = args->csa_slotid; |
453 | 459 | ||
@@ -458,15 +464,17 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, | |||
458 | */ | 464 | */ |
459 | if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { | 465 | if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists)) { |
460 | status = htonl(NFS4ERR_DELAY); | 466 | status = htonl(NFS4ERR_DELAY); |
461 | goto out; | 467 | goto out_unlock; |
462 | } | 468 | } |
463 | 469 | ||
464 | memcpy(&res->csr_sessionid, &args->csa_sessionid, | 470 | /* |
465 | sizeof(res->csr_sessionid)); | 471 | * RFC5661 20.9.3 |
466 | res->csr_sequenceid = args->csa_sequenceid; | 472 | * If CB_SEQUENCE returns an error, then the state of the slot |
467 | res->csr_slotid = args->csa_slotid; | 473 | * (sequence ID, cached reply) MUST NOT change. |
468 | res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 474 | */ |
469 | res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; | 475 | slot->seq_nr++; |
476 | out_unlock: | ||
477 | spin_unlock(&tbl->slot_tbl_lock); | ||
470 | 478 | ||
471 | out: | 479 | out: |
472 | cps->clp = clp; /* put in nfs4_callback_compound */ | 480 | cps->clp = clp; /* put in nfs4_callback_compound */ |
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 19ca95cdfd9b..6b1697a01dde 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c | |||
@@ -909,7 +909,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r | |||
909 | xdr_init_encode(&xdr_out, &rqstp->rq_res, p); | 909 | xdr_init_encode(&xdr_out, &rqstp->rq_res, p); |
910 | 910 | ||
911 | status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); | 911 | status = decode_compound_hdr_arg(&xdr_in, &hdr_arg); |
912 | if (status == __constant_htonl(NFS4ERR_RESOURCE)) | 912 | if (status == htonl(NFS4ERR_RESOURCE)) |
913 | return rpc_garbage_args; | 913 | return rpc_garbage_args; |
914 | 914 | ||
915 | if (hdr_arg.minorversion == 0) { | 915 | if (hdr_arg.minorversion == 0) { |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 892aefff3630..ecebb406cc1a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -825,7 +825,6 @@ error: | |||
825 | * Load up the server record from information gained in an fsinfo record | 825 | * Load up the server record from information gained in an fsinfo record |
826 | */ | 826 | */ |
827 | static void nfs_server_set_fsinfo(struct nfs_server *server, | 827 | static void nfs_server_set_fsinfo(struct nfs_server *server, |
828 | struct nfs_fh *mntfh, | ||
829 | struct nfs_fsinfo *fsinfo) | 828 | struct nfs_fsinfo *fsinfo) |
830 | { | 829 | { |
831 | unsigned long max_rpc_payload; | 830 | unsigned long max_rpc_payload; |
@@ -901,7 +900,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs | |||
901 | if (error < 0) | 900 | if (error < 0) |
902 | goto out_error; | 901 | goto out_error; |
903 | 902 | ||
904 | nfs_server_set_fsinfo(server, mntfh, &fsinfo); | 903 | nfs_server_set_fsinfo(server, &fsinfo); |
905 | 904 | ||
906 | /* Get some general file system info */ | 905 | /* Get some general file system info */ |
907 | if (server->namelen == 0) { | 906 | if (server->namelen == 0) { |
@@ -1193,8 +1192,6 @@ void nfs_clients_init(struct net *net) | |||
1193 | } | 1192 | } |
1194 | 1193 | ||
1195 | #ifdef CONFIG_PROC_FS | 1194 | #ifdef CONFIG_PROC_FS |
1196 | static struct proc_dir_entry *proc_fs_nfs; | ||
1197 | |||
1198 | static int nfs_server_list_open(struct inode *inode, struct file *file); | 1195 | static int nfs_server_list_open(struct inode *inode, struct file *file); |
1199 | static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); | 1196 | static void *nfs_server_list_start(struct seq_file *p, loff_t *pos); |
1200 | static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); | 1197 | static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos); |
@@ -1364,27 +1361,29 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) | |||
1364 | { | 1361 | { |
1365 | struct nfs_server *server; | 1362 | struct nfs_server *server; |
1366 | struct nfs_client *clp; | 1363 | struct nfs_client *clp; |
1367 | char dev[8], fsid[17]; | 1364 | char dev[13]; // 8 for 2^24, 1 for ':', 3 for 2^8, 1 for '\0' |
1365 | char fsid[34]; // 2 * 16 for %llx, 1 for ':', 1 for '\0' | ||
1368 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); | 1366 | struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id); |
1369 | 1367 | ||
1370 | /* display header on line 1 */ | 1368 | /* display header on line 1 */ |
1371 | if (v == &nn->nfs_volume_list) { | 1369 | if (v == &nn->nfs_volume_list) { |
1372 | seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); | 1370 | seq_puts(m, "NV SERVER PORT DEV FSID" |
1371 | " FSC\n"); | ||
1373 | return 0; | 1372 | return 0; |
1374 | } | 1373 | } |
1375 | /* display one transport per line on subsequent lines */ | 1374 | /* display one transport per line on subsequent lines */ |
1376 | server = list_entry(v, struct nfs_server, master_link); | 1375 | server = list_entry(v, struct nfs_server, master_link); |
1377 | clp = server->nfs_client; | 1376 | clp = server->nfs_client; |
1378 | 1377 | ||
1379 | snprintf(dev, 8, "%u:%u", | 1378 | snprintf(dev, sizeof(dev), "%u:%u", |
1380 | MAJOR(server->s_dev), MINOR(server->s_dev)); | 1379 | MAJOR(server->s_dev), MINOR(server->s_dev)); |
1381 | 1380 | ||
1382 | snprintf(fsid, 17, "%llx:%llx", | 1381 | snprintf(fsid, sizeof(fsid), "%llx:%llx", |
1383 | (unsigned long long) server->fsid.major, | 1382 | (unsigned long long) server->fsid.major, |
1384 | (unsigned long long) server->fsid.minor); | 1383 | (unsigned long long) server->fsid.minor); |
1385 | 1384 | ||
1386 | rcu_read_lock(); | 1385 | rcu_read_lock(); |
1387 | seq_printf(m, "v%u %s %s %-7s %-17s %s\n", | 1386 | seq_printf(m, "v%u %s %s %-12s %-33s %s\n", |
1388 | clp->rpc_ops->version, | 1387 | clp->rpc_ops->version, |
1389 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), | 1388 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), |
1390 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), | 1389 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), |
@@ -1434,27 +1433,20 @@ void nfs_fs_proc_net_exit(struct net *net) | |||
1434 | */ | 1433 | */ |
1435 | int __init nfs_fs_proc_init(void) | 1434 | int __init nfs_fs_proc_init(void) |
1436 | { | 1435 | { |
1437 | struct proc_dir_entry *p; | 1436 | if (!proc_mkdir("fs/nfsfs", NULL)) |
1438 | |||
1439 | proc_fs_nfs = proc_mkdir("fs/nfsfs", NULL); | ||
1440 | if (!proc_fs_nfs) | ||
1441 | goto error_0; | 1437 | goto error_0; |
1442 | 1438 | ||
1443 | /* a file of servers with which we're dealing */ | 1439 | /* a file of servers with which we're dealing */ |
1444 | p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers"); | 1440 | if (!proc_symlink("fs/nfsfs/servers", NULL, "../../net/nfsfs/servers")) |
1445 | if (!p) | ||
1446 | goto error_1; | 1441 | goto error_1; |
1447 | 1442 | ||
1448 | /* a file of volumes that we have mounted */ | 1443 | /* a file of volumes that we have mounted */ |
1449 | p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes"); | 1444 | if (!proc_symlink("fs/nfsfs/volumes", NULL, "../../net/nfsfs/volumes")) |
1450 | if (!p) | 1445 | goto error_1; |
1451 | goto error_2; | ||
1452 | return 0; | ||
1453 | 1446 | ||
1454 | error_2: | 1447 | return 0; |
1455 | remove_proc_entry("servers", proc_fs_nfs); | ||
1456 | error_1: | 1448 | error_1: |
1457 | remove_proc_entry("fs/nfsfs", NULL); | 1449 | remove_proc_subtree("fs/nfsfs", NULL); |
1458 | error_0: | 1450 | error_0: |
1459 | return -ENOMEM; | 1451 | return -ENOMEM; |
1460 | } | 1452 | } |
@@ -1464,9 +1456,7 @@ error_0: | |||
1464 | */ | 1456 | */ |
1465 | void nfs_fs_proc_exit(void) | 1457 | void nfs_fs_proc_exit(void) |
1466 | { | 1458 | { |
1467 | remove_proc_entry("volumes", proc_fs_nfs); | 1459 | remove_proc_subtree("fs/nfsfs", NULL); |
1468 | remove_proc_entry("servers", proc_fs_nfs); | ||
1469 | remove_proc_entry("fs/nfsfs", NULL); | ||
1470 | } | 1460 | } |
1471 | 1461 | ||
1472 | #endif /* CONFIG_PROC_FS */ | 1462 | #endif /* CONFIG_PROC_FS */ |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b2c8b31b2be7..21457bb0edd6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -1470,9 +1470,6 @@ static int nfs_finish_open(struct nfs_open_context *ctx, | |||
1470 | { | 1470 | { |
1471 | int err; | 1471 | int err; |
1472 | 1472 | ||
1473 | if ((open_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) | ||
1474 | *opened |= FILE_CREATED; | ||
1475 | |||
1476 | err = finish_open(file, dentry, do_open, opened); | 1473 | err = finish_open(file, dentry, do_open, opened); |
1477 | if (err) | 1474 | if (err) |
1478 | goto out; | 1475 | goto out; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8b8d83a526ce..cc4fa1ed61fc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page) | |||
555 | return nfs_wb_page(inode, page); | 555 | return nfs_wb_page(inode, page); |
556 | } | 556 | } |
557 | 557 | ||
558 | #ifdef CONFIG_NFS_SWAP | ||
559 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, | 558 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, |
560 | sector_t *span) | 559 | sector_t *span) |
561 | { | 560 | { |
562 | int ret; | ||
563 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); | 561 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); |
564 | 562 | ||
565 | *span = sis->pages; | 563 | *span = sis->pages; |
566 | 564 | ||
567 | rcu_read_lock(); | 565 | return rpc_clnt_swap_activate(clnt); |
568 | ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); | ||
569 | rcu_read_unlock(); | ||
570 | |||
571 | return ret; | ||
572 | } | 566 | } |
573 | 567 | ||
574 | static void nfs_swap_deactivate(struct file *file) | 568 | static void nfs_swap_deactivate(struct file *file) |
575 | { | 569 | { |
576 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); | 570 | struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); |
577 | 571 | ||
578 | rcu_read_lock(); | 572 | rpc_clnt_swap_deactivate(clnt); |
579 | xs_swapper(rcu_dereference(clnt->cl_xprt), 0); | ||
580 | rcu_read_unlock(); | ||
581 | } | 573 | } |
582 | #endif | ||
583 | 574 | ||
584 | const struct address_space_operations nfs_file_aops = { | 575 | const struct address_space_operations nfs_file_aops = { |
585 | .readpage = nfs_readpage, | 576 | .readpage = nfs_readpage, |
@@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = { | |||
596 | .launder_page = nfs_launder_page, | 587 | .launder_page = nfs_launder_page, |
597 | .is_dirty_writeback = nfs_check_dirty_writeback, | 588 | .is_dirty_writeback = nfs_check_dirty_writeback, |
598 | .error_remove_page = generic_error_remove_page, | 589 | .error_remove_page = generic_error_remove_page, |
599 | #ifdef CONFIG_NFS_SWAP | ||
600 | .swap_activate = nfs_swap_activate, | 590 | .swap_activate = nfs_swap_activate, |
601 | .swap_deactivate = nfs_swap_deactivate, | 591 | .swap_deactivate = nfs_swap_deactivate, |
602 | #endif | ||
603 | }; | 592 | }; |
604 | 593 | ||
605 | /* | 594 | /* |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7d05089e52d6..c12951b9551e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include "../nfs4trace.h" | 20 | #include "../nfs4trace.h" |
21 | #include "../iostat.h" | 21 | #include "../iostat.h" |
22 | #include "../nfs.h" | 22 | #include "../nfs.h" |
23 | #include "../nfs42.h" | ||
23 | 24 | ||
24 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | 25 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
25 | 26 | ||
@@ -182,17 +183,14 @@ static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) | |||
182 | 183 | ||
183 | static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) | 184 | static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) |
184 | { | 185 | { |
185 | struct nfs4_ff_layout_mirror *tmp; | ||
186 | int i, j; | 186 | int i, j; |
187 | 187 | ||
188 | for (i = 0; i < fls->mirror_array_cnt - 1; i++) { | 188 | for (i = 0; i < fls->mirror_array_cnt - 1; i++) { |
189 | for (j = i + 1; j < fls->mirror_array_cnt; j++) | 189 | for (j = i + 1; j < fls->mirror_array_cnt; j++) |
190 | if (fls->mirror_array[i]->efficiency < | 190 | if (fls->mirror_array[i]->efficiency < |
191 | fls->mirror_array[j]->efficiency) { | 191 | fls->mirror_array[j]->efficiency) |
192 | tmp = fls->mirror_array[i]; | 192 | swap(fls->mirror_array[i], |
193 | fls->mirror_array[i] = fls->mirror_array[j]; | 193 | fls->mirror_array[j]); |
194 | fls->mirror_array[j] = tmp; | ||
195 | } | ||
196 | } | 194 | } |
197 | } | 195 | } |
198 | 196 | ||
@@ -274,6 +272,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | |||
274 | 272 | ||
275 | spin_lock_init(&fls->mirror_array[i]->lock); | 273 | spin_lock_init(&fls->mirror_array[i]->lock); |
276 | fls->mirror_array[i]->ds_count = ds_count; | 274 | fls->mirror_array[i]->ds_count = ds_count; |
275 | fls->mirror_array[i]->lseg = &fls->generic_hdr; | ||
277 | 276 | ||
278 | /* deviceid */ | 277 | /* deviceid */ |
279 | rc = decode_deviceid(&stream, &devid); | 278 | rc = decode_deviceid(&stream, &devid); |
@@ -344,6 +343,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | |||
344 | fls->mirror_array[i]->gid); | 343 | fls->mirror_array[i]->gid); |
345 | } | 344 | } |
346 | 345 | ||
346 | p = xdr_inline_decode(&stream, 4); | ||
347 | if (p) | ||
348 | fls->flags = be32_to_cpup(p); | ||
349 | |||
347 | ff_layout_sort_mirrors(fls); | 350 | ff_layout_sort_mirrors(fls); |
348 | rc = ff_layout_check_layout(lgr); | 351 | rc = ff_layout_check_layout(lgr); |
349 | if (rc) | 352 | if (rc) |
@@ -415,6 +418,146 @@ ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) | |||
415 | return 1; | 418 | return 1; |
416 | } | 419 | } |
417 | 420 | ||
421 | static void | ||
422 | nfs4_ff_start_busy_timer(struct nfs4_ff_busy_timer *timer) | ||
423 | { | ||
424 | /* first IO request? */ | ||
425 | if (atomic_inc_return(&timer->n_ops) == 1) { | ||
426 | timer->start_time = ktime_get(); | ||
427 | } | ||
428 | } | ||
429 | |||
430 | static ktime_t | ||
431 | nfs4_ff_end_busy_timer(struct nfs4_ff_busy_timer *timer) | ||
432 | { | ||
433 | ktime_t start, now; | ||
434 | |||
435 | if (atomic_dec_return(&timer->n_ops) < 0) | ||
436 | WARN_ON_ONCE(1); | ||
437 | |||
438 | now = ktime_get(); | ||
439 | start = timer->start_time; | ||
440 | timer->start_time = now; | ||
441 | return ktime_sub(now, start); | ||
442 | } | ||
443 | |||
444 | static ktime_t | ||
445 | nfs4_ff_layout_calc_completion_time(struct rpc_task *task) | ||
446 | { | ||
447 | return ktime_sub(ktime_get(), task->tk_start); | ||
448 | } | ||
449 | |||
450 | static bool | ||
451 | nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, | ||
452 | struct nfs4_ff_layoutstat *layoutstat) | ||
453 | { | ||
454 | static const ktime_t notime = {0}; | ||
455 | ktime_t now = ktime_get(); | ||
456 | |||
457 | nfs4_ff_start_busy_timer(&layoutstat->busy_timer); | ||
458 | if (ktime_equal(mirror->start_time, notime)) | ||
459 | mirror->start_time = now; | ||
460 | if (ktime_equal(mirror->last_report_time, notime)) | ||
461 | mirror->last_report_time = now; | ||
462 | if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= | ||
463 | FF_LAYOUTSTATS_REPORT_INTERVAL) { | ||
464 | mirror->last_report_time = now; | ||
465 | return true; | ||
466 | } | ||
467 | |||
468 | return false; | ||
469 | } | ||
470 | |||
471 | static void | ||
472 | nfs4_ff_layout_stat_io_update_requested(struct nfs4_ff_layoutstat *layoutstat, | ||
473 | __u64 requested) | ||
474 | { | ||
475 | struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; | ||
476 | |||
477 | iostat->ops_requested++; | ||
478 | iostat->bytes_requested += requested; | ||
479 | } | ||
480 | |||
481 | static void | ||
482 | nfs4_ff_layout_stat_io_update_completed(struct nfs4_ff_layoutstat *layoutstat, | ||
483 | __u64 requested, | ||
484 | __u64 completed, | ||
485 | ktime_t time_completed) | ||
486 | { | ||
487 | struct nfs4_ff_io_stat *iostat = &layoutstat->io_stat; | ||
488 | ktime_t timer; | ||
489 | |||
490 | iostat->ops_completed++; | ||
491 | iostat->bytes_completed += completed; | ||
492 | iostat->bytes_not_delivered += requested - completed; | ||
493 | |||
494 | timer = nfs4_ff_end_busy_timer(&layoutstat->busy_timer); | ||
495 | iostat->total_busy_time = | ||
496 | ktime_add(iostat->total_busy_time, timer); | ||
497 | iostat->aggregate_completion_time = | ||
498 | ktime_add(iostat->aggregate_completion_time, time_completed); | ||
499 | } | ||
500 | |||
501 | static void | ||
502 | nfs4_ff_layout_stat_io_start_read(struct nfs4_ff_layout_mirror *mirror, | ||
503 | __u64 requested) | ||
504 | { | ||
505 | bool report; | ||
506 | |||
507 | spin_lock(&mirror->lock); | ||
508 | report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat); | ||
509 | nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); | ||
510 | spin_unlock(&mirror->lock); | ||
511 | |||
512 | if (report) | ||
513 | pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); | ||
514 | } | ||
515 | |||
516 | static void | ||
517 | nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, | ||
518 | struct nfs4_ff_layout_mirror *mirror, | ||
519 | __u64 requested, | ||
520 | __u64 completed) | ||
521 | { | ||
522 | spin_lock(&mirror->lock); | ||
523 | nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, | ||
524 | requested, completed, | ||
525 | nfs4_ff_layout_calc_completion_time(task)); | ||
526 | spin_unlock(&mirror->lock); | ||
527 | } | ||
528 | |||
529 | static void | ||
530 | nfs4_ff_layout_stat_io_start_write(struct nfs4_ff_layout_mirror *mirror, | ||
531 | __u64 requested) | ||
532 | { | ||
533 | bool report; | ||
534 | |||
535 | spin_lock(&mirror->lock); | ||
536 | report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat); | ||
537 | nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); | ||
538 | spin_unlock(&mirror->lock); | ||
539 | |||
540 | if (report) | ||
541 | pnfs_report_layoutstat(mirror->lseg->pls_layout->plh_inode); | ||
542 | } | ||
543 | |||
544 | static void | ||
545 | nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, | ||
546 | struct nfs4_ff_layout_mirror *mirror, | ||
547 | __u64 requested, | ||
548 | __u64 completed, | ||
549 | enum nfs3_stable_how committed) | ||
550 | { | ||
551 | if (committed == NFS_UNSTABLE) | ||
552 | requested = completed = 0; | ||
553 | |||
554 | spin_lock(&mirror->lock); | ||
555 | nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, | ||
556 | requested, completed, | ||
557 | nfs4_ff_layout_calc_completion_time(task)); | ||
558 | spin_unlock(&mirror->lock); | ||
559 | } | ||
560 | |||
418 | static int | 561 | static int |
419 | ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, | 562 | ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, |
420 | struct nfs_commit_info *cinfo, | 563 | struct nfs_commit_info *cinfo, |
@@ -631,7 +774,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) | |||
631 | nfs_direct_set_resched_writes(hdr->dreq); | 774 | nfs_direct_set_resched_writes(hdr->dreq); |
632 | /* fake unstable write to let common nfs resend pages */ | 775 | /* fake unstable write to let common nfs resend pages */ |
633 | hdr->verf.committed = NFS_UNSTABLE; | 776 | hdr->verf.committed = NFS_UNSTABLE; |
634 | hdr->good_bytes = 0; | 777 | hdr->good_bytes = hdr->args.count; |
635 | } | 778 | } |
636 | return; | 779 | return; |
637 | } | 780 | } |
@@ -879,6 +1022,12 @@ static int ff_layout_read_done_cb(struct rpc_task *task, | |||
879 | return 0; | 1022 | return 0; |
880 | } | 1023 | } |
881 | 1024 | ||
1025 | static bool | ||
1026 | ff_layout_need_layoutcommit(struct pnfs_layout_segment *lseg) | ||
1027 | { | ||
1028 | return !(FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_LAYOUTCOMMIT); | ||
1029 | } | ||
1030 | |||
882 | /* | 1031 | /* |
883 | * We reference the rpc_cred of the first WRITE that triggers the need for | 1032 | * We reference the rpc_cred of the first WRITE that triggers the need for |
884 | * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. | 1033 | * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. |
@@ -891,6 +1040,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, | |||
891 | static void | 1040 | static void |
892 | ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) | 1041 | ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) |
893 | { | 1042 | { |
1043 | if (!ff_layout_need_layoutcommit(hdr->lseg)) | ||
1044 | return; | ||
1045 | |||
894 | pnfs_set_layoutcommit(hdr->inode, hdr->lseg, | 1046 | pnfs_set_layoutcommit(hdr->inode, hdr->lseg, |
895 | hdr->mds_offset + hdr->res.count); | 1047 | hdr->mds_offset + hdr->res.count); |
896 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, | 1048 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, |
@@ -909,6 +1061,10 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) | |||
909 | static int ff_layout_read_prepare_common(struct rpc_task *task, | 1061 | static int ff_layout_read_prepare_common(struct rpc_task *task, |
910 | struct nfs_pgio_header *hdr) | 1062 | struct nfs_pgio_header *hdr) |
911 | { | 1063 | { |
1064 | nfs4_ff_layout_stat_io_start_read( | ||
1065 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | ||
1066 | hdr->args.count); | ||
1067 | |||
912 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { | 1068 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { |
913 | rpc_exit(task, -EIO); | 1069 | rpc_exit(task, -EIO); |
914 | return -EIO; | 1070 | return -EIO; |
@@ -962,15 +1118,15 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) | |||
962 | { | 1118 | { |
963 | struct nfs_pgio_header *hdr = data; | 1119 | struct nfs_pgio_header *hdr = data; |
964 | 1120 | ||
965 | if (ff_layout_read_prepare_common(task, hdr)) | ||
966 | return; | ||
967 | |||
968 | if (ff_layout_setup_sequence(hdr->ds_clp, | 1121 | if (ff_layout_setup_sequence(hdr->ds_clp, |
969 | &hdr->args.seq_args, | 1122 | &hdr->args.seq_args, |
970 | &hdr->res.seq_res, | 1123 | &hdr->res.seq_res, |
971 | task)) | 1124 | task)) |
972 | return; | 1125 | return; |
973 | 1126 | ||
1127 | if (ff_layout_read_prepare_common(task, hdr)) | ||
1128 | return; | ||
1129 | |||
974 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, | 1130 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, |
975 | hdr->args.lock_context, FMODE_READ) == -EIO) | 1131 | hdr->args.lock_context, FMODE_READ) == -EIO) |
976 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | 1132 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ |
@@ -982,6 +1138,10 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) | |||
982 | 1138 | ||
983 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); | 1139 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); |
984 | 1140 | ||
1141 | nfs4_ff_layout_stat_io_end_read(task, | ||
1142 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | ||
1143 | hdr->args.count, hdr->res.count); | ||
1144 | |||
985 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && | 1145 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && |
986 | task->tk_status == 0) { | 1146 | task->tk_status == 0) { |
987 | nfs4_sequence_done(task, &hdr->res.seq_res); | 1147 | nfs4_sequence_done(task, &hdr->res.seq_res); |
@@ -1074,7 +1234,8 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, | |||
1074 | return -EAGAIN; | 1234 | return -EAGAIN; |
1075 | } | 1235 | } |
1076 | 1236 | ||
1077 | if (data->verf.committed == NFS_UNSTABLE) | 1237 | if (data->verf.committed == NFS_UNSTABLE |
1238 | && ff_layout_need_layoutcommit(data->lseg)) | ||
1078 | pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); | 1239 | pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb); |
1079 | 1240 | ||
1080 | return 0; | 1241 | return 0; |
@@ -1083,6 +1244,10 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, | |||
1083 | static int ff_layout_write_prepare_common(struct rpc_task *task, | 1244 | static int ff_layout_write_prepare_common(struct rpc_task *task, |
1084 | struct nfs_pgio_header *hdr) | 1245 | struct nfs_pgio_header *hdr) |
1085 | { | 1246 | { |
1247 | nfs4_ff_layout_stat_io_start_write( | ||
1248 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | ||
1249 | hdr->args.count); | ||
1250 | |||
1086 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { | 1251 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { |
1087 | rpc_exit(task, -EIO); | 1252 | rpc_exit(task, -EIO); |
1088 | return -EIO; | 1253 | return -EIO; |
@@ -1116,15 +1281,15 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) | |||
1116 | { | 1281 | { |
1117 | struct nfs_pgio_header *hdr = data; | 1282 | struct nfs_pgio_header *hdr = data; |
1118 | 1283 | ||
1119 | if (ff_layout_write_prepare_common(task, hdr)) | ||
1120 | return; | ||
1121 | |||
1122 | if (ff_layout_setup_sequence(hdr->ds_clp, | 1284 | if (ff_layout_setup_sequence(hdr->ds_clp, |
1123 | &hdr->args.seq_args, | 1285 | &hdr->args.seq_args, |
1124 | &hdr->res.seq_res, | 1286 | &hdr->res.seq_res, |
1125 | task)) | 1287 | task)) |
1126 | return; | 1288 | return; |
1127 | 1289 | ||
1290 | if (ff_layout_write_prepare_common(task, hdr)) | ||
1291 | return; | ||
1292 | |||
1128 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, | 1293 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, |
1129 | hdr->args.lock_context, FMODE_WRITE) == -EIO) | 1294 | hdr->args.lock_context, FMODE_WRITE) == -EIO) |
1130 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | 1295 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ |
@@ -1134,6 +1299,11 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data) | |||
1134 | { | 1299 | { |
1135 | struct nfs_pgio_header *hdr = data; | 1300 | struct nfs_pgio_header *hdr = data; |
1136 | 1301 | ||
1302 | nfs4_ff_layout_stat_io_end_write(task, | ||
1303 | FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), | ||
1304 | hdr->args.count, hdr->res.count, | ||
1305 | hdr->res.verf->committed); | ||
1306 | |||
1137 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && | 1307 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && |
1138 | task->tk_status == 0) { | 1308 | task->tk_status == 0) { |
1139 | nfs4_sequence_done(task, &hdr->res.seq_res); | 1309 | nfs4_sequence_done(task, &hdr->res.seq_res); |
@@ -1152,8 +1322,17 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) | |||
1152 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); | 1322 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); |
1153 | } | 1323 | } |
1154 | 1324 | ||
1325 | static void ff_layout_commit_prepare_common(struct rpc_task *task, | ||
1326 | struct nfs_commit_data *cdata) | ||
1327 | { | ||
1328 | nfs4_ff_layout_stat_io_start_write( | ||
1329 | FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), | ||
1330 | 0); | ||
1331 | } | ||
1332 | |||
1155 | static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) | 1333 | static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) |
1156 | { | 1334 | { |
1335 | ff_layout_commit_prepare_common(task, data); | ||
1157 | rpc_call_start(task); | 1336 | rpc_call_start(task); |
1158 | } | 1337 | } |
1159 | 1338 | ||
@@ -1161,10 +1340,30 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) | |||
1161 | { | 1340 | { |
1162 | struct nfs_commit_data *wdata = data; | 1341 | struct nfs_commit_data *wdata = data; |
1163 | 1342 | ||
1164 | ff_layout_setup_sequence(wdata->ds_clp, | 1343 | if (ff_layout_setup_sequence(wdata->ds_clp, |
1165 | &wdata->args.seq_args, | 1344 | &wdata->args.seq_args, |
1166 | &wdata->res.seq_res, | 1345 | &wdata->res.seq_res, |
1167 | task); | 1346 | task)) |
1347 | return; | ||
1348 | ff_layout_commit_prepare_common(task, data); | ||
1349 | } | ||
1350 | |||
1351 | static void ff_layout_commit_done(struct rpc_task *task, void *data) | ||
1352 | { | ||
1353 | struct nfs_commit_data *cdata = data; | ||
1354 | struct nfs_page *req; | ||
1355 | __u64 count = 0; | ||
1356 | |||
1357 | if (task->tk_status == 0) { | ||
1358 | list_for_each_entry(req, &cdata->pages, wb_list) | ||
1359 | count += req->wb_bytes; | ||
1360 | } | ||
1361 | |||
1362 | nfs4_ff_layout_stat_io_end_write(task, | ||
1363 | FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), | ||
1364 | count, count, NFS_FILE_SYNC); | ||
1365 | |||
1366 | pnfs_generic_write_commit_done(task, data); | ||
1168 | } | 1367 | } |
1169 | 1368 | ||
1170 | static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) | 1369 | static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) |
@@ -1205,14 +1404,14 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { | |||
1205 | 1404 | ||
1206 | static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { | 1405 | static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { |
1207 | .rpc_call_prepare = ff_layout_commit_prepare_v3, | 1406 | .rpc_call_prepare = ff_layout_commit_prepare_v3, |
1208 | .rpc_call_done = pnfs_generic_write_commit_done, | 1407 | .rpc_call_done = ff_layout_commit_done, |
1209 | .rpc_count_stats = ff_layout_commit_count_stats, | 1408 | .rpc_count_stats = ff_layout_commit_count_stats, |
1210 | .rpc_release = pnfs_generic_commit_release, | 1409 | .rpc_release = pnfs_generic_commit_release, |
1211 | }; | 1410 | }; |
1212 | 1411 | ||
1213 | static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { | 1412 | static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { |
1214 | .rpc_call_prepare = ff_layout_commit_prepare_v4, | 1413 | .rpc_call_prepare = ff_layout_commit_prepare_v4, |
1215 | .rpc_call_done = pnfs_generic_write_commit_done, | 1414 | .rpc_call_done = ff_layout_commit_done, |
1216 | .rpc_count_stats = ff_layout_commit_count_stats, | 1415 | .rpc_count_stats = ff_layout_commit_count_stats, |
1217 | .rpc_release = pnfs_generic_commit_release, | 1416 | .rpc_release = pnfs_generic_commit_release, |
1218 | }; | 1417 | }; |
@@ -1256,7 +1455,6 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) | |||
1256 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); | 1455 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); |
1257 | if (fh) | 1456 | if (fh) |
1258 | hdr->args.fh = fh; | 1457 | hdr->args.fh = fh; |
1259 | |||
1260 | /* | 1458 | /* |
1261 | * Note that if we ever decide to split across DSes, | 1459 | * Note that if we ever decide to split across DSes, |
1262 | * then we may need to handle dense-like offsets. | 1460 | * then we may need to handle dense-like offsets. |
@@ -1385,6 +1583,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) | |||
1385 | fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); | 1583 | fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); |
1386 | if (fh) | 1584 | if (fh) |
1387 | data->args.fh = fh; | 1585 | data->args.fh = fh; |
1586 | |||
1388 | return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, | 1587 | return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, |
1389 | vers == 3 ? &ff_layout_commit_call_ops_v3 : | 1588 | vers == 3 ? &ff_layout_commit_call_ops_v3 : |
1390 | &ff_layout_commit_call_ops_v4, | 1589 | &ff_layout_commit_call_ops_v4, |
@@ -1488,6 +1687,247 @@ out: | |||
1488 | dprintk("%s: Return\n", __func__); | 1687 | dprintk("%s: Return\n", __func__); |
1489 | } | 1688 | } |
1490 | 1689 | ||
1690 | static int | ||
1691 | ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) | ||
1692 | { | ||
1693 | const struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
1694 | |||
1695 | return snprintf(buf, buflen, "%pI4", &sin->sin_addr); | ||
1696 | } | ||
1697 | |||
1698 | static size_t | ||
1699 | ff_layout_ntop6_noscopeid(const struct sockaddr *sap, char *buf, | ||
1700 | const int buflen) | ||
1701 | { | ||
1702 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
1703 | const struct in6_addr *addr = &sin6->sin6_addr; | ||
1704 | |||
1705 | /* | ||
1706 | * RFC 4291, Section 2.2.2 | ||
1707 | * | ||
1708 | * Shorthanded ANY address | ||
1709 | */ | ||
1710 | if (ipv6_addr_any(addr)) | ||
1711 | return snprintf(buf, buflen, "::"); | ||
1712 | |||
1713 | /* | ||
1714 | * RFC 4291, Section 2.2.2 | ||
1715 | * | ||
1716 | * Shorthanded loopback address | ||
1717 | */ | ||
1718 | if (ipv6_addr_loopback(addr)) | ||
1719 | return snprintf(buf, buflen, "::1"); | ||
1720 | |||
1721 | /* | ||
1722 | * RFC 4291, Section 2.2.3 | ||
1723 | * | ||
1724 | * Special presentation address format for mapped v4 | ||
1725 | * addresses. | ||
1726 | */ | ||
1727 | if (ipv6_addr_v4mapped(addr)) | ||
1728 | return snprintf(buf, buflen, "::ffff:%pI4", | ||
1729 | &addr->s6_addr32[3]); | ||
1730 | |||
1731 | /* | ||
1732 | * RFC 4291, Section 2.2.1 | ||
1733 | */ | ||
1734 | return snprintf(buf, buflen, "%pI6c", addr); | ||
1735 | } | ||
1736 | |||
1737 | /* Derived from rpc_sockaddr2uaddr */ | ||
1738 | static void | ||
1739 | ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) | ||
1740 | { | ||
1741 | struct sockaddr *sap = (struct sockaddr *)&da->da_addr; | ||
1742 | char portbuf[RPCBIND_MAXUADDRPLEN]; | ||
1743 | char addrbuf[RPCBIND_MAXUADDRLEN]; | ||
1744 | char *netid; | ||
1745 | unsigned short port; | ||
1746 | int len, netid_len; | ||
1747 | __be32 *p; | ||
1748 | |||
1749 | switch (sap->sa_family) { | ||
1750 | case AF_INET: | ||
1751 | if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) | ||
1752 | return; | ||
1753 | port = ntohs(((struct sockaddr_in *)sap)->sin_port); | ||
1754 | netid = "tcp"; | ||
1755 | netid_len = 3; | ||
1756 | break; | ||
1757 | case AF_INET6: | ||
1758 | if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) | ||
1759 | return; | ||
1760 | port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); | ||
1761 | netid = "tcp6"; | ||
1762 | netid_len = 4; | ||
1763 | break; | ||
1764 | default: | ||
1765 | /* we only support tcp and tcp6 */ | ||
1766 | WARN_ON_ONCE(1); | ||
1767 | return; | ||
1768 | } | ||
1769 | |||
1770 | snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); | ||
1771 | len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); | ||
1772 | |||
1773 | p = xdr_reserve_space(xdr, 4 + netid_len); | ||
1774 | xdr_encode_opaque(p, netid, netid_len); | ||
1775 | |||
1776 | p = xdr_reserve_space(xdr, 4 + len); | ||
1777 | xdr_encode_opaque(p, addrbuf, len); | ||
1778 | } | ||
1779 | |||
1780 | static void | ||
1781 | ff_layout_encode_nfstime(struct xdr_stream *xdr, | ||
1782 | ktime_t t) | ||
1783 | { | ||
1784 | struct timespec64 ts; | ||
1785 | __be32 *p; | ||
1786 | |||
1787 | p = xdr_reserve_space(xdr, 12); | ||
1788 | ts = ktime_to_timespec64(t); | ||
1789 | p = xdr_encode_hyper(p, ts.tv_sec); | ||
1790 | *p++ = cpu_to_be32(ts.tv_nsec); | ||
1791 | } | ||
1792 | |||
1793 | static void | ||
1794 | ff_layout_encode_io_latency(struct xdr_stream *xdr, | ||
1795 | struct nfs4_ff_io_stat *stat) | ||
1796 | { | ||
1797 | __be32 *p; | ||
1798 | |||
1799 | p = xdr_reserve_space(xdr, 5 * 8); | ||
1800 | p = xdr_encode_hyper(p, stat->ops_requested); | ||
1801 | p = xdr_encode_hyper(p, stat->bytes_requested); | ||
1802 | p = xdr_encode_hyper(p, stat->ops_completed); | ||
1803 | p = xdr_encode_hyper(p, stat->bytes_completed); | ||
1804 | p = xdr_encode_hyper(p, stat->bytes_not_delivered); | ||
1805 | ff_layout_encode_nfstime(xdr, stat->total_busy_time); | ||
1806 | ff_layout_encode_nfstime(xdr, stat->aggregate_completion_time); | ||
1807 | } | ||
1808 | |||
1809 | static void | ||
1810 | ff_layout_encode_layoutstats(struct xdr_stream *xdr, | ||
1811 | struct nfs42_layoutstat_args *args, | ||
1812 | struct nfs42_layoutstat_devinfo *devinfo) | ||
1813 | { | ||
1814 | struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private; | ||
1815 | struct nfs4_pnfs_ds_addr *da; | ||
1816 | struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; | ||
1817 | struct nfs_fh *fh = &mirror->fh_versions[0]; | ||
1818 | __be32 *p, *start; | ||
1819 | |||
1820 | da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); | ||
1821 | dprintk("%s: DS %s: encoding address %s\n", | ||
1822 | __func__, ds->ds_remotestr, da->da_remotestr); | ||
1823 | /* layoutupdate length */ | ||
1824 | start = xdr_reserve_space(xdr, 4); | ||
1825 | /* netaddr4 */ | ||
1826 | ff_layout_encode_netaddr(xdr, da); | ||
1827 | /* nfs_fh4 */ | ||
1828 | p = xdr_reserve_space(xdr, 4 + fh->size); | ||
1829 | xdr_encode_opaque(p, fh->data, fh->size); | ||
1830 | /* ff_io_latency4 read */ | ||
1831 | spin_lock(&mirror->lock); | ||
1832 | ff_layout_encode_io_latency(xdr, &mirror->read_stat.io_stat); | ||
1833 | /* ff_io_latency4 write */ | ||
1834 | ff_layout_encode_io_latency(xdr, &mirror->write_stat.io_stat); | ||
1835 | spin_unlock(&mirror->lock); | ||
1836 | /* nfstime4 */ | ||
1837 | ff_layout_encode_nfstime(xdr, ktime_sub(ktime_get(), mirror->start_time)); | ||
1838 | /* bool */ | ||
1839 | p = xdr_reserve_space(xdr, 4); | ||
1840 | *p = cpu_to_be32(false); | ||
1841 | |||
1842 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
1843 | } | ||
1844 | |||
1845 | static bool | ||
1846 | ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, | ||
1847 | struct pnfs_layout_segment *pls, | ||
1848 | int *dev_count, int dev_limit) | ||
1849 | { | ||
1850 | struct nfs4_ff_layout_mirror *mirror; | ||
1851 | struct nfs4_deviceid_node *dev; | ||
1852 | struct nfs42_layoutstat_devinfo *devinfo; | ||
1853 | int i; | ||
1854 | |||
1855 | for (i = 0; i <= FF_LAYOUT_MIRROR_COUNT(pls); i++) { | ||
1856 | if (*dev_count >= dev_limit) | ||
1857 | break; | ||
1858 | mirror = FF_LAYOUT_COMP(pls, i); | ||
1859 | if (!mirror || !mirror->mirror_ds) | ||
1860 | continue; | ||
1861 | dev = FF_LAYOUT_DEVID_NODE(pls, i); | ||
1862 | devinfo = &args->devinfo[*dev_count]; | ||
1863 | memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); | ||
1864 | devinfo->offset = pls->pls_range.offset; | ||
1865 | devinfo->length = pls->pls_range.length; | ||
1866 | /* well, we don't really know if IO is continuous or not! */ | ||
1867 | devinfo->read_count = mirror->read_stat.io_stat.bytes_completed; | ||
1868 | devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; | ||
1869 | devinfo->write_count = mirror->write_stat.io_stat.bytes_completed; | ||
1870 | devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; | ||
1871 | devinfo->layout_type = LAYOUT_FLEX_FILES; | ||
1872 | devinfo->layoutstats_encode = ff_layout_encode_layoutstats; | ||
1873 | devinfo->layout_private = mirror; | ||
1874 | /* lseg refcount put in cleanup_layoutstats */ | ||
1875 | pnfs_get_lseg(pls); | ||
1876 | |||
1877 | ++(*dev_count); | ||
1878 | } | ||
1879 | |||
1880 | return *dev_count < dev_limit; | ||
1881 | } | ||
1882 | |||
1883 | static int | ||
1884 | ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) | ||
1885 | { | ||
1886 | struct pnfs_layout_segment *pls; | ||
1887 | int dev_count = 0; | ||
1888 | |||
1889 | spin_lock(&args->inode->i_lock); | ||
1890 | list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { | ||
1891 | dev_count += FF_LAYOUT_MIRROR_COUNT(pls); | ||
1892 | } | ||
1893 | spin_unlock(&args->inode->i_lock); | ||
1894 | /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ | ||
1895 | if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { | ||
1896 | dprintk("%s: truncating devinfo to limit (%d:%d)\n", | ||
1897 | __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); | ||
1898 | dev_count = PNFS_LAYOUTSTATS_MAXDEV; | ||
1899 | } | ||
1900 | args->devinfo = kmalloc(dev_count * sizeof(*args->devinfo), GFP_KERNEL); | ||
1901 | if (!args->devinfo) | ||
1902 | return -ENOMEM; | ||
1903 | |||
1904 | dev_count = 0; | ||
1905 | spin_lock(&args->inode->i_lock); | ||
1906 | list_for_each_entry(pls, &NFS_I(args->inode)->layout->plh_segs, pls_list) { | ||
1907 | if (!ff_layout_mirror_prepare_stats(args, pls, &dev_count, | ||
1908 | PNFS_LAYOUTSTATS_MAXDEV)) { | ||
1909 | break; | ||
1910 | } | ||
1911 | } | ||
1912 | spin_unlock(&args->inode->i_lock); | ||
1913 | args->num_dev = dev_count; | ||
1914 | |||
1915 | return 0; | ||
1916 | } | ||
1917 | |||
1918 | static void | ||
1919 | ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) | ||
1920 | { | ||
1921 | struct nfs4_ff_layout_mirror *mirror; | ||
1922 | int i; | ||
1923 | |||
1924 | for (i = 0; i < data->args.num_dev; i++) { | ||
1925 | mirror = data->args.devinfo[i].layout_private; | ||
1926 | data->args.devinfo[i].layout_private = NULL; | ||
1927 | pnfs_put_lseg(mirror->lseg); | ||
1928 | } | ||
1929 | } | ||
1930 | |||
1491 | static struct pnfs_layoutdriver_type flexfilelayout_type = { | 1931 | static struct pnfs_layoutdriver_type flexfilelayout_type = { |
1492 | .id = LAYOUT_FLEX_FILES, | 1932 | .id = LAYOUT_FLEX_FILES, |
1493 | .name = "LAYOUT_FLEX_FILES", | 1933 | .name = "LAYOUT_FLEX_FILES", |
@@ -1510,6 +1950,8 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { | |||
1510 | .alloc_deviceid_node = ff_layout_alloc_deviceid_node, | 1950 | .alloc_deviceid_node = ff_layout_alloc_deviceid_node, |
1511 | .encode_layoutreturn = ff_layout_encode_layoutreturn, | 1951 | .encode_layoutreturn = ff_layout_encode_layoutreturn, |
1512 | .sync = pnfs_nfs_generic_sync, | 1952 | .sync = pnfs_nfs_generic_sync, |
1953 | .prepare_layoutstats = ff_layout_prepare_layoutstats, | ||
1954 | .cleanup_layoutstats = ff_layout_cleanup_layoutstats, | ||
1513 | }; | 1955 | }; |
1514 | 1956 | ||
1515 | static int __init nfs4flexfilelayout_init(void) | 1957 | static int __init nfs4flexfilelayout_init(void) |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 070f20445b2d..f92f9a0a856b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h | |||
@@ -9,12 +9,17 @@ | |||
9 | #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H | 9 | #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H |
10 | #define FS_NFS_NFS4FLEXFILELAYOUT_H | 10 | #define FS_NFS_NFS4FLEXFILELAYOUT_H |
11 | 11 | ||
12 | #define FF_FLAGS_NO_LAYOUTCOMMIT 1 | ||
13 | |||
12 | #include "../pnfs.h" | 14 | #include "../pnfs.h" |
13 | 15 | ||
14 | /* XXX: Let's filter out insanely large mirror count for now to avoid oom | 16 | /* XXX: Let's filter out insanely large mirror count for now to avoid oom |
15 | * due to network error etc. */ | 17 | * due to network error etc. */ |
16 | #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 | 18 | #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 |
17 | 19 | ||
20 | /* LAYOUTSTATS report interval in ms */ | ||
21 | #define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) | ||
22 | |||
18 | struct nfs4_ff_ds_version { | 23 | struct nfs4_ff_ds_version { |
19 | u32 version; | 24 | u32 version; |
20 | u32 minor_version; | 25 | u32 minor_version; |
@@ -41,24 +46,48 @@ struct nfs4_ff_layout_ds_err { | |||
41 | struct nfs4_deviceid deviceid; | 46 | struct nfs4_deviceid deviceid; |
42 | }; | 47 | }; |
43 | 48 | ||
49 | struct nfs4_ff_io_stat { | ||
50 | __u64 ops_requested; | ||
51 | __u64 bytes_requested; | ||
52 | __u64 ops_completed; | ||
53 | __u64 bytes_completed; | ||
54 | __u64 bytes_not_delivered; | ||
55 | ktime_t total_busy_time; | ||
56 | ktime_t aggregate_completion_time; | ||
57 | }; | ||
58 | |||
59 | struct nfs4_ff_busy_timer { | ||
60 | ktime_t start_time; | ||
61 | atomic_t n_ops; | ||
62 | }; | ||
63 | |||
64 | struct nfs4_ff_layoutstat { | ||
65 | struct nfs4_ff_io_stat io_stat; | ||
66 | struct nfs4_ff_busy_timer busy_timer; | ||
67 | }; | ||
68 | |||
44 | struct nfs4_ff_layout_mirror { | 69 | struct nfs4_ff_layout_mirror { |
70 | struct pnfs_layout_segment *lseg; /* back pointer */ | ||
45 | u32 ds_count; | 71 | u32 ds_count; |
46 | u32 efficiency; | 72 | u32 efficiency; |
47 | struct nfs4_ff_layout_ds *mirror_ds; | 73 | struct nfs4_ff_layout_ds *mirror_ds; |
48 | u32 fh_versions_cnt; | 74 | u32 fh_versions_cnt; |
49 | struct nfs_fh *fh_versions; | 75 | struct nfs_fh *fh_versions; |
50 | nfs4_stateid stateid; | 76 | nfs4_stateid stateid; |
51 | struct nfs4_string user_name; | ||
52 | struct nfs4_string group_name; | ||
53 | u32 uid; | 77 | u32 uid; |
54 | u32 gid; | 78 | u32 gid; |
55 | struct rpc_cred *cred; | 79 | struct rpc_cred *cred; |
56 | spinlock_t lock; | 80 | spinlock_t lock; |
81 | struct nfs4_ff_layoutstat read_stat; | ||
82 | struct nfs4_ff_layoutstat write_stat; | ||
83 | ktime_t start_time; | ||
84 | ktime_t last_report_time; | ||
57 | }; | 85 | }; |
58 | 86 | ||
59 | struct nfs4_ff_layout_segment { | 87 | struct nfs4_ff_layout_segment { |
60 | struct pnfs_layout_segment generic_hdr; | 88 | struct pnfs_layout_segment generic_hdr; |
61 | u64 stripe_unit; | 89 | u64 stripe_unit; |
90 | u32 flags; | ||
62 | u32 mirror_array_cnt; | 91 | u32 mirror_array_cnt; |
63 | struct nfs4_ff_layout_mirror **mirror_array; | 92 | struct nfs4_ff_layout_mirror **mirror_array; |
64 | }; | 93 | }; |
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 77a2d026aa12..f13e1969eedd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
@@ -324,7 +324,8 @@ static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, | |||
324 | __func__, PTR_ERR(cred)); | 324 | __func__, PTR_ERR(cred)); |
325 | return PTR_ERR(cred); | 325 | return PTR_ERR(cred); |
326 | } else { | 326 | } else { |
327 | mirror->cred = cred; | 327 | if (cmpxchg(&mirror->cred, NULL, cred)) |
328 | put_rpccred(cred); | ||
328 | } | 329 | } |
329 | } | 330 | } |
330 | return 0; | 331 | return 0; |
@@ -386,7 +387,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
386 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ | 387 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ |
387 | smp_rmb(); | 388 | smp_rmb(); |
388 | if (ds->ds_clp) | 389 | if (ds->ds_clp) |
389 | goto out; | 390 | goto out_update_creds; |
390 | 391 | ||
391 | flavor = nfs4_ff_layout_choose_authflavor(mirror); | 392 | flavor = nfs4_ff_layout_choose_authflavor(mirror); |
392 | 393 | ||
@@ -430,7 +431,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
430 | } | 431 | } |
431 | } | 432 | } |
432 | } | 433 | } |
433 | 434 | out_update_creds: | |
434 | if (ff_layout_update_mirror_cred(mirror, ds)) | 435 | if (ff_layout_update_mirror_cred(mirror, ds)) |
435 | ds = NULL; | 436 | ds = NULL; |
436 | out: | 437 | out: |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f734562c6d24..b77b328a06d7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -678,6 +678,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
678 | if (!err) { | 678 | if (!err) { |
679 | generic_fillattr(inode, stat); | 679 | generic_fillattr(inode, stat); |
680 | stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); | 680 | stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); |
681 | if (S_ISDIR(inode->i_mode)) | ||
682 | stat->blksize = NFS_SERVER(inode)->dtsize; | ||
681 | } | 683 | } |
682 | out: | 684 | out: |
683 | trace_nfs_getattr_exit(inode, err); | 685 | trace_nfs_getattr_exit(inode, err); |
@@ -2008,17 +2010,15 @@ static int __init init_nfs_fs(void) | |||
2008 | if (err) | 2010 | if (err) |
2009 | goto out1; | 2011 | goto out1; |
2010 | 2012 | ||
2011 | #ifdef CONFIG_PROC_FS | ||
2012 | rpc_proc_register(&init_net, &nfs_rpcstat); | 2013 | rpc_proc_register(&init_net, &nfs_rpcstat); |
2013 | #endif | 2014 | |
2014 | if ((err = register_nfs_fs()) != 0) | 2015 | err = register_nfs_fs(); |
2016 | if (err) | ||
2015 | goto out0; | 2017 | goto out0; |
2016 | 2018 | ||
2017 | return 0; | 2019 | return 0; |
2018 | out0: | 2020 | out0: |
2019 | #ifdef CONFIG_PROC_FS | ||
2020 | rpc_proc_unregister(&init_net, "nfs"); | 2021 | rpc_proc_unregister(&init_net, "nfs"); |
2021 | #endif | ||
2022 | nfs_destroy_directcache(); | 2022 | nfs_destroy_directcache(); |
2023 | out1: | 2023 | out1: |
2024 | nfs_destroy_writepagecache(); | 2024 | nfs_destroy_writepagecache(); |
@@ -2049,9 +2049,7 @@ static void __exit exit_nfs_fs(void) | |||
2049 | nfs_destroy_nfspagecache(); | 2049 | nfs_destroy_nfspagecache(); |
2050 | nfs_fscache_unregister(); | 2050 | nfs_fscache_unregister(); |
2051 | unregister_pernet_subsys(&nfs_net_ops); | 2051 | unregister_pernet_subsys(&nfs_net_ops); |
2052 | #ifdef CONFIG_PROC_FS | ||
2053 | rpc_proc_unregister(&init_net, "nfs"); | 2052 | rpc_proc_unregister(&init_net, "nfs"); |
2054 | #endif | ||
2055 | unregister_nfs_fs(); | 2053 | unregister_nfs_fs(); |
2056 | nfs_fs_proc_exit(); | 2054 | nfs_fs_proc_exit(); |
2057 | nfsiod_stop(); | 2055 | nfsiod_stop(); |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 53852a4bd88b..9b04c2e6fffc 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -1342,7 +1342,7 @@ static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, | |||
1342 | if (args->npages != 0) | 1342 | if (args->npages != 0) |
1343 | xdr_write_pages(xdr, args->pages, 0, args->len); | 1343 | xdr_write_pages(xdr, args->pages, 0, args->len); |
1344 | else | 1344 | else |
1345 | xdr_reserve_space(xdr, NFS_ACL_INLINE_BUFSIZE); | 1345 | xdr_reserve_space(xdr, args->len); |
1346 | 1346 | ||
1347 | error = nfsacl_encode(xdr->buf, base, args->inode, | 1347 | error = nfsacl_encode(xdr->buf, base, args->inode, |
1348 | (args->mask & NFS_ACL) ? | 1348 | (args->mask & NFS_ACL) ? |
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 7afb8947dfdf..ff66ae700b89 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h | |||
@@ -5,11 +5,18 @@ | |||
5 | #ifndef __LINUX_FS_NFS_NFS4_2_H | 5 | #ifndef __LINUX_FS_NFS_NFS4_2_H |
6 | #define __LINUX_FS_NFS_NFS4_2_H | 6 | #define __LINUX_FS_NFS_NFS4_2_H |
7 | 7 | ||
8 | /* | ||
9 | * FIXME: four LAYOUTSTATS calls per compound at most! Do we need to support | ||
10 | * more? Need to consider not to pre-alloc too much for a compound. | ||
11 | */ | ||
12 | #define PNFS_LAYOUTSTATS_MAXDEV (4) | ||
13 | |||
8 | /* nfs4.2proc.c */ | 14 | /* nfs4.2proc.c */ |
9 | int nfs42_proc_allocate(struct file *, loff_t, loff_t); | 15 | int nfs42_proc_allocate(struct file *, loff_t, loff_t); |
10 | int nfs42_proc_deallocate(struct file *, loff_t, loff_t); | 16 | int nfs42_proc_deallocate(struct file *, loff_t, loff_t); |
11 | loff_t nfs42_proc_llseek(struct file *, loff_t, int); | 17 | loff_t nfs42_proc_llseek(struct file *, loff_t, int); |
12 | 18 | int nfs42_proc_layoutstats_generic(struct nfs_server *, | |
19 | struct nfs42_layoutstat_data *); | ||
13 | /* nfs4.2xdr.h */ | 20 | /* nfs4.2xdr.h */ |
14 | extern struct rpc_procinfo nfs4_2_procedures[]; | 21 | extern struct rpc_procinfo nfs4_2_procedures[]; |
15 | 22 | ||
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3a9e75235f30..f486b80f927a 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c | |||
@@ -10,6 +10,11 @@ | |||
10 | #include <linux/nfs_fs.h> | 10 | #include <linux/nfs_fs.h> |
11 | #include "nfs4_fs.h" | 11 | #include "nfs4_fs.h" |
12 | #include "nfs42.h" | 12 | #include "nfs42.h" |
13 | #include "iostat.h" | ||
14 | #include "pnfs.h" | ||
15 | #include "internal.h" | ||
16 | |||
17 | #define NFSDBG_FACILITY NFSDBG_PNFS | ||
13 | 18 | ||
14 | static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, | 19 | static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, |
15 | fmode_t fmode) | 20 | fmode_t fmode) |
@@ -165,3 +170,85 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) | |||
165 | 170 | ||
166 | return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); | 171 | return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); |
167 | } | 172 | } |
173 | |||
174 | static void | ||
175 | nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata) | ||
176 | { | ||
177 | struct nfs42_layoutstat_data *data = calldata; | ||
178 | struct nfs_server *server = NFS_SERVER(data->args.inode); | ||
179 | |||
180 | nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args, | ||
181 | &data->res.seq_res, task); | ||
182 | } | ||
183 | |||
184 | static void | ||
185 | nfs42_layoutstat_done(struct rpc_task *task, void *calldata) | ||
186 | { | ||
187 | struct nfs42_layoutstat_data *data = calldata; | ||
188 | |||
189 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | ||
190 | return; | ||
191 | |||
192 | switch (task->tk_status) { | ||
193 | case 0: | ||
194 | break; | ||
195 | case -ENOTSUPP: | ||
196 | case -EOPNOTSUPP: | ||
197 | NFS_SERVER(data->inode)->caps &= ~NFS_CAP_LAYOUTSTATS; | ||
198 | default: | ||
199 | dprintk("%s server returns %d\n", __func__, task->tk_status); | ||
200 | } | ||
201 | } | ||
202 | |||
203 | static void | ||
204 | nfs42_layoutstat_release(void *calldata) | ||
205 | { | ||
206 | struct nfs42_layoutstat_data *data = calldata; | ||
207 | struct nfs_server *nfss = NFS_SERVER(data->args.inode); | ||
208 | |||
209 | if (nfss->pnfs_curr_ld->cleanup_layoutstats) | ||
210 | nfss->pnfs_curr_ld->cleanup_layoutstats(data); | ||
211 | |||
212 | pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); | ||
213 | smp_mb__before_atomic(); | ||
214 | clear_bit(NFS_INO_LAYOUTSTATS, &NFS_I(data->args.inode)->flags); | ||
215 | smp_mb__after_atomic(); | ||
216 | nfs_iput_and_deactive(data->inode); | ||
217 | kfree(data->args.devinfo); | ||
218 | kfree(data); | ||
219 | } | ||
220 | |||
221 | static const struct rpc_call_ops nfs42_layoutstat_ops = { | ||
222 | .rpc_call_prepare = nfs42_layoutstat_prepare, | ||
223 | .rpc_call_done = nfs42_layoutstat_done, | ||
224 | .rpc_release = nfs42_layoutstat_release, | ||
225 | }; | ||
226 | |||
227 | int nfs42_proc_layoutstats_generic(struct nfs_server *server, | ||
228 | struct nfs42_layoutstat_data *data) | ||
229 | { | ||
230 | struct rpc_message msg = { | ||
231 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTSTATS], | ||
232 | .rpc_argp = &data->args, | ||
233 | .rpc_resp = &data->res, | ||
234 | }; | ||
235 | struct rpc_task_setup task_setup = { | ||
236 | .rpc_client = server->client, | ||
237 | .rpc_message = &msg, | ||
238 | .callback_ops = &nfs42_layoutstat_ops, | ||
239 | .callback_data = data, | ||
240 | .flags = RPC_TASK_ASYNC, | ||
241 | }; | ||
242 | struct rpc_task *task; | ||
243 | |||
244 | data->inode = nfs_igrab_and_active(data->args.inode); | ||
245 | if (!data->inode) { | ||
246 | nfs42_layoutstat_release(data); | ||
247 | return -EAGAIN; | ||
248 | } | ||
249 | nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); | ||
250 | task = rpc_run_task(&task_setup); | ||
251 | if (IS_ERR(task)) | ||
252 | return PTR_ERR(task); | ||
253 | return 0; | ||
254 | } | ||
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1a25b27248f2..a6bd27da6286 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c | |||
@@ -4,6 +4,8 @@ | |||
4 | #ifndef __LINUX_FS_NFS_NFS4_2XDR_H | 4 | #ifndef __LINUX_FS_NFS_NFS4_2XDR_H |
5 | #define __LINUX_FS_NFS_NFS4_2XDR_H | 5 | #define __LINUX_FS_NFS_NFS4_2XDR_H |
6 | 6 | ||
7 | #include "nfs42.h" | ||
8 | |||
7 | #define encode_fallocate_maxsz (encode_stateid_maxsz + \ | 9 | #define encode_fallocate_maxsz (encode_stateid_maxsz + \ |
8 | 2 /* offset */ + \ | 10 | 2 /* offset */ + \ |
9 | 2 /* length */) | 11 | 2 /* length */) |
@@ -22,6 +24,16 @@ | |||
22 | 1 /* whence */ + \ | 24 | 1 /* whence */ + \ |
23 | 2 /* offset */ + \ | 25 | 2 /* offset */ + \ |
24 | 2 /* length */) | 26 | 2 /* length */) |
27 | #define encode_io_info_maxsz 4 | ||
28 | #define encode_layoutstats_maxsz (op_decode_hdr_maxsz + \ | ||
29 | 2 /* offset */ + \ | ||
30 | 2 /* length */ + \ | ||
31 | encode_stateid_maxsz + \ | ||
32 | encode_io_info_maxsz + \ | ||
33 | encode_io_info_maxsz + \ | ||
34 | 1 /* opaque devaddr4 length */ + \ | ||
35 | XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) | ||
36 | #define decode_layoutstats_maxsz (op_decode_hdr_maxsz) | ||
25 | 37 | ||
26 | #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ | 38 | #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ |
27 | encode_putfh_maxsz + \ | 39 | encode_putfh_maxsz + \ |
@@ -45,6 +57,14 @@ | |||
45 | #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ | 57 | #define NFS4_dec_seek_sz (compound_decode_hdr_maxsz + \ |
46 | decode_putfh_maxsz + \ | 58 | decode_putfh_maxsz + \ |
47 | decode_seek_maxsz) | 59 | decode_seek_maxsz) |
60 | #define NFS4_enc_layoutstats_sz (compound_encode_hdr_maxsz + \ | ||
61 | encode_sequence_maxsz + \ | ||
62 | encode_putfh_maxsz + \ | ||
63 | PNFS_LAYOUTSTATS_MAXDEV * encode_layoutstats_maxsz) | ||
64 | #define NFS4_dec_layoutstats_sz (compound_decode_hdr_maxsz + \ | ||
65 | decode_sequence_maxsz + \ | ||
66 | decode_putfh_maxsz + \ | ||
67 | PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) | ||
48 | 68 | ||
49 | 69 | ||
50 | static void encode_fallocate(struct xdr_stream *xdr, | 70 | static void encode_fallocate(struct xdr_stream *xdr, |
@@ -81,6 +101,33 @@ static void encode_seek(struct xdr_stream *xdr, | |||
81 | encode_uint32(xdr, args->sa_what); | 101 | encode_uint32(xdr, args->sa_what); |
82 | } | 102 | } |
83 | 103 | ||
104 | static void encode_layoutstats(struct xdr_stream *xdr, | ||
105 | struct nfs42_layoutstat_args *args, | ||
106 | struct nfs42_layoutstat_devinfo *devinfo, | ||
107 | struct compound_hdr *hdr) | ||
108 | { | ||
109 | __be32 *p; | ||
110 | |||
111 | encode_op_hdr(xdr, OP_LAYOUTSTATS, decode_layoutstats_maxsz, hdr); | ||
112 | p = reserve_space(xdr, 8 + 8); | ||
113 | p = xdr_encode_hyper(p, devinfo->offset); | ||
114 | p = xdr_encode_hyper(p, devinfo->length); | ||
115 | encode_nfs4_stateid(xdr, &args->stateid); | ||
116 | p = reserve_space(xdr, 4*8 + NFS4_DEVICEID4_SIZE + 4); | ||
117 | p = xdr_encode_hyper(p, devinfo->read_count); | ||
118 | p = xdr_encode_hyper(p, devinfo->read_bytes); | ||
119 | p = xdr_encode_hyper(p, devinfo->write_count); | ||
120 | p = xdr_encode_hyper(p, devinfo->write_bytes); | ||
121 | p = xdr_encode_opaque_fixed(p, devinfo->dev_id.data, | ||
122 | NFS4_DEVICEID4_SIZE); | ||
123 | /* Encode layoutupdate4 */ | ||
124 | *p++ = cpu_to_be32(devinfo->layout_type); | ||
125 | if (devinfo->layoutstats_encode != NULL) | ||
126 | devinfo->layoutstats_encode(xdr, args, devinfo); | ||
127 | else | ||
128 | encode_uint32(xdr, 0); | ||
129 | } | ||
130 | |||
84 | /* | 131 | /* |
85 | * Encode ALLOCATE request | 132 | * Encode ALLOCATE request |
86 | */ | 133 | */ |
@@ -137,6 +184,28 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req, | |||
137 | encode_nops(&hdr); | 184 | encode_nops(&hdr); |
138 | } | 185 | } |
139 | 186 | ||
187 | /* | ||
188 | * Encode LAYOUTSTATS request | ||
189 | */ | ||
190 | static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, | ||
191 | struct xdr_stream *xdr, | ||
192 | struct nfs42_layoutstat_args *args) | ||
193 | { | ||
194 | int i; | ||
195 | |||
196 | struct compound_hdr hdr = { | ||
197 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
198 | }; | ||
199 | |||
200 | encode_compound_hdr(xdr, req, &hdr); | ||
201 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
202 | encode_putfh(xdr, args->fh, &hdr); | ||
203 | WARN_ON(args->num_dev > PNFS_LAYOUTSTATS_MAXDEV); | ||
204 | for (i = 0; i < args->num_dev; i++) | ||
205 | encode_layoutstats(xdr, args, &args->devinfo[i], &hdr); | ||
206 | encode_nops(&hdr); | ||
207 | } | ||
208 | |||
140 | static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) | 209 | static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) |
141 | { | 210 | { |
142 | return decode_op_hdr(xdr, OP_ALLOCATE); | 211 | return decode_op_hdr(xdr, OP_ALLOCATE); |
@@ -169,6 +238,12 @@ out_overflow: | |||
169 | return -EIO; | 238 | return -EIO; |
170 | } | 239 | } |
171 | 240 | ||
241 | static int decode_layoutstats(struct xdr_stream *xdr, | ||
242 | struct nfs42_layoutstat_res *res) | ||
243 | { | ||
244 | return decode_op_hdr(xdr, OP_LAYOUTSTATS); | ||
245 | } | ||
246 | |||
172 | /* | 247 | /* |
173 | * Decode ALLOCATE request | 248 | * Decode ALLOCATE request |
174 | */ | 249 | */ |
@@ -246,4 +321,35 @@ static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp, | |||
246 | out: | 321 | out: |
247 | return status; | 322 | return status; |
248 | } | 323 | } |
324 | |||
325 | /* | ||
326 | * Decode LAYOUTSTATS request | ||
327 | */ | ||
328 | static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp, | ||
329 | struct xdr_stream *xdr, | ||
330 | struct nfs42_layoutstat_res *res) | ||
331 | { | ||
332 | struct compound_hdr hdr; | ||
333 | int status, i; | ||
334 | |||
335 | status = decode_compound_hdr(xdr, &hdr); | ||
336 | if (status) | ||
337 | goto out; | ||
338 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
339 | if (status) | ||
340 | goto out; | ||
341 | status = decode_putfh(xdr); | ||
342 | if (status) | ||
343 | goto out; | ||
344 | WARN_ON(res->num_dev > PNFS_LAYOUTSTATS_MAXDEV); | ||
345 | for (i = 0; i < res->num_dev; i++) { | ||
346 | status = decode_layoutstats(xdr, res); | ||
347 | if (status) | ||
348 | goto out; | ||
349 | } | ||
350 | out: | ||
351 | res->rpc_status = status; | ||
352 | return status; | ||
353 | } | ||
354 | |||
249 | #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ | 355 | #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdef424b0cd3..ea3bee919a76 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -233,6 +233,7 @@ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception | |||
233 | extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, | 233 | extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *, |
234 | struct rpc_message *, struct nfs4_sequence_args *, | 234 | struct rpc_message *, struct nfs4_sequence_args *, |
235 | struct nfs4_sequence_res *, int); | 235 | struct nfs4_sequence_res *, int); |
236 | extern void nfs4_init_sequence(struct nfs4_sequence_args *, struct nfs4_sequence_res *, int); | ||
236 | extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); | 237 | extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); |
237 | extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); | 238 | extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); |
238 | extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); | 239 | extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool); |
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e42be52a8c18..3aa6a9ba5113 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -676,7 +676,6 @@ found: | |||
676 | break; | 676 | break; |
677 | } | 677 | } |
678 | 678 | ||
679 | /* No matching nfs_client found. */ | ||
680 | spin_unlock(&nn->nfs_client_lock); | 679 | spin_unlock(&nn->nfs_client_lock); |
681 | dprintk("NFS: <-- %s status = %d\n", __func__, status); | 680 | dprintk("NFS: <-- %s status = %d\n", __func__, status); |
682 | nfs_put_client(prev); | 681 | nfs_put_client(prev); |
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index f58c17b3b480..dcd39d4e2efe 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c | |||
@@ -41,6 +41,10 @@ nfs4_file_open(struct inode *inode, struct file *filp) | |||
41 | 41 | ||
42 | dprintk("NFS: open file(%pd2)\n", dentry); | 42 | dprintk("NFS: open file(%pd2)\n", dentry); |
43 | 43 | ||
44 | err = nfs_check_flags(openflags); | ||
45 | if (err) | ||
46 | return err; | ||
47 | |||
44 | if ((openflags & O_ACCMODE) == 3) | 48 | if ((openflags & O_ACCMODE) == 3) |
45 | openflags--; | 49 | openflags--; |
46 | 50 | ||
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c index c0b3a16b4a00..039b3eb6d834 100644 --- a/fs/nfs/nfs4getroot.c +++ b/fs/nfs/nfs4getroot.c | |||
@@ -35,13 +35,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p | |||
35 | goto out; | 35 | goto out; |
36 | } | 36 | } |
37 | 37 | ||
38 | if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { | ||
39 | printk(KERN_ERR "nfs4_get_rootfh:" | ||
40 | " getroot obtained referral\n"); | ||
41 | ret = -EREMOTE; | ||
42 | goto out; | ||
43 | } | ||
44 | |||
45 | memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); | 38 | memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); |
46 | out: | 39 | out: |
47 | nfs_free_fattr(fsinfo.fattr); | 40 | nfs_free_fattr(fsinfo.fattr); |
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 2e1737c40a29..535dfc69c628 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c | |||
@@ -494,12 +494,7 @@ nfs_idmap_delete(struct nfs_client *clp) | |||
494 | 494 | ||
495 | int nfs_idmap_init(void) | 495 | int nfs_idmap_init(void) |
496 | { | 496 | { |
497 | int ret; | 497 | return nfs_idmap_init_keyring(); |
498 | ret = nfs_idmap_init_keyring(); | ||
499 | if (ret != 0) | ||
500 | goto out; | ||
501 | out: | ||
502 | return ret; | ||
503 | } | 498 | } |
504 | 499 | ||
505 | void nfs_idmap_quit(void) | 500 | void nfs_idmap_quit(void) |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55e1e3af23a3..6f228b5af819 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -356,6 +356,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ | |||
356 | case 0: | 356 | case 0: |
357 | return 0; | 357 | return 0; |
358 | case -NFS4ERR_OPENMODE: | 358 | case -NFS4ERR_OPENMODE: |
359 | case -NFS4ERR_DELEG_REVOKED: | ||
360 | case -NFS4ERR_ADMIN_REVOKED: | ||
361 | case -NFS4ERR_BAD_STATEID: | ||
359 | if (inode && nfs4_have_delegation(inode, FMODE_READ)) { | 362 | if (inode && nfs4_have_delegation(inode, FMODE_READ)) { |
360 | nfs4_inode_return_delegation(inode); | 363 | nfs4_inode_return_delegation(inode); |
361 | exception->retry = 1; | 364 | exception->retry = 1; |
@@ -367,15 +370,6 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_ | |||
367 | if (ret < 0) | 370 | if (ret < 0) |
368 | break; | 371 | break; |
369 | goto wait_on_recovery; | 372 | goto wait_on_recovery; |
370 | case -NFS4ERR_DELEG_REVOKED: | ||
371 | case -NFS4ERR_ADMIN_REVOKED: | ||
372 | case -NFS4ERR_BAD_STATEID: | ||
373 | if (state == NULL) | ||
374 | break; | ||
375 | ret = nfs4_schedule_stateid_recovery(server, state); | ||
376 | if (ret < 0) | ||
377 | break; | ||
378 | goto wait_on_recovery; | ||
379 | case -NFS4ERR_EXPIRED: | 373 | case -NFS4ERR_EXPIRED: |
380 | if (state != NULL) { | 374 | if (state != NULL) { |
381 | ret = nfs4_schedule_stateid_recovery(server, state); | 375 | ret = nfs4_schedule_stateid_recovery(server, state); |
@@ -482,8 +476,8 @@ struct nfs4_call_sync_data { | |||
482 | struct nfs4_sequence_res *seq_res; | 476 | struct nfs4_sequence_res *seq_res; |
483 | }; | 477 | }; |
484 | 478 | ||
485 | static void nfs4_init_sequence(struct nfs4_sequence_args *args, | 479 | void nfs4_init_sequence(struct nfs4_sequence_args *args, |
486 | struct nfs4_sequence_res *res, int cache_reply) | 480 | struct nfs4_sequence_res *res, int cache_reply) |
487 | { | 481 | { |
488 | args->sa_slot = NULL; | 482 | args->sa_slot = NULL; |
489 | args->sa_cache_this = cache_reply; | 483 | args->sa_cache_this = cache_reply; |
@@ -1553,6 +1547,13 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod | |||
1553 | struct nfs4_state *newstate; | 1547 | struct nfs4_state *newstate; |
1554 | int ret; | 1548 | int ret; |
1555 | 1549 | ||
1550 | if ((opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR || | ||
1551 | opendata->o_arg.claim == NFS4_OPEN_CLAIM_DELEG_CUR_FH) && | ||
1552 | (opendata->o_arg.u.delegation_type & fmode) != fmode) | ||
1553 | /* This mode can't have been delegated, so we must have | ||
1554 | * a valid open_stateid to cover it - not need to reclaim. | ||
1555 | */ | ||
1556 | return 0; | ||
1556 | opendata->o_arg.open_flags = 0; | 1557 | opendata->o_arg.open_flags = 0; |
1557 | opendata->o_arg.fmode = fmode; | 1558 | opendata->o_arg.fmode = fmode; |
1558 | opendata->o_arg.share_access = nfs4_map_atomic_open_share( | 1559 | opendata->o_arg.share_access = nfs4_map_atomic_open_share( |
@@ -1684,6 +1685,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct | |||
1684 | "%d.\n", __func__, err); | 1685 | "%d.\n", __func__, err); |
1685 | case 0: | 1686 | case 0: |
1686 | case -ENOENT: | 1687 | case -ENOENT: |
1688 | case -EAGAIN: | ||
1687 | case -ESTALE: | 1689 | case -ESTALE: |
1688 | break; | 1690 | break; |
1689 | case -NFS4ERR_BADSESSION: | 1691 | case -NFS4ERR_BADSESSION: |
@@ -3355,6 +3357,8 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, | |||
3355 | goto out; | 3357 | goto out; |
3356 | case -NFS4ERR_MOVED: | 3358 | case -NFS4ERR_MOVED: |
3357 | err = nfs4_get_referral(client, dir, name, fattr, fhandle); | 3359 | err = nfs4_get_referral(client, dir, name, fattr, fhandle); |
3360 | if (err == -NFS4ERR_MOVED) | ||
3361 | err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); | ||
3358 | goto out; | 3362 | goto out; |
3359 | case -NFS4ERR_WRONGSEC: | 3363 | case -NFS4ERR_WRONGSEC: |
3360 | err = -EPERM; | 3364 | err = -EPERM; |
@@ -4955,49 +4959,128 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp, | |||
4955 | memcpy(bootverf->data, verf, sizeof(bootverf->data)); | 4959 | memcpy(bootverf->data, verf, sizeof(bootverf->data)); |
4956 | } | 4960 | } |
4957 | 4961 | ||
4958 | static unsigned int | 4962 | static int |
4959 | nfs4_init_nonuniform_client_string(struct nfs_client *clp, | 4963 | nfs4_init_nonuniform_client_string(struct nfs_client *clp) |
4960 | char *buf, size_t len) | ||
4961 | { | 4964 | { |
4962 | unsigned int result; | 4965 | int result; |
4966 | size_t len; | ||
4967 | char *str; | ||
4968 | bool retried = false; | ||
4963 | 4969 | ||
4964 | if (clp->cl_owner_id != NULL) | 4970 | if (clp->cl_owner_id != NULL) |
4965 | return strlcpy(buf, clp->cl_owner_id, len); | 4971 | return 0; |
4972 | retry: | ||
4973 | rcu_read_lock(); | ||
4974 | len = 10 + strlen(clp->cl_ipaddr) + 1 + | ||
4975 | strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)) + | ||
4976 | 1 + | ||
4977 | strlen(rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)) + | ||
4978 | 1; | ||
4979 | rcu_read_unlock(); | ||
4980 | |||
4981 | if (len > NFS4_OPAQUE_LIMIT + 1) | ||
4982 | return -EINVAL; | ||
4983 | |||
4984 | /* | ||
4985 | * Since this string is allocated at mount time, and held until the | ||
4986 | * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying | ||
4987 | * about a memory-reclaim deadlock. | ||
4988 | */ | ||
4989 | str = kmalloc(len, GFP_KERNEL); | ||
4990 | if (!str) | ||
4991 | return -ENOMEM; | ||
4966 | 4992 | ||
4967 | rcu_read_lock(); | 4993 | rcu_read_lock(); |
4968 | result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s", | 4994 | result = scnprintf(str, len, "Linux NFSv4.0 %s/%s %s", |
4969 | clp->cl_ipaddr, | 4995 | clp->cl_ipaddr, |
4970 | rpc_peeraddr2str(clp->cl_rpcclient, | 4996 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), |
4971 | RPC_DISPLAY_ADDR), | 4997 | rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO)); |
4972 | rpc_peeraddr2str(clp->cl_rpcclient, | ||
4973 | RPC_DISPLAY_PROTO)); | ||
4974 | rcu_read_unlock(); | 4998 | rcu_read_unlock(); |
4975 | clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); | 4999 | |
4976 | return result; | 5000 | /* Did something change? */ |
5001 | if (result >= len) { | ||
5002 | kfree(str); | ||
5003 | if (retried) | ||
5004 | return -EINVAL; | ||
5005 | retried = true; | ||
5006 | goto retry; | ||
5007 | } | ||
5008 | clp->cl_owner_id = str; | ||
5009 | return 0; | ||
4977 | } | 5010 | } |
4978 | 5011 | ||
4979 | static unsigned int | 5012 | static int |
4980 | nfs4_init_uniform_client_string(struct nfs_client *clp, | 5013 | nfs4_init_uniquifier_client_string(struct nfs_client *clp) |
4981 | char *buf, size_t len) | 5014 | { |
5015 | int result; | ||
5016 | size_t len; | ||
5017 | char *str; | ||
5018 | |||
5019 | len = 10 + 10 + 1 + 10 + 1 + | ||
5020 | strlen(nfs4_client_id_uniquifier) + 1 + | ||
5021 | strlen(clp->cl_rpcclient->cl_nodename) + 1; | ||
5022 | |||
5023 | if (len > NFS4_OPAQUE_LIMIT + 1) | ||
5024 | return -EINVAL; | ||
5025 | |||
5026 | /* | ||
5027 | * Since this string is allocated at mount time, and held until the | ||
5028 | * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying | ||
5029 | * about a memory-reclaim deadlock. | ||
5030 | */ | ||
5031 | str = kmalloc(len, GFP_KERNEL); | ||
5032 | if (!str) | ||
5033 | return -ENOMEM; | ||
5034 | |||
5035 | result = scnprintf(str, len, "Linux NFSv%u.%u %s/%s", | ||
5036 | clp->rpc_ops->version, clp->cl_minorversion, | ||
5037 | nfs4_client_id_uniquifier, | ||
5038 | clp->cl_rpcclient->cl_nodename); | ||
5039 | if (result >= len) { | ||
5040 | kfree(str); | ||
5041 | return -EINVAL; | ||
5042 | } | ||
5043 | clp->cl_owner_id = str; | ||
5044 | return 0; | ||
5045 | } | ||
5046 | |||
5047 | static int | ||
5048 | nfs4_init_uniform_client_string(struct nfs_client *clp) | ||
4982 | { | 5049 | { |
4983 | const char *nodename = clp->cl_rpcclient->cl_nodename; | 5050 | int result; |
4984 | unsigned int result; | 5051 | size_t len; |
5052 | char *str; | ||
4985 | 5053 | ||
4986 | if (clp->cl_owner_id != NULL) | 5054 | if (clp->cl_owner_id != NULL) |
4987 | return strlcpy(buf, clp->cl_owner_id, len); | 5055 | return 0; |
4988 | 5056 | ||
4989 | if (nfs4_client_id_uniquifier[0] != '\0') | 5057 | if (nfs4_client_id_uniquifier[0] != '\0') |
4990 | result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s", | 5058 | return nfs4_init_uniquifier_client_string(clp); |
4991 | clp->rpc_ops->version, | 5059 | |
4992 | clp->cl_minorversion, | 5060 | len = 10 + 10 + 1 + 10 + 1 + |
4993 | nfs4_client_id_uniquifier, | 5061 | strlen(clp->cl_rpcclient->cl_nodename) + 1; |
4994 | nodename); | 5062 | |
4995 | else | 5063 | if (len > NFS4_OPAQUE_LIMIT + 1) |
4996 | result = scnprintf(buf, len, "Linux NFSv%u.%u %s", | 5064 | return -EINVAL; |
4997 | clp->rpc_ops->version, clp->cl_minorversion, | 5065 | |
4998 | nodename); | 5066 | /* |
4999 | clp->cl_owner_id = kstrdup(buf, GFP_KERNEL); | 5067 | * Since this string is allocated at mount time, and held until the |
5000 | return result; | 5068 | * nfs_client is destroyed, we can use GFP_KERNEL here w/o worrying |
5069 | * about a memory-reclaim deadlock. | ||
5070 | */ | ||
5071 | str = kmalloc(len, GFP_KERNEL); | ||
5072 | if (!str) | ||
5073 | return -ENOMEM; | ||
5074 | |||
5075 | result = scnprintf(str, len, "Linux NFSv%u.%u %s", | ||
5076 | clp->rpc_ops->version, clp->cl_minorversion, | ||
5077 | clp->cl_rpcclient->cl_nodename); | ||
5078 | if (result >= len) { | ||
5079 | kfree(str); | ||
5080 | return -EINVAL; | ||
5081 | } | ||
5082 | clp->cl_owner_id = str; | ||
5083 | return 0; | ||
5001 | } | 5084 | } |
5002 | 5085 | ||
5003 | /* | 5086 | /* |
@@ -5044,7 +5127,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
5044 | struct nfs4_setclientid setclientid = { | 5127 | struct nfs4_setclientid setclientid = { |
5045 | .sc_verifier = &sc_verifier, | 5128 | .sc_verifier = &sc_verifier, |
5046 | .sc_prog = program, | 5129 | .sc_prog = program, |
5047 | .sc_cb_ident = clp->cl_cb_ident, | 5130 | .sc_clnt = clp, |
5048 | }; | 5131 | }; |
5049 | struct rpc_message msg = { | 5132 | struct rpc_message msg = { |
5050 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], | 5133 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], |
@@ -5064,16 +5147,15 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
5064 | 5147 | ||
5065 | /* nfs_client_id4 */ | 5148 | /* nfs_client_id4 */ |
5066 | nfs4_init_boot_verifier(clp, &sc_verifier); | 5149 | nfs4_init_boot_verifier(clp, &sc_verifier); |
5150 | |||
5067 | if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) | 5151 | if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags)) |
5068 | setclientid.sc_name_len = | 5152 | status = nfs4_init_uniform_client_string(clp); |
5069 | nfs4_init_uniform_client_string(clp, | ||
5070 | setclientid.sc_name, | ||
5071 | sizeof(setclientid.sc_name)); | ||
5072 | else | 5153 | else |
5073 | setclientid.sc_name_len = | 5154 | status = nfs4_init_nonuniform_client_string(clp); |
5074 | nfs4_init_nonuniform_client_string(clp, | 5155 | |
5075 | setclientid.sc_name, | 5156 | if (status) |
5076 | sizeof(setclientid.sc_name)); | 5157 | goto out; |
5158 | |||
5077 | /* cb_client4 */ | 5159 | /* cb_client4 */ |
5078 | setclientid.sc_netid_len = | 5160 | setclientid.sc_netid_len = |
5079 | nfs4_init_callback_netid(clp, | 5161 | nfs4_init_callback_netid(clp, |
@@ -5083,9 +5165,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
5083 | sizeof(setclientid.sc_uaddr), "%s.%u.%u", | 5165 | sizeof(setclientid.sc_uaddr), "%s.%u.%u", |
5084 | clp->cl_ipaddr, port >> 8, port & 255); | 5166 | clp->cl_ipaddr, port >> 8, port & 255); |
5085 | 5167 | ||
5086 | dprintk("NFS call setclientid auth=%s, '%.*s'\n", | 5168 | dprintk("NFS call setclientid auth=%s, '%s'\n", |
5087 | clp->cl_rpcclient->cl_auth->au_ops->au_name, | 5169 | clp->cl_rpcclient->cl_auth->au_ops->au_name, |
5088 | setclientid.sc_name_len, setclientid.sc_name); | 5170 | clp->cl_owner_id); |
5089 | task = rpc_run_task(&task_setup_data); | 5171 | task = rpc_run_task(&task_setup_data); |
5090 | if (IS_ERR(task)) { | 5172 | if (IS_ERR(task)) { |
5091 | status = PTR_ERR(task); | 5173 | status = PTR_ERR(task); |
@@ -5402,6 +5484,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, | |||
5402 | atomic_inc(&lsp->ls_count); | 5484 | atomic_inc(&lsp->ls_count); |
5403 | /* Ensure we don't close file until we're done freeing locks! */ | 5485 | /* Ensure we don't close file until we're done freeing locks! */ |
5404 | p->ctx = get_nfs_open_context(ctx); | 5486 | p->ctx = get_nfs_open_context(ctx); |
5487 | get_file(fl->fl_file); | ||
5405 | memcpy(&p->fl, fl, sizeof(p->fl)); | 5488 | memcpy(&p->fl, fl, sizeof(p->fl)); |
5406 | p->server = NFS_SERVER(inode); | 5489 | p->server = NFS_SERVER(inode); |
5407 | return p; | 5490 | return p; |
@@ -5413,6 +5496,7 @@ static void nfs4_locku_release_calldata(void *data) | |||
5413 | nfs_free_seqid(calldata->arg.seqid); | 5496 | nfs_free_seqid(calldata->arg.seqid); |
5414 | nfs4_put_lock_state(calldata->lsp); | 5497 | nfs4_put_lock_state(calldata->lsp); |
5415 | put_nfs_open_context(calldata->ctx); | 5498 | put_nfs_open_context(calldata->ctx); |
5499 | fput(calldata->fl.fl_file); | ||
5416 | kfree(calldata); | 5500 | kfree(calldata); |
5417 | } | 5501 | } |
5418 | 5502 | ||
@@ -6846,11 +6930,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, | |||
6846 | }; | 6930 | }; |
6847 | 6931 | ||
6848 | nfs4_init_boot_verifier(clp, &verifier); | 6932 | nfs4_init_boot_verifier(clp, &verifier); |
6849 | args.id_len = nfs4_init_uniform_client_string(clp, args.id, | 6933 | |
6850 | sizeof(args.id)); | 6934 | status = nfs4_init_uniform_client_string(clp); |
6851 | dprintk("NFS call exchange_id auth=%s, '%.*s'\n", | 6935 | if (status) |
6936 | goto out; | ||
6937 | |||
6938 | dprintk("NFS call exchange_id auth=%s, '%s'\n", | ||
6852 | clp->cl_rpcclient->cl_auth->au_ops->au_name, | 6939 | clp->cl_rpcclient->cl_auth->au_ops->au_name, |
6853 | args.id_len, args.id); | 6940 | clp->cl_owner_id); |
6854 | 6941 | ||
6855 | res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), | 6942 | res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), |
6856 | GFP_NOFS); | 6943 | GFP_NOFS); |
@@ -6885,7 +6972,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, | |||
6885 | /* unsupported! */ | 6972 | /* unsupported! */ |
6886 | WARN_ON_ONCE(1); | 6973 | WARN_ON_ONCE(1); |
6887 | status = -EINVAL; | 6974 | status = -EINVAL; |
6888 | goto out_server_scope; | 6975 | goto out_impl_id; |
6889 | } | 6976 | } |
6890 | 6977 | ||
6891 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); | 6978 | status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); |
@@ -6913,6 +7000,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, | |||
6913 | /* use the most recent implementation id */ | 7000 | /* use the most recent implementation id */ |
6914 | kfree(clp->cl_implid); | 7001 | kfree(clp->cl_implid); |
6915 | clp->cl_implid = res.impl_id; | 7002 | clp->cl_implid = res.impl_id; |
7003 | res.impl_id = NULL; | ||
6916 | 7004 | ||
6917 | if (clp->cl_serverscope != NULL && | 7005 | if (clp->cl_serverscope != NULL && |
6918 | !nfs41_same_server_scope(clp->cl_serverscope, | 7006 | !nfs41_same_server_scope(clp->cl_serverscope, |
@@ -6926,15 +7014,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, | |||
6926 | 7014 | ||
6927 | if (clp->cl_serverscope == NULL) { | 7015 | if (clp->cl_serverscope == NULL) { |
6928 | clp->cl_serverscope = res.server_scope; | 7016 | clp->cl_serverscope = res.server_scope; |
6929 | goto out; | 7017 | res.server_scope = NULL; |
6930 | } | 7018 | } |
6931 | } else | 7019 | } |
6932 | kfree(res.impl_id); | ||
6933 | 7020 | ||
6934 | out_server_owner: | 7021 | out_impl_id: |
6935 | kfree(res.server_owner); | 7022 | kfree(res.impl_id); |
6936 | out_server_scope: | 7023 | out_server_scope: |
6937 | kfree(res.server_scope); | 7024 | kfree(res.server_scope); |
7025 | out_server_owner: | ||
7026 | kfree(res.server_owner); | ||
6938 | out: | 7027 | out: |
6939 | if (clp->cl_implid != NULL) | 7028 | if (clp->cl_implid != NULL) |
6940 | dprintk("NFS reply exchange_id: Server Implementation ID: " | 7029 | dprintk("NFS reply exchange_id: Server Implementation ID: " |
@@ -8061,9 +8150,8 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) | |||
8061 | struct rpc_task *task; | 8150 | struct rpc_task *task; |
8062 | int status = 0; | 8151 | int status = 0; |
8063 | 8152 | ||
8064 | dprintk("NFS: %4d initiating layoutcommit call. sync %d " | 8153 | dprintk("NFS: initiating layoutcommit call. sync %d " |
8065 | "lbw: %llu inode %lu\n", | 8154 | "lbw: %llu inode %lu\n", sync, |
8066 | data->task.tk_pid, sync, | ||
8067 | data->args.lastbytewritten, | 8155 | data->args.lastbytewritten, |
8068 | data->args.inode->i_ino); | 8156 | data->args.inode->i_ino); |
8069 | 8157 | ||
@@ -8557,7 +8645,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | |||
8557 | | NFS_CAP_ATOMIC_OPEN_V1 | 8645 | | NFS_CAP_ATOMIC_OPEN_V1 |
8558 | | NFS_CAP_ALLOCATE | 8646 | | NFS_CAP_ALLOCATE |
8559 | | NFS_CAP_DEALLOCATE | 8647 | | NFS_CAP_DEALLOCATE |
8560 | | NFS_CAP_SEEK, | 8648 | | NFS_CAP_SEEK |
8649 | | NFS_CAP_LAYOUTSTATS, | ||
8561 | .init_client = nfs41_init_client, | 8650 | .init_client = nfs41_init_client, |
8562 | .shutdown_client = nfs41_shutdown_client, | 8651 | .shutdown_client = nfs41_shutdown_client, |
8563 | .match_stateid = nfs41_match_stateid, | 8652 | .match_stateid = nfs41_match_stateid, |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2782cfca2265..605840dc89cf 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -309,7 +309,6 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) | |||
309 | 309 | ||
310 | if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) | 310 | if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) |
311 | goto do_confirm; | 311 | goto do_confirm; |
312 | nfs4_begin_drain_session(clp); | ||
313 | status = nfs4_proc_exchange_id(clp, cred); | 312 | status = nfs4_proc_exchange_id(clp, cred); |
314 | if (status != 0) | 313 | if (status != 0) |
315 | goto out; | 314 | goto out; |
@@ -1482,6 +1481,8 @@ restart: | |||
1482 | spin_unlock(&state->state_lock); | 1481 | spin_unlock(&state->state_lock); |
1483 | } | 1482 | } |
1484 | nfs4_put_open_state(state); | 1483 | nfs4_put_open_state(state); |
1484 | clear_bit(NFS4CLNT_RECLAIM_NOGRACE, | ||
1485 | &state->flags); | ||
1485 | spin_lock(&sp->so_lock); | 1486 | spin_lock(&sp->so_lock); |
1486 | goto restart; | 1487 | goto restart; |
1487 | } | 1488 | } |
@@ -1830,6 +1831,7 @@ static int nfs4_establish_lease(struct nfs_client *clp) | |||
1830 | clp->cl_mvops->reboot_recovery_ops; | 1831 | clp->cl_mvops->reboot_recovery_ops; |
1831 | int status; | 1832 | int status; |
1832 | 1833 | ||
1834 | nfs4_begin_drain_session(clp); | ||
1833 | cred = nfs4_get_clid_cred(clp); | 1835 | cred = nfs4_get_clid_cred(clp); |
1834 | if (cred == NULL) | 1836 | if (cred == NULL) |
1835 | return -ENOENT; | 1837 | return -ENOENT; |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0aea97841d30..558cd65dbdb7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -139,7 +139,8 @@ static int nfs4_stat_to_errno(int); | |||
139 | #define encode_setclientid_maxsz \ | 139 | #define encode_setclientid_maxsz \ |
140 | (op_encode_hdr_maxsz + \ | 140 | (op_encode_hdr_maxsz + \ |
141 | XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ | 141 | XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \ |
142 | XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \ | 142 | /* client name */ \ |
143 | 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ | ||
143 | 1 /* sc_prog */ + \ | 144 | 1 /* sc_prog */ + \ |
144 | 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ | 145 | 1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \ |
145 | 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ | 146 | 1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \ |
@@ -288,7 +289,8 @@ static int nfs4_stat_to_errno(int); | |||
288 | #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ | 289 | #define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ |
289 | encode_verifier_maxsz + \ | 290 | encode_verifier_maxsz + \ |
290 | 1 /* co_ownerid.len */ + \ | 291 | 1 /* co_ownerid.len */ + \ |
291 | XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ | 292 | /* eia_clientowner */ \ |
293 | 1 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ | ||
292 | 1 /* flags */ + \ | 294 | 1 /* flags */ + \ |
293 | 1 /* spa_how */ + \ | 295 | 1 /* spa_how */ + \ |
294 | /* max is SP4_MACH_CRED (for now) */ + \ | 296 | /* max is SP4_MACH_CRED (for now) */ + \ |
@@ -1667,13 +1669,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie | |||
1667 | encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); | 1669 | encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); |
1668 | encode_nfs4_verifier(xdr, setclientid->sc_verifier); | 1670 | encode_nfs4_verifier(xdr, setclientid->sc_verifier); |
1669 | 1671 | ||
1670 | encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); | 1672 | encode_string(xdr, strlen(setclientid->sc_clnt->cl_owner_id), |
1673 | setclientid->sc_clnt->cl_owner_id); | ||
1671 | p = reserve_space(xdr, 4); | 1674 | p = reserve_space(xdr, 4); |
1672 | *p = cpu_to_be32(setclientid->sc_prog); | 1675 | *p = cpu_to_be32(setclientid->sc_prog); |
1673 | encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); | 1676 | encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); |
1674 | encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); | 1677 | encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); |
1675 | p = reserve_space(xdr, 4); | 1678 | p = reserve_space(xdr, 4); |
1676 | *p = cpu_to_be32(setclientid->sc_cb_ident); | 1679 | *p = cpu_to_be32(setclientid->sc_clnt->cl_cb_ident); |
1677 | } | 1680 | } |
1678 | 1681 | ||
1679 | static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) | 1682 | static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) |
@@ -1747,7 +1750,8 @@ static void encode_exchange_id(struct xdr_stream *xdr, | |||
1747 | encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); | 1750 | encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); |
1748 | encode_nfs4_verifier(xdr, args->verifier); | 1751 | encode_nfs4_verifier(xdr, args->verifier); |
1749 | 1752 | ||
1750 | encode_string(xdr, args->id_len, args->id); | 1753 | encode_string(xdr, strlen(args->client->cl_owner_id), |
1754 | args->client->cl_owner_id); | ||
1751 | 1755 | ||
1752 | encode_uint32(xdr, args->flags); | 1756 | encode_uint32(xdr, args->flags); |
1753 | encode_uint32(xdr, args->state_protect.how); | 1757 | encode_uint32(xdr, args->state_protect.how); |
@@ -7427,6 +7431,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
7427 | PROC(SEEK, enc_seek, dec_seek), | 7431 | PROC(SEEK, enc_seek, dec_seek), |
7428 | PROC(ALLOCATE, enc_allocate, dec_allocate), | 7432 | PROC(ALLOCATE, enc_allocate, dec_allocate), |
7429 | PROC(DEALLOCATE, enc_deallocate, dec_deallocate), | 7433 | PROC(DEALLOCATE, enc_deallocate, dec_deallocate), |
7434 | PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), | ||
7430 | #endif /* CONFIG_NFS_V4_2 */ | 7435 | #endif /* CONFIG_NFS_V4_2 */ |
7431 | }; | 7436 | }; |
7432 | 7437 | ||
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 282b39369510..1da68d3b1eda 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -636,9 +636,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, | |||
636 | 636 | ||
637 | hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); | 637 | hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); |
638 | 638 | ||
639 | dprintk("NFS: %5u initiated pgio call " | 639 | dprintk("NFS: initiated pgio call " |
640 | "(req %s/%llu, %u bytes @ offset %llu)\n", | 640 | "(req %s/%llu, %u bytes @ offset %llu)\n", |
641 | hdr->task.tk_pid, | ||
642 | hdr->inode->i_sb->s_id, | 641 | hdr->inode->i_sb->s_id, |
643 | (unsigned long long)NFS_FILEID(hdr->inode), | 642 | (unsigned long long)NFS_FILEID(hdr->inode), |
644 | hdr->args.count, | 643 | hdr->args.count, |
@@ -690,8 +689,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, | |||
690 | static void nfs_pgio_release(void *calldata) | 689 | static void nfs_pgio_release(void *calldata) |
691 | { | 690 | { |
692 | struct nfs_pgio_header *hdr = calldata; | 691 | struct nfs_pgio_header *hdr = calldata; |
693 | if (hdr->rw_ops->rw_release) | ||
694 | hdr->rw_ops->rw_release(hdr); | ||
695 | nfs_pgio_data_destroy(hdr); | 692 | nfs_pgio_data_destroy(hdr); |
696 | hdr->completion_ops->completion(hdr); | 693 | hdr->completion_ops->completion(hdr); |
697 | } | 694 | } |
@@ -711,7 +708,9 @@ static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, | |||
711 | * nfs_pageio_init - initialise a page io descriptor | 708 | * nfs_pageio_init - initialise a page io descriptor |
712 | * @desc: pointer to descriptor | 709 | * @desc: pointer to descriptor |
713 | * @inode: pointer to inode | 710 | * @inode: pointer to inode |
714 | * @doio: pointer to io function | 711 | * @pg_ops: pointer to pageio operations |
712 | * @compl_ops: pointer to pageio completion operations | ||
713 | * @rw_ops: pointer to nfs read/write operations | ||
715 | * @bsize: io block size | 714 | * @bsize: io block size |
716 | * @io_flags: extra parameters for the io function | 715 | * @io_flags: extra parameters for the io function |
717 | */ | 716 | */ |
@@ -1186,6 +1185,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
1186 | * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an | 1185 | * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an |
1187 | * nfs_pageio_descriptor | 1186 | * nfs_pageio_descriptor |
1188 | * @desc: pointer to io descriptor | 1187 | * @desc: pointer to io descriptor |
1188 | * @mirror_idx: pointer to mirror index | ||
1189 | */ | 1189 | */ |
1190 | static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, | 1190 | static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, |
1191 | u32 mirror_idx) | 1191 | u32 mirror_idx) |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 230606243be6..0ba9a02c9566 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "iostat.h" | 35 | #include "iostat.h" |
36 | #include "nfs4trace.h" | 36 | #include "nfs4trace.h" |
37 | #include "delegation.h" | 37 | #include "delegation.h" |
38 | #include "nfs42.h" | ||
38 | 39 | ||
39 | #define NFSDBG_FACILITY NFSDBG_PNFS | 40 | #define NFSDBG_FACILITY NFSDBG_PNFS |
40 | #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) | 41 | #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) |
@@ -1821,6 +1822,7 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr) | |||
1821 | /* Resend all requests through the MDS */ | 1822 | /* Resend all requests through the MDS */ |
1822 | nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, | 1823 | nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true, |
1823 | hdr->completion_ops); | 1824 | hdr->completion_ops); |
1825 | set_bit(NFS_CONTEXT_RESEND_WRITES, &hdr->args.context->flags); | ||
1824 | return nfs_pageio_resend(&pgio, hdr); | 1826 | return nfs_pageio_resend(&pgio, hdr); |
1825 | } | 1827 | } |
1826 | EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); | 1828 | EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); |
@@ -1865,6 +1867,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, | |||
1865 | mirror->pg_recoalesce = 1; | 1867 | mirror->pg_recoalesce = 1; |
1866 | } | 1868 | } |
1867 | nfs_pgio_data_destroy(hdr); | 1869 | nfs_pgio_data_destroy(hdr); |
1870 | hdr->release(hdr); | ||
1868 | } | 1871 | } |
1869 | 1872 | ||
1870 | static enum pnfs_try_status | 1873 | static enum pnfs_try_status |
@@ -1979,6 +1982,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, | |||
1979 | mirror->pg_recoalesce = 1; | 1982 | mirror->pg_recoalesce = 1; |
1980 | } | 1983 | } |
1981 | nfs_pgio_data_destroy(hdr); | 1984 | nfs_pgio_data_destroy(hdr); |
1985 | hdr->release(hdr); | ||
1982 | } | 1986 | } |
1983 | 1987 | ||
1984 | /* | 1988 | /* |
@@ -2247,3 +2251,63 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) | |||
2247 | } | 2251 | } |
2248 | return thp; | 2252 | return thp; |
2249 | } | 2253 | } |
2254 | |||
2255 | #if IS_ENABLED(CONFIG_NFS_V4_2) | ||
2256 | int | ||
2257 | pnfs_report_layoutstat(struct inode *inode) | ||
2258 | { | ||
2259 | struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; | ||
2260 | struct nfs_server *server = NFS_SERVER(inode); | ||
2261 | struct nfs_inode *nfsi = NFS_I(inode); | ||
2262 | struct nfs42_layoutstat_data *data; | ||
2263 | struct pnfs_layout_hdr *hdr; | ||
2264 | int status = 0; | ||
2265 | |||
2266 | if (!pnfs_enabled_sb(server) || !ld->prepare_layoutstats) | ||
2267 | goto out; | ||
2268 | |||
2269 | if (!nfs_server_capable(inode, NFS_CAP_LAYOUTSTATS)) | ||
2270 | goto out; | ||
2271 | |||
2272 | if (test_and_set_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags)) | ||
2273 | goto out; | ||
2274 | |||
2275 | spin_lock(&inode->i_lock); | ||
2276 | if (!NFS_I(inode)->layout) { | ||
2277 | spin_unlock(&inode->i_lock); | ||
2278 | goto out; | ||
2279 | } | ||
2280 | hdr = NFS_I(inode)->layout; | ||
2281 | pnfs_get_layout_hdr(hdr); | ||
2282 | spin_unlock(&inode->i_lock); | ||
2283 | |||
2284 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
2285 | if (!data) { | ||
2286 | status = -ENOMEM; | ||
2287 | goto out_put; | ||
2288 | } | ||
2289 | |||
2290 | data->args.fh = NFS_FH(inode); | ||
2291 | data->args.inode = inode; | ||
2292 | nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid); | ||
2293 | status = ld->prepare_layoutstats(&data->args); | ||
2294 | if (status) | ||
2295 | goto out_free; | ||
2296 | |||
2297 | status = nfs42_proc_layoutstats_generic(NFS_SERVER(inode), data); | ||
2298 | |||
2299 | out: | ||
2300 | dprintk("%s returns %d\n", __func__, status); | ||
2301 | return status; | ||
2302 | |||
2303 | out_free: | ||
2304 | kfree(data); | ||
2305 | out_put: | ||
2306 | pnfs_put_layout_hdr(hdr); | ||
2307 | smp_mb__before_atomic(); | ||
2308 | clear_bit(NFS_INO_LAYOUTSTATS, &nfsi->flags); | ||
2309 | smp_mb__after_atomic(); | ||
2310 | goto out; | ||
2311 | } | ||
2312 | EXPORT_SYMBOL_GPL(pnfs_report_layoutstat); | ||
2313 | #endif | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1e6308f82fc3..3e6ab7bfbabd 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -178,6 +178,8 @@ struct pnfs_layoutdriver_type { | |||
178 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, | 178 | void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, |
179 | struct xdr_stream *xdr, | 179 | struct xdr_stream *xdr, |
180 | const struct nfs4_layoutcommit_args *args); | 180 | const struct nfs4_layoutcommit_args *args); |
181 | int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); | ||
182 | void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data); | ||
181 | }; | 183 | }; |
182 | 184 | ||
183 | struct pnfs_layout_hdr { | 185 | struct pnfs_layout_hdr { |
@@ -290,7 +292,6 @@ int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); | |||
290 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); | 292 | struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); |
291 | void pnfs_error_mark_layout_for_return(struct inode *inode, | 293 | void pnfs_error_mark_layout_for_return(struct inode *inode, |
292 | struct pnfs_layout_segment *lseg); | 294 | struct pnfs_layout_segment *lseg); |
293 | |||
294 | /* nfs4_deviceid_flags */ | 295 | /* nfs4_deviceid_flags */ |
295 | enum { | 296 | enum { |
296 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ | 297 | NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ |
@@ -689,4 +690,14 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) | |||
689 | 690 | ||
690 | #endif /* CONFIG_NFS_V4_1 */ | 691 | #endif /* CONFIG_NFS_V4_1 */ |
691 | 692 | ||
693 | #if IS_ENABLED(CONFIG_NFS_V4_2) | ||
694 | int pnfs_report_layoutstat(struct inode *inode); | ||
695 | #else | ||
696 | static inline int | ||
697 | pnfs_report_layoutstat(struct inode *inode) | ||
698 | { | ||
699 | return 0; | ||
700 | } | ||
701 | #endif | ||
702 | |||
692 | #endif /* FS_NFS_PNFS_H */ | 703 | #endif /* FS_NFS_PNFS_H */ |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e6c262555e08..65869ca9c851 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -1290,6 +1290,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, | |||
1290 | static void nfs_redirty_request(struct nfs_page *req) | 1290 | static void nfs_redirty_request(struct nfs_page *req) |
1291 | { | 1291 | { |
1292 | nfs_mark_request_dirty(req); | 1292 | nfs_mark_request_dirty(req); |
1293 | set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); | ||
1293 | nfs_unlock_request(req); | 1294 | nfs_unlock_request(req); |
1294 | nfs_end_page_writeback(req); | 1295 | nfs_end_page_writeback(req); |
1295 | nfs_release_request(req); | 1296 | nfs_release_request(req); |
@@ -1348,11 +1349,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) | |||
1348 | NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); | 1349 | NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); |
1349 | } | 1350 | } |
1350 | 1351 | ||
1351 | static void nfs_writeback_release_common(struct nfs_pgio_header *hdr) | ||
1352 | { | ||
1353 | /* do nothing! */ | ||
1354 | } | ||
1355 | |||
1356 | /* | 1352 | /* |
1357 | * Special version of should_remove_suid() that ignores capabilities. | 1353 | * Special version of should_remove_suid() that ignores capabilities. |
1358 | */ | 1354 | */ |
@@ -1556,7 +1552,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, | |||
1556 | /* Set up the initial task struct. */ | 1552 | /* Set up the initial task struct. */ |
1557 | nfs_ops->commit_setup(data, &msg); | 1553 | nfs_ops->commit_setup(data, &msg); |
1558 | 1554 | ||
1559 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | 1555 | dprintk("NFS: initiated commit call\n"); |
1560 | 1556 | ||
1561 | nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, | 1557 | nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client, |
1562 | NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); | 1558 | NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg); |
@@ -2013,7 +2009,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { | |||
2013 | .rw_mode = FMODE_WRITE, | 2009 | .rw_mode = FMODE_WRITE, |
2014 | .rw_alloc_header = nfs_writehdr_alloc, | 2010 | .rw_alloc_header = nfs_writehdr_alloc, |
2015 | .rw_free_header = nfs_writehdr_free, | 2011 | .rw_free_header = nfs_writehdr_free, |
2016 | .rw_release = nfs_writeback_release_common, | ||
2017 | .rw_done = nfs_writeback_done, | 2012 | .rw_done = nfs_writeback_done, |
2018 | .rw_result = nfs_writeback_result, | 2013 | .rw_result = nfs_writeback_result, |
2019 | .rw_initiate = nfs_initiate_write, | 2014 | .rw_initiate = nfs_initiate_write, |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 32201c269890..b8e72aad919c 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
@@ -500,6 +500,7 @@ enum { | |||
500 | NFSPROC4_CLNT_SEEK, | 500 | NFSPROC4_CLNT_SEEK, |
501 | NFSPROC4_CLNT_ALLOCATE, | 501 | NFSPROC4_CLNT_ALLOCATE, |
502 | NFSPROC4_CLNT_DEALLOCATE, | 502 | NFSPROC4_CLNT_DEALLOCATE, |
503 | NFSPROC4_CLNT_LAYOUTSTATS, | ||
503 | }; | 504 | }; |
504 | 505 | ||
505 | /* nfs41 types */ | 506 | /* nfs41 types */ |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b95f914ce083..f91b5ade30c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -219,6 +219,7 @@ struct nfs_inode { | |||
219 | #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ | 219 | #define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ |
220 | #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ | 220 | #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ |
221 | #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ | 221 | #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ |
222 | #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ | ||
222 | 223 | ||
223 | static inline struct nfs_inode *NFS_I(const struct inode *inode) | 224 | static inline struct nfs_inode *NFS_I(const struct inode *inode) |
224 | { | 225 | { |
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5e1273d4de14..a2ea1491d3df 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
@@ -237,5 +237,6 @@ struct nfs_server { | |||
237 | #define NFS_CAP_SEEK (1U << 19) | 237 | #define NFS_CAP_SEEK (1U << 19) |
238 | #define NFS_CAP_ALLOCATE (1U << 20) | 238 | #define NFS_CAP_ALLOCATE (1U << 20) |
239 | #define NFS_CAP_DEALLOCATE (1U << 21) | 239 | #define NFS_CAP_DEALLOCATE (1U << 21) |
240 | #define NFS_CAP_LAYOUTSTATS (1U << 22) | ||
240 | 241 | ||
241 | #endif | 242 | #endif |
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3eb072dbce83..f2f650f136ee 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h | |||
@@ -67,7 +67,6 @@ struct nfs_rw_ops { | |||
67 | const fmode_t rw_mode; | 67 | const fmode_t rw_mode; |
68 | struct nfs_pgio_header *(*rw_alloc_header)(void); | 68 | struct nfs_pgio_header *(*rw_alloc_header)(void); |
69 | void (*rw_free_header)(struct nfs_pgio_header *); | 69 | void (*rw_free_header)(struct nfs_pgio_header *); |
70 | void (*rw_release)(struct nfs_pgio_header *); | ||
71 | int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, | 70 | int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, |
72 | struct inode *); | 71 | struct inode *); |
73 | void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); | 72 | void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); |
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 93ab6071bbe9..7bbe50504211 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h | |||
@@ -316,6 +316,49 @@ struct nfs4_layoutreturn { | |||
316 | int rpc_status; | 316 | int rpc_status; |
317 | }; | 317 | }; |
318 | 318 | ||
319 | #define PNFS_LAYOUTSTATS_MAXSIZE 256 | ||
320 | |||
321 | struct nfs42_layoutstat_args; | ||
322 | struct nfs42_layoutstat_devinfo; | ||
323 | typedef void (*layoutstats_encode_t)(struct xdr_stream *, | ||
324 | struct nfs42_layoutstat_args *, | ||
325 | struct nfs42_layoutstat_devinfo *); | ||
326 | |||
327 | /* Per file per deviceid layoutstats */ | ||
328 | struct nfs42_layoutstat_devinfo { | ||
329 | struct nfs4_deviceid dev_id; | ||
330 | __u64 offset; | ||
331 | __u64 length; | ||
332 | __u64 read_count; | ||
333 | __u64 read_bytes; | ||
334 | __u64 write_count; | ||
335 | __u64 write_bytes; | ||
336 | __u32 layout_type; | ||
337 | layoutstats_encode_t layoutstats_encode; | ||
338 | void *layout_private; | ||
339 | }; | ||
340 | |||
341 | struct nfs42_layoutstat_args { | ||
342 | struct nfs4_sequence_args seq_args; | ||
343 | struct nfs_fh *fh; | ||
344 | struct inode *inode; | ||
345 | nfs4_stateid stateid; | ||
346 | int num_dev; | ||
347 | struct nfs42_layoutstat_devinfo *devinfo; | ||
348 | }; | ||
349 | |||
350 | struct nfs42_layoutstat_res { | ||
351 | struct nfs4_sequence_res seq_res; | ||
352 | int num_dev; | ||
353 | int rpc_status; | ||
354 | }; | ||
355 | |||
356 | struct nfs42_layoutstat_data { | ||
357 | struct inode *inode; | ||
358 | struct nfs42_layoutstat_args args; | ||
359 | struct nfs42_layoutstat_res res; | ||
360 | }; | ||
361 | |||
319 | struct stateowner_id { | 362 | struct stateowner_id { |
320 | __u64 create_time; | 363 | __u64 create_time; |
321 | __u32 uniquifier; | 364 | __u32 uniquifier; |
@@ -984,17 +1027,14 @@ struct nfs4_readlink_res { | |||
984 | struct nfs4_sequence_res seq_res; | 1027 | struct nfs4_sequence_res seq_res; |
985 | }; | 1028 | }; |
986 | 1029 | ||
987 | #define NFS4_SETCLIENTID_NAMELEN (127) | ||
988 | struct nfs4_setclientid { | 1030 | struct nfs4_setclientid { |
989 | const nfs4_verifier * sc_verifier; | 1031 | const nfs4_verifier * sc_verifier; |
990 | unsigned int sc_name_len; | ||
991 | char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; | ||
992 | u32 sc_prog; | 1032 | u32 sc_prog; |
993 | unsigned int sc_netid_len; | 1033 | unsigned int sc_netid_len; |
994 | char sc_netid[RPCBIND_MAXNETIDLEN + 1]; | 1034 | char sc_netid[RPCBIND_MAXNETIDLEN + 1]; |
995 | unsigned int sc_uaddr_len; | 1035 | unsigned int sc_uaddr_len; |
996 | char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; | 1036 | char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; |
997 | u32 sc_cb_ident; | 1037 | struct nfs_client *sc_clnt; |
998 | struct rpc_cred *sc_cred; | 1038 | struct rpc_cred *sc_cred; |
999 | }; | 1039 | }; |
1000 | 1040 | ||
@@ -1142,12 +1182,9 @@ struct nfs41_state_protection { | |||
1142 | struct nfs4_op_map allow; | 1182 | struct nfs4_op_map allow; |
1143 | }; | 1183 | }; |
1144 | 1184 | ||
1145 | #define NFS4_EXCHANGE_ID_LEN (48) | ||
1146 | struct nfs41_exchange_id_args { | 1185 | struct nfs41_exchange_id_args { |
1147 | struct nfs_client *client; | 1186 | struct nfs_client *client; |
1148 | nfs4_verifier *verifier; | 1187 | nfs4_verifier *verifier; |
1149 | unsigned int id_len; | ||
1150 | char id[NFS4_EXCHANGE_ID_LEN]; | ||
1151 | u32 flags; | 1188 | u32 flags; |
1152 | struct nfs41_state_protection state_protect; | 1189 | struct nfs41_state_protection state_protect; |
1153 | }; | 1190 | }; |
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 2ca67b55e0fe..8df43c9f11dc 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h | |||
@@ -37,7 +37,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); | |||
37 | void xprt_free_bc_request(struct rpc_rqst *req); | 37 | void xprt_free_bc_request(struct rpc_rqst *req); |
38 | int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); | 38 | int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); |
39 | void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); | 39 | void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); |
40 | int bc_send(struct rpc_rqst *req); | ||
41 | 40 | ||
42 | /* | 41 | /* |
43 | * Determine if a shared backchannel is in use | 42 | * Determine if a shared backchannel is in use |
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 598ba80ec30c..131032f15cc1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h | |||
@@ -56,6 +56,7 @@ struct rpc_clnt { | |||
56 | struct rpc_rtt * cl_rtt; /* RTO estimator data */ | 56 | struct rpc_rtt * cl_rtt; /* RTO estimator data */ |
57 | const struct rpc_timeout *cl_timeout; /* Timeout strategy */ | 57 | const struct rpc_timeout *cl_timeout; /* Timeout strategy */ |
58 | 58 | ||
59 | atomic_t cl_swapper; /* swapfile count */ | ||
59 | int cl_nodelen; /* nodename length */ | 60 | int cl_nodelen; /* nodename length */ |
60 | char cl_nodename[UNX_MAXNODENAME+1]; | 61 | char cl_nodename[UNX_MAXNODENAME+1]; |
61 | struct rpc_pipe_dir_head cl_pipedir_objects; | 62 | struct rpc_pipe_dir_head cl_pipedir_objects; |
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 5f1e6bd4c316..d703f0ef37d8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h | |||
@@ -205,8 +205,7 @@ struct rpc_wait_queue { | |||
205 | */ | 205 | */ |
206 | struct rpc_task *rpc_new_task(const struct rpc_task_setup *); | 206 | struct rpc_task *rpc_new_task(const struct rpc_task_setup *); |
207 | struct rpc_task *rpc_run_task(const struct rpc_task_setup *); | 207 | struct rpc_task *rpc_run_task(const struct rpc_task_setup *); |
208 | struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, | 208 | struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req); |
209 | const struct rpc_call_ops *ops); | ||
210 | void rpc_put_task(struct rpc_task *); | 209 | void rpc_put_task(struct rpc_task *); |
211 | void rpc_put_task_async(struct rpc_task *); | 210 | void rpc_put_task_async(struct rpc_task *); |
212 | void rpc_exit_task(struct rpc_task *); | 211 | void rpc_exit_task(struct rpc_task *); |
@@ -269,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, | |||
269 | } | 268 | } |
270 | #endif | 269 | #endif |
271 | 270 | ||
271 | #if IS_ENABLED(CONFIG_SUNRPC_SWAP) | ||
272 | int rpc_clnt_swap_activate(struct rpc_clnt *clnt); | ||
273 | void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt); | ||
274 | #else | ||
275 | static inline int | ||
276 | rpc_clnt_swap_activate(struct rpc_clnt *clnt) | ||
277 | { | ||
278 | return -EINVAL; | ||
279 | } | ||
280 | |||
281 | static inline void | ||
282 | rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) | ||
283 | { | ||
284 | } | ||
285 | #endif /* CONFIG_SUNRPC_SWAP */ | ||
286 | |||
272 | #endif /* _LINUX_SUNRPC_SCHED_H_ */ | 287 | #endif /* _LINUX_SUNRPC_SCHED_H_ */ |
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8b93ef53df3c..0fb9acbb4780 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h | |||
@@ -133,6 +133,9 @@ struct rpc_xprt_ops { | |||
133 | void (*close)(struct rpc_xprt *xprt); | 133 | void (*close)(struct rpc_xprt *xprt); |
134 | void (*destroy)(struct rpc_xprt *xprt); | 134 | void (*destroy)(struct rpc_xprt *xprt); |
135 | void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); | 135 | void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); |
136 | int (*enable_swap)(struct rpc_xprt *xprt); | ||
137 | void (*disable_swap)(struct rpc_xprt *xprt); | ||
138 | void (*inject_disconnect)(struct rpc_xprt *xprt); | ||
136 | }; | 139 | }; |
137 | 140 | ||
138 | /* | 141 | /* |
@@ -180,7 +183,7 @@ struct rpc_xprt { | |||
180 | atomic_t num_reqs; /* total slots */ | 183 | atomic_t num_reqs; /* total slots */ |
181 | unsigned long state; /* transport state */ | 184 | unsigned long state; /* transport state */ |
182 | unsigned char resvport : 1; /* use a reserved port */ | 185 | unsigned char resvport : 1; /* use a reserved port */ |
183 | unsigned int swapper; /* we're swapping over this | 186 | atomic_t swapper; /* we're swapping over this |
184 | transport */ | 187 | transport */ |
185 | unsigned int bind_index; /* bind function index */ | 188 | unsigned int bind_index; /* bind function index */ |
186 | 189 | ||
@@ -212,7 +215,8 @@ struct rpc_xprt { | |||
212 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | 215 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
213 | struct svc_serv *bc_serv; /* The RPC service which will */ | 216 | struct svc_serv *bc_serv; /* The RPC service which will */ |
214 | /* process the callback */ | 217 | /* process the callback */ |
215 | unsigned int bc_alloc_count; /* Total number of preallocs */ | 218 | int bc_alloc_count; /* Total number of preallocs */ |
219 | atomic_t bc_free_slots; | ||
216 | spinlock_t bc_pa_lock; /* Protects the preallocated | 220 | spinlock_t bc_pa_lock; /* Protects the preallocated |
217 | * items */ | 221 | * items */ |
218 | struct list_head bc_pa_list; /* List of preallocated | 222 | struct list_head bc_pa_list; /* List of preallocated |
@@ -241,6 +245,7 @@ struct rpc_xprt { | |||
241 | const char *address_strings[RPC_DISPLAY_MAX]; | 245 | const char *address_strings[RPC_DISPLAY_MAX]; |
242 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 246 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
243 | struct dentry *debugfs; /* debugfs directory */ | 247 | struct dentry *debugfs; /* debugfs directory */ |
248 | atomic_t inject_disconnect; | ||
244 | #endif | 249 | #endif |
245 | }; | 250 | }; |
246 | 251 | ||
@@ -327,6 +332,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * | |||
327 | return p + xprt->tsh_size; | 332 | return p + xprt->tsh_size; |
328 | } | 333 | } |
329 | 334 | ||
335 | static inline int | ||
336 | xprt_enable_swap(struct rpc_xprt *xprt) | ||
337 | { | ||
338 | return xprt->ops->enable_swap(xprt); | ||
339 | } | ||
340 | |||
341 | static inline void | ||
342 | xprt_disable_swap(struct rpc_xprt *xprt) | ||
343 | { | ||
344 | xprt->ops->disable_swap(xprt); | ||
345 | } | ||
346 | |||
330 | /* | 347 | /* |
331 | * Transport switch helper functions | 348 | * Transport switch helper functions |
332 | */ | 349 | */ |
@@ -345,7 +362,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); | |||
345 | void xprt_disconnect_done(struct rpc_xprt *xprt); | 362 | void xprt_disconnect_done(struct rpc_xprt *xprt); |
346 | void xprt_force_disconnect(struct rpc_xprt *xprt); | 363 | void xprt_force_disconnect(struct rpc_xprt *xprt); |
347 | void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); | 364 | void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); |
348 | int xs_swapper(struct rpc_xprt *xprt, int enable); | ||
349 | 365 | ||
350 | bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); | 366 | bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); |
351 | void xprt_unlock_connect(struct rpc_xprt *, void *); | 367 | void xprt_unlock_connect(struct rpc_xprt *, void *); |
@@ -431,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) | |||
431 | return test_and_set_bit(XPRT_BINDING, &xprt->state); | 447 | return test_and_set_bit(XPRT_BINDING, &xprt->state); |
432 | } | 448 | } |
433 | 449 | ||
450 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | ||
451 | extern unsigned int rpc_inject_disconnect; | ||
452 | static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) | ||
453 | { | ||
454 | if (!rpc_inject_disconnect) | ||
455 | return; | ||
456 | if (atomic_dec_return(&xprt->inject_disconnect)) | ||
457 | return; | ||
458 | atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); | ||
459 | xprt->ops->inject_disconnect(xprt); | ||
460 | } | ||
461 | #else | ||
462 | static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) | ||
463 | { | ||
464 | } | ||
465 | #endif | ||
466 | |||
434 | #endif /* __KERNEL__*/ | 467 | #endif /* __KERNEL__*/ |
435 | 468 | ||
436 | #endif /* _LINUX_SUNRPC_XPRT_H */ | 469 | #endif /* _LINUX_SUNRPC_XPRT_H */ |
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index c984c85981ea..b17613052cc3 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h | |||
@@ -56,7 +56,8 @@ | |||
56 | 56 | ||
57 | #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ | 57 | #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ |
58 | 58 | ||
59 | /* memory registration strategies */ | 59 | /* Memory registration strategies, by number. |
60 | * This is part of a kernel / user space API. Do not remove. */ | ||
60 | enum rpcrdma_memreg { | 61 | enum rpcrdma_memreg { |
61 | RPCRDMA_BOUNCEBUFFERS = 0, | 62 | RPCRDMA_BOUNCEBUFFERS = 0, |
62 | RPCRDMA_REGISTER, | 63 | RPCRDMA_REGISTER, |
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 936ad0a15371..b512fbd9d79a 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
@@ -14,6 +14,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ | |||
14 | sunrpc_syms.o cache.o rpc_pipe.o \ | 14 | sunrpc_syms.o cache.o rpc_pipe.o \ |
15 | svc_xprt.o | 15 | svc_xprt.o |
16 | sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o | 16 | sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o |
17 | sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o | 17 | sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o |
18 | sunrpc-$(CONFIG_PROC_FS) += stats.o | 18 | sunrpc-$(CONFIG_PROC_FS) += stats.o |
19 | sunrpc-$(CONFIG_SYSCTL) += sysctl.o | 19 | sunrpc-$(CONFIG_SYSCTL) += sysctl.o |
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9dd0ea8db463..9825ff0f91d6 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c | |||
@@ -37,16 +37,18 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
37 | */ | 37 | */ |
38 | static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) | 38 | static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) |
39 | { | 39 | { |
40 | return xprt->bc_alloc_count > 0; | 40 | return xprt->bc_alloc_count < atomic_read(&xprt->bc_free_slots); |
41 | } | 41 | } |
42 | 42 | ||
43 | static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) | 43 | static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) |
44 | { | 44 | { |
45 | atomic_add(n, &xprt->bc_free_slots); | ||
45 | xprt->bc_alloc_count += n; | 46 | xprt->bc_alloc_count += n; |
46 | } | 47 | } |
47 | 48 | ||
48 | static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) | 49 | static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) |
49 | { | 50 | { |
51 | atomic_sub(n, &xprt->bc_free_slots); | ||
50 | return xprt->bc_alloc_count -= n; | 52 | return xprt->bc_alloc_count -= n; |
51 | } | 53 | } |
52 | 54 | ||
@@ -60,13 +62,62 @@ static void xprt_free_allocation(struct rpc_rqst *req) | |||
60 | 62 | ||
61 | dprintk("RPC: free allocations for req= %p\n", req); | 63 | dprintk("RPC: free allocations for req= %p\n", req); |
62 | WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); | 64 | WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); |
63 | xbufp = &req->rq_private_buf; | 65 | xbufp = &req->rq_rcv_buf; |
64 | free_page((unsigned long)xbufp->head[0].iov_base); | 66 | free_page((unsigned long)xbufp->head[0].iov_base); |
65 | xbufp = &req->rq_snd_buf; | 67 | xbufp = &req->rq_snd_buf; |
66 | free_page((unsigned long)xbufp->head[0].iov_base); | 68 | free_page((unsigned long)xbufp->head[0].iov_base); |
67 | kfree(req); | 69 | kfree(req); |
68 | } | 70 | } |
69 | 71 | ||
72 | static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags) | ||
73 | { | ||
74 | struct page *page; | ||
75 | /* Preallocate one XDR receive buffer */ | ||
76 | page = alloc_page(gfp_flags); | ||
77 | if (page == NULL) | ||
78 | return -ENOMEM; | ||
79 | buf->head[0].iov_base = page_address(page); | ||
80 | buf->head[0].iov_len = PAGE_SIZE; | ||
81 | buf->tail[0].iov_base = NULL; | ||
82 | buf->tail[0].iov_len = 0; | ||
83 | buf->page_len = 0; | ||
84 | buf->len = 0; | ||
85 | buf->buflen = PAGE_SIZE; | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | static | ||
90 | struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags) | ||
91 | { | ||
92 | struct rpc_rqst *req; | ||
93 | |||
94 | /* Pre-allocate one backchannel rpc_rqst */ | ||
95 | req = kzalloc(sizeof(*req), gfp_flags); | ||
96 | if (req == NULL) | ||
97 | return NULL; | ||
98 | |||
99 | req->rq_xprt = xprt; | ||
100 | INIT_LIST_HEAD(&req->rq_list); | ||
101 | INIT_LIST_HEAD(&req->rq_bc_list); | ||
102 | |||
103 | /* Preallocate one XDR receive buffer */ | ||
104 | if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) { | ||
105 | printk(KERN_ERR "Failed to create bc receive xbuf\n"); | ||
106 | goto out_free; | ||
107 | } | ||
108 | req->rq_rcv_buf.len = PAGE_SIZE; | ||
109 | |||
110 | /* Preallocate one XDR send buffer */ | ||
111 | if (xprt_alloc_xdr_buf(&req->rq_snd_buf, gfp_flags) < 0) { | ||
112 | printk(KERN_ERR "Failed to create bc snd xbuf\n"); | ||
113 | goto out_free; | ||
114 | } | ||
115 | return req; | ||
116 | out_free: | ||
117 | xprt_free_allocation(req); | ||
118 | return NULL; | ||
119 | } | ||
120 | |||
70 | /* | 121 | /* |
71 | * Preallocate up to min_reqs structures and related buffers for use | 122 | * Preallocate up to min_reqs structures and related buffers for use |
72 | * by the backchannel. This function can be called multiple times | 123 | * by the backchannel. This function can be called multiple times |
@@ -87,9 +138,7 @@ static void xprt_free_allocation(struct rpc_rqst *req) | |||
87 | */ | 138 | */ |
88 | int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) | 139 | int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) |
89 | { | 140 | { |
90 | struct page *page_rcv = NULL, *page_snd = NULL; | 141 | struct rpc_rqst *req; |
91 | struct xdr_buf *xbufp = NULL; | ||
92 | struct rpc_rqst *req, *tmp; | ||
93 | struct list_head tmp_list; | 142 | struct list_head tmp_list; |
94 | int i; | 143 | int i; |
95 | 144 | ||
@@ -106,7 +155,7 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) | |||
106 | INIT_LIST_HEAD(&tmp_list); | 155 | INIT_LIST_HEAD(&tmp_list); |
107 | for (i = 0; i < min_reqs; i++) { | 156 | for (i = 0; i < min_reqs; i++) { |
108 | /* Pre-allocate one backchannel rpc_rqst */ | 157 | /* Pre-allocate one backchannel rpc_rqst */ |
109 | req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); | 158 | req = xprt_alloc_bc_req(xprt, GFP_KERNEL); |
110 | if (req == NULL) { | 159 | if (req == NULL) { |
111 | printk(KERN_ERR "Failed to create bc rpc_rqst\n"); | 160 | printk(KERN_ERR "Failed to create bc rpc_rqst\n"); |
112 | goto out_free; | 161 | goto out_free; |
@@ -115,41 +164,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) | |||
115 | /* Add the allocated buffer to the tmp list */ | 164 | /* Add the allocated buffer to the tmp list */ |
116 | dprintk("RPC: adding req= %p\n", req); | 165 | dprintk("RPC: adding req= %p\n", req); |
117 | list_add(&req->rq_bc_pa_list, &tmp_list); | 166 | list_add(&req->rq_bc_pa_list, &tmp_list); |
118 | |||
119 | req->rq_xprt = xprt; | ||
120 | INIT_LIST_HEAD(&req->rq_list); | ||
121 | INIT_LIST_HEAD(&req->rq_bc_list); | ||
122 | |||
123 | /* Preallocate one XDR receive buffer */ | ||
124 | page_rcv = alloc_page(GFP_KERNEL); | ||
125 | if (page_rcv == NULL) { | ||
126 | printk(KERN_ERR "Failed to create bc receive xbuf\n"); | ||
127 | goto out_free; | ||
128 | } | ||
129 | xbufp = &req->rq_rcv_buf; | ||
130 | xbufp->head[0].iov_base = page_address(page_rcv); | ||
131 | xbufp->head[0].iov_len = PAGE_SIZE; | ||
132 | xbufp->tail[0].iov_base = NULL; | ||
133 | xbufp->tail[0].iov_len = 0; | ||
134 | xbufp->page_len = 0; | ||
135 | xbufp->len = PAGE_SIZE; | ||
136 | xbufp->buflen = PAGE_SIZE; | ||
137 | |||
138 | /* Preallocate one XDR send buffer */ | ||
139 | page_snd = alloc_page(GFP_KERNEL); | ||
140 | if (page_snd == NULL) { | ||
141 | printk(KERN_ERR "Failed to create bc snd xbuf\n"); | ||
142 | goto out_free; | ||
143 | } | ||
144 | |||
145 | xbufp = &req->rq_snd_buf; | ||
146 | xbufp->head[0].iov_base = page_address(page_snd); | ||
147 | xbufp->head[0].iov_len = 0; | ||
148 | xbufp->tail[0].iov_base = NULL; | ||
149 | xbufp->tail[0].iov_len = 0; | ||
150 | xbufp->page_len = 0; | ||
151 | xbufp->len = 0; | ||
152 | xbufp->buflen = PAGE_SIZE; | ||
153 | } | 167 | } |
154 | 168 | ||
155 | /* | 169 | /* |
@@ -167,7 +181,10 @@ out_free: | |||
167 | /* | 181 | /* |
168 | * Memory allocation failed, free the temporary list | 182 | * Memory allocation failed, free the temporary list |
169 | */ | 183 | */ |
170 | list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) { | 184 | while (!list_empty(&tmp_list)) { |
185 | req = list_first_entry(&tmp_list, | ||
186 | struct rpc_rqst, | ||
187 | rq_bc_pa_list); | ||
171 | list_del(&req->rq_bc_pa_list); | 188 | list_del(&req->rq_bc_pa_list); |
172 | xprt_free_allocation(req); | 189 | xprt_free_allocation(req); |
173 | } | 190 | } |
@@ -217,9 +234,15 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) | |||
217 | struct rpc_rqst *req = NULL; | 234 | struct rpc_rqst *req = NULL; |
218 | 235 | ||
219 | dprintk("RPC: allocate a backchannel request\n"); | 236 | dprintk("RPC: allocate a backchannel request\n"); |
220 | if (list_empty(&xprt->bc_pa_list)) | 237 | if (atomic_read(&xprt->bc_free_slots) <= 0) |
221 | goto not_found; | 238 | goto not_found; |
222 | 239 | if (list_empty(&xprt->bc_pa_list)) { | |
240 | req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); | ||
241 | if (!req) | ||
242 | goto not_found; | ||
243 | /* Note: this 'free' request adds it to xprt->bc_pa_list */ | ||
244 | xprt_free_bc_request(req); | ||
245 | } | ||
223 | req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, | 246 | req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, |
224 | rq_bc_pa_list); | 247 | rq_bc_pa_list); |
225 | req->rq_reply_bytes_recvd = 0; | 248 | req->rq_reply_bytes_recvd = 0; |
@@ -245,11 +268,21 @@ void xprt_free_bc_request(struct rpc_rqst *req) | |||
245 | 268 | ||
246 | req->rq_connect_cookie = xprt->connect_cookie - 1; | 269 | req->rq_connect_cookie = xprt->connect_cookie - 1; |
247 | smp_mb__before_atomic(); | 270 | smp_mb__before_atomic(); |
248 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); | ||
249 | clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); | 271 | clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); |
250 | smp_mb__after_atomic(); | 272 | smp_mb__after_atomic(); |
251 | 273 | ||
252 | if (!xprt_need_to_requeue(xprt)) { | 274 | /* |
275 | * Return it to the list of preallocations so that it | ||
276 | * may be reused by a new callback request. | ||
277 | */ | ||
278 | spin_lock_bh(&xprt->bc_pa_lock); | ||
279 | if (xprt_need_to_requeue(xprt)) { | ||
280 | list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); | ||
281 | xprt->bc_alloc_count++; | ||
282 | req = NULL; | ||
283 | } | ||
284 | spin_unlock_bh(&xprt->bc_pa_lock); | ||
285 | if (req != NULL) { | ||
253 | /* | 286 | /* |
254 | * The last remaining session was destroyed while this | 287 | * The last remaining session was destroyed while this |
255 | * entry was in use. Free the entry and don't attempt | 288 | * entry was in use. Free the entry and don't attempt |
@@ -260,14 +293,6 @@ void xprt_free_bc_request(struct rpc_rqst *req) | |||
260 | xprt_free_allocation(req); | 293 | xprt_free_allocation(req); |
261 | return; | 294 | return; |
262 | } | 295 | } |
263 | |||
264 | /* | ||
265 | * Return it to the list of preallocations so that it | ||
266 | * may be reused by a new callback request. | ||
267 | */ | ||
268 | spin_lock_bh(&xprt->bc_pa_lock); | ||
269 | list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); | ||
270 | spin_unlock_bh(&xprt->bc_pa_lock); | ||
271 | } | 296 | } |
272 | 297 | ||
273 | /* | 298 | /* |
@@ -311,6 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) | |||
311 | 336 | ||
312 | spin_lock(&xprt->bc_pa_lock); | 337 | spin_lock(&xprt->bc_pa_lock); |
313 | list_del(&req->rq_bc_pa_list); | 338 | list_del(&req->rq_bc_pa_list); |
339 | xprt->bc_alloc_count--; | ||
314 | spin_unlock(&xprt->bc_pa_lock); | 340 | spin_unlock(&xprt->bc_pa_lock); |
315 | 341 | ||
316 | req->rq_private_buf.len = copied; | 342 | req->rq_private_buf.len = copied; |
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c deleted file mode 100644 index 15c7a8a1c24f..000000000000 --- a/net/sunrpc/bc_svc.c +++ /dev/null | |||
@@ -1,63 +0,0 @@ | |||
1 | /****************************************************************************** | ||
2 | |||
3 | (c) 2007 Network Appliance, Inc. All Rights Reserved. | ||
4 | (c) 2009 NetApp. All Rights Reserved. | ||
5 | |||
6 | NetApp provides this source code under the GPL v2 License. | ||
7 | The GPL v2 license is available at | ||
8 | http://opensource.org/licenses/gpl-license.php. | ||
9 | |||
10 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
11 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
12 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
13 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
14 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
15 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
16 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
17 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
18 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
19 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
20 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
21 | |||
22 | ******************************************************************************/ | ||
23 | |||
24 | /* | ||
25 | * The NFSv4.1 callback service helper routines. | ||
26 | * They implement the transport level processing required to send the | ||
27 | * reply over an existing open connection previously established by the client. | ||
28 | */ | ||
29 | |||
30 | #include <linux/module.h> | ||
31 | |||
32 | #include <linux/sunrpc/xprt.h> | ||
33 | #include <linux/sunrpc/sched.h> | ||
34 | #include <linux/sunrpc/bc_xprt.h> | ||
35 | |||
36 | #define RPCDBG_FACILITY RPCDBG_SVCDSP | ||
37 | |||
38 | /* Empty callback ops */ | ||
39 | static const struct rpc_call_ops nfs41_callback_ops = { | ||
40 | }; | ||
41 | |||
42 | |||
43 | /* | ||
44 | * Send the callback reply | ||
45 | */ | ||
46 | int bc_send(struct rpc_rqst *req) | ||
47 | { | ||
48 | struct rpc_task *task; | ||
49 | int ret; | ||
50 | |||
51 | dprintk("RPC: bc_send req= %p\n", req); | ||
52 | task = rpc_run_bc_task(req, &nfs41_callback_ops); | ||
53 | if (IS_ERR(task)) | ||
54 | ret = PTR_ERR(task); | ||
55 | else { | ||
56 | WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); | ||
57 | ret = task->tk_status; | ||
58 | rpc_put_task(task); | ||
59 | } | ||
60 | dprintk("RPC: bc_send ret= %d\n", ret); | ||
61 | return ret; | ||
62 | } | ||
63 | |||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e6ce1517367f..cbc6af923dd1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) | |||
891 | task->tk_flags |= RPC_TASK_SOFT; | 891 | task->tk_flags |= RPC_TASK_SOFT; |
892 | if (clnt->cl_noretranstimeo) | 892 | if (clnt->cl_noretranstimeo) |
893 | task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; | 893 | task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; |
894 | if (sk_memalloc_socks()) { | 894 | if (atomic_read(&clnt->cl_swapper)) |
895 | struct rpc_xprt *xprt; | 895 | task->tk_flags |= RPC_TASK_SWAPPER; |
896 | |||
897 | rcu_read_lock(); | ||
898 | xprt = rcu_dereference(clnt->cl_xprt); | ||
899 | if (xprt->swapper) | ||
900 | task->tk_flags |= RPC_TASK_SWAPPER; | ||
901 | rcu_read_unlock(); | ||
902 | } | ||
903 | /* Add to the client's list of all tasks */ | 896 | /* Add to the client's list of all tasks */ |
904 | spin_lock(&clnt->cl_lock); | 897 | spin_lock(&clnt->cl_lock); |
905 | list_add_tail(&task->tk_task, &clnt->cl_tasks); | 898 | list_add_tail(&task->tk_task, &clnt->cl_tasks); |
@@ -1031,15 +1024,14 @@ EXPORT_SYMBOL_GPL(rpc_call_async); | |||
1031 | * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run | 1024 | * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run |
1032 | * rpc_execute against it | 1025 | * rpc_execute against it |
1033 | * @req: RPC request | 1026 | * @req: RPC request |
1034 | * @tk_ops: RPC call ops | ||
1035 | */ | 1027 | */ |
1036 | struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, | 1028 | struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req) |
1037 | const struct rpc_call_ops *tk_ops) | ||
1038 | { | 1029 | { |
1039 | struct rpc_task *task; | 1030 | struct rpc_task *task; |
1040 | struct xdr_buf *xbufp = &req->rq_snd_buf; | 1031 | struct xdr_buf *xbufp = &req->rq_snd_buf; |
1041 | struct rpc_task_setup task_setup_data = { | 1032 | struct rpc_task_setup task_setup_data = { |
1042 | .callback_ops = tk_ops, | 1033 | .callback_ops = &rpc_default_ops, |
1034 | .flags = RPC_TASK_SOFTCONN, | ||
1043 | }; | 1035 | }; |
1044 | 1036 | ||
1045 | dprintk("RPC: rpc_run_bc_task req= %p\n", req); | 1037 | dprintk("RPC: rpc_run_bc_task req= %p\n", req); |
@@ -1614,6 +1606,7 @@ call_allocate(struct rpc_task *task) | |||
1614 | req->rq_callsize + req->rq_rcvsize); | 1606 | req->rq_callsize + req->rq_rcvsize); |
1615 | if (req->rq_buffer != NULL) | 1607 | if (req->rq_buffer != NULL) |
1616 | return; | 1608 | return; |
1609 | xprt_inject_disconnect(xprt); | ||
1617 | 1610 | ||
1618 | dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); | 1611 | dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); |
1619 | 1612 | ||
@@ -1951,33 +1944,36 @@ call_bc_transmit(struct rpc_task *task) | |||
1951 | { | 1944 | { |
1952 | struct rpc_rqst *req = task->tk_rqstp; | 1945 | struct rpc_rqst *req = task->tk_rqstp; |
1953 | 1946 | ||
1954 | if (!xprt_prepare_transmit(task)) { | 1947 | if (!xprt_prepare_transmit(task)) |
1955 | /* | 1948 | goto out_retry; |
1956 | * Could not reserve the transport. Try again after the | ||
1957 | * transport is released. | ||
1958 | */ | ||
1959 | task->tk_status = 0; | ||
1960 | task->tk_action = call_bc_transmit; | ||
1961 | return; | ||
1962 | } | ||
1963 | 1949 | ||
1964 | task->tk_action = rpc_exit_task; | ||
1965 | if (task->tk_status < 0) { | 1950 | if (task->tk_status < 0) { |
1966 | printk(KERN_NOTICE "RPC: Could not send backchannel reply " | 1951 | printk(KERN_NOTICE "RPC: Could not send backchannel reply " |
1967 | "error: %d\n", task->tk_status); | 1952 | "error: %d\n", task->tk_status); |
1968 | return; | 1953 | goto out_done; |
1969 | } | 1954 | } |
1955 | if (req->rq_connect_cookie != req->rq_xprt->connect_cookie) | ||
1956 | req->rq_bytes_sent = 0; | ||
1970 | 1957 | ||
1971 | xprt_transmit(task); | 1958 | xprt_transmit(task); |
1959 | |||
1960 | if (task->tk_status == -EAGAIN) | ||
1961 | goto out_nospace; | ||
1962 | |||
1972 | xprt_end_transmit(task); | 1963 | xprt_end_transmit(task); |
1973 | dprint_status(task); | 1964 | dprint_status(task); |
1974 | switch (task->tk_status) { | 1965 | switch (task->tk_status) { |
1975 | case 0: | 1966 | case 0: |
1976 | /* Success */ | 1967 | /* Success */ |
1977 | break; | ||
1978 | case -EHOSTDOWN: | 1968 | case -EHOSTDOWN: |
1979 | case -EHOSTUNREACH: | 1969 | case -EHOSTUNREACH: |
1980 | case -ENETUNREACH: | 1970 | case -ENETUNREACH: |
1971 | case -ECONNRESET: | ||
1972 | case -ECONNREFUSED: | ||
1973 | case -EADDRINUSE: | ||
1974 | case -ENOTCONN: | ||
1975 | case -EPIPE: | ||
1976 | break; | ||
1981 | case -ETIMEDOUT: | 1977 | case -ETIMEDOUT: |
1982 | /* | 1978 | /* |
1983 | * Problem reaching the server. Disconnect and let the | 1979 | * Problem reaching the server. Disconnect and let the |
@@ -2002,6 +1998,13 @@ call_bc_transmit(struct rpc_task *task) | |||
2002 | break; | 1998 | break; |
2003 | } | 1999 | } |
2004 | rpc_wake_up_queued_task(&req->rq_xprt->pending, task); | 2000 | rpc_wake_up_queued_task(&req->rq_xprt->pending, task); |
2001 | out_done: | ||
2002 | task->tk_action = rpc_exit_task; | ||
2003 | return; | ||
2004 | out_nospace: | ||
2005 | req->rq_connect_cookie = req->rq_xprt->connect_cookie; | ||
2006 | out_retry: | ||
2007 | task->tk_status = 0; | ||
2005 | } | 2008 | } |
2006 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | 2009 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
2007 | 2010 | ||
@@ -2476,3 +2479,59 @@ void rpc_show_tasks(struct net *net) | |||
2476 | spin_unlock(&sn->rpc_client_lock); | 2479 | spin_unlock(&sn->rpc_client_lock); |
2477 | } | 2480 | } |
2478 | #endif | 2481 | #endif |
2482 | |||
2483 | #if IS_ENABLED(CONFIG_SUNRPC_SWAP) | ||
2484 | int | ||
2485 | rpc_clnt_swap_activate(struct rpc_clnt *clnt) | ||
2486 | { | ||
2487 | int ret = 0; | ||
2488 | struct rpc_xprt *xprt; | ||
2489 | |||
2490 | if (atomic_inc_return(&clnt->cl_swapper) == 1) { | ||
2491 | retry: | ||
2492 | rcu_read_lock(); | ||
2493 | xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); | ||
2494 | rcu_read_unlock(); | ||
2495 | if (!xprt) { | ||
2496 | /* | ||
2497 | * If we didn't get a reference, then we likely are | ||
2498 | * racing with a migration event. Wait for a grace | ||
2499 | * period and try again. | ||
2500 | */ | ||
2501 | synchronize_rcu(); | ||
2502 | goto retry; | ||
2503 | } | ||
2504 | |||
2505 | ret = xprt_enable_swap(xprt); | ||
2506 | xprt_put(xprt); | ||
2507 | } | ||
2508 | return ret; | ||
2509 | } | ||
2510 | EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate); | ||
2511 | |||
2512 | void | ||
2513 | rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) | ||
2514 | { | ||
2515 | struct rpc_xprt *xprt; | ||
2516 | |||
2517 | if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) { | ||
2518 | retry: | ||
2519 | rcu_read_lock(); | ||
2520 | xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); | ||
2521 | rcu_read_unlock(); | ||
2522 | if (!xprt) { | ||
2523 | /* | ||
2524 | * If we didn't get a reference, then we likely are | ||
2525 | * racing with a migration event. Wait for a grace | ||
2526 | * period and try again. | ||
2527 | */ | ||
2528 | synchronize_rcu(); | ||
2529 | goto retry; | ||
2530 | } | ||
2531 | |||
2532 | xprt_disable_swap(xprt); | ||
2533 | xprt_put(xprt); | ||
2534 | } | ||
2535 | } | ||
2536 | EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate); | ||
2537 | #endif /* CONFIG_SUNRPC_SWAP */ | ||
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 82962f7e6e88..e7b4d93566df 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c | |||
@@ -10,9 +10,12 @@ | |||
10 | #include "netns.h" | 10 | #include "netns.h" |
11 | 11 | ||
12 | static struct dentry *topdir; | 12 | static struct dentry *topdir; |
13 | static struct dentry *rpc_fault_dir; | ||
13 | static struct dentry *rpc_clnt_dir; | 14 | static struct dentry *rpc_clnt_dir; |
14 | static struct dentry *rpc_xprt_dir; | 15 | static struct dentry *rpc_xprt_dir; |
15 | 16 | ||
17 | unsigned int rpc_inject_disconnect; | ||
18 | |||
16 | struct rpc_clnt_iter { | 19 | struct rpc_clnt_iter { |
17 | struct rpc_clnt *clnt; | 20 | struct rpc_clnt *clnt; |
18 | loff_t pos; | 21 | loff_t pos; |
@@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt) | |||
257 | debugfs_remove_recursive(xprt->debugfs); | 260 | debugfs_remove_recursive(xprt->debugfs); |
258 | xprt->debugfs = NULL; | 261 | xprt->debugfs = NULL; |
259 | } | 262 | } |
263 | |||
264 | atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); | ||
260 | } | 265 | } |
261 | 266 | ||
262 | void | 267 | void |
@@ -266,11 +271,79 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) | |||
266 | xprt->debugfs = NULL; | 271 | xprt->debugfs = NULL; |
267 | } | 272 | } |
268 | 273 | ||
274 | static int | ||
275 | fault_open(struct inode *inode, struct file *filp) | ||
276 | { | ||
277 | filp->private_data = kmalloc(128, GFP_KERNEL); | ||
278 | if (!filp->private_data) | ||
279 | return -ENOMEM; | ||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int | ||
284 | fault_release(struct inode *inode, struct file *filp) | ||
285 | { | ||
286 | kfree(filp->private_data); | ||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | static ssize_t | ||
291 | fault_disconnect_read(struct file *filp, char __user *user_buf, | ||
292 | size_t len, loff_t *offset) | ||
293 | { | ||
294 | char *buffer = (char *)filp->private_data; | ||
295 | size_t size; | ||
296 | |||
297 | size = sprintf(buffer, "%u\n", rpc_inject_disconnect); | ||
298 | return simple_read_from_buffer(user_buf, len, offset, buffer, size); | ||
299 | } | ||
300 | |||
301 | static ssize_t | ||
302 | fault_disconnect_write(struct file *filp, const char __user *user_buf, | ||
303 | size_t len, loff_t *offset) | ||
304 | { | ||
305 | char buffer[16]; | ||
306 | |||
307 | if (len >= sizeof(buffer)) | ||
308 | len = sizeof(buffer) - 1; | ||
309 | if (copy_from_user(buffer, user_buf, len)) | ||
310 | return -EFAULT; | ||
311 | buffer[len] = '\0'; | ||
312 | if (kstrtouint(buffer, 10, &rpc_inject_disconnect)) | ||
313 | return -EINVAL; | ||
314 | return len; | ||
315 | } | ||
316 | |||
317 | static const struct file_operations fault_disconnect_fops = { | ||
318 | .owner = THIS_MODULE, | ||
319 | .open = fault_open, | ||
320 | .read = fault_disconnect_read, | ||
321 | .write = fault_disconnect_write, | ||
322 | .release = fault_release, | ||
323 | }; | ||
324 | |||
325 | static struct dentry * | ||
326 | inject_fault_dir(struct dentry *topdir) | ||
327 | { | ||
328 | struct dentry *faultdir; | ||
329 | |||
330 | faultdir = debugfs_create_dir("inject_fault", topdir); | ||
331 | if (!faultdir) | ||
332 | return NULL; | ||
333 | |||
334 | if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir, | ||
335 | NULL, &fault_disconnect_fops)) | ||
336 | return NULL; | ||
337 | |||
338 | return faultdir; | ||
339 | } | ||
340 | |||
269 | void __exit | 341 | void __exit |
270 | sunrpc_debugfs_exit(void) | 342 | sunrpc_debugfs_exit(void) |
271 | { | 343 | { |
272 | debugfs_remove_recursive(topdir); | 344 | debugfs_remove_recursive(topdir); |
273 | topdir = NULL; | 345 | topdir = NULL; |
346 | rpc_fault_dir = NULL; | ||
274 | rpc_clnt_dir = NULL; | 347 | rpc_clnt_dir = NULL; |
275 | rpc_xprt_dir = NULL; | 348 | rpc_xprt_dir = NULL; |
276 | } | 349 | } |
@@ -282,6 +355,10 @@ sunrpc_debugfs_init(void) | |||
282 | if (!topdir) | 355 | if (!topdir) |
283 | return; | 356 | return; |
284 | 357 | ||
358 | rpc_fault_dir = inject_fault_dir(topdir); | ||
359 | if (!rpc_fault_dir) | ||
360 | goto out_remove; | ||
361 | |||
285 | rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); | 362 | rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); |
286 | if (!rpc_clnt_dir) | 363 | if (!rpc_clnt_dir) |
287 | goto out_remove; | 364 | goto out_remove; |
@@ -294,5 +371,6 @@ sunrpc_debugfs_init(void) | |||
294 | out_remove: | 371 | out_remove: |
295 | debugfs_remove_recursive(topdir); | 372 | debugfs_remove_recursive(topdir); |
296 | topdir = NULL; | 373 | topdir = NULL; |
374 | rpc_fault_dir = NULL; | ||
297 | rpc_clnt_dir = NULL; | 375 | rpc_clnt_dir = NULL; |
298 | } | 376 | } |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 852ae606b02a..5a16d8d8c831 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -1350,6 +1350,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, | |||
1350 | { | 1350 | { |
1351 | struct kvec *argv = &rqstp->rq_arg.head[0]; | 1351 | struct kvec *argv = &rqstp->rq_arg.head[0]; |
1352 | struct kvec *resv = &rqstp->rq_res.head[0]; | 1352 | struct kvec *resv = &rqstp->rq_res.head[0]; |
1353 | struct rpc_task *task; | ||
1354 | int proc_error; | ||
1355 | int error; | ||
1356 | |||
1357 | dprintk("svc: %s(%p)\n", __func__, req); | ||
1353 | 1358 | ||
1354 | /* Build the svc_rqst used by the common processing routine */ | 1359 | /* Build the svc_rqst used by the common processing routine */ |
1355 | rqstp->rq_xprt = serv->sv_bc_xprt; | 1360 | rqstp->rq_xprt = serv->sv_bc_xprt; |
@@ -1372,21 +1377,36 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, | |||
1372 | 1377 | ||
1373 | /* | 1378 | /* |
1374 | * Skip the next two words because they've already been | 1379 | * Skip the next two words because they've already been |
1375 | * processed in the trasport | 1380 | * processed in the transport |
1376 | */ | 1381 | */ |
1377 | svc_getu32(argv); /* XID */ | 1382 | svc_getu32(argv); /* XID */ |
1378 | svc_getnl(argv); /* CALLDIR */ | 1383 | svc_getnl(argv); /* CALLDIR */ |
1379 | 1384 | ||
1380 | /* Returns 1 for send, 0 for drop */ | 1385 | /* Parse and execute the bc call */ |
1381 | if (svc_process_common(rqstp, argv, resv)) { | 1386 | proc_error = svc_process_common(rqstp, argv, resv); |
1382 | memcpy(&req->rq_snd_buf, &rqstp->rq_res, | 1387 | |
1383 | sizeof(req->rq_snd_buf)); | 1388 | atomic_inc(&req->rq_xprt->bc_free_slots); |
1384 | return bc_send(req); | 1389 | if (!proc_error) { |
1385 | } else { | 1390 | /* Processing error: drop the request */ |
1386 | /* drop request */ | ||
1387 | xprt_free_bc_request(req); | 1391 | xprt_free_bc_request(req); |
1388 | return 0; | 1392 | return 0; |
1389 | } | 1393 | } |
1394 | |||
1395 | /* Finally, send the reply synchronously */ | ||
1396 | memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); | ||
1397 | task = rpc_run_bc_task(req); | ||
1398 | if (IS_ERR(task)) { | ||
1399 | error = PTR_ERR(task); | ||
1400 | goto out; | ||
1401 | } | ||
1402 | |||
1403 | WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); | ||
1404 | error = task->tk_status; | ||
1405 | rpc_put_task(task); | ||
1406 | |||
1407 | out: | ||
1408 | dprintk("svc: %s(), error=%d\n", __func__, error); | ||
1409 | return error; | ||
1390 | } | 1410 | } |
1391 | EXPORT_SYMBOL_GPL(bc_svc_process); | 1411 | EXPORT_SYMBOL_GPL(bc_svc_process); |
1392 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | 1412 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1d4fe24af06a..ab5dd621ae0c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -68,6 +68,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net); | |||
68 | static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); | 68 | static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); |
69 | static void xprt_connect_status(struct rpc_task *task); | 69 | static void xprt_connect_status(struct rpc_task *task); |
70 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); | 70 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); |
71 | static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *); | ||
71 | static void xprt_destroy(struct rpc_xprt *xprt); | 72 | static void xprt_destroy(struct rpc_xprt *xprt); |
72 | 73 | ||
73 | static DEFINE_SPINLOCK(xprt_list_lock); | 74 | static DEFINE_SPINLOCK(xprt_list_lock); |
@@ -250,6 +251,8 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) | |||
250 | } | 251 | } |
251 | xprt_clear_locked(xprt); | 252 | xprt_clear_locked(xprt); |
252 | out_sleep: | 253 | out_sleep: |
254 | if (req) | ||
255 | __xprt_put_cong(xprt, req); | ||
253 | dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); | 256 | dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); |
254 | task->tk_timeout = 0; | 257 | task->tk_timeout = 0; |
255 | task->tk_status = -EAGAIN; | 258 | task->tk_status = -EAGAIN; |
@@ -608,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work) | |||
608 | struct rpc_xprt *xprt = | 611 | struct rpc_xprt *xprt = |
609 | container_of(work, struct rpc_xprt, task_cleanup); | 612 | container_of(work, struct rpc_xprt, task_cleanup); |
610 | 613 | ||
611 | xprt->ops->close(xprt); | ||
612 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); | 614 | clear_bit(XPRT_CLOSE_WAIT, &xprt->state); |
615 | xprt->ops->close(xprt); | ||
613 | xprt_release_write(xprt, NULL); | 616 | xprt_release_write(xprt, NULL); |
614 | } | 617 | } |
615 | 618 | ||
@@ -967,6 +970,7 @@ void xprt_transmit(struct rpc_task *task) | |||
967 | task->tk_status = status; | 970 | task->tk_status = status; |
968 | return; | 971 | return; |
969 | } | 972 | } |
973 | xprt_inject_disconnect(xprt); | ||
970 | 974 | ||
971 | dprintk("RPC: %5u xmit complete\n", task->tk_pid); | 975 | dprintk("RPC: %5u xmit complete\n", task->tk_pid); |
972 | task->tk_flags |= RPC_TASK_SENT; | 976 | task->tk_flags |= RPC_TASK_SENT; |
@@ -1285,6 +1289,7 @@ void xprt_release(struct rpc_task *task) | |||
1285 | spin_unlock_bh(&xprt->transport_lock); | 1289 | spin_unlock_bh(&xprt->transport_lock); |
1286 | if (req->rq_buffer) | 1290 | if (req->rq_buffer) |
1287 | xprt->ops->buf_free(req->rq_buffer); | 1291 | xprt->ops->buf_free(req->rq_buffer); |
1292 | xprt_inject_disconnect(xprt); | ||
1288 | if (req->rq_cred != NULL) | 1293 | if (req->rq_cred != NULL) |
1289 | put_rpccred(req->rq_cred); | 1294 | put_rpccred(req->rq_cred); |
1290 | task->tk_rqstp = NULL; | 1295 | task->tk_rqstp = NULL; |
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 302d4ebf6fbf..f1e8dafbd507 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c | |||
@@ -11,6 +11,21 @@ | |||
11 | * can take tens of usecs to complete. | 11 | * can take tens of usecs to complete. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | /* Normal operation | ||
15 | * | ||
16 | * A Memory Region is prepared for RDMA READ or WRITE using the | ||
17 | * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is | ||
18 | * finished, the Memory Region is unmapped using the ib_unmap_fmr | ||
19 | * verb (fmr_op_unmap). | ||
20 | */ | ||
21 | |||
22 | /* Transport recovery | ||
23 | * | ||
24 | * After a transport reconnect, fmr_op_map re-uses the MR already | ||
25 | * allocated for the RPC, but generates a fresh rkey then maps the | ||
26 | * MR again. This process is synchronous. | ||
27 | */ | ||
28 | |||
14 | #include "xprt_rdma.h" | 29 | #include "xprt_rdma.h" |
15 | 30 | ||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 31 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
@@ -50,19 +65,28 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) | |||
50 | struct rpcrdma_mw *r; | 65 | struct rpcrdma_mw *r; |
51 | int i, rc; | 66 | int i, rc; |
52 | 67 | ||
68 | spin_lock_init(&buf->rb_mwlock); | ||
53 | INIT_LIST_HEAD(&buf->rb_mws); | 69 | INIT_LIST_HEAD(&buf->rb_mws); |
54 | INIT_LIST_HEAD(&buf->rb_all); | 70 | INIT_LIST_HEAD(&buf->rb_all); |
55 | 71 | ||
56 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | 72 | i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1); |
57 | dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); | 73 | i += 2; /* head + tail */ |
74 | i *= buf->rb_max_requests; /* one set for each RPC slot */ | ||
75 | dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); | ||
58 | 76 | ||
77 | rc = -ENOMEM; | ||
59 | while (i--) { | 78 | while (i--) { |
60 | r = kzalloc(sizeof(*r), GFP_KERNEL); | 79 | r = kzalloc(sizeof(*r), GFP_KERNEL); |
61 | if (!r) | 80 | if (!r) |
62 | return -ENOMEM; | 81 | goto out; |
63 | 82 | ||
64 | r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); | 83 | r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES * |
65 | if (IS_ERR(r->r.fmr)) | 84 | sizeof(u64), GFP_KERNEL); |
85 | if (!r->r.fmr.physaddrs) | ||
86 | goto out_free; | ||
87 | |||
88 | r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr); | ||
89 | if (IS_ERR(r->r.fmr.fmr)) | ||
66 | goto out_fmr_err; | 90 | goto out_fmr_err; |
67 | 91 | ||
68 | list_add(&r->mw_list, &buf->rb_mws); | 92 | list_add(&r->mw_list, &buf->rb_mws); |
@@ -71,12 +95,24 @@ fmr_op_init(struct rpcrdma_xprt *r_xprt) | |||
71 | return 0; | 95 | return 0; |
72 | 96 | ||
73 | out_fmr_err: | 97 | out_fmr_err: |
74 | rc = PTR_ERR(r->r.fmr); | 98 | rc = PTR_ERR(r->r.fmr.fmr); |
75 | dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); | 99 | dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc); |
100 | kfree(r->r.fmr.physaddrs); | ||
101 | out_free: | ||
76 | kfree(r); | 102 | kfree(r); |
103 | out: | ||
77 | return rc; | 104 | return rc; |
78 | } | 105 | } |
79 | 106 | ||
107 | static int | ||
108 | __fmr_unmap(struct rpcrdma_mw *r) | ||
109 | { | ||
110 | LIST_HEAD(l); | ||
111 | |||
112 | list_add(&r->r.fmr.fmr->list, &l); | ||
113 | return ib_unmap_fmr(&l); | ||
114 | } | ||
115 | |||
80 | /* Use the ib_map_phys_fmr() verb to register a memory region | 116 | /* Use the ib_map_phys_fmr() verb to register a memory region |
81 | * for remote access via RDMA READ or RDMA WRITE. | 117 | * for remote access via RDMA READ or RDMA WRITE. |
82 | */ | 118 | */ |
@@ -85,12 +121,24 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
85 | int nsegs, bool writing) | 121 | int nsegs, bool writing) |
86 | { | 122 | { |
87 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 123 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
88 | struct ib_device *device = ia->ri_id->device; | 124 | struct ib_device *device = ia->ri_device; |
89 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | 125 | enum dma_data_direction direction = rpcrdma_data_dir(writing); |
90 | struct rpcrdma_mr_seg *seg1 = seg; | 126 | struct rpcrdma_mr_seg *seg1 = seg; |
91 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
92 | u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; | ||
93 | int len, pageoff, i, rc; | 127 | int len, pageoff, i, rc; |
128 | struct rpcrdma_mw *mw; | ||
129 | |||
130 | mw = seg1->rl_mw; | ||
131 | seg1->rl_mw = NULL; | ||
132 | if (!mw) { | ||
133 | mw = rpcrdma_get_mw(r_xprt); | ||
134 | if (!mw) | ||
135 | return -ENOMEM; | ||
136 | } else { | ||
137 | /* this is a retransmit; generate a fresh rkey */ | ||
138 | rc = __fmr_unmap(mw); | ||
139 | if (rc) | ||
140 | return rc; | ||
141 | } | ||
94 | 142 | ||
95 | pageoff = offset_in_page(seg1->mr_offset); | 143 | pageoff = offset_in_page(seg1->mr_offset); |
96 | seg1->mr_offset -= pageoff; /* start of page */ | 144 | seg1->mr_offset -= pageoff; /* start of page */ |
@@ -100,7 +148,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
100 | nsegs = RPCRDMA_MAX_FMR_SGES; | 148 | nsegs = RPCRDMA_MAX_FMR_SGES; |
101 | for (i = 0; i < nsegs;) { | 149 | for (i = 0; i < nsegs;) { |
102 | rpcrdma_map_one(device, seg, direction); | 150 | rpcrdma_map_one(device, seg, direction); |
103 | physaddrs[i] = seg->mr_dma; | 151 | mw->r.fmr.physaddrs[i] = seg->mr_dma; |
104 | len += seg->mr_len; | 152 | len += seg->mr_len; |
105 | ++seg; | 153 | ++seg; |
106 | ++i; | 154 | ++i; |
@@ -110,11 +158,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
110 | break; | 158 | break; |
111 | } | 159 | } |
112 | 160 | ||
113 | rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma); | 161 | rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs, |
162 | i, seg1->mr_dma); | ||
114 | if (rc) | 163 | if (rc) |
115 | goto out_maperr; | 164 | goto out_maperr; |
116 | 165 | ||
117 | seg1->mr_rkey = mw->r.fmr->rkey; | 166 | seg1->rl_mw = mw; |
167 | seg1->mr_rkey = mw->r.fmr.fmr->rkey; | ||
118 | seg1->mr_base = seg1->mr_dma + pageoff; | 168 | seg1->mr_base = seg1->mr_dma + pageoff; |
119 | seg1->mr_nsegs = i; | 169 | seg1->mr_nsegs = i; |
120 | seg1->mr_len = len; | 170 | seg1->mr_len = len; |
@@ -137,48 +187,28 @@ fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | |||
137 | { | 187 | { |
138 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 188 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
139 | struct rpcrdma_mr_seg *seg1 = seg; | 189 | struct rpcrdma_mr_seg *seg1 = seg; |
140 | struct ib_device *device; | 190 | struct rpcrdma_mw *mw = seg1->rl_mw; |
141 | int rc, nsegs = seg->mr_nsegs; | 191 | int rc, nsegs = seg->mr_nsegs; |
142 | LIST_HEAD(l); | ||
143 | 192 | ||
144 | list_add(&seg1->rl_mw->r.fmr->list, &l); | 193 | dprintk("RPC: %s: FMR %p\n", __func__, mw); |
145 | rc = ib_unmap_fmr(&l); | 194 | |
146 | read_lock(&ia->ri_qplock); | 195 | seg1->rl_mw = NULL; |
147 | device = ia->ri_id->device; | ||
148 | while (seg1->mr_nsegs--) | 196 | while (seg1->mr_nsegs--) |
149 | rpcrdma_unmap_one(device, seg++); | 197 | rpcrdma_unmap_one(ia->ri_device, seg++); |
150 | read_unlock(&ia->ri_qplock); | 198 | rc = __fmr_unmap(mw); |
151 | if (rc) | 199 | if (rc) |
152 | goto out_err; | 200 | goto out_err; |
201 | rpcrdma_put_mw(r_xprt, mw); | ||
153 | return nsegs; | 202 | return nsegs; |
154 | 203 | ||
155 | out_err: | 204 | out_err: |
205 | /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy | ||
206 | * will attempt to release it when the transport is destroyed. | ||
207 | */ | ||
156 | dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); | 208 | dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc); |
157 | return nsegs; | 209 | return nsegs; |
158 | } | 210 | } |
159 | 211 | ||
160 | /* After a disconnect, unmap all FMRs. | ||
161 | * | ||
162 | * This is invoked only in the transport connect worker in order | ||
163 | * to serialize with rpcrdma_register_fmr_external(). | ||
164 | */ | ||
165 | static void | ||
166 | fmr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
167 | { | ||
168 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
169 | struct rpcrdma_mw *r; | ||
170 | LIST_HEAD(list); | ||
171 | int rc; | ||
172 | |||
173 | list_for_each_entry(r, &buf->rb_all, mw_all) | ||
174 | list_add(&r->r.fmr->list, &list); | ||
175 | |||
176 | rc = ib_unmap_fmr(&list); | ||
177 | if (rc) | ||
178 | dprintk("RPC: %s: ib_unmap_fmr failed %i\n", | ||
179 | __func__, rc); | ||
180 | } | ||
181 | |||
182 | static void | 212 | static void |
183 | fmr_op_destroy(struct rpcrdma_buffer *buf) | 213 | fmr_op_destroy(struct rpcrdma_buffer *buf) |
184 | { | 214 | { |
@@ -188,10 +218,13 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) | |||
188 | while (!list_empty(&buf->rb_all)) { | 218 | while (!list_empty(&buf->rb_all)) { |
189 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | 219 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); |
190 | list_del(&r->mw_all); | 220 | list_del(&r->mw_all); |
191 | rc = ib_dealloc_fmr(r->r.fmr); | 221 | kfree(r->r.fmr.physaddrs); |
222 | |||
223 | rc = ib_dealloc_fmr(r->r.fmr.fmr); | ||
192 | if (rc) | 224 | if (rc) |
193 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", | 225 | dprintk("RPC: %s: ib_dealloc_fmr failed %i\n", |
194 | __func__, rc); | 226 | __func__, rc); |
227 | |||
195 | kfree(r); | 228 | kfree(r); |
196 | } | 229 | } |
197 | } | 230 | } |
@@ -202,7 +235,6 @@ const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { | |||
202 | .ro_open = fmr_op_open, | 235 | .ro_open = fmr_op_open, |
203 | .ro_maxpages = fmr_op_maxpages, | 236 | .ro_maxpages = fmr_op_maxpages, |
204 | .ro_init = fmr_op_init, | 237 | .ro_init = fmr_op_init, |
205 | .ro_reset = fmr_op_reset, | ||
206 | .ro_destroy = fmr_op_destroy, | 238 | .ro_destroy = fmr_op_destroy, |
207 | .ro_displayname = "fmr", | 239 | .ro_displayname = "fmr", |
208 | }; | 240 | }; |
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index d234521320a4..04ea914201b2 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -11,12 +11,136 @@ | |||
11 | * but most complex memory registration mode. | 11 | * but most complex memory registration mode. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | /* Normal operation | ||
15 | * | ||
16 | * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG | ||
17 | * Work Request (frmr_op_map). When the RDMA operation is finished, this | ||
18 | * Memory Region is invalidated using a LOCAL_INV Work Request | ||
19 | * (frmr_op_unmap). | ||
20 | * | ||
21 | * Typically these Work Requests are not signaled, and neither are RDMA | ||
22 | * SEND Work Requests (with the exception of signaling occasionally to | ||
23 | * prevent provider work queue overflows). This greatly reduces HCA | ||
24 | * interrupt workload. | ||
25 | * | ||
26 | * As an optimization, frwr_op_unmap marks MRs INVALID before the | ||
27 | * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on | ||
28 | * rb_mws immediately so that no work (like managing a linked list | ||
29 | * under a spinlock) is needed in the completion upcall. | ||
30 | * | ||
31 | * But this means that frwr_op_map() can occasionally encounter an MR | ||
32 | * that is INVALID but the LOCAL_INV WR has not completed. Work Queue | ||
33 | * ordering prevents a subsequent FAST_REG WR from executing against | ||
34 | * that MR while it is still being invalidated. | ||
35 | */ | ||
36 | |||
37 | /* Transport recovery | ||
38 | * | ||
39 | * ->op_map and the transport connect worker cannot run at the same | ||
40 | * time, but ->op_unmap can fire while the transport connect worker | ||
41 | * is running. Thus MR recovery is handled in ->op_map, to guarantee | ||
42 | * that recovered MRs are owned by a sending RPC, and not one where | ||
43 | * ->op_unmap could fire at the same time transport reconnect is | ||
44 | * being done. | ||
45 | * | ||
46 | * When the underlying transport disconnects, MRs are left in one of | ||
47 | * three states: | ||
48 | * | ||
49 | * INVALID: The MR was not in use before the QP entered ERROR state. | ||
50 | * (Or, the LOCAL_INV WR has not completed or flushed yet). | ||
51 | * | ||
52 | * STALE: The MR was being registered or unregistered when the QP | ||
53 | * entered ERROR state, and the pending WR was flushed. | ||
54 | * | ||
55 | * VALID: The MR was registered before the QP entered ERROR state. | ||
56 | * | ||
57 | * When frwr_op_map encounters STALE and VALID MRs, they are recovered | ||
58 | * with ib_dereg_mr and then are re-initialized. Beause MR recovery | ||
59 | * allocates fresh resources, it is deferred to a workqueue, and the | ||
60 | * recovered MRs are placed back on the rb_mws list when recovery is | ||
61 | * complete. frwr_op_map allocates another MR for the current RPC while | ||
62 | * the broken MR is reset. | ||
63 | * | ||
64 | * To ensure that frwr_op_map doesn't encounter an MR that is marked | ||
65 | * INVALID but that is about to be flushed due to a previous transport | ||
66 | * disconnect, the transport connect worker attempts to drain all | ||
67 | * pending send queue WRs before the transport is reconnected. | ||
68 | */ | ||
69 | |||
14 | #include "xprt_rdma.h" | 70 | #include "xprt_rdma.h" |
15 | 71 | ||
16 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) | 72 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
17 | # define RPCDBG_FACILITY RPCDBG_TRANS | 73 | # define RPCDBG_FACILITY RPCDBG_TRANS |
18 | #endif | 74 | #endif |
19 | 75 | ||
76 | static struct workqueue_struct *frwr_recovery_wq; | ||
77 | |||
78 | #define FRWR_RECOVERY_WQ_FLAGS (WQ_UNBOUND | WQ_MEM_RECLAIM) | ||
79 | |||
80 | int | ||
81 | frwr_alloc_recovery_wq(void) | ||
82 | { | ||
83 | frwr_recovery_wq = alloc_workqueue("frwr_recovery", | ||
84 | FRWR_RECOVERY_WQ_FLAGS, 0); | ||
85 | return !frwr_recovery_wq ? -ENOMEM : 0; | ||
86 | } | ||
87 | |||
88 | void | ||
89 | frwr_destroy_recovery_wq(void) | ||
90 | { | ||
91 | struct workqueue_struct *wq; | ||
92 | |||
93 | if (!frwr_recovery_wq) | ||
94 | return; | ||
95 | |||
96 | wq = frwr_recovery_wq; | ||
97 | frwr_recovery_wq = NULL; | ||
98 | destroy_workqueue(wq); | ||
99 | } | ||
100 | |||
101 | /* Deferred reset of a single FRMR. Generate a fresh rkey by | ||
102 | * replacing the MR. | ||
103 | * | ||
104 | * There's no recovery if this fails. The FRMR is abandoned, but | ||
105 | * remains in rb_all. It will be cleaned up when the transport is | ||
106 | * destroyed. | ||
107 | */ | ||
108 | static void | ||
109 | __frwr_recovery_worker(struct work_struct *work) | ||
110 | { | ||
111 | struct rpcrdma_mw *r = container_of(work, struct rpcrdma_mw, | ||
112 | r.frmr.fr_work); | ||
113 | struct rpcrdma_xprt *r_xprt = r->r.frmr.fr_xprt; | ||
114 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
115 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
116 | |||
117 | if (ib_dereg_mr(r->r.frmr.fr_mr)) | ||
118 | goto out_fail; | ||
119 | |||
120 | r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(pd, depth); | ||
121 | if (IS_ERR(r->r.frmr.fr_mr)) | ||
122 | goto out_fail; | ||
123 | |||
124 | dprintk("RPC: %s: recovered FRMR %p\n", __func__, r); | ||
125 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
126 | rpcrdma_put_mw(r_xprt, r); | ||
127 | return; | ||
128 | |||
129 | out_fail: | ||
130 | pr_warn("RPC: %s: FRMR %p unrecovered\n", | ||
131 | __func__, r); | ||
132 | } | ||
133 | |||
134 | /* A broken MR was discovered in a context that can't sleep. | ||
135 | * Defer recovery to the recovery worker. | ||
136 | */ | ||
137 | static void | ||
138 | __frwr_queue_recovery(struct rpcrdma_mw *r) | ||
139 | { | ||
140 | INIT_WORK(&r->r.frmr.fr_work, __frwr_recovery_worker); | ||
141 | queue_work(frwr_recovery_wq, &r->r.frmr.fr_work); | ||
142 | } | ||
143 | |||
20 | static int | 144 | static int |
21 | __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, | 145 | __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, |
22 | unsigned int depth) | 146 | unsigned int depth) |
@@ -128,7 +252,7 @@ frwr_sendcompletion(struct ib_wc *wc) | |||
128 | 252 | ||
129 | /* WARNING: Only wr_id and status are reliable at this point */ | 253 | /* WARNING: Only wr_id and status are reliable at this point */ |
130 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; | 254 | r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; |
131 | dprintk("RPC: %s: frmr %p (stale), status %s (%d)\n", | 255 | pr_warn("RPC: %s: frmr %p flushed, status %s (%d)\n", |
132 | __func__, r, ib_wc_status_msg(wc->status), wc->status); | 256 | __func__, r, ib_wc_status_msg(wc->status), wc->status); |
133 | r->r.frmr.fr_state = FRMR_IS_STALE; | 257 | r->r.frmr.fr_state = FRMR_IS_STALE; |
134 | } | 258 | } |
@@ -137,16 +261,19 @@ static int | |||
137 | frwr_op_init(struct rpcrdma_xprt *r_xprt) | 261 | frwr_op_init(struct rpcrdma_xprt *r_xprt) |
138 | { | 262 | { |
139 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | 263 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
140 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | 264 | struct ib_device *device = r_xprt->rx_ia.ri_device; |
141 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | 265 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; |
142 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | 266 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; |
143 | int i; | 267 | int i; |
144 | 268 | ||
269 | spin_lock_init(&buf->rb_mwlock); | ||
145 | INIT_LIST_HEAD(&buf->rb_mws); | 270 | INIT_LIST_HEAD(&buf->rb_mws); |
146 | INIT_LIST_HEAD(&buf->rb_all); | 271 | INIT_LIST_HEAD(&buf->rb_all); |
147 | 272 | ||
148 | i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; | 273 | i = max_t(int, RPCRDMA_MAX_DATA_SEGS / depth, 1); |
149 | dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); | 274 | i += 2; /* head + tail */ |
275 | i *= buf->rb_max_requests; /* one set for each RPC slot */ | ||
276 | dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); | ||
150 | 277 | ||
151 | while (i--) { | 278 | while (i--) { |
152 | struct rpcrdma_mw *r; | 279 | struct rpcrdma_mw *r; |
@@ -165,6 +292,7 @@ frwr_op_init(struct rpcrdma_xprt *r_xprt) | |||
165 | list_add(&r->mw_list, &buf->rb_mws); | 292 | list_add(&r->mw_list, &buf->rb_mws); |
166 | list_add(&r->mw_all, &buf->rb_all); | 293 | list_add(&r->mw_all, &buf->rb_all); |
167 | r->mw_sendcompletion = frwr_sendcompletion; | 294 | r->mw_sendcompletion = frwr_sendcompletion; |
295 | r->r.frmr.fr_xprt = r_xprt; | ||
168 | } | 296 | } |
169 | 297 | ||
170 | return 0; | 298 | return 0; |
@@ -178,12 +306,12 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
178 | int nsegs, bool writing) | 306 | int nsegs, bool writing) |
179 | { | 307 | { |
180 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 308 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
181 | struct ib_device *device = ia->ri_id->device; | 309 | struct ib_device *device = ia->ri_device; |
182 | enum dma_data_direction direction = rpcrdma_data_dir(writing); | 310 | enum dma_data_direction direction = rpcrdma_data_dir(writing); |
183 | struct rpcrdma_mr_seg *seg1 = seg; | 311 | struct rpcrdma_mr_seg *seg1 = seg; |
184 | struct rpcrdma_mw *mw = seg1->rl_mw; | 312 | struct rpcrdma_mw *mw; |
185 | struct rpcrdma_frmr *frmr = &mw->r.frmr; | 313 | struct rpcrdma_frmr *frmr; |
186 | struct ib_mr *mr = frmr->fr_mr; | 314 | struct ib_mr *mr; |
187 | struct ib_send_wr fastreg_wr, *bad_wr; | 315 | struct ib_send_wr fastreg_wr, *bad_wr; |
188 | u8 key; | 316 | u8 key; |
189 | int len, pageoff; | 317 | int len, pageoff; |
@@ -192,12 +320,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
192 | u64 pa; | 320 | u64 pa; |
193 | int page_no; | 321 | int page_no; |
194 | 322 | ||
323 | mw = seg1->rl_mw; | ||
324 | seg1->rl_mw = NULL; | ||
325 | do { | ||
326 | if (mw) | ||
327 | __frwr_queue_recovery(mw); | ||
328 | mw = rpcrdma_get_mw(r_xprt); | ||
329 | if (!mw) | ||
330 | return -ENOMEM; | ||
331 | } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); | ||
332 | frmr = &mw->r.frmr; | ||
333 | frmr->fr_state = FRMR_IS_VALID; | ||
334 | |||
195 | pageoff = offset_in_page(seg1->mr_offset); | 335 | pageoff = offset_in_page(seg1->mr_offset); |
196 | seg1->mr_offset -= pageoff; /* start of page */ | 336 | seg1->mr_offset -= pageoff; /* start of page */ |
197 | seg1->mr_len += pageoff; | 337 | seg1->mr_len += pageoff; |
198 | len = -pageoff; | 338 | len = -pageoff; |
199 | if (nsegs > ia->ri_max_frmr_depth) | 339 | if (nsegs > ia->ri_max_frmr_depth) |
200 | nsegs = ia->ri_max_frmr_depth; | 340 | nsegs = ia->ri_max_frmr_depth; |
341 | |||
201 | for (page_no = i = 0; i < nsegs;) { | 342 | for (page_no = i = 0; i < nsegs;) { |
202 | rpcrdma_map_one(device, seg, direction); | 343 | rpcrdma_map_one(device, seg, direction); |
203 | pa = seg->mr_dma; | 344 | pa = seg->mr_dma; |
@@ -216,8 +357,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
216 | dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", | 357 | dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", |
217 | __func__, mw, i, len); | 358 | __func__, mw, i, len); |
218 | 359 | ||
219 | frmr->fr_state = FRMR_IS_VALID; | ||
220 | |||
221 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); | 360 | memset(&fastreg_wr, 0, sizeof(fastreg_wr)); |
222 | fastreg_wr.wr_id = (unsigned long)(void *)mw; | 361 | fastreg_wr.wr_id = (unsigned long)(void *)mw; |
223 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; | 362 | fastreg_wr.opcode = IB_WR_FAST_REG_MR; |
@@ -229,6 +368,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
229 | fastreg_wr.wr.fast_reg.access_flags = writing ? | 368 | fastreg_wr.wr.fast_reg.access_flags = writing ? |
230 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : | 369 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
231 | IB_ACCESS_REMOTE_READ; | 370 | IB_ACCESS_REMOTE_READ; |
371 | mr = frmr->fr_mr; | ||
232 | key = (u8)(mr->rkey & 0x000000FF); | 372 | key = (u8)(mr->rkey & 0x000000FF); |
233 | ib_update_fast_reg_key(mr, ++key); | 373 | ib_update_fast_reg_key(mr, ++key); |
234 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; | 374 | fastreg_wr.wr.fast_reg.rkey = mr->rkey; |
@@ -238,6 +378,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
238 | if (rc) | 378 | if (rc) |
239 | goto out_senderr; | 379 | goto out_senderr; |
240 | 380 | ||
381 | seg1->rl_mw = mw; | ||
241 | seg1->mr_rkey = mr->rkey; | 382 | seg1->mr_rkey = mr->rkey; |
242 | seg1->mr_base = seg1->mr_dma + pageoff; | 383 | seg1->mr_base = seg1->mr_dma + pageoff; |
243 | seg1->mr_nsegs = i; | 384 | seg1->mr_nsegs = i; |
@@ -246,10 +387,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
246 | 387 | ||
247 | out_senderr: | 388 | out_senderr: |
248 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | 389 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); |
249 | ib_update_fast_reg_key(mr, --key); | ||
250 | frmr->fr_state = FRMR_IS_INVALID; | ||
251 | while (i--) | 390 | while (i--) |
252 | rpcrdma_unmap_one(device, --seg); | 391 | rpcrdma_unmap_one(device, --seg); |
392 | __frwr_queue_recovery(mw); | ||
253 | return rc; | 393 | return rc; |
254 | } | 394 | } |
255 | 395 | ||
@@ -261,78 +401,46 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | |||
261 | { | 401 | { |
262 | struct rpcrdma_mr_seg *seg1 = seg; | 402 | struct rpcrdma_mr_seg *seg1 = seg; |
263 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 403 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
404 | struct rpcrdma_mw *mw = seg1->rl_mw; | ||
264 | struct ib_send_wr invalidate_wr, *bad_wr; | 405 | struct ib_send_wr invalidate_wr, *bad_wr; |
265 | int rc, nsegs = seg->mr_nsegs; | 406 | int rc, nsegs = seg->mr_nsegs; |
266 | struct ib_device *device; | ||
267 | 407 | ||
268 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; | 408 | dprintk("RPC: %s: FRMR %p\n", __func__, mw); |
409 | |||
410 | seg1->rl_mw = NULL; | ||
411 | mw->r.frmr.fr_state = FRMR_IS_INVALID; | ||
269 | 412 | ||
270 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | 413 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); |
271 | invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; | 414 | invalidate_wr.wr_id = (unsigned long)(void *)mw; |
272 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | 415 | invalidate_wr.opcode = IB_WR_LOCAL_INV; |
273 | invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; | 416 | invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; |
274 | DECR_CQCOUNT(&r_xprt->rx_ep); | 417 | DECR_CQCOUNT(&r_xprt->rx_ep); |
275 | 418 | ||
276 | read_lock(&ia->ri_qplock); | ||
277 | device = ia->ri_id->device; | ||
278 | while (seg1->mr_nsegs--) | 419 | while (seg1->mr_nsegs--) |
279 | rpcrdma_unmap_one(device, seg++); | 420 | rpcrdma_unmap_one(ia->ri_device, seg++); |
421 | read_lock(&ia->ri_qplock); | ||
280 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | 422 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); |
281 | read_unlock(&ia->ri_qplock); | 423 | read_unlock(&ia->ri_qplock); |
282 | if (rc) | 424 | if (rc) |
283 | goto out_err; | 425 | goto out_err; |
426 | |||
427 | rpcrdma_put_mw(r_xprt, mw); | ||
284 | return nsegs; | 428 | return nsegs; |
285 | 429 | ||
286 | out_err: | 430 | out_err: |
287 | /* Force rpcrdma_buffer_get() to retry */ | ||
288 | seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; | ||
289 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); | 431 | dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); |
432 | __frwr_queue_recovery(mw); | ||
290 | return nsegs; | 433 | return nsegs; |
291 | } | 434 | } |
292 | 435 | ||
293 | /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in | ||
294 | * an unusable state. Find FRMRs in this state and dereg / reg | ||
295 | * each. FRMRs that are VALID and attached to an rpcrdma_req are | ||
296 | * also torn down. | ||
297 | * | ||
298 | * This gives all in-use FRMRs a fresh rkey and leaves them INVALID. | ||
299 | * | ||
300 | * This is invoked only in the transport connect worker in order | ||
301 | * to serialize with rpcrdma_register_frmr_external(). | ||
302 | */ | ||
303 | static void | ||
304 | frwr_op_reset(struct rpcrdma_xprt *r_xprt) | ||
305 | { | ||
306 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; | ||
307 | struct ib_device *device = r_xprt->rx_ia.ri_id->device; | ||
308 | unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth; | ||
309 | struct ib_pd *pd = r_xprt->rx_ia.ri_pd; | ||
310 | struct rpcrdma_mw *r; | ||
311 | int rc; | ||
312 | |||
313 | list_for_each_entry(r, &buf->rb_all, mw_all) { | ||
314 | if (r->r.frmr.fr_state == FRMR_IS_INVALID) | ||
315 | continue; | ||
316 | |||
317 | __frwr_release(r); | ||
318 | rc = __frwr_init(r, pd, device, depth); | ||
319 | if (rc) { | ||
320 | dprintk("RPC: %s: mw %p left %s\n", | ||
321 | __func__, r, | ||
322 | (r->r.frmr.fr_state == FRMR_IS_STALE ? | ||
323 | "stale" : "valid")); | ||
324 | continue; | ||
325 | } | ||
326 | |||
327 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | static void | 436 | static void |
332 | frwr_op_destroy(struct rpcrdma_buffer *buf) | 437 | frwr_op_destroy(struct rpcrdma_buffer *buf) |
333 | { | 438 | { |
334 | struct rpcrdma_mw *r; | 439 | struct rpcrdma_mw *r; |
335 | 440 | ||
441 | /* Ensure stale MWs for "buf" are no longer in flight */ | ||
442 | flush_workqueue(frwr_recovery_wq); | ||
443 | |||
336 | while (!list_empty(&buf->rb_all)) { | 444 | while (!list_empty(&buf->rb_all)) { |
337 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); | 445 | r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); |
338 | list_del(&r->mw_all); | 446 | list_del(&r->mw_all); |
@@ -347,7 +455,6 @@ const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { | |||
347 | .ro_open = frwr_op_open, | 455 | .ro_open = frwr_op_open, |
348 | .ro_maxpages = frwr_op_maxpages, | 456 | .ro_maxpages = frwr_op_maxpages, |
349 | .ro_init = frwr_op_init, | 457 | .ro_init = frwr_op_init, |
350 | .ro_reset = frwr_op_reset, | ||
351 | .ro_destroy = frwr_op_destroy, | 458 | .ro_destroy = frwr_op_destroy, |
352 | .ro_displayname = "frwr", | 459 | .ro_displayname = "frwr", |
353 | }; | 460 | }; |
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index ba518af16787..41985d07fdb7 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c | |||
@@ -50,8 +50,7 @@ physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, | |||
50 | { | 50 | { |
51 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 51 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
52 | 52 | ||
53 | rpcrdma_map_one(ia->ri_id->device, seg, | 53 | rpcrdma_map_one(ia->ri_device, seg, rpcrdma_data_dir(writing)); |
54 | rpcrdma_data_dir(writing)); | ||
55 | seg->mr_rkey = ia->ri_bind_mem->rkey; | 54 | seg->mr_rkey = ia->ri_bind_mem->rkey; |
56 | seg->mr_base = seg->mr_dma; | 55 | seg->mr_base = seg->mr_dma; |
57 | seg->mr_nsegs = 1; | 56 | seg->mr_nsegs = 1; |
@@ -65,19 +64,11 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) | |||
65 | { | 64 | { |
66 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; | 65 | struct rpcrdma_ia *ia = &r_xprt->rx_ia; |
67 | 66 | ||
68 | read_lock(&ia->ri_qplock); | 67 | rpcrdma_unmap_one(ia->ri_device, seg); |
69 | rpcrdma_unmap_one(ia->ri_id->device, seg); | ||
70 | read_unlock(&ia->ri_qplock); | ||
71 | |||
72 | return 1; | 68 | return 1; |
73 | } | 69 | } |
74 | 70 | ||
75 | static void | 71 | static void |
76 | physical_op_reset(struct rpcrdma_xprt *r_xprt) | ||
77 | { | ||
78 | } | ||
79 | |||
80 | static void | ||
81 | physical_op_destroy(struct rpcrdma_buffer *buf) | 72 | physical_op_destroy(struct rpcrdma_buffer *buf) |
82 | { | 73 | { |
83 | } | 74 | } |
@@ -88,7 +79,6 @@ const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { | |||
88 | .ro_open = physical_op_open, | 79 | .ro_open = physical_op_open, |
89 | .ro_maxpages = physical_op_maxpages, | 80 | .ro_maxpages = physical_op_maxpages, |
90 | .ro_init = physical_op_init, | 81 | .ro_init = physical_op_init, |
91 | .ro_reset = physical_op_reset, | ||
92 | .ro_destroy = physical_op_destroy, | 82 | .ro_destroy = physical_op_destroy, |
93 | .ro_displayname = "physical", | 83 | .ro_displayname = "physical", |
94 | }; | 84 | }; |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2c53ea9e1b83..84ea37daef36 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
@@ -284,9 +284,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, | |||
284 | return (unsigned char *)iptr - (unsigned char *)headerp; | 284 | return (unsigned char *)iptr - (unsigned char *)headerp; |
285 | 285 | ||
286 | out: | 286 | out: |
287 | if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) | ||
288 | return n; | ||
289 | |||
290 | for (pos = 0; nchunks--;) | 287 | for (pos = 0; nchunks--;) |
291 | pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, | 288 | pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt, |
292 | &req->rl_segments[pos]); | 289 | &req->rl_segments[pos]); |
@@ -732,8 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
732 | struct rpcrdma_msg *headerp; | 729 | struct rpcrdma_msg *headerp; |
733 | struct rpcrdma_req *req; | 730 | struct rpcrdma_req *req; |
734 | struct rpc_rqst *rqst; | 731 | struct rpc_rqst *rqst; |
735 | struct rpc_xprt *xprt = rep->rr_xprt; | 732 | struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; |
736 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 733 | struct rpc_xprt *xprt = &r_xprt->rx_xprt; |
737 | __be32 *iptr; | 734 | __be32 *iptr; |
738 | int rdmalen, status; | 735 | int rdmalen, status; |
739 | unsigned long cwnd; | 736 | unsigned long cwnd; |
@@ -770,7 +767,6 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) | |||
770 | rep->rr_len); | 767 | rep->rr_len); |
771 | repost: | 768 | repost: |
772 | r_xprt->rx_stats.bad_reply_count++; | 769 | r_xprt->rx_stats.bad_reply_count++; |
773 | rep->rr_func = rpcrdma_reply_handler; | ||
774 | if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) | 770 | if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) |
775 | rpcrdma_recv_buffer_put(rep); | 771 | rpcrdma_recv_buffer_put(rep); |
776 | 772 | ||
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 436da2caec95..680f888a9ddd 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -240,6 +240,16 @@ xprt_rdma_connect_worker(struct work_struct *work) | |||
240 | xprt_clear_connecting(xprt); | 240 | xprt_clear_connecting(xprt); |
241 | } | 241 | } |
242 | 242 | ||
243 | static void | ||
244 | xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) | ||
245 | { | ||
246 | struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, | ||
247 | rx_xprt); | ||
248 | |||
249 | pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); | ||
250 | rdma_disconnect(r_xprt->rx_ia.ri_id); | ||
251 | } | ||
252 | |||
243 | /* | 253 | /* |
244 | * xprt_rdma_destroy | 254 | * xprt_rdma_destroy |
245 | * | 255 | * |
@@ -612,12 +622,6 @@ xprt_rdma_send_request(struct rpc_task *task) | |||
612 | if (req->rl_reply == NULL) /* e.g. reconnection */ | 622 | if (req->rl_reply == NULL) /* e.g. reconnection */ |
613 | rpcrdma_recv_buffer_get(req); | 623 | rpcrdma_recv_buffer_get(req); |
614 | 624 | ||
615 | if (req->rl_reply) { | ||
616 | req->rl_reply->rr_func = rpcrdma_reply_handler; | ||
617 | /* this need only be done once, but... */ | ||
618 | req->rl_reply->rr_xprt = xprt; | ||
619 | } | ||
620 | |||
621 | /* Must suppress retransmit to maintain credits */ | 625 | /* Must suppress retransmit to maintain credits */ |
622 | if (req->rl_connect_cookie == xprt->connect_cookie) | 626 | if (req->rl_connect_cookie == xprt->connect_cookie) |
623 | goto drop_connection; | 627 | goto drop_connection; |
@@ -676,6 +680,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | |||
676 | r_xprt->rx_stats.bad_reply_count); | 680 | r_xprt->rx_stats.bad_reply_count); |
677 | } | 681 | } |
678 | 682 | ||
683 | static int | ||
684 | xprt_rdma_enable_swap(struct rpc_xprt *xprt) | ||
685 | { | ||
686 | return -EINVAL; | ||
687 | } | ||
688 | |||
689 | static void | ||
690 | xprt_rdma_disable_swap(struct rpc_xprt *xprt) | ||
691 | { | ||
692 | } | ||
693 | |||
679 | /* | 694 | /* |
680 | * Plumbing for rpc transport switch and kernel module | 695 | * Plumbing for rpc transport switch and kernel module |
681 | */ | 696 | */ |
@@ -694,7 +709,10 @@ static struct rpc_xprt_ops xprt_rdma_procs = { | |||
694 | .send_request = xprt_rdma_send_request, | 709 | .send_request = xprt_rdma_send_request, |
695 | .close = xprt_rdma_close, | 710 | .close = xprt_rdma_close, |
696 | .destroy = xprt_rdma_destroy, | 711 | .destroy = xprt_rdma_destroy, |
697 | .print_stats = xprt_rdma_print_stats | 712 | .print_stats = xprt_rdma_print_stats, |
713 | .enable_swap = xprt_rdma_enable_swap, | ||
714 | .disable_swap = xprt_rdma_disable_swap, | ||
715 | .inject_disconnect = xprt_rdma_inject_disconnect | ||
698 | }; | 716 | }; |
699 | 717 | ||
700 | static struct xprt_class xprt_rdma = { | 718 | static struct xprt_class xprt_rdma = { |
@@ -720,17 +738,24 @@ void xprt_rdma_cleanup(void) | |||
720 | if (rc) | 738 | if (rc) |
721 | dprintk("RPC: %s: xprt_unregister returned %i\n", | 739 | dprintk("RPC: %s: xprt_unregister returned %i\n", |
722 | __func__, rc); | 740 | __func__, rc); |
741 | |||
742 | frwr_destroy_recovery_wq(); | ||
723 | } | 743 | } |
724 | 744 | ||
725 | int xprt_rdma_init(void) | 745 | int xprt_rdma_init(void) |
726 | { | 746 | { |
727 | int rc; | 747 | int rc; |
728 | 748 | ||
729 | rc = xprt_register_transport(&xprt_rdma); | 749 | rc = frwr_alloc_recovery_wq(); |
730 | |||
731 | if (rc) | 750 | if (rc) |
732 | return rc; | 751 | return rc; |
733 | 752 | ||
753 | rc = xprt_register_transport(&xprt_rdma); | ||
754 | if (rc) { | ||
755 | frwr_destroy_recovery_wq(); | ||
756 | return rc; | ||
757 | } | ||
758 | |||
734 | dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); | 759 | dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); |
735 | 760 | ||
736 | dprintk("Defaults:\n"); | 761 | dprintk("Defaults:\n"); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 52df265b472a..891c4ede2c20 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -80,7 +80,6 @@ static void | |||
80 | rpcrdma_run_tasklet(unsigned long data) | 80 | rpcrdma_run_tasklet(unsigned long data) |
81 | { | 81 | { |
82 | struct rpcrdma_rep *rep; | 82 | struct rpcrdma_rep *rep; |
83 | void (*func)(struct rpcrdma_rep *); | ||
84 | unsigned long flags; | 83 | unsigned long flags; |
85 | 84 | ||
86 | data = data; | 85 | data = data; |
@@ -89,14 +88,9 @@ rpcrdma_run_tasklet(unsigned long data) | |||
89 | rep = list_entry(rpcrdma_tasklets_g.next, | 88 | rep = list_entry(rpcrdma_tasklets_g.next, |
90 | struct rpcrdma_rep, rr_list); | 89 | struct rpcrdma_rep, rr_list); |
91 | list_del(&rep->rr_list); | 90 | list_del(&rep->rr_list); |
92 | func = rep->rr_func; | ||
93 | rep->rr_func = NULL; | ||
94 | spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); | 91 | spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); |
95 | 92 | ||
96 | if (func) | 93 | rpcrdma_reply_handler(rep); |
97 | func(rep); | ||
98 | else | ||
99 | rpcrdma_recv_buffer_put(rep); | ||
100 | 94 | ||
101 | spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); | 95 | spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); |
102 | } | 96 | } |
@@ -236,7 +230,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) | |||
236 | __func__, rep, wc->byte_len); | 230 | __func__, rep, wc->byte_len); |
237 | 231 | ||
238 | rep->rr_len = wc->byte_len; | 232 | rep->rr_len = wc->byte_len; |
239 | ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, | 233 | ib_dma_sync_single_for_cpu(rep->rr_device, |
240 | rdmab_addr(rep->rr_rdmabuf), | 234 | rdmab_addr(rep->rr_rdmabuf), |
241 | rep->rr_len, DMA_FROM_DEVICE); | 235 | rep->rr_len, DMA_FROM_DEVICE); |
242 | prefetch(rdmab_to_msg(rep->rr_rdmabuf)); | 236 | prefetch(rdmab_to_msg(rep->rr_rdmabuf)); |
@@ -407,7 +401,7 @@ connected: | |||
407 | 401 | ||
408 | pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", | 402 | pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n", |
409 | sap, rpc_get_port(sap), | 403 | sap, rpc_get_port(sap), |
410 | ia->ri_id->device->name, | 404 | ia->ri_device->name, |
411 | ia->ri_ops->ro_displayname, | 405 | ia->ri_ops->ro_displayname, |
412 | xprt->rx_buf.rb_max_requests, | 406 | xprt->rx_buf.rb_max_requests, |
413 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); | 407 | ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); |
@@ -508,8 +502,9 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
508 | rc = PTR_ERR(ia->ri_id); | 502 | rc = PTR_ERR(ia->ri_id); |
509 | goto out1; | 503 | goto out1; |
510 | } | 504 | } |
505 | ia->ri_device = ia->ri_id->device; | ||
511 | 506 | ||
512 | ia->ri_pd = ib_alloc_pd(ia->ri_id->device); | 507 | ia->ri_pd = ib_alloc_pd(ia->ri_device); |
513 | if (IS_ERR(ia->ri_pd)) { | 508 | if (IS_ERR(ia->ri_pd)) { |
514 | rc = PTR_ERR(ia->ri_pd); | 509 | rc = PTR_ERR(ia->ri_pd); |
515 | dprintk("RPC: %s: ib_alloc_pd() failed %i\n", | 510 | dprintk("RPC: %s: ib_alloc_pd() failed %i\n", |
@@ -517,7 +512,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
517 | goto out2; | 512 | goto out2; |
518 | } | 513 | } |
519 | 514 | ||
520 | rc = ib_query_device(ia->ri_id->device, devattr); | 515 | rc = ib_query_device(ia->ri_device, devattr); |
521 | if (rc) { | 516 | if (rc) { |
522 | dprintk("RPC: %s: ib_query_device failed %d\n", | 517 | dprintk("RPC: %s: ib_query_device failed %d\n", |
523 | __func__, rc); | 518 | __func__, rc); |
@@ -526,7 +521,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
526 | 521 | ||
527 | if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { | 522 | if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { |
528 | ia->ri_have_dma_lkey = 1; | 523 | ia->ri_have_dma_lkey = 1; |
529 | ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; | 524 | ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; |
530 | } | 525 | } |
531 | 526 | ||
532 | if (memreg == RPCRDMA_FRMR) { | 527 | if (memreg == RPCRDMA_FRMR) { |
@@ -541,7 +536,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
541 | } | 536 | } |
542 | } | 537 | } |
543 | if (memreg == RPCRDMA_MTHCAFMR) { | 538 | if (memreg == RPCRDMA_MTHCAFMR) { |
544 | if (!ia->ri_id->device->alloc_fmr) { | 539 | if (!ia->ri_device->alloc_fmr) { |
545 | dprintk("RPC: %s: MTHCAFMR registration " | 540 | dprintk("RPC: %s: MTHCAFMR registration " |
546 | "not supported by HCA\n", __func__); | 541 | "not supported by HCA\n", __func__); |
547 | memreg = RPCRDMA_ALLPHYSICAL; | 542 | memreg = RPCRDMA_ALLPHYSICAL; |
@@ -590,9 +585,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
590 | dprintk("RPC: %s: memory registration strategy is '%s'\n", | 585 | dprintk("RPC: %s: memory registration strategy is '%s'\n", |
591 | __func__, ia->ri_ops->ro_displayname); | 586 | __func__, ia->ri_ops->ro_displayname); |
592 | 587 | ||
593 | /* Else will do memory reg/dereg for each chunk */ | ||
594 | ia->ri_memreg_strategy = memreg; | ||
595 | |||
596 | rwlock_init(&ia->ri_qplock); | 588 | rwlock_init(&ia->ri_qplock); |
597 | return 0; | 589 | return 0; |
598 | 590 | ||
@@ -622,17 +614,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) | |||
622 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", | 614 | dprintk("RPC: %s: ib_dereg_mr returned %i\n", |
623 | __func__, rc); | 615 | __func__, rc); |
624 | } | 616 | } |
617 | |||
625 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { | 618 | if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { |
626 | if (ia->ri_id->qp) | 619 | if (ia->ri_id->qp) |
627 | rdma_destroy_qp(ia->ri_id); | 620 | rdma_destroy_qp(ia->ri_id); |
628 | rdma_destroy_id(ia->ri_id); | 621 | rdma_destroy_id(ia->ri_id); |
629 | ia->ri_id = NULL; | 622 | ia->ri_id = NULL; |
630 | } | 623 | } |
631 | if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { | 624 | |
632 | rc = ib_dealloc_pd(ia->ri_pd); | 625 | /* If the pd is still busy, xprtrdma missed freeing a resource */ |
633 | dprintk("RPC: %s: ib_dealloc_pd returned %i\n", | 626 | if (ia->ri_pd && !IS_ERR(ia->ri_pd)) |
634 | __func__, rc); | 627 | WARN_ON(ib_dealloc_pd(ia->ri_pd)); |
635 | } | ||
636 | } | 628 | } |
637 | 629 | ||
638 | /* | 630 | /* |
@@ -693,8 +685,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
693 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); | 685 | INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); |
694 | 686 | ||
695 | cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; | 687 | cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1; |
696 | sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall, | 688 | sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall, |
697 | rpcrdma_cq_async_error_upcall, ep, &cq_attr); | 689 | rpcrdma_cq_async_error_upcall, ep, &cq_attr); |
698 | if (IS_ERR(sendcq)) { | 690 | if (IS_ERR(sendcq)) { |
699 | rc = PTR_ERR(sendcq); | 691 | rc = PTR_ERR(sendcq); |
700 | dprintk("RPC: %s: failed to create send CQ: %i\n", | 692 | dprintk("RPC: %s: failed to create send CQ: %i\n", |
@@ -710,8 +702,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
710 | } | 702 | } |
711 | 703 | ||
712 | cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; | 704 | cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1; |
713 | recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall, | 705 | recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall, |
714 | rpcrdma_cq_async_error_upcall, ep, &cq_attr); | 706 | rpcrdma_cq_async_error_upcall, ep, &cq_attr); |
715 | if (IS_ERR(recvcq)) { | 707 | if (IS_ERR(recvcq)) { |
716 | rc = PTR_ERR(recvcq); | 708 | rc = PTR_ERR(recvcq); |
717 | dprintk("RPC: %s: failed to create recv CQ: %i\n", | 709 | dprintk("RPC: %s: failed to create recv CQ: %i\n", |
@@ -817,8 +809,6 @@ retry: | |||
817 | rpcrdma_flush_cqs(ep); | 809 | rpcrdma_flush_cqs(ep); |
818 | 810 | ||
819 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); | 811 | xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); |
820 | ia->ri_ops->ro_reset(xprt); | ||
821 | |||
822 | id = rpcrdma_create_id(xprt, ia, | 812 | id = rpcrdma_create_id(xprt, ia, |
823 | (struct sockaddr *)&xprt->rx_data.addr); | 813 | (struct sockaddr *)&xprt->rx_data.addr); |
824 | if (IS_ERR(id)) { | 814 | if (IS_ERR(id)) { |
@@ -832,7 +822,7 @@ retry: | |||
832 | * More stuff I haven't thought of! | 822 | * More stuff I haven't thought of! |
833 | * Rrrgh! | 823 | * Rrrgh! |
834 | */ | 824 | */ |
835 | if (ia->ri_id->device != id->device) { | 825 | if (ia->ri_device != id->device) { |
836 | printk("RPC: %s: can't reconnect on " | 826 | printk("RPC: %s: can't reconnect on " |
837 | "different device!\n", __func__); | 827 | "different device!\n", __func__); |
838 | rdma_destroy_id(id); | 828 | rdma_destroy_id(id); |
@@ -974,7 +964,8 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) | |||
974 | goto out_free; | 964 | goto out_free; |
975 | } | 965 | } |
976 | 966 | ||
977 | rep->rr_buffer = &r_xprt->rx_buf; | 967 | rep->rr_device = ia->ri_device; |
968 | rep->rr_rxprt = r_xprt; | ||
978 | return rep; | 969 | return rep; |
979 | 970 | ||
980 | out_free: | 971 | out_free: |
@@ -1098,31 +1089,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) | |||
1098 | kfree(buf->rb_pool); | 1089 | kfree(buf->rb_pool); |
1099 | } | 1090 | } |
1100 | 1091 | ||
1101 | /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving | 1092 | struct rpcrdma_mw * |
1102 | * some req segments uninitialized. | 1093 | rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) |
1103 | */ | ||
1104 | static void | ||
1105 | rpcrdma_buffer_put_mr(struct rpcrdma_mw **mw, struct rpcrdma_buffer *buf) | ||
1106 | { | 1094 | { |
1107 | if (*mw) { | 1095 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1108 | list_add_tail(&(*mw)->mw_list, &buf->rb_mws); | 1096 | struct rpcrdma_mw *mw = NULL; |
1109 | *mw = NULL; | 1097 | |
1098 | spin_lock(&buf->rb_mwlock); | ||
1099 | if (!list_empty(&buf->rb_mws)) { | ||
1100 | mw = list_first_entry(&buf->rb_mws, | ||
1101 | struct rpcrdma_mw, mw_list); | ||
1102 | list_del_init(&mw->mw_list); | ||
1110 | } | 1103 | } |
1104 | spin_unlock(&buf->rb_mwlock); | ||
1105 | |||
1106 | if (!mw) | ||
1107 | pr_err("RPC: %s: no MWs available\n", __func__); | ||
1108 | return mw; | ||
1111 | } | 1109 | } |
1112 | 1110 | ||
1113 | /* Cycle mw's back in reverse order, and "spin" them. | 1111 | void |
1114 | * This delays and scrambles reuse as much as possible. | 1112 | rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) |
1115 | */ | ||
1116 | static void | ||
1117 | rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) | ||
1118 | { | 1113 | { |
1119 | struct rpcrdma_mr_seg *seg = req->rl_segments; | 1114 | struct rpcrdma_buffer *buf = &r_xprt->rx_buf; |
1120 | struct rpcrdma_mr_seg *seg1 = seg; | ||
1121 | int i; | ||
1122 | 1115 | ||
1123 | for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) | 1116 | spin_lock(&buf->rb_mwlock); |
1124 | rpcrdma_buffer_put_mr(&seg->rl_mw, buf); | 1117 | list_add_tail(&mw->mw_list, &buf->rb_mws); |
1125 | rpcrdma_buffer_put_mr(&seg1->rl_mw, buf); | 1118 | spin_unlock(&buf->rb_mwlock); |
1126 | } | 1119 | } |
1127 | 1120 | ||
1128 | static void | 1121 | static void |
@@ -1132,115 +1125,10 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) | |||
1132 | req->rl_niovs = 0; | 1125 | req->rl_niovs = 0; |
1133 | if (req->rl_reply) { | 1126 | if (req->rl_reply) { |
1134 | buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply; | 1127 | buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply; |
1135 | req->rl_reply->rr_func = NULL; | ||
1136 | req->rl_reply = NULL; | 1128 | req->rl_reply = NULL; |
1137 | } | 1129 | } |
1138 | } | 1130 | } |
1139 | 1131 | ||
1140 | /* rpcrdma_unmap_one() was already done during deregistration. | ||
1141 | * Redo only the ib_post_send(). | ||
1142 | */ | ||
1143 | static void | ||
1144 | rpcrdma_retry_local_inv(struct rpcrdma_mw *r, struct rpcrdma_ia *ia) | ||
1145 | { | ||
1146 | struct rpcrdma_xprt *r_xprt = | ||
1147 | container_of(ia, struct rpcrdma_xprt, rx_ia); | ||
1148 | struct ib_send_wr invalidate_wr, *bad_wr; | ||
1149 | int rc; | ||
1150 | |||
1151 | dprintk("RPC: %s: FRMR %p is stale\n", __func__, r); | ||
1152 | |||
1153 | /* When this FRMR is re-inserted into rb_mws, it is no longer stale */ | ||
1154 | r->r.frmr.fr_state = FRMR_IS_INVALID; | ||
1155 | |||
1156 | memset(&invalidate_wr, 0, sizeof(invalidate_wr)); | ||
1157 | invalidate_wr.wr_id = (unsigned long)(void *)r; | ||
1158 | invalidate_wr.opcode = IB_WR_LOCAL_INV; | ||
1159 | invalidate_wr.ex.invalidate_rkey = r->r.frmr.fr_mr->rkey; | ||
1160 | DECR_CQCOUNT(&r_xprt->rx_ep); | ||
1161 | |||
1162 | dprintk("RPC: %s: frmr %p invalidating rkey %08x\n", | ||
1163 | __func__, r, r->r.frmr.fr_mr->rkey); | ||
1164 | |||
1165 | read_lock(&ia->ri_qplock); | ||
1166 | rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); | ||
1167 | read_unlock(&ia->ri_qplock); | ||
1168 | if (rc) { | ||
1169 | /* Force rpcrdma_buffer_get() to retry */ | ||
1170 | r->r.frmr.fr_state = FRMR_IS_STALE; | ||
1171 | dprintk("RPC: %s: ib_post_send failed, %i\n", | ||
1172 | __func__, rc); | ||
1173 | } | ||
1174 | } | ||
1175 | |||
1176 | static void | ||
1177 | rpcrdma_retry_flushed_linv(struct list_head *stale, | ||
1178 | struct rpcrdma_buffer *buf) | ||
1179 | { | ||
1180 | struct rpcrdma_ia *ia = rdmab_to_ia(buf); | ||
1181 | struct list_head *pos; | ||
1182 | struct rpcrdma_mw *r; | ||
1183 | unsigned long flags; | ||
1184 | |||
1185 | list_for_each(pos, stale) { | ||
1186 | r = list_entry(pos, struct rpcrdma_mw, mw_list); | ||
1187 | rpcrdma_retry_local_inv(r, ia); | ||
1188 | } | ||
1189 | |||
1190 | spin_lock_irqsave(&buf->rb_lock, flags); | ||
1191 | list_splice_tail(stale, &buf->rb_mws); | ||
1192 | spin_unlock_irqrestore(&buf->rb_lock, flags); | ||
1193 | } | ||
1194 | |||
1195 | static struct rpcrdma_req * | ||
1196 | rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, | ||
1197 | struct list_head *stale) | ||
1198 | { | ||
1199 | struct rpcrdma_mw *r; | ||
1200 | int i; | ||
1201 | |||
1202 | i = RPCRDMA_MAX_SEGS - 1; | ||
1203 | while (!list_empty(&buf->rb_mws)) { | ||
1204 | r = list_entry(buf->rb_mws.next, | ||
1205 | struct rpcrdma_mw, mw_list); | ||
1206 | list_del(&r->mw_list); | ||
1207 | if (r->r.frmr.fr_state == FRMR_IS_STALE) { | ||
1208 | list_add(&r->mw_list, stale); | ||
1209 | continue; | ||
1210 | } | ||
1211 | req->rl_segments[i].rl_mw = r; | ||
1212 | if (unlikely(i-- == 0)) | ||
1213 | return req; /* Success */ | ||
1214 | } | ||
1215 | |||
1216 | /* Not enough entries on rb_mws for this req */ | ||
1217 | rpcrdma_buffer_put_sendbuf(req, buf); | ||
1218 | rpcrdma_buffer_put_mrs(req, buf); | ||
1219 | return NULL; | ||
1220 | } | ||
1221 | |||
1222 | static struct rpcrdma_req * | ||
1223 | rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) | ||
1224 | { | ||
1225 | struct rpcrdma_mw *r; | ||
1226 | int i; | ||
1227 | |||
1228 | i = RPCRDMA_MAX_SEGS - 1; | ||
1229 | while (!list_empty(&buf->rb_mws)) { | ||
1230 | r = list_entry(buf->rb_mws.next, | ||
1231 | struct rpcrdma_mw, mw_list); | ||
1232 | list_del(&r->mw_list); | ||
1233 | req->rl_segments[i].rl_mw = r; | ||
1234 | if (unlikely(i-- == 0)) | ||
1235 | return req; /* Success */ | ||
1236 | } | ||
1237 | |||
1238 | /* Not enough entries on rb_mws for this req */ | ||
1239 | rpcrdma_buffer_put_sendbuf(req, buf); | ||
1240 | rpcrdma_buffer_put_mrs(req, buf); | ||
1241 | return NULL; | ||
1242 | } | ||
1243 | |||
1244 | /* | 1132 | /* |
1245 | * Get a set of request/reply buffers. | 1133 | * Get a set of request/reply buffers. |
1246 | * | 1134 | * |
@@ -1253,12 +1141,11 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) | |||
1253 | struct rpcrdma_req * | 1141 | struct rpcrdma_req * |
1254 | rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | 1142 | rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) |
1255 | { | 1143 | { |
1256 | struct rpcrdma_ia *ia = rdmab_to_ia(buffers); | ||
1257 | struct list_head stale; | ||
1258 | struct rpcrdma_req *req; | 1144 | struct rpcrdma_req *req; |
1259 | unsigned long flags; | 1145 | unsigned long flags; |
1260 | 1146 | ||
1261 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1147 | spin_lock_irqsave(&buffers->rb_lock, flags); |
1148 | |||
1262 | if (buffers->rb_send_index == buffers->rb_max_requests) { | 1149 | if (buffers->rb_send_index == buffers->rb_max_requests) { |
1263 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1150 | spin_unlock_irqrestore(&buffers->rb_lock, flags); |
1264 | dprintk("RPC: %s: out of request buffers\n", __func__); | 1151 | dprintk("RPC: %s: out of request buffers\n", __func__); |
@@ -1277,20 +1164,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) | |||
1277 | } | 1164 | } |
1278 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; | 1165 | buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; |
1279 | 1166 | ||
1280 | INIT_LIST_HEAD(&stale); | ||
1281 | switch (ia->ri_memreg_strategy) { | ||
1282 | case RPCRDMA_FRMR: | ||
1283 | req = rpcrdma_buffer_get_frmrs(req, buffers, &stale); | ||
1284 | break; | ||
1285 | case RPCRDMA_MTHCAFMR: | ||
1286 | req = rpcrdma_buffer_get_fmrs(req, buffers); | ||
1287 | break; | ||
1288 | default: | ||
1289 | break; | ||
1290 | } | ||
1291 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1167 | spin_unlock_irqrestore(&buffers->rb_lock, flags); |
1292 | if (!list_empty(&stale)) | ||
1293 | rpcrdma_retry_flushed_linv(&stale, buffers); | ||
1294 | return req; | 1168 | return req; |
1295 | } | 1169 | } |
1296 | 1170 | ||
@@ -1302,19 +1176,10 @@ void | |||
1302 | rpcrdma_buffer_put(struct rpcrdma_req *req) | 1176 | rpcrdma_buffer_put(struct rpcrdma_req *req) |
1303 | { | 1177 | { |
1304 | struct rpcrdma_buffer *buffers = req->rl_buffer; | 1178 | struct rpcrdma_buffer *buffers = req->rl_buffer; |
1305 | struct rpcrdma_ia *ia = rdmab_to_ia(buffers); | ||
1306 | unsigned long flags; | 1179 | unsigned long flags; |
1307 | 1180 | ||
1308 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1181 | spin_lock_irqsave(&buffers->rb_lock, flags); |
1309 | rpcrdma_buffer_put_sendbuf(req, buffers); | 1182 | rpcrdma_buffer_put_sendbuf(req, buffers); |
1310 | switch (ia->ri_memreg_strategy) { | ||
1311 | case RPCRDMA_FRMR: | ||
1312 | case RPCRDMA_MTHCAFMR: | ||
1313 | rpcrdma_buffer_put_mrs(req, buffers); | ||
1314 | break; | ||
1315 | default: | ||
1316 | break; | ||
1317 | } | ||
1318 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1183 | spin_unlock_irqrestore(&buffers->rb_lock, flags); |
1319 | } | 1184 | } |
1320 | 1185 | ||
@@ -1344,10 +1209,9 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) | |||
1344 | void | 1209 | void |
1345 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) | 1210 | rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) |
1346 | { | 1211 | { |
1347 | struct rpcrdma_buffer *buffers = rep->rr_buffer; | 1212 | struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; |
1348 | unsigned long flags; | 1213 | unsigned long flags; |
1349 | 1214 | ||
1350 | rep->rr_func = NULL; | ||
1351 | spin_lock_irqsave(&buffers->rb_lock, flags); | 1215 | spin_lock_irqsave(&buffers->rb_lock, flags); |
1352 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; | 1216 | buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; |
1353 | spin_unlock_irqrestore(&buffers->rb_lock, flags); | 1217 | spin_unlock_irqrestore(&buffers->rb_lock, flags); |
@@ -1376,9 +1240,9 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, | |||
1376 | /* | 1240 | /* |
1377 | * All memory passed here was kmalloc'ed, therefore phys-contiguous. | 1241 | * All memory passed here was kmalloc'ed, therefore phys-contiguous. |
1378 | */ | 1242 | */ |
1379 | iov->addr = ib_dma_map_single(ia->ri_id->device, | 1243 | iov->addr = ib_dma_map_single(ia->ri_device, |
1380 | va, len, DMA_BIDIRECTIONAL); | 1244 | va, len, DMA_BIDIRECTIONAL); |
1381 | if (ib_dma_mapping_error(ia->ri_id->device, iov->addr)) | 1245 | if (ib_dma_mapping_error(ia->ri_device, iov->addr)) |
1382 | return -ENOMEM; | 1246 | return -ENOMEM; |
1383 | 1247 | ||
1384 | iov->length = len; | 1248 | iov->length = len; |
@@ -1422,8 +1286,8 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia, | |||
1422 | { | 1286 | { |
1423 | int rc; | 1287 | int rc; |
1424 | 1288 | ||
1425 | ib_dma_unmap_single(ia->ri_id->device, | 1289 | ib_dma_unmap_single(ia->ri_device, |
1426 | iov->addr, iov->length, DMA_BIDIRECTIONAL); | 1290 | iov->addr, iov->length, DMA_BIDIRECTIONAL); |
1427 | 1291 | ||
1428 | if (NULL == mr) | 1292 | if (NULL == mr) |
1429 | return 0; | 1293 | return 0; |
@@ -1516,15 +1380,18 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, | |||
1516 | send_wr.num_sge = req->rl_niovs; | 1380 | send_wr.num_sge = req->rl_niovs; |
1517 | send_wr.opcode = IB_WR_SEND; | 1381 | send_wr.opcode = IB_WR_SEND; |
1518 | if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ | 1382 | if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ |
1519 | ib_dma_sync_single_for_device(ia->ri_id->device, | 1383 | ib_dma_sync_single_for_device(ia->ri_device, |
1520 | req->rl_send_iov[3].addr, req->rl_send_iov[3].length, | 1384 | req->rl_send_iov[3].addr, |
1521 | DMA_TO_DEVICE); | 1385 | req->rl_send_iov[3].length, |
1522 | ib_dma_sync_single_for_device(ia->ri_id->device, | 1386 | DMA_TO_DEVICE); |
1523 | req->rl_send_iov[1].addr, req->rl_send_iov[1].length, | 1387 | ib_dma_sync_single_for_device(ia->ri_device, |
1524 | DMA_TO_DEVICE); | 1388 | req->rl_send_iov[1].addr, |
1525 | ib_dma_sync_single_for_device(ia->ri_id->device, | 1389 | req->rl_send_iov[1].length, |
1526 | req->rl_send_iov[0].addr, req->rl_send_iov[0].length, | 1390 | DMA_TO_DEVICE); |
1527 | DMA_TO_DEVICE); | 1391 | ib_dma_sync_single_for_device(ia->ri_device, |
1392 | req->rl_send_iov[0].addr, | ||
1393 | req->rl_send_iov[0].length, | ||
1394 | DMA_TO_DEVICE); | ||
1528 | 1395 | ||
1529 | if (DECR_CQCOUNT(ep) > 0) | 1396 | if (DECR_CQCOUNT(ep) > 0) |
1530 | send_wr.send_flags = 0; | 1397 | send_wr.send_flags = 0; |
@@ -1557,7 +1424,7 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, | |||
1557 | recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; | 1424 | recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; |
1558 | recv_wr.num_sge = 1; | 1425 | recv_wr.num_sge = 1; |
1559 | 1426 | ||
1560 | ib_dma_sync_single_for_cpu(ia->ri_id->device, | 1427 | ib_dma_sync_single_for_cpu(ia->ri_device, |
1561 | rdmab_addr(rep->rr_rdmabuf), | 1428 | rdmab_addr(rep->rr_rdmabuf), |
1562 | rdmab_length(rep->rr_rdmabuf), | 1429 | rdmab_length(rep->rr_rdmabuf), |
1563 | DMA_BIDIRECTIONAL); | 1430 | DMA_BIDIRECTIONAL); |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 58163b88738c..f49dd8b38122 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -62,6 +62,7 @@ | |||
62 | struct rpcrdma_ia { | 62 | struct rpcrdma_ia { |
63 | const struct rpcrdma_memreg_ops *ri_ops; | 63 | const struct rpcrdma_memreg_ops *ri_ops; |
64 | rwlock_t ri_qplock; | 64 | rwlock_t ri_qplock; |
65 | struct ib_device *ri_device; | ||
65 | struct rdma_cm_id *ri_id; | 66 | struct rdma_cm_id *ri_id; |
66 | struct ib_pd *ri_pd; | 67 | struct ib_pd *ri_pd; |
67 | struct ib_mr *ri_bind_mem; | 68 | struct ib_mr *ri_bind_mem; |
@@ -69,7 +70,6 @@ struct rpcrdma_ia { | |||
69 | int ri_have_dma_lkey; | 70 | int ri_have_dma_lkey; |
70 | struct completion ri_done; | 71 | struct completion ri_done; |
71 | int ri_async_rc; | 72 | int ri_async_rc; |
72 | enum rpcrdma_memreg ri_memreg_strategy; | ||
73 | unsigned int ri_max_frmr_depth; | 73 | unsigned int ri_max_frmr_depth; |
74 | struct ib_device_attr ri_devattr; | 74 | struct ib_device_attr ri_devattr; |
75 | struct ib_qp_attr ri_qp_attr; | 75 | struct ib_qp_attr ri_qp_attr; |
@@ -173,9 +173,8 @@ struct rpcrdma_buffer; | |||
173 | 173 | ||
174 | struct rpcrdma_rep { | 174 | struct rpcrdma_rep { |
175 | unsigned int rr_len; | 175 | unsigned int rr_len; |
176 | struct rpcrdma_buffer *rr_buffer; | 176 | struct ib_device *rr_device; |
177 | struct rpc_xprt *rr_xprt; | 177 | struct rpcrdma_xprt *rr_rxprt; |
178 | void (*rr_func)(struct rpcrdma_rep *); | ||
179 | struct list_head rr_list; | 178 | struct list_head rr_list; |
180 | struct rpcrdma_regbuf *rr_rdmabuf; | 179 | struct rpcrdma_regbuf *rr_rdmabuf; |
181 | }; | 180 | }; |
@@ -203,11 +202,18 @@ struct rpcrdma_frmr { | |||
203 | struct ib_fast_reg_page_list *fr_pgl; | 202 | struct ib_fast_reg_page_list *fr_pgl; |
204 | struct ib_mr *fr_mr; | 203 | struct ib_mr *fr_mr; |
205 | enum rpcrdma_frmr_state fr_state; | 204 | enum rpcrdma_frmr_state fr_state; |
205 | struct work_struct fr_work; | ||
206 | struct rpcrdma_xprt *fr_xprt; | ||
207 | }; | ||
208 | |||
209 | struct rpcrdma_fmr { | ||
210 | struct ib_fmr *fmr; | ||
211 | u64 *physaddrs; | ||
206 | }; | 212 | }; |
207 | 213 | ||
208 | struct rpcrdma_mw { | 214 | struct rpcrdma_mw { |
209 | union { | 215 | union { |
210 | struct ib_fmr *fmr; | 216 | struct rpcrdma_fmr fmr; |
211 | struct rpcrdma_frmr frmr; | 217 | struct rpcrdma_frmr frmr; |
212 | } r; | 218 | } r; |
213 | void (*mw_sendcompletion)(struct ib_wc *); | 219 | void (*mw_sendcompletion)(struct ib_wc *); |
@@ -281,15 +287,17 @@ rpcr_to_rdmar(struct rpc_rqst *rqst) | |||
281 | * One of these is associated with a transport instance | 287 | * One of these is associated with a transport instance |
282 | */ | 288 | */ |
283 | struct rpcrdma_buffer { | 289 | struct rpcrdma_buffer { |
284 | spinlock_t rb_lock; /* protects indexes */ | 290 | spinlock_t rb_mwlock; /* protect rb_mws list */ |
285 | u32 rb_max_requests;/* client max requests */ | 291 | struct list_head rb_mws; |
286 | struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ | 292 | struct list_head rb_all; |
287 | struct list_head rb_all; | 293 | char *rb_pool; |
288 | int rb_send_index; | 294 | |
295 | spinlock_t rb_lock; /* protect buf arrays */ | ||
296 | u32 rb_max_requests; | ||
297 | int rb_send_index; | ||
298 | int rb_recv_index; | ||
289 | struct rpcrdma_req **rb_send_bufs; | 299 | struct rpcrdma_req **rb_send_bufs; |
290 | int rb_recv_index; | ||
291 | struct rpcrdma_rep **rb_recv_bufs; | 300 | struct rpcrdma_rep **rb_recv_bufs; |
292 | char *rb_pool; | ||
293 | }; | 301 | }; |
294 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) | 302 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) |
295 | 303 | ||
@@ -350,7 +358,6 @@ struct rpcrdma_memreg_ops { | |||
350 | struct rpcrdma_create_data_internal *); | 358 | struct rpcrdma_create_data_internal *); |
351 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); | 359 | size_t (*ro_maxpages)(struct rpcrdma_xprt *); |
352 | int (*ro_init)(struct rpcrdma_xprt *); | 360 | int (*ro_init)(struct rpcrdma_xprt *); |
353 | void (*ro_reset)(struct rpcrdma_xprt *); | ||
354 | void (*ro_destroy)(struct rpcrdma_buffer *); | 361 | void (*ro_destroy)(struct rpcrdma_buffer *); |
355 | const char *ro_displayname; | 362 | const char *ro_displayname; |
356 | }; | 363 | }; |
@@ -413,6 +420,8 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, | |||
413 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); | 420 | int rpcrdma_buffer_create(struct rpcrdma_xprt *); |
414 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); | 421 | void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); |
415 | 422 | ||
423 | struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); | ||
424 | void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); | ||
416 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); | 425 | struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); |
417 | void rpcrdma_buffer_put(struct rpcrdma_req *); | 426 | void rpcrdma_buffer_put(struct rpcrdma_req *); |
418 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); | 427 | void rpcrdma_recv_buffer_get(struct rpcrdma_req *); |
@@ -425,6 +434,9 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *, | |||
425 | 434 | ||
426 | unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); | 435 | unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *); |
427 | 436 | ||
437 | int frwr_alloc_recovery_wq(void); | ||
438 | void frwr_destroy_recovery_wq(void); | ||
439 | |||
428 | /* | 440 | /* |
429 | * Wrappers for chunk registration, shared by read/write chunk code. | 441 | * Wrappers for chunk registration, shared by read/write chunk code. |
430 | */ | 442 | */ |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b0517287075b..e193c2b5476b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
@@ -623,24 +623,6 @@ process_status: | |||
623 | } | 623 | } |
624 | 624 | ||
625 | /** | 625 | /** |
626 | * xs_tcp_shutdown - gracefully shut down a TCP socket | ||
627 | * @xprt: transport | ||
628 | * | ||
629 | * Initiates a graceful shutdown of the TCP socket by calling the | ||
630 | * equivalent of shutdown(SHUT_RDWR); | ||
631 | */ | ||
632 | static void xs_tcp_shutdown(struct rpc_xprt *xprt) | ||
633 | { | ||
634 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | ||
635 | struct socket *sock = transport->sock; | ||
636 | |||
637 | if (sock != NULL) { | ||
638 | kernel_sock_shutdown(sock, SHUT_RDWR); | ||
639 | trace_rpc_socket_shutdown(xprt, sock); | ||
640 | } | ||
641 | } | ||
642 | |||
643 | /** | ||
644 | * xs_tcp_send_request - write an RPC request to a TCP socket | 626 | * xs_tcp_send_request - write an RPC request to a TCP socket |
645 | * @task: address of RPC task that manages the state of an RPC request | 627 | * @task: address of RPC task that manages the state of an RPC request |
646 | * | 628 | * |
@@ -786,6 +768,7 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt) | |||
786 | xs_sock_reset_connection_flags(xprt); | 768 | xs_sock_reset_connection_flags(xprt); |
787 | /* Mark transport as closed and wake up all pending tasks */ | 769 | /* Mark transport as closed and wake up all pending tasks */ |
788 | xprt_disconnect_done(xprt); | 770 | xprt_disconnect_done(xprt); |
771 | xprt_force_disconnect(xprt); | ||
789 | } | 772 | } |
790 | 773 | ||
791 | /** | 774 | /** |
@@ -827,6 +810,9 @@ static void xs_reset_transport(struct sock_xprt *transport) | |||
827 | if (sk == NULL) | 810 | if (sk == NULL) |
828 | return; | 811 | return; |
829 | 812 | ||
813 | if (atomic_read(&transport->xprt.swapper)) | ||
814 | sk_clear_memalloc(sk); | ||
815 | |||
830 | write_lock_bh(&sk->sk_callback_lock); | 816 | write_lock_bh(&sk->sk_callback_lock); |
831 | transport->inet = NULL; | 817 | transport->inet = NULL; |
832 | transport->sock = NULL; | 818 | transport->sock = NULL; |
@@ -863,6 +849,13 @@ static void xs_close(struct rpc_xprt *xprt) | |||
863 | xprt_disconnect_done(xprt); | 849 | xprt_disconnect_done(xprt); |
864 | } | 850 | } |
865 | 851 | ||
852 | static void xs_inject_disconnect(struct rpc_xprt *xprt) | ||
853 | { | ||
854 | dprintk("RPC: injecting transport disconnect on xprt=%p\n", | ||
855 | xprt); | ||
856 | xprt_disconnect_done(xprt); | ||
857 | } | ||
858 | |||
866 | static void xs_xprt_free(struct rpc_xprt *xprt) | 859 | static void xs_xprt_free(struct rpc_xprt *xprt) |
867 | { | 860 | { |
868 | xs_free_peer_addresses(xprt); | 861 | xs_free_peer_addresses(xprt); |
@@ -901,7 +894,6 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | |||
901 | /** | 894 | /** |
902 | * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets | 895 | * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets |
903 | * @sk: socket with data to read | 896 | * @sk: socket with data to read |
904 | * @len: how much data to read | ||
905 | * | 897 | * |
906 | * Currently this assumes we can read the whole reply in a single gulp. | 898 | * Currently this assumes we can read the whole reply in a single gulp. |
907 | */ | 899 | */ |
@@ -965,7 +957,6 @@ static void xs_local_data_ready(struct sock *sk) | |||
965 | /** | 957 | /** |
966 | * xs_udp_data_ready - "data ready" callback for UDP sockets | 958 | * xs_udp_data_ready - "data ready" callback for UDP sockets |
967 | * @sk: socket with data to read | 959 | * @sk: socket with data to read |
968 | * @len: how much data to read | ||
969 | * | 960 | * |
970 | */ | 961 | */ |
971 | static void xs_udp_data_ready(struct sock *sk) | 962 | static void xs_udp_data_ready(struct sock *sk) |
@@ -1389,7 +1380,6 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns | |||
1389 | /** | 1380 | /** |
1390 | * xs_tcp_data_ready - "data ready" callback for TCP sockets | 1381 | * xs_tcp_data_ready - "data ready" callback for TCP sockets |
1391 | * @sk: socket with data to read | 1382 | * @sk: socket with data to read |
1392 | * @bytes: how much data to read | ||
1393 | * | 1383 | * |
1394 | */ | 1384 | */ |
1395 | static void xs_tcp_data_ready(struct sock *sk) | 1385 | static void xs_tcp_data_ready(struct sock *sk) |
@@ -1886,9 +1876,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, | |||
1886 | 1876 | ||
1887 | /** | 1877 | /** |
1888 | * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint | 1878 | * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint |
1889 | * @xprt: RPC transport to connect | ||
1890 | * @transport: socket transport to connect | 1879 | * @transport: socket transport to connect |
1891 | * @create_sock: function to create a socket of the correct type | ||
1892 | */ | 1880 | */ |
1893 | static int xs_local_setup_socket(struct sock_xprt *transport) | 1881 | static int xs_local_setup_socket(struct sock_xprt *transport) |
1894 | { | 1882 | { |
@@ -1960,43 +1948,84 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) | |||
1960 | msleep_interruptible(15000); | 1948 | msleep_interruptible(15000); |
1961 | } | 1949 | } |
1962 | 1950 | ||
1963 | #ifdef CONFIG_SUNRPC_SWAP | 1951 | #if IS_ENABLED(CONFIG_SUNRPC_SWAP) |
1952 | /* | ||
1953 | * Note that this should be called with XPRT_LOCKED held (or when we otherwise | ||
1954 | * know that we have exclusive access to the socket), to guard against | ||
1955 | * races with xs_reset_transport. | ||
1956 | */ | ||
1964 | static void xs_set_memalloc(struct rpc_xprt *xprt) | 1957 | static void xs_set_memalloc(struct rpc_xprt *xprt) |
1965 | { | 1958 | { |
1966 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, | 1959 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, |
1967 | xprt); | 1960 | xprt); |
1968 | 1961 | ||
1969 | if (xprt->swapper) | 1962 | /* |
1963 | * If there's no sock, then we have nothing to set. The | ||
1964 | * reconnecting process will get it for us. | ||
1965 | */ | ||
1966 | if (!transport->inet) | ||
1967 | return; | ||
1968 | if (atomic_read(&xprt->swapper)) | ||
1970 | sk_set_memalloc(transport->inet); | 1969 | sk_set_memalloc(transport->inet); |
1971 | } | 1970 | } |
1972 | 1971 | ||
1973 | /** | 1972 | /** |
1974 | * xs_swapper - Tag this transport as being used for swap. | 1973 | * xs_enable_swap - Tag this transport as being used for swap. |
1975 | * @xprt: transport to tag | 1974 | * @xprt: transport to tag |
1976 | * @enable: enable/disable | ||
1977 | * | 1975 | * |
1976 | * Take a reference to this transport on behalf of the rpc_clnt, and | ||
1977 | * optionally mark it for swapping if it wasn't already. | ||
1978 | */ | 1978 | */ |
1979 | int xs_swapper(struct rpc_xprt *xprt, int enable) | 1979 | static int |
1980 | xs_enable_swap(struct rpc_xprt *xprt) | ||
1980 | { | 1981 | { |
1981 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, | 1982 | struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); |
1982 | xprt); | ||
1983 | int err = 0; | ||
1984 | 1983 | ||
1985 | if (enable) { | 1984 | if (atomic_inc_return(&xprt->swapper) != 1) |
1986 | xprt->swapper++; | 1985 | return 0; |
1987 | xs_set_memalloc(xprt); | 1986 | if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) |
1988 | } else if (xprt->swapper) { | 1987 | return -ERESTARTSYS; |
1989 | xprt->swapper--; | 1988 | if (xs->inet) |
1990 | sk_clear_memalloc(transport->inet); | 1989 | sk_set_memalloc(xs->inet); |
1991 | } | 1990 | xprt_release_xprt(xprt, NULL); |
1991 | return 0; | ||
1992 | } | ||
1992 | 1993 | ||
1993 | return err; | 1994 | /** |
1995 | * xs_disable_swap - Untag this transport as being used for swap. | ||
1996 | * @xprt: transport to tag | ||
1997 | * | ||
1998 | * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the | ||
1999 | * swapper refcount goes to 0, untag the socket as a memalloc socket. | ||
2000 | */ | ||
2001 | static void | ||
2002 | xs_disable_swap(struct rpc_xprt *xprt) | ||
2003 | { | ||
2004 | struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); | ||
2005 | |||
2006 | if (!atomic_dec_and_test(&xprt->swapper)) | ||
2007 | return; | ||
2008 | if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) | ||
2009 | return; | ||
2010 | if (xs->inet) | ||
2011 | sk_clear_memalloc(xs->inet); | ||
2012 | xprt_release_xprt(xprt, NULL); | ||
1994 | } | 2013 | } |
1995 | EXPORT_SYMBOL_GPL(xs_swapper); | ||
1996 | #else | 2014 | #else |
1997 | static void xs_set_memalloc(struct rpc_xprt *xprt) | 2015 | static void xs_set_memalloc(struct rpc_xprt *xprt) |
1998 | { | 2016 | { |
1999 | } | 2017 | } |
2018 | |||
2019 | static int | ||
2020 | xs_enable_swap(struct rpc_xprt *xprt) | ||
2021 | { | ||
2022 | return -EINVAL; | ||
2023 | } | ||
2024 | |||
2025 | static void | ||
2026 | xs_disable_swap(struct rpc_xprt *xprt) | ||
2027 | { | ||
2028 | } | ||
2000 | #endif | 2029 | #endif |
2001 | 2030 | ||
2002 | static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | 2031 | static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) |
@@ -2057,6 +2086,27 @@ out: | |||
2057 | xprt_wake_pending_tasks(xprt, status); | 2086 | xprt_wake_pending_tasks(xprt, status); |
2058 | } | 2087 | } |
2059 | 2088 | ||
2089 | /** | ||
2090 | * xs_tcp_shutdown - gracefully shut down a TCP socket | ||
2091 | * @xprt: transport | ||
2092 | * | ||
2093 | * Initiates a graceful shutdown of the TCP socket by calling the | ||
2094 | * equivalent of shutdown(SHUT_RDWR); | ||
2095 | */ | ||
2096 | static void xs_tcp_shutdown(struct rpc_xprt *xprt) | ||
2097 | { | ||
2098 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | ||
2099 | struct socket *sock = transport->sock; | ||
2100 | |||
2101 | if (sock == NULL) | ||
2102 | return; | ||
2103 | if (xprt_connected(xprt)) { | ||
2104 | kernel_sock_shutdown(sock, SHUT_RDWR); | ||
2105 | trace_rpc_socket_shutdown(xprt, sock); | ||
2106 | } else | ||
2107 | xs_reset_transport(transport); | ||
2108 | } | ||
2109 | |||
2060 | static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | 2110 | static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) |
2061 | { | 2111 | { |
2062 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); | 2112 | struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); |
@@ -2067,6 +2117,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
2067 | unsigned int keepidle = xprt->timeout->to_initval / HZ; | 2117 | unsigned int keepidle = xprt->timeout->to_initval / HZ; |
2068 | unsigned int keepcnt = xprt->timeout->to_retries + 1; | 2118 | unsigned int keepcnt = xprt->timeout->to_retries + 1; |
2069 | unsigned int opt_on = 1; | 2119 | unsigned int opt_on = 1; |
2120 | unsigned int timeo; | ||
2070 | 2121 | ||
2071 | /* TCP Keepalive options */ | 2122 | /* TCP Keepalive options */ |
2072 | kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, | 2123 | kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, |
@@ -2078,6 +2129,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) | |||
2078 | kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, | 2129 | kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, |
2079 | (char *)&keepcnt, sizeof(keepcnt)); | 2130 | (char *)&keepcnt, sizeof(keepcnt)); |
2080 | 2131 | ||
2132 | /* TCP user timeout (see RFC5482) */ | ||
2133 | timeo = jiffies_to_msecs(xprt->timeout->to_initval) * | ||
2134 | (xprt->timeout->to_retries + 1); | ||
2135 | kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, | ||
2136 | (char *)&timeo, sizeof(timeo)); | ||
2137 | |||
2081 | write_lock_bh(&sk->sk_callback_lock); | 2138 | write_lock_bh(&sk->sk_callback_lock); |
2082 | 2139 | ||
2083 | xs_save_old_callbacks(transport, sk); | 2140 | xs_save_old_callbacks(transport, sk); |
@@ -2125,9 +2182,6 @@ out: | |||
2125 | 2182 | ||
2126 | /** | 2183 | /** |
2127 | * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint | 2184 | * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint |
2128 | * @xprt: RPC transport to connect | ||
2129 | * @transport: socket transport to connect | ||
2130 | * @create_sock: function to create a socket of the correct type | ||
2131 | * | 2185 | * |
2132 | * Invoked by a work queue tasklet. | 2186 | * Invoked by a work queue tasklet. |
2133 | */ | 2187 | */ |
@@ -2463,6 +2517,8 @@ static struct rpc_xprt_ops xs_local_ops = { | |||
2463 | .close = xs_close, | 2517 | .close = xs_close, |
2464 | .destroy = xs_destroy, | 2518 | .destroy = xs_destroy, |
2465 | .print_stats = xs_local_print_stats, | 2519 | .print_stats = xs_local_print_stats, |
2520 | .enable_swap = xs_enable_swap, | ||
2521 | .disable_swap = xs_disable_swap, | ||
2466 | }; | 2522 | }; |
2467 | 2523 | ||
2468 | static struct rpc_xprt_ops xs_udp_ops = { | 2524 | static struct rpc_xprt_ops xs_udp_ops = { |
@@ -2482,6 +2538,9 @@ static struct rpc_xprt_ops xs_udp_ops = { | |||
2482 | .close = xs_close, | 2538 | .close = xs_close, |
2483 | .destroy = xs_destroy, | 2539 | .destroy = xs_destroy, |
2484 | .print_stats = xs_udp_print_stats, | 2540 | .print_stats = xs_udp_print_stats, |
2541 | .enable_swap = xs_enable_swap, | ||
2542 | .disable_swap = xs_disable_swap, | ||
2543 | .inject_disconnect = xs_inject_disconnect, | ||
2485 | }; | 2544 | }; |
2486 | 2545 | ||
2487 | static struct rpc_xprt_ops xs_tcp_ops = { | 2546 | static struct rpc_xprt_ops xs_tcp_ops = { |
@@ -2498,6 +2557,9 @@ static struct rpc_xprt_ops xs_tcp_ops = { | |||
2498 | .close = xs_tcp_shutdown, | 2557 | .close = xs_tcp_shutdown, |
2499 | .destroy = xs_destroy, | 2558 | .destroy = xs_destroy, |
2500 | .print_stats = xs_tcp_print_stats, | 2559 | .print_stats = xs_tcp_print_stats, |
2560 | .enable_swap = xs_enable_swap, | ||
2561 | .disable_swap = xs_disable_swap, | ||
2562 | .inject_disconnect = xs_inject_disconnect, | ||
2501 | }; | 2563 | }; |
2502 | 2564 | ||
2503 | /* | 2565 | /* |
@@ -2515,6 +2577,9 @@ static struct rpc_xprt_ops bc_tcp_ops = { | |||
2515 | .close = bc_close, | 2577 | .close = bc_close, |
2516 | .destroy = bc_destroy, | 2578 | .destroy = bc_destroy, |
2517 | .print_stats = xs_tcp_print_stats, | 2579 | .print_stats = xs_tcp_print_stats, |
2580 | .enable_swap = xs_enable_swap, | ||
2581 | .disable_swap = xs_disable_swap, | ||
2582 | .inject_disconnect = xs_inject_disconnect, | ||
2518 | }; | 2583 | }; |
2519 | 2584 | ||
2520 | static int xs_init_anyaddr(const int family, struct sockaddr *sap) | 2585 | static int xs_init_anyaddr(const int family, struct sockaddr *sap) |