aboutsummaryrefslogtreecommitdiffstats
path: root/net/smc
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2018-08-16 16:13:03 -0400
committerJason Gunthorpe <jgg@mellanox.com>2018-08-16 16:21:29 -0400
commit0a3173a5f09bc58a3638ecfd0a80bdbae55e123c (patch)
treed6c0bc84863cca54dfbde3b7463e5d49c82af9f1 /net/smc
parent92f4e77c85918eab5e5803d7e28ab89a7e6bd3a2 (diff)
parent5c60a7389d795e001c8748b458eb76e3a5b6008c (diff)
Merge branch 'linus/master' into rdma.git for-next
rdma.git merge resolution for the 4.19 merge window Conflicts: drivers/infiniband/core/rdma_core.c - Use the rdma code and revise with the new spelling for atomic_fetch_add_unless drivers/nvme/host/rdma.c - Replace max_sge with max_send_sge in new blk code drivers/nvme/target/rdma.c - Use the blk code and revise to use NULL for ib_post_recv when appropriate - Replace max_sge with max_recv_sge in new blk code net/rds/ib_send.c - Use the net code and revise to use NULL for ib_post_recv when appropriate Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'net/smc')
-rw-r--r--net/smc/Makefile2
-rw-r--r--net/smc/af_smc.c310
-rw-r--r--net/smc/smc.h9
-rw-r--r--net/smc/smc_cdc.c113
-rw-r--r--net/smc/smc_cdc.h86
-rw-r--r--net/smc/smc_clc.c197
-rw-r--r--net/smc/smc_clc.h99
-rw-r--r--net/smc/smc_core.c349
-rw-r--r--net/smc/smc_core.h85
-rw-r--r--net/smc/smc_diag.c33
-rw-r--r--net/smc/smc_ib.c170
-rw-r--r--net/smc/smc_ib.h7
-rw-r--r--net/smc/smc_ism.c348
-rw-r--r--net/smc/smc_ism.h48
-rw-r--r--net/smc/smc_llc.c80
-rw-r--r--net/smc/smc_llc.h7
-rw-r--r--net/smc/smc_pnet.c171
-rw-r--r--net/smc/smc_pnet.h19
-rw-r--r--net/smc/smc_rx.c21
-rw-r--r--net/smc/smc_tx.c239
-rw-r--r--net/smc/smc_tx.h6
-rw-r--r--net/smc/smc_wr.c32
22 files changed, 1853 insertions, 578 deletions
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 188104654b54..4df96b4b8130 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -1,4 +1,4 @@
1obj-$(CONFIG_SMC) += smc.o 1obj-$(CONFIG_SMC) += smc.o
2obj-$(CONFIG_SMC_DIAG) += smc_diag.o 2obj-$(CONFIG_SMC_DIAG) += smc_diag.o
3smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o 3smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
4smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o 4smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e7de5f282722..2d8a1e15e4f9 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -23,6 +23,7 @@
23#include <linux/workqueue.h> 23#include <linux/workqueue.h>
24#include <linux/in.h> 24#include <linux/in.h>
25#include <linux/sched/signal.h> 25#include <linux/sched/signal.h>
26#include <linux/if_vlan.h>
26 27
27#include <net/sock.h> 28#include <net/sock.h>
28#include <net/tcp.h> 29#include <net/tcp.h>
@@ -35,6 +36,7 @@
35#include "smc_cdc.h" 36#include "smc_cdc.h"
36#include "smc_core.h" 37#include "smc_core.h"
37#include "smc_ib.h" 38#include "smc_ib.h"
39#include "smc_ism.h"
38#include "smc_pnet.h" 40#include "smc_pnet.h"
39#include "smc_tx.h" 41#include "smc_tx.h"
40#include "smc_rx.h" 42#include "smc_rx.h"
@@ -342,20 +344,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
342 344
343 rc = smc_ib_modify_qp_rts(link); 345 rc = smc_ib_modify_qp_rts(link);
344 if (rc) 346 if (rc)
345 return SMC_CLC_DECL_INTERR; 347 return SMC_CLC_DECL_ERR_RDYLNK;
346 348
347 smc_wr_remember_qp_attr(link); 349 smc_wr_remember_qp_attr(link);
348 350
349 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 351 if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
350 return SMC_CLC_DECL_INTERR; 352 return SMC_CLC_DECL_ERR_REGRMB;
351 353
352 /* send CONFIRM LINK response over RoCE fabric */ 354 /* send CONFIRM LINK response over RoCE fabric */
353 rc = smc_llc_send_confirm_link(link, 355 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
354 link->smcibdev->mac[link->ibport - 1],
355 &link->smcibdev->gid[link->ibport - 1],
356 SMC_LLC_RESP);
357 if (rc < 0) 356 if (rc < 0)
358 return SMC_CLC_DECL_TCL; 357 return SMC_CLC_DECL_TIMEOUT_CL;
359 358
360 /* receive ADD LINK request from server over RoCE fabric */ 359 /* receive ADD LINK request from server over RoCE fabric */
361 rest = wait_for_completion_interruptible_timeout(&link->llc_add, 360 rest = wait_for_completion_interruptible_timeout(&link->llc_add,
@@ -371,18 +370,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
371 /* send add link reject message, only one link supported for now */ 370 /* send add link reject message, only one link supported for now */
372 rc = smc_llc_send_add_link(link, 371 rc = smc_llc_send_add_link(link,
373 link->smcibdev->mac[link->ibport - 1], 372 link->smcibdev->mac[link->ibport - 1],
374 &link->smcibdev->gid[link->ibport - 1], 373 link->gid, SMC_LLC_RESP);
375 SMC_LLC_RESP);
376 if (rc < 0) 374 if (rc < 0)
377 return SMC_CLC_DECL_TCL; 375 return SMC_CLC_DECL_TIMEOUT_AL;
378 376
379 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); 377 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
380 378
381 return 0; 379 return 0;
382} 380}
383 381
384static void smc_conn_save_peer_info(struct smc_sock *smc, 382static void smcr_conn_save_peer_info(struct smc_sock *smc,
385 struct smc_clc_msg_accept_confirm *clc) 383 struct smc_clc_msg_accept_confirm *clc)
386{ 384{
387 int bufsize = smc_uncompress_bufsize(clc->rmbe_size); 385 int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
388 386
@@ -393,6 +391,28 @@ static void smc_conn_save_peer_info(struct smc_sock *smc,
393 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); 391 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
394} 392}
395 393
394static void smcd_conn_save_peer_info(struct smc_sock *smc,
395 struct smc_clc_msg_accept_confirm *clc)
396{
397 int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
398
399 smc->conn.peer_rmbe_idx = clc->dmbe_idx;
400 smc->conn.peer_token = clc->token;
401 /* msg header takes up space in the buffer */
402 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
403 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
404 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
405}
406
407static void smc_conn_save_peer_info(struct smc_sock *smc,
408 struct smc_clc_msg_accept_confirm *clc)
409{
410 if (smc->conn.lgr->is_smcd)
411 smcd_conn_save_peer_info(smc, clc);
412 else
413 smcr_conn_save_peer_info(smc, clc);
414}
415
396static void smc_link_save_peer_info(struct smc_link *link, 416static void smc_link_save_peer_info(struct smc_link *link,
397 struct smc_clc_msg_accept_confirm *clc) 417 struct smc_clc_msg_accept_confirm *clc)
398{ 418{
@@ -404,9 +424,10 @@ static void smc_link_save_peer_info(struct smc_link *link,
404} 424}
405 425
406/* fall back during connect */ 426/* fall back during connect */
407static int smc_connect_fallback(struct smc_sock *smc) 427static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
408{ 428{
409 smc->use_fallback = true; 429 smc->use_fallback = true;
430 smc->fallback_rsn = reason_code;
410 smc_copy_sock_settings_to_clc(smc); 431 smc_copy_sock_settings_to_clc(smc);
411 if (smc->sk.sk_state == SMC_INIT) 432 if (smc->sk.sk_state == SMC_INIT)
412 smc->sk.sk_state = SMC_ACTIVE; 433 smc->sk.sk_state = SMC_ACTIVE;
@@ -423,7 +444,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
423 sock_put(&smc->sk); /* passive closing */ 444 sock_put(&smc->sk); /* passive closing */
424 return reason_code; 445 return reason_code;
425 } 446 }
426 if (reason_code != SMC_CLC_DECL_REPLY) { 447 if (reason_code != SMC_CLC_DECL_PEERDECL) {
427 rc = smc_clc_send_decline(smc, reason_code); 448 rc = smc_clc_send_decline(smc, reason_code);
428 if (rc < 0) { 449 if (rc < 0) {
429 if (smc->sk.sk_state == SMC_INIT) 450 if (smc->sk.sk_state == SMC_INIT)
@@ -431,7 +452,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
431 return rc; 452 return rc;
432 } 453 }
433 } 454 }
434 return smc_connect_fallback(smc); 455 return smc_connect_fallback(smc, reason_code);
435} 456}
436 457
437/* abort connecting */ 458/* abort connecting */
@@ -448,7 +469,7 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
448/* check if there is a rdma device available for this connection. */ 469/* check if there is a rdma device available for this connection. */
449/* called for connect and listen */ 470/* called for connect and listen */
450static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, 471static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
451 u8 *ibport) 472 u8 *ibport, unsigned short vlan_id, u8 gid[])
452{ 473{
453 int reason_code = 0; 474 int reason_code = 0;
454 475
@@ -456,22 +477,59 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
456 * within same PNETID that also contains the ethernet device 477 * within same PNETID that also contains the ethernet device
457 * used for the internal TCP socket 478 * used for the internal TCP socket
458 */ 479 */
459 smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport); 480 smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
481 gid);
460 if (!(*ibdev)) 482 if (!(*ibdev))
461 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 483 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
462 484
463 return reason_code; 485 return reason_code;
464} 486}
465 487
488/* check if there is an ISM device available for this connection. */
489/* called for connect and listen */
490static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev)
491{
492 /* Find ISM device with same PNETID as connecting interface */
493 smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev);
494 if (!(*ismdev))
495 return SMC_CLC_DECL_CNFERR; /* configuration error */
496 return 0;
497}
498
499/* Check for VLAN ID and register it on ISM device just for CLC handshake */
500static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
501 struct smcd_dev *ismdev,
502 unsigned short vlan_id)
503{
504 if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id))
505 return SMC_CLC_DECL_CNFERR;
506 return 0;
507}
508
509/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
510 * used, the VLAN ID will be registered again during the connection setup.
511 */
512static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
513 struct smcd_dev *ismdev,
514 unsigned short vlan_id)
515{
516 if (!is_smcd)
517 return 0;
518 if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id))
519 return SMC_CLC_DECL_CNFERR;
520 return 0;
521}
522
466/* CLC handshake during connect */ 523/* CLC handshake during connect */
467static int smc_connect_clc(struct smc_sock *smc, 524static int smc_connect_clc(struct smc_sock *smc, int smc_type,
468 struct smc_clc_msg_accept_confirm *aclc, 525 struct smc_clc_msg_accept_confirm *aclc,
469 struct smc_ib_device *ibdev, u8 ibport) 526 struct smc_ib_device *ibdev, u8 ibport,
527 u8 gid[], struct smcd_dev *ismdev)
470{ 528{
471 int rc = 0; 529 int rc = 0;
472 530
473 /* do inband token exchange */ 531 /* do inband token exchange */
474 rc = smc_clc_send_proposal(smc, ibdev, ibport); 532 rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
475 if (rc) 533 if (rc)
476 return rc; 534 return rc;
477 /* receive SMC Accept CLC message */ 535 /* receive SMC Accept CLC message */
@@ -488,8 +546,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
488 int reason_code = 0; 546 int reason_code = 0;
489 547
490 mutex_lock(&smc_create_lgr_pending); 548 mutex_lock(&smc_create_lgr_pending);
491 local_contact = smc_conn_create(smc, ibdev, ibport, &aclc->lcl, 549 local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
492 aclc->hdr.flag); 550 ibport, &aclc->lcl, NULL, 0);
493 if (local_contact < 0) { 551 if (local_contact < 0) {
494 if (local_contact == -ENOMEM) 552 if (local_contact == -ENOMEM)
495 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 553 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -504,14 +562,14 @@ static int smc_connect_rdma(struct smc_sock *smc,
504 smc_conn_save_peer_info(smc, aclc); 562 smc_conn_save_peer_info(smc, aclc);
505 563
506 /* create send buffer and rmb */ 564 /* create send buffer and rmb */
507 if (smc_buf_create(smc)) 565 if (smc_buf_create(smc, false))
508 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); 566 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
509 567
510 if (local_contact == SMC_FIRST_CONTACT) 568 if (local_contact == SMC_FIRST_CONTACT)
511 smc_link_save_peer_info(link, aclc); 569 smc_link_save_peer_info(link, aclc);
512 570
513 if (smc_rmb_rtoken_handling(&smc->conn, aclc)) 571 if (smc_rmb_rtoken_handling(&smc->conn, aclc))
514 return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, 572 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
515 local_contact); 573 local_contact);
516 574
517 smc_close_init(smc); 575 smc_close_init(smc);
@@ -519,12 +577,12 @@ static int smc_connect_rdma(struct smc_sock *smc,
519 577
520 if (local_contact == SMC_FIRST_CONTACT) { 578 if (local_contact == SMC_FIRST_CONTACT) {
521 if (smc_ib_ready_link(link)) 579 if (smc_ib_ready_link(link))
522 return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, 580 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
523 local_contact); 581 local_contact);
524 } else { 582 } else {
525 if (!smc->conn.rmb_desc->reused && 583 if (!smc->conn.rmb_desc->reused &&
526 smc_reg_rmb(link, smc->conn.rmb_desc, true)) 584 smc_reg_rmb(link, smc->conn.rmb_desc, true))
527 return smc_connect_abort(smc, SMC_CLC_DECL_INTERR, 585 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
528 local_contact); 586 local_contact);
529 } 587 }
530 smc_rmb_sync_sg_for_device(&smc->conn); 588 smc_rmb_sync_sg_for_device(&smc->conn);
@@ -551,41 +609,113 @@ static int smc_connect_rdma(struct smc_sock *smc,
551 return 0; 609 return 0;
552} 610}
553 611
612/* setup for ISM connection of client */
613static int smc_connect_ism(struct smc_sock *smc,
614 struct smc_clc_msg_accept_confirm *aclc,
615 struct smcd_dev *ismdev)
616{
617 int local_contact = SMC_FIRST_CONTACT;
618 int rc = 0;
619
620 mutex_lock(&smc_create_lgr_pending);
621 local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
622 NULL, ismdev, aclc->gid);
623 if (local_contact < 0)
624 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
625
626 /* Create send and receive buffers */
627 if (smc_buf_create(smc, true))
628 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
629
630 smc_conn_save_peer_info(smc, aclc);
631 smc_close_init(smc);
632 smc_rx_init(smc);
633 smc_tx_init(smc);
634
635 rc = smc_clc_send_confirm(smc);
636 if (rc)
637 return smc_connect_abort(smc, rc, local_contact);
638 mutex_unlock(&smc_create_lgr_pending);
639
640 smc_copy_sock_settings_to_clc(smc);
641 if (smc->sk.sk_state == SMC_INIT)
642 smc->sk.sk_state = SMC_ACTIVE;
643
644 return 0;
645}
646
554/* perform steps before actually connecting */ 647/* perform steps before actually connecting */
555static int __smc_connect(struct smc_sock *smc) 648static int __smc_connect(struct smc_sock *smc)
556{ 649{
650 bool ism_supported = false, rdma_supported = false;
557 struct smc_clc_msg_accept_confirm aclc; 651 struct smc_clc_msg_accept_confirm aclc;
558 struct smc_ib_device *ibdev; 652 struct smc_ib_device *ibdev;
653 struct smcd_dev *ismdev;
654 u8 gid[SMC_GID_SIZE];
655 unsigned short vlan;
656 int smc_type;
559 int rc = 0; 657 int rc = 0;
560 u8 ibport; 658 u8 ibport;
561 659
562 sock_hold(&smc->sk); /* sock put in passive closing */ 660 sock_hold(&smc->sk); /* sock put in passive closing */
563 661
564 if (smc->use_fallback) 662 if (smc->use_fallback)
565 return smc_connect_fallback(smc); 663 return smc_connect_fallback(smc, smc->fallback_rsn);
566 664
567 /* if peer has not signalled SMC-capability, fall back */ 665 /* if peer has not signalled SMC-capability, fall back */
568 if (!tcp_sk(smc->clcsock->sk)->syn_smc) 666 if (!tcp_sk(smc->clcsock->sk)->syn_smc)
569 return smc_connect_fallback(smc); 667 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
570 668
571 /* IPSec connections opt out of SMC-R optimizations */ 669 /* IPSec connections opt out of SMC-R optimizations */
572 if (using_ipsec(smc)) 670 if (using_ipsec(smc))
573 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); 671 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
574 672
575 /* check if a RDMA device is available; if not, fall back */ 673 /* check for VLAN ID */
576 if (smc_check_rdma(smc, &ibdev, &ibport)) 674 if (smc_vlan_by_tcpsk(smc->clcsock, &vlan))
577 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); 675 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
578 676
677 /* check if there is an ism device available */
678 if (!smc_check_ism(smc, &ismdev) &&
679 !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) {
680 /* ISM is supported for this connection */
681 ism_supported = true;
682 smc_type = SMC_TYPE_D;
683 }
684
685 /* check if there is a rdma device available */
686 if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
687 /* RDMA is supported for this connection */
688 rdma_supported = true;
689 if (ism_supported)
690 smc_type = SMC_TYPE_B; /* both */
691 else
692 smc_type = SMC_TYPE_R; /* only RDMA */
693 }
694
695 /* if neither ISM nor RDMA are supported, fallback */
696 if (!rdma_supported && !ism_supported)
697 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
698
579 /* perform CLC handshake */ 699 /* perform CLC handshake */
580 rc = smc_connect_clc(smc, &aclc, ibdev, ibport); 700 rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
581 if (rc) 701 if (rc) {
702 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
582 return smc_connect_decline_fallback(smc, rc); 703 return smc_connect_decline_fallback(smc, rc);
704 }
583 705
584 /* connect using rdma */ 706 /* depending on previous steps, connect using rdma or ism */
585 rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); 707 if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
586 if (rc) 708 rc = smc_connect_rdma(smc, &aclc, ibdev, ibport);
709 else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
710 rc = smc_connect_ism(smc, &aclc, ismdev);
711 else
712 rc = SMC_CLC_DECL_MODEUNSUPP;
713 if (rc) {
714 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
587 return smc_connect_decline_fallback(smc, rc); 715 return smc_connect_decline_fallback(smc, rc);
716 }
588 717
718 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
589 return 0; 719 return 0;
590} 720}
591 721
@@ -817,15 +947,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
817 link = &lgr->lnk[SMC_SINGLE_LINK]; 947 link = &lgr->lnk[SMC_SINGLE_LINK];
818 948
819 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 949 if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
820 return SMC_CLC_DECL_INTERR; 950 return SMC_CLC_DECL_ERR_REGRMB;
821 951
822 /* send CONFIRM LINK request to client over the RoCE fabric */ 952 /* send CONFIRM LINK request to client over the RoCE fabric */
823 rc = smc_llc_send_confirm_link(link, 953 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
824 link->smcibdev->mac[link->ibport - 1],
825 &link->smcibdev->gid[link->ibport - 1],
826 SMC_LLC_REQ);
827 if (rc < 0) 954 if (rc < 0)
828 return SMC_CLC_DECL_TCL; 955 return SMC_CLC_DECL_TIMEOUT_CL;
829 956
830 /* receive CONFIRM LINK response from client over the RoCE fabric */ 957 /* receive CONFIRM LINK response from client over the RoCE fabric */
831 rest = wait_for_completion_interruptible_timeout( 958 rest = wait_for_completion_interruptible_timeout(
@@ -845,10 +972,9 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
845 /* send ADD LINK request to client over the RoCE fabric */ 972 /* send ADD LINK request to client over the RoCE fabric */
846 rc = smc_llc_send_add_link(link, 973 rc = smc_llc_send_add_link(link,
847 link->smcibdev->mac[link->ibport - 1], 974 link->smcibdev->mac[link->ibport - 1],
848 &link->smcibdev->gid[link->ibport - 1], 975 link->gid, SMC_LLC_REQ);
849 SMC_LLC_REQ);
850 if (rc < 0) 976 if (rc < 0)
851 return SMC_CLC_DECL_TCL; 977 return SMC_CLC_DECL_TIMEOUT_AL;
852 978
853 /* receive ADD LINK response from client over the RoCE fabric */ 979 /* receive ADD LINK response from client over the RoCE fabric */
854 rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, 980 rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
@@ -923,7 +1049,8 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
923 } 1049 }
924 smc_conn_free(&new_smc->conn); 1050 smc_conn_free(&new_smc->conn);
925 new_smc->use_fallback = true; 1051 new_smc->use_fallback = true;
926 if (reason_code && reason_code != SMC_CLC_DECL_REPLY) { 1052 new_smc->fallback_rsn = reason_code;
1053 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
927 if (smc_clc_send_decline(new_smc, reason_code) < 0) { 1054 if (smc_clc_send_decline(new_smc, reason_code) < 0) {
928 smc_listen_out_err(new_smc); 1055 smc_listen_out_err(new_smc);
929 return; 1056 return;
@@ -953,7 +1080,8 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
953 int *local_contact) 1080 int *local_contact)
954{ 1081{
955 /* allocate connection / link group */ 1082 /* allocate connection / link group */
956 *local_contact = smc_conn_create(new_smc, ibdev, ibport, &pclc->lcl, 0); 1083 *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
1084 &pclc->lcl, NULL, 0);
957 if (*local_contact < 0) { 1085 if (*local_contact < 0) {
958 if (*local_contact == -ENOMEM) 1086 if (*local_contact == -ENOMEM)
959 return SMC_CLC_DECL_MEM;/* insufficient memory*/ 1087 return SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -961,12 +1089,50 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
961 } 1089 }
962 1090
963 /* create send buffer and rmb */ 1091 /* create send buffer and rmb */
964 if (smc_buf_create(new_smc)) 1092 if (smc_buf_create(new_smc, false))
965 return SMC_CLC_DECL_MEM; 1093 return SMC_CLC_DECL_MEM;
966 1094
967 return 0; 1095 return 0;
968} 1096}
969 1097
1098/* listen worker: initialize connection and buffers for SMC-D */
1099static int smc_listen_ism_init(struct smc_sock *new_smc,
1100 struct smc_clc_msg_proposal *pclc,
1101 struct smcd_dev *ismdev,
1102 int *local_contact)
1103{
1104 struct smc_clc_msg_smcd *pclc_smcd;
1105
1106 pclc_smcd = smc_get_clc_msg_smcd(pclc);
1107 *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
1108 ismdev, pclc_smcd->gid);
1109 if (*local_contact < 0) {
1110 if (*local_contact == -ENOMEM)
1111 return SMC_CLC_DECL_MEM;/* insufficient memory*/
1112 return SMC_CLC_DECL_INTERR; /* other error */
1113 }
1114
1115 /* Check if peer can be reached via ISM device */
1116 if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1117 new_smc->conn.lgr->vlan_id,
1118 new_smc->conn.lgr->smcd)) {
1119 if (*local_contact == SMC_FIRST_CONTACT)
1120 smc_lgr_forget(new_smc->conn.lgr);
1121 smc_conn_free(&new_smc->conn);
1122 return SMC_CLC_DECL_CNFERR;
1123 }
1124
1125 /* Create send and receive buffers */
1126 if (smc_buf_create(new_smc, true)) {
1127 if (*local_contact == SMC_FIRST_CONTACT)
1128 smc_lgr_forget(new_smc->conn.lgr);
1129 smc_conn_free(&new_smc->conn);
1130 return SMC_CLC_DECL_MEM;
1131 }
1132
1133 return 0;
1134}
1135
970/* listen worker: register buffers */ 1136/* listen worker: register buffers */
971static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) 1137static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
972{ 1138{
@@ -975,7 +1141,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
975 if (local_contact != SMC_FIRST_CONTACT) { 1141 if (local_contact != SMC_FIRST_CONTACT) {
976 if (!new_smc->conn.rmb_desc->reused) { 1142 if (!new_smc->conn.rmb_desc->reused) {
977 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) 1143 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
978 return SMC_CLC_DECL_INTERR; 1144 return SMC_CLC_DECL_ERR_REGRMB;
979 } 1145 }
980 } 1146 }
981 smc_rmb_sync_sg_for_device(&new_smc->conn); 1147 smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -995,13 +1161,13 @@ static void smc_listen_rdma_finish(struct smc_sock *new_smc,
995 smc_link_save_peer_info(link, cclc); 1161 smc_link_save_peer_info(link, cclc);
996 1162
997 if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) { 1163 if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
998 reason_code = SMC_CLC_DECL_INTERR; 1164 reason_code = SMC_CLC_DECL_ERR_RTOK;
999 goto decline; 1165 goto decline;
1000 } 1166 }
1001 1167
1002 if (local_contact == SMC_FIRST_CONTACT) { 1168 if (local_contact == SMC_FIRST_CONTACT) {
1003 if (smc_ib_ready_link(link)) { 1169 if (smc_ib_ready_link(link)) {
1004 reason_code = SMC_CLC_DECL_INTERR; 1170 reason_code = SMC_CLC_DECL_ERR_RDYLNK;
1005 goto decline; 1171 goto decline;
1006 } 1172 }
1007 /* QP confirmation over RoCE fabric */ 1173 /* QP confirmation over RoCE fabric */
@@ -1025,8 +1191,11 @@ static void smc_listen_work(struct work_struct *work)
1025 struct smc_clc_msg_accept_confirm cclc; 1191 struct smc_clc_msg_accept_confirm cclc;
1026 struct smc_clc_msg_proposal *pclc; 1192 struct smc_clc_msg_proposal *pclc;
1027 struct smc_ib_device *ibdev; 1193 struct smc_ib_device *ibdev;
1194 bool ism_supported = false;
1195 struct smcd_dev *ismdev;
1028 u8 buf[SMC_CLC_MAX_LEN]; 1196 u8 buf[SMC_CLC_MAX_LEN];
1029 int local_contact = 0; 1197 int local_contact = 0;
1198 unsigned short vlan;
1030 int reason_code = 0; 1199 int reason_code = 0;
1031 int rc = 0; 1200 int rc = 0;
1032 u8 ibport; 1201 u8 ibport;
@@ -1039,6 +1208,7 @@ static void smc_listen_work(struct work_struct *work)
1039 /* check if peer is smc capable */ 1208 /* check if peer is smc capable */
1040 if (!tcp_sk(newclcsock->sk)->syn_smc) { 1209 if (!tcp_sk(newclcsock->sk)->syn_smc) {
1041 new_smc->use_fallback = true; 1210 new_smc->use_fallback = true;
1211 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
1042 smc_listen_out_connected(new_smc); 1212 smc_listen_out_connected(new_smc);
1043 return; 1213 return;
1044 } 1214 }
@@ -1065,15 +1235,26 @@ static void smc_listen_work(struct work_struct *work)
1065 smc_rx_init(new_smc); 1235 smc_rx_init(new_smc);
1066 smc_tx_init(new_smc); 1236 smc_tx_init(new_smc);
1067 1237
1238 /* check if ISM is available */
1239 if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
1240 !smc_check_ism(new_smc, &ismdev) &&
1241 !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) {
1242 ism_supported = true;
1243 }
1244
1068 /* check if RDMA is available */ 1245 /* check if RDMA is available */
1069 if (smc_check_rdma(new_smc, &ibdev, &ibport) || 1246 if (!ism_supported &&
1070 smc_listen_rdma_check(new_smc, pclc) || 1247 ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
1071 smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, 1248 smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
1072 &local_contact) || 1249 smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
1073 smc_listen_rdma_reg(new_smc, local_contact)) { 1250 smc_listen_rdma_check(new_smc, pclc) ||
1251 smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
1252 &local_contact) ||
1253 smc_listen_rdma_reg(new_smc, local_contact))) {
1074 /* SMC not supported, decline */ 1254 /* SMC not supported, decline */
1075 mutex_unlock(&smc_create_lgr_pending); 1255 mutex_unlock(&smc_create_lgr_pending);
1076 smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact); 1256 smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
1257 local_contact);
1077 return; 1258 return;
1078 } 1259 }
1079 1260
@@ -1095,7 +1276,8 @@ static void smc_listen_work(struct work_struct *work)
1095 } 1276 }
1096 1277
1097 /* finish worker */ 1278 /* finish worker */
1098 smc_listen_rdma_finish(new_smc, &cclc, local_contact); 1279 if (!ism_supported)
1280 smc_listen_rdma_finish(new_smc, &cclc, local_contact);
1099 smc_conn_save_peer_info(new_smc, &cclc); 1281 smc_conn_save_peer_info(new_smc, &cclc);
1100 mutex_unlock(&smc_create_lgr_pending); 1282 mutex_unlock(&smc_create_lgr_pending);
1101 smc_listen_out_connected(new_smc); 1283 smc_listen_out_connected(new_smc);
@@ -1119,6 +1301,7 @@ static void smc_tcp_listen_work(struct work_struct *work)
1119 1301
1120 new_smc->listen_smc = lsmc; 1302 new_smc->listen_smc = lsmc;
1121 new_smc->use_fallback = lsmc->use_fallback; 1303 new_smc->use_fallback = lsmc->use_fallback;
1304 new_smc->fallback_rsn = lsmc->fallback_rsn;
1122 sock_hold(lsk); /* sock_put in smc_listen_work */ 1305 sock_hold(lsk); /* sock_put in smc_listen_work */
1123 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1306 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1124 smc_copy_sock_settings_to_smc(new_smc); 1307 smc_copy_sock_settings_to_smc(new_smc);
@@ -1275,6 +1458,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1275 if (msg->msg_flags & MSG_FASTOPEN) { 1458 if (msg->msg_flags & MSG_FASTOPEN) {
1276 if (sk->sk_state == SMC_INIT) { 1459 if (sk->sk_state == SMC_INIT) {
1277 smc->use_fallback = true; 1460 smc->use_fallback = true;
1461 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1278 } else { 1462 } else {
1279 rc = -EINVAL; 1463 rc = -EINVAL;
1280 goto out; 1464 goto out;
@@ -1353,7 +1537,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
1353 mask |= EPOLLERR; 1537 mask |= EPOLLERR;
1354 } else { 1538 } else {
1355 if (sk->sk_state != SMC_CLOSED) 1539 if (sk->sk_state != SMC_CLOSED)
1356 sock_poll_wait(file, sk_sleep(sk), wait); 1540 sock_poll_wait(file, wait);
1357 if (sk->sk_err) 1541 if (sk->sk_err)
1358 mask |= EPOLLERR; 1542 mask |= EPOLLERR;
1359 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1543 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
@@ -1471,6 +1655,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
1471 /* option not supported by SMC */ 1655 /* option not supported by SMC */
1472 if (sk->sk_state == SMC_INIT) { 1656 if (sk->sk_state == SMC_INIT) {
1473 smc->use_fallback = true; 1657 smc->use_fallback = true;
1658 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
1474 } else { 1659 } else {
1475 if (!smc->use_fallback) 1660 if (!smc->use_fallback)
1476 rc = -EINVAL; 1661 rc = -EINVAL;
@@ -1578,12 +1763,8 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
1578 smc->sk.sk_state == SMC_CLOSED) { 1763 smc->sk.sk_state == SMC_CLOSED) {
1579 answ = 0; 1764 answ = 0;
1580 } else { 1765 } else {
1581 smc_curs_write(&cons, 1766 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1582 smc_curs_read(&conn->local_tx_ctrl.cons, conn), 1767 smc_curs_copy(&urg, &conn->urg_curs, conn);
1583 conn);
1584 smc_curs_write(&urg,
1585 smc_curs_read(&conn->urg_curs, conn),
1586 conn);
1587 answ = smc_curs_diff(conn->rmb_desc->len, 1768 answ = smc_curs_diff(conn->rmb_desc->len,
1588 &cons, &urg) == 1; 1769 &cons, &urg) == 1;
1589 } 1770 }
@@ -1716,6 +1897,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
1716 /* create internal TCP socket for CLC handshake and fallback */ 1897 /* create internal TCP socket for CLC handshake and fallback */
1717 smc = smc_sk(sk); 1898 smc = smc_sk(sk);
1718 smc->use_fallback = false; /* assume rdma capability first */ 1899 smc->use_fallback = false; /* assume rdma capability first */
1900 smc->fallback_rsn = 0;
1719 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, 1901 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1720 &smc->clcsock); 1902 &smc->clcsock);
1721 if (rc) { 1903 if (rc) {
diff --git a/net/smc/smc.h b/net/smc/smc.h
index d7ca26570482..08786ace6010 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -21,8 +21,6 @@
21#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ 21#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
22#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ 22#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
23 23
24#define SMC_MAX_PORTS 2 /* Max # of ports */
25
26extern struct proto smc_proto; 24extern struct proto smc_proto;
27extern struct proto smc_proto6; 25extern struct proto smc_proto6;
28 26
@@ -185,6 +183,11 @@ struct smc_connection {
185 spinlock_t acurs_lock; /* protect cursors */ 183 spinlock_t acurs_lock; /* protect cursors */
186#endif 184#endif
187 struct work_struct close_work; /* peer sent some closing */ 185 struct work_struct close_work; /* peer sent some closing */
186 struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */
187 u8 rx_off; /* receive offset:
188 * 0 for SMC-R, 32 for SMC-D
189 */
190 u64 peer_token; /* SMC-D token of peer */
188}; 191};
189 192
190struct smc_connect_info { 193struct smc_connect_info {
@@ -205,6 +208,8 @@ struct smc_sock { /* smc sock container */
205 struct list_head accept_q; /* sockets to be accepted */ 208 struct list_head accept_q; /* sockets to be accepted */
206 spinlock_t accept_q_lock; /* protects accept_q */ 209 spinlock_t accept_q_lock; /* protects accept_q */
207 bool use_fallback; /* fallback to tcp */ 210 bool use_fallback; /* fallback to tcp */
211 int fallback_rsn; /* reason for fallback */
212 u32 peer_diagnosis; /* decline reason from peer */
208 int sockopt_defer_accept; 213 int sockopt_defer_accept;
209 /* sockopt TCP_DEFER_ACCEPT 214 /* sockopt TCP_DEFER_ACCEPT
210 * value 215 * value
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 9bde1e4ca288..ed5dcf03fe0b 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -34,14 +34,15 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
34 enum ib_wc_status wc_status) 34 enum ib_wc_status wc_status)
35{ 35{
36 struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd; 36 struct smc_cdc_tx_pend *cdcpend = (struct smc_cdc_tx_pend *)pnd_snd;
37 struct smc_connection *conn = cdcpend->conn;
37 struct smc_sock *smc; 38 struct smc_sock *smc;
38 int diff; 39 int diff;
39 40
40 if (!cdcpend->conn) 41 if (!conn)
41 /* already dismissed */ 42 /* already dismissed */
42 return; 43 return;
43 44
44 smc = container_of(cdcpend->conn, struct smc_sock, conn); 45 smc = container_of(conn, struct smc_sock, conn);
45 bh_lock_sock(&smc->sk); 46 bh_lock_sock(&smc->sk);
46 if (!wc_status) { 47 if (!wc_status) {
47 diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len, 48 diff = smc_curs_diff(cdcpend->conn->sndbuf_desc->len,
@@ -52,9 +53,7 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
52 atomic_add(diff, &cdcpend->conn->sndbuf_space); 53 atomic_add(diff, &cdcpend->conn->sndbuf_space);
53 /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ 54 /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
54 smp_mb__after_atomic(); 55 smp_mb__after_atomic();
55 smc_curs_write(&cdcpend->conn->tx_curs_fin, 56 smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn);
56 smc_curs_read(&cdcpend->cursor, cdcpend->conn),
57 cdcpend->conn);
58 } 57 }
59 smc_tx_sndbuf_nonfull(smc); 58 smc_tx_sndbuf_nonfull(smc);
60 bh_unlock_sock(&smc->sk); 59 bh_unlock_sock(&smc->sk);
@@ -110,14 +109,13 @@ int smc_cdc_msg_send(struct smc_connection *conn,
110 &conn->local_tx_ctrl, conn); 109 &conn->local_tx_ctrl, conn);
111 rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); 110 rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
112 if (!rc) 111 if (!rc)
113 smc_curs_write(&conn->rx_curs_confirmed, 112 smc_curs_copy(&conn->rx_curs_confirmed,
114 smc_curs_read(&conn->local_tx_ctrl.cons, conn), 113 &conn->local_tx_ctrl.cons, conn);
115 conn);
116 114
117 return rc; 115 return rc;
118} 116}
119 117
120int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) 118static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
121{ 119{
122 struct smc_cdc_tx_pend *pend; 120 struct smc_cdc_tx_pend *pend;
123 struct smc_wr_buf *wr_buf; 121 struct smc_wr_buf *wr_buf;
@@ -130,6 +128,21 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
130 return smc_cdc_msg_send(conn, wr_buf, pend); 128 return smc_cdc_msg_send(conn, wr_buf, pend);
131} 129}
132 130
131int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
132{
133 int rc;
134
135 if (conn->lgr->is_smcd) {
136 spin_lock_bh(&conn->send_lock);
137 rc = smcd_cdc_msg_send(conn);
138 spin_unlock_bh(&conn->send_lock);
139 } else {
140 rc = smcr_cdc_get_slot_and_msg_send(conn);
141 }
142
143 return rc;
144}
145
133static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, 146static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend,
134 unsigned long data) 147 unsigned long data)
135{ 148{
@@ -157,6 +170,44 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
157 (unsigned long)conn); 170 (unsigned long)conn);
158} 171}
159 172
173/* Send a SMC-D CDC header.
174 * This increments the free space available in our send buffer.
175 * Also update the confirmed receive buffer with what was sent to the peer.
176 */
177int smcd_cdc_msg_send(struct smc_connection *conn)
178{
179 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
180 struct smcd_cdc_msg cdc;
181 int rc, diff;
182
183 memset(&cdc, 0, sizeof(cdc));
184 cdc.common.type = SMC_CDC_MSG_TYPE;
185 cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
186 cdc.prod_count = conn->local_tx_ctrl.prod.count;
187
188 cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
189 cdc.cons_count = conn->local_tx_ctrl.cons.count;
190 cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
191 cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
192 rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
193 if (rc)
194 return rc;
195 smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons,
196 conn);
197 /* Calculate transmitted data and increment free send buffer space */
198 diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
199 &conn->tx_curs_sent);
200 /* increased by confirmed number of bytes */
201 smp_mb__before_atomic();
202 atomic_add(diff, &conn->sndbuf_space);
203 /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
204 smp_mb__after_atomic();
205 smc_curs_copy(&conn->tx_curs_fin, &conn->tx_curs_sent, conn);
206
207 smc_tx_sndbuf_nonfull(smc);
208 return rc;
209}
210
160/********************************* receive ***********************************/ 211/********************************* receive ***********************************/
161 212
162static inline bool smc_cdc_before(u16 seq1, u16 seq2) 213static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -171,14 +222,12 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,
171 char *base; 222 char *base;
172 223
173 /* new data included urgent business */ 224 /* new data included urgent business */
174 smc_curs_write(&conn->urg_curs, 225 smc_curs_copy(&conn->urg_curs, &conn->local_rx_ctrl.prod, conn);
175 smc_curs_read(&conn->local_rx_ctrl.prod, conn),
176 conn);
177 conn->urg_state = SMC_URG_VALID; 226 conn->urg_state = SMC_URG_VALID;
178 if (!sock_flag(&smc->sk, SOCK_URGINLINE)) 227 if (!sock_flag(&smc->sk, SOCK_URGINLINE))
179 /* we'll skip the urgent byte, so don't account for it */ 228 /* we'll skip the urgent byte, so don't account for it */
180 (*diff_prod)--; 229 (*diff_prod)--;
181 base = (char *)conn->rmb_desc->cpu_addr; 230 base = (char *)conn->rmb_desc->cpu_addr + conn->rx_off;
182 if (conn->urg_curs.count) 231 if (conn->urg_curs.count)
183 conn->urg_rx_byte = *(base + conn->urg_curs.count - 1); 232 conn->urg_rx_byte = *(base + conn->urg_curs.count - 1);
184 else 233 else
@@ -193,12 +242,8 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
193 struct smc_connection *conn = &smc->conn; 242 struct smc_connection *conn = &smc->conn;
194 int diff_cons, diff_prod; 243 int diff_cons, diff_prod;
195 244
196 smc_curs_write(&prod_old, 245 smc_curs_copy(&prod_old, &conn->local_rx_ctrl.prod, conn);
197 smc_curs_read(&conn->local_rx_ctrl.prod, conn), 246 smc_curs_copy(&cons_old, &conn->local_rx_ctrl.cons, conn);
198 conn);
199 smc_curs_write(&cons_old,
200 smc_curs_read(&conn->local_rx_ctrl.cons, conn),
201 conn);
202 smc_cdc_msg_to_host(&conn->local_rx_ctrl, cdc, conn); 247 smc_cdc_msg_to_host(&conn->local_rx_ctrl, cdc, conn);
203 248
204 diff_cons = smc_curs_diff(conn->peer_rmbe_size, &cons_old, 249 diff_cons = smc_curs_diff(conn->peer_rmbe_size, &cons_old,
@@ -277,6 +322,34 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
277 sock_put(&smc->sk); /* no free sk in softirq-context */ 322 sock_put(&smc->sk); /* no free sk in softirq-context */
278} 323}
279 324
325/* Schedule a tasklet for this connection. Triggered from the ISM device IRQ
326 * handler to indicate update in the DMBE.
327 *
328 * Context:
329 * - tasklet context
330 */
331static void smcd_cdc_rx_tsklet(unsigned long data)
332{
333 struct smc_connection *conn = (struct smc_connection *)data;
334 struct smcd_cdc_msg cdc;
335 struct smc_sock *smc;
336
337 if (!conn)
338 return;
339
340 memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
341 smc = container_of(conn, struct smc_sock, conn);
342 smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
343}
344
345/* Initialize receive tasklet. Called from ISM device IRQ handler to start
346 * receiver side.
347 */
348void smcd_cdc_rx_init(struct smc_connection *conn)
349{
350 tasklet_init(&conn->rx_tsklet, smcd_cdc_rx_tsklet, (unsigned long)conn);
351}
352
280/***************************** init, exit, misc ******************************/ 353/***************************** init, exit, misc ******************************/
281 354
282static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) 355static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
@@ -293,7 +366,7 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
293 return; /* invalid message */ 366 return; /* invalid message */
294 367
295 /* lookup connection */ 368 /* lookup connection */
296 lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); 369 lgr = smc_get_lgr(link);
297 read_lock_bh(&lgr->conns_lock); 370 read_lock_bh(&lgr->conns_lock);
298 conn = smc_lgr_find_conn(ntohl(cdc->token), lgr); 371 conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);
299 read_unlock_bh(&lgr->conns_lock); 372 read_unlock_bh(&lgr->conns_lock);
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index f60082fee5b8..934df4473a7c 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -50,6 +50,20 @@ struct smc_cdc_msg {
50 u8 reserved[18]; 50 u8 reserved[18];
51} __packed; /* format defined in RFC7609 */ 51} __packed; /* format defined in RFC7609 */
52 52
53/* CDC message for SMC-D */
54struct smcd_cdc_msg {
55 struct smc_wr_rx_hdr common; /* Type = 0xFE */
56 u8 res1[7];
57 u16 prod_wrap;
58 u32 prod_count;
59 u8 res2[2];
60 u16 cons_wrap;
61 u32 cons_count;
62 struct smc_cdc_producer_flags prod_flags;
63 struct smc_cdc_conn_state_flags conn_state_flags;
64 u8 res3[8];
65} __packed;
66
53static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn) 67static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
54{ 68{
55 return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort || 69 return conn->local_rx_ctrl.conn_state_flags.peer_conn_abort ||
@@ -90,47 +104,34 @@ static inline u64 smc_curs_read(union smc_host_cursor *curs,
90#endif 104#endif
91} 105}
92 106
93static inline u64 smc_curs_read_net(union smc_cdc_cursor *curs, 107/* Copy cursor src into tgt */
94 struct smc_connection *conn) 108static inline void smc_curs_copy(union smc_host_cursor *tgt,
95{ 109 union smc_host_cursor *src,
96#ifndef KERNEL_HAS_ATOMIC64 110 struct smc_connection *conn)
97 unsigned long flags;
98 u64 ret;
99
100 spin_lock_irqsave(&conn->acurs_lock, flags);
101 ret = curs->acurs;
102 spin_unlock_irqrestore(&conn->acurs_lock, flags);
103 return ret;
104#else
105 return atomic64_read(&curs->acurs);
106#endif
107}
108
109static inline void smc_curs_write(union smc_host_cursor *curs, u64 val,
110 struct smc_connection *conn)
111{ 111{
112#ifndef KERNEL_HAS_ATOMIC64 112#ifndef KERNEL_HAS_ATOMIC64
113 unsigned long flags; 113 unsigned long flags;
114 114
115 spin_lock_irqsave(&conn->acurs_lock, flags); 115 spin_lock_irqsave(&conn->acurs_lock, flags);
116 curs->acurs = val; 116 tgt->acurs = src->acurs;
117 spin_unlock_irqrestore(&conn->acurs_lock, flags); 117 spin_unlock_irqrestore(&conn->acurs_lock, flags);
118#else 118#else
119 atomic64_set(&curs->acurs, val); 119 atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
120#endif 120#endif
121} 121}
122 122
123static inline void smc_curs_write_net(union smc_cdc_cursor *curs, u64 val, 123static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt,
124 struct smc_connection *conn) 124 union smc_cdc_cursor *src,
125 struct smc_connection *conn)
125{ 126{
126#ifndef KERNEL_HAS_ATOMIC64 127#ifndef KERNEL_HAS_ATOMIC64
127 unsigned long flags; 128 unsigned long flags;
128 129
129 spin_lock_irqsave(&conn->acurs_lock, flags); 130 spin_lock_irqsave(&conn->acurs_lock, flags);
130 curs->acurs = val; 131 tgt->acurs = src->acurs;
131 spin_unlock_irqrestore(&conn->acurs_lock, flags); 132 spin_unlock_irqrestore(&conn->acurs_lock, flags);
132#else 133#else
133 atomic64_set(&curs->acurs, val); 134 atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
134#endif 135#endif
135} 136}
136 137
@@ -165,7 +166,7 @@ static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer,
165{ 166{
166 union smc_host_cursor temp; 167 union smc_host_cursor temp;
167 168
168 smc_curs_write(&temp, smc_curs_read(local, conn), conn); 169 smc_curs_copy(&temp, local, conn);
169 peer->count = htonl(temp.count); 170 peer->count = htonl(temp.count);
170 peer->wrap = htons(temp.wrap); 171 peer->wrap = htons(temp.wrap);
171 /* peer->reserved = htons(0); must be ensured by caller */ 172 /* peer->reserved = htons(0); must be ensured by caller */
@@ -192,8 +193,8 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,
192 union smc_host_cursor temp, old; 193 union smc_host_cursor temp, old;
193 union smc_cdc_cursor net; 194 union smc_cdc_cursor net;
194 195
195 smc_curs_write(&old, smc_curs_read(local, conn), conn); 196 smc_curs_copy(&old, local, conn);
196 smc_curs_write_net(&net, smc_curs_read_net(peer, conn), conn); 197 smc_curs_copy_net(&net, peer, conn);
197 temp.count = ntohl(net.count); 198 temp.count = ntohl(net.count);
198 temp.wrap = ntohs(net.wrap); 199 temp.wrap = ntohs(net.wrap);
199 if ((old.wrap > temp.wrap) && temp.wrap) 200 if ((old.wrap > temp.wrap) && temp.wrap)
@@ -201,12 +202,12 @@ static inline void smc_cdc_cursor_to_host(union smc_host_cursor *local,
201 if ((old.wrap == temp.wrap) && 202 if ((old.wrap == temp.wrap) &&
202 (old.count > temp.count)) 203 (old.count > temp.count))
203 return; 204 return;
204 smc_curs_write(local, smc_curs_read(&temp, conn), conn); 205 smc_curs_copy(local, &temp, conn);
205} 206}
206 207
207static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, 208static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
208 struct smc_cdc_msg *peer, 209 struct smc_cdc_msg *peer,
209 struct smc_connection *conn) 210 struct smc_connection *conn)
210{ 211{
211 local->common.type = peer->common.type; 212 local->common.type = peer->common.type;
212 local->len = peer->len; 213 local->len = peer->len;
@@ -218,6 +219,27 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
218 local->conn_state_flags = peer->conn_state_flags; 219 local->conn_state_flags = peer->conn_state_flags;
219} 220}
220 221
222static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
223 struct smcd_cdc_msg *peer)
224{
225 local->prod.wrap = peer->prod_wrap;
226 local->prod.count = peer->prod_count;
227 local->cons.wrap = peer->cons_wrap;
228 local->cons.count = peer->cons_count;
229 local->prod_flags = peer->prod_flags;
230 local->conn_state_flags = peer->conn_state_flags;
231}
232
233static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
234 struct smc_cdc_msg *peer,
235 struct smc_connection *conn)
236{
237 if (conn->lgr->is_smcd)
238 smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer);
239 else
240 smcr_cdc_msg_to_host(local, peer, conn);
241}
242
221struct smc_cdc_tx_pend; 243struct smc_cdc_tx_pend;
222 244
223int smc_cdc_get_free_slot(struct smc_connection *conn, 245int smc_cdc_get_free_slot(struct smc_connection *conn,
@@ -227,6 +249,8 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
227int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, 249int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
228 struct smc_cdc_tx_pend *pend); 250 struct smc_cdc_tx_pend *pend);
229int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); 251int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
252int smcd_cdc_msg_send(struct smc_connection *conn);
230int smc_cdc_init(void) __init; 253int smc_cdc_init(void) __init;
254void smcd_cdc_rx_init(struct smc_connection *conn);
231 255
232#endif /* SMC_CDC_H */ 256#endif /* SMC_CDC_H */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index ae5d168653ce..83aba9ade060 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -23,9 +23,15 @@
23#include "smc_core.h" 23#include "smc_core.h"
24#include "smc_clc.h" 24#include "smc_clc.h"
25#include "smc_ib.h" 25#include "smc_ib.h"
26#include "smc_ism.h"
27
28#define SMCR_CLC_ACCEPT_CONFIRM_LEN 68
29#define SMCD_CLC_ACCEPT_CONFIRM_LEN 48
26 30
27/* eye catcher "SMCR" EBCDIC for CLC messages */ 31/* eye catcher "SMCR" EBCDIC for CLC messages */
28static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; 32static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'};
33/* eye catcher "SMCD" EBCDIC for CLC messages */
34static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'};
29 35
30/* check if received message has a correct header length and contains valid 36/* check if received message has a correct header length and contains valid
31 * heading and trailing eyecatchers 37 * heading and trailing eyecatchers
@@ -38,10 +44,14 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
38 struct smc_clc_msg_decline *dclc; 44 struct smc_clc_msg_decline *dclc;
39 struct smc_clc_msg_trail *trl; 45 struct smc_clc_msg_trail *trl;
40 46
41 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) 47 if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
48 memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
42 return false; 49 return false;
43 switch (clcm->type) { 50 switch (clcm->type) {
44 case SMC_CLC_PROPOSAL: 51 case SMC_CLC_PROPOSAL:
52 if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
53 clcm->path != SMC_TYPE_B)
54 return false;
45 pclc = (struct smc_clc_msg_proposal *)clcm; 55 pclc = (struct smc_clc_msg_proposal *)clcm;
46 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 56 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
47 if (ntohs(pclc->hdr.length) != 57 if (ntohs(pclc->hdr.length) !=
@@ -56,10 +66,16 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
56 break; 66 break;
57 case SMC_CLC_ACCEPT: 67 case SMC_CLC_ACCEPT:
58 case SMC_CLC_CONFIRM: 68 case SMC_CLC_CONFIRM:
69 if (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D)
70 return false;
59 clc = (struct smc_clc_msg_accept_confirm *)clcm; 71 clc = (struct smc_clc_msg_accept_confirm *)clcm;
60 if (ntohs(clc->hdr.length) != sizeof(*clc)) 72 if ((clcm->path == SMC_TYPE_R &&
73 ntohs(clc->hdr.length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) ||
74 (clcm->path == SMC_TYPE_D &&
75 ntohs(clc->hdr.length) != SMCD_CLC_ACCEPT_CONFIRM_LEN))
61 return false; 76 return false;
62 trl = &clc->trl; 77 trl = (struct smc_clc_msg_trail *)
78 ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
63 break; 79 break;
64 case SMC_CLC_DECLINE: 80 case SMC_CLC_DECLINE:
65 dclc = (struct smc_clc_msg_decline *)clcm; 81 dclc = (struct smc_clc_msg_decline *)clcm;
@@ -70,7 +86,8 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
70 default: 86 default:
71 return false; 87 return false;
72 } 88 }
73 if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) 89 if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) &&
90 memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)))
74 return false; 91 return false;
75 return true; 92 return true;
76} 93}
@@ -296,6 +313,9 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
296 datlen = ntohs(clcm->length); 313 datlen = ntohs(clcm->length);
297 if ((len < sizeof(struct smc_clc_msg_hdr)) || 314 if ((len < sizeof(struct smc_clc_msg_hdr)) ||
298 (datlen > buflen) || 315 (datlen > buflen) ||
316 (clcm->version != SMC_CLC_V1) ||
317 (clcm->path != SMC_TYPE_R && clcm->path != SMC_TYPE_D &&
318 clcm->path != SMC_TYPE_B) ||
299 ((clcm->type != SMC_CLC_DECLINE) && 319 ((clcm->type != SMC_CLC_DECLINE) &&
300 (clcm->type != expected_type))) { 320 (clcm->type != expected_type))) {
301 smc->sk.sk_err = EPROTO; 321 smc->sk.sk_err = EPROTO;
@@ -314,7 +334,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
314 goto out; 334 goto out;
315 } 335 }
316 if (clcm->type == SMC_CLC_DECLINE) { 336 if (clcm->type == SMC_CLC_DECLINE) {
317 reason_code = SMC_CLC_DECL_REPLY; 337 struct smc_clc_msg_decline *dclc;
338
339 dclc = (struct smc_clc_msg_decline *)clcm;
340 reason_code = SMC_CLC_DECL_PEERDECL;
341 smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
318 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { 342 if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
319 smc->conn.lgr->sync_err = 1; 343 smc->conn.lgr->sync_err = 1;
320 smc_lgr_terminate(smc->conn.lgr); 344 smc_lgr_terminate(smc->conn.lgr);
@@ -357,17 +381,18 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
357} 381}
358 382
359/* send CLC PROPOSAL message across internal TCP socket */ 383/* send CLC PROPOSAL message across internal TCP socket */
360int smc_clc_send_proposal(struct smc_sock *smc, 384int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
361 struct smc_ib_device *smcibdev, 385 struct smc_ib_device *ibdev, u8 ibport, u8 gid[],
362 u8 ibport) 386 struct smcd_dev *ismdev)
363{ 387{
364 struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX]; 388 struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
365 struct smc_clc_msg_proposal_prefix pclc_prfx; 389 struct smc_clc_msg_proposal_prefix pclc_prfx;
390 struct smc_clc_msg_smcd pclc_smcd;
366 struct smc_clc_msg_proposal pclc; 391 struct smc_clc_msg_proposal pclc;
367 struct smc_clc_msg_trail trl; 392 struct smc_clc_msg_trail trl;
368 int len, i, plen, rc; 393 int len, i, plen, rc;
369 int reason_code = 0; 394 int reason_code = 0;
370 struct kvec vec[4]; 395 struct kvec vec[5];
371 struct msghdr msg; 396 struct msghdr msg;
372 397
373 /* retrieve ip prefixes for CLC proposal msg */ 398 /* retrieve ip prefixes for CLC proposal msg */
@@ -382,18 +407,34 @@ int smc_clc_send_proposal(struct smc_sock *smc,
382 memset(&pclc, 0, sizeof(pclc)); 407 memset(&pclc, 0, sizeof(pclc));
383 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 408 memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
384 pclc.hdr.type = SMC_CLC_PROPOSAL; 409 pclc.hdr.type = SMC_CLC_PROPOSAL;
385 pclc.hdr.length = htons(plen);
386 pclc.hdr.version = SMC_CLC_V1; /* SMC version */ 410 pclc.hdr.version = SMC_CLC_V1; /* SMC version */
387 memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 411 pclc.hdr.path = smc_type;
388 memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); 412 if (smc_type == SMC_TYPE_R || smc_type == SMC_TYPE_B) {
389 memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); 413 /* add SMC-R specifics */
390 pclc.iparea_offset = htons(0); 414 memcpy(pclc.lcl.id_for_peer, local_systemid,
415 sizeof(local_systemid));
416 memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE);
417 memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN);
418 pclc.iparea_offset = htons(0);
419 }
420 if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
421 /* add SMC-D specifics */
422 memset(&pclc_smcd, 0, sizeof(pclc_smcd));
423 plen += sizeof(pclc_smcd);
424 pclc.iparea_offset = htons(SMC_CLC_PROPOSAL_MAX_OFFSET);
425 pclc_smcd.gid = ismdev->local_gid;
426 }
427 pclc.hdr.length = htons(plen);
391 428
392 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 429 memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
393 memset(&msg, 0, sizeof(msg)); 430 memset(&msg, 0, sizeof(msg));
394 i = 0; 431 i = 0;
395 vec[i].iov_base = &pclc; 432 vec[i].iov_base = &pclc;
396 vec[i++].iov_len = sizeof(pclc); 433 vec[i++].iov_len = sizeof(pclc);
434 if (smc_type == SMC_TYPE_D || smc_type == SMC_TYPE_B) {
435 vec[i].iov_base = &pclc_smcd;
436 vec[i++].iov_len = sizeof(pclc_smcd);
437 }
397 vec[i].iov_base = &pclc_prfx; 438 vec[i].iov_base = &pclc_prfx;
398 vec[i++].iov_len = sizeof(pclc_prfx); 439 vec[i++].iov_len = sizeof(pclc_prfx);
399 if (pclc_prfx.ipv6_prefixes_cnt > 0) { 440 if (pclc_prfx.ipv6_prefixes_cnt > 0) {
@@ -429,35 +470,55 @@ int smc_clc_send_confirm(struct smc_sock *smc)
429 struct kvec vec; 470 struct kvec vec;
430 int len; 471 int len;
431 472
432 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
433 /* send SMC Confirm CLC msg */ 473 /* send SMC Confirm CLC msg */
434 memset(&cclc, 0, sizeof(cclc)); 474 memset(&cclc, 0, sizeof(cclc));
435 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
436 cclc.hdr.type = SMC_CLC_CONFIRM; 475 cclc.hdr.type = SMC_CLC_CONFIRM;
437 cclc.hdr.length = htons(sizeof(cclc));
438 cclc.hdr.version = SMC_CLC_V1; /* SMC version */ 476 cclc.hdr.version = SMC_CLC_V1; /* SMC version */
439 memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 477 if (smc->conn.lgr->is_smcd) {
440 memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 478 /* SMC-D specific settings */
441 SMC_GID_SIZE); 479 memcpy(cclc.hdr.eyecatcher, SMCD_EYECATCHER,
442 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 480 sizeof(SMCD_EYECATCHER));
443 hton24(cclc.qpn, link->roce_qp->qp_num); 481 cclc.hdr.path = SMC_TYPE_D;
444 cclc.rmb_rkey = 482 cclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
445 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 483 cclc.gid = conn->lgr->smcd->local_gid;
446 cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ 484 cclc.token = conn->rmb_desc->token;
447 cclc.rmbe_alert_token = htonl(conn->alert_token_local); 485 cclc.dmbe_size = conn->rmbe_size_short;
448 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); 486 cclc.dmbe_idx = 0;
449 cclc.rmbe_size = conn->rmbe_size_short; 487 memcpy(&cclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
450 cclc.rmb_dma_addr = cpu_to_be64( 488 memcpy(cclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
451 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); 489 sizeof(SMCD_EYECATCHER));
452 hton24(cclc.psn, link->psn_initial); 490 } else {
453 491 /* SMC-R specific settings */
454 memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 492 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
493 memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
494 sizeof(SMC_EYECATCHER));
495 cclc.hdr.path = SMC_TYPE_R;
496 cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
497 memcpy(cclc.lcl.id_for_peer, local_systemid,
498 sizeof(local_systemid));
499 memcpy(&cclc.lcl.gid, link->gid, SMC_GID_SIZE);
500 memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
501 ETH_ALEN);
502 hton24(cclc.qpn, link->roce_qp->qp_num);
503 cclc.rmb_rkey =
504 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
505 cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
506 cclc.rmbe_alert_token = htonl(conn->alert_token_local);
507 cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
508 cclc.rmbe_size = conn->rmbe_size_short;
509 cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
510 (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
511 hton24(cclc.psn, link->psn_initial);
512 memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
513 sizeof(SMC_EYECATCHER));
514 }
455 515
456 memset(&msg, 0, sizeof(msg)); 516 memset(&msg, 0, sizeof(msg));
457 vec.iov_base = &cclc; 517 vec.iov_base = &cclc;
458 vec.iov_len = sizeof(cclc); 518 vec.iov_len = ntohs(cclc.hdr.length);
459 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc)); 519 len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
460 if (len < sizeof(cclc)) { 520 ntohs(cclc.hdr.length));
521 if (len < ntohs(cclc.hdr.length)) {
461 if (len >= 0) { 522 if (len >= 0) {
462 reason_code = -ENETUNREACH; 523 reason_code = -ENETUNREACH;
463 smc->sk.sk_err = -reason_code; 524 smc->sk.sk_err = -reason_code;
@@ -480,35 +541,57 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
480 int rc = 0; 541 int rc = 0;
481 int len; 542 int len;
482 543
483 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
484 memset(&aclc, 0, sizeof(aclc)); 544 memset(&aclc, 0, sizeof(aclc));
485 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
486 aclc.hdr.type = SMC_CLC_ACCEPT; 545 aclc.hdr.type = SMC_CLC_ACCEPT;
487 aclc.hdr.length = htons(sizeof(aclc));
488 aclc.hdr.version = SMC_CLC_V1; /* SMC version */ 546 aclc.hdr.version = SMC_CLC_V1; /* SMC version */
489 if (srv_first_contact) 547 if (srv_first_contact)
490 aclc.hdr.flag = 1; 548 aclc.hdr.flag = 1;
491 memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); 549
492 memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1], 550 if (new_smc->conn.lgr->is_smcd) {
493 SMC_GID_SIZE); 551 /* SMC-D specific settings */
494 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); 552 aclc.hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
495 hton24(aclc.qpn, link->roce_qp->qp_num); 553 memcpy(aclc.hdr.eyecatcher, SMCD_EYECATCHER,
496 aclc.rmb_rkey = 554 sizeof(SMCD_EYECATCHER));
497 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); 555 aclc.hdr.path = SMC_TYPE_D;
498 aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */ 556 aclc.gid = conn->lgr->smcd->local_gid;
499 aclc.rmbe_alert_token = htonl(conn->alert_token_local); 557 aclc.token = conn->rmb_desc->token;
500 aclc.qp_mtu = link->path_mtu; 558 aclc.dmbe_size = conn->rmbe_size_short;
501 aclc.rmbe_size = conn->rmbe_size_short, 559 aclc.dmbe_idx = 0;
502 aclc.rmb_dma_addr = cpu_to_be64( 560 memcpy(&aclc.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
503 (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); 561 memcpy(aclc.smcd_trl.eyecatcher, SMCD_EYECATCHER,
504 hton24(aclc.psn, link->psn_initial); 562 sizeof(SMCD_EYECATCHER));
505 memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); 563 } else {
564 /* SMC-R specific settings */
565 aclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
566 memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
567 sizeof(SMC_EYECATCHER));
568 aclc.hdr.path = SMC_TYPE_R;
569 link = &conn->lgr->lnk[SMC_SINGLE_LINK];
570 memcpy(aclc.lcl.id_for_peer, local_systemid,
571 sizeof(local_systemid));
572 memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
573 memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1],
574 ETH_ALEN);
575 hton24(aclc.qpn, link->roce_qp->qp_num);
576 aclc.rmb_rkey =
577 htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
578 aclc.rmbe_idx = 1; /* as long as 1 RMB = 1 RMBE */
579 aclc.rmbe_alert_token = htonl(conn->alert_token_local);
580 aclc.qp_mtu = link->path_mtu;
581 aclc.rmbe_size = conn->rmbe_size_short,
582 aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
583 (conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
584 hton24(aclc.psn, link->psn_initial);
585 memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
586 sizeof(SMC_EYECATCHER));
587 }
506 588
507 memset(&msg, 0, sizeof(msg)); 589 memset(&msg, 0, sizeof(msg));
508 vec.iov_base = &aclc; 590 vec.iov_base = &aclc;
509 vec.iov_len = sizeof(aclc); 591 vec.iov_len = ntohs(aclc.hdr.length);
510 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc)); 592 len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1,
511 if (len < sizeof(aclc)) { 593 ntohs(aclc.hdr.length));
594 if (len < ntohs(aclc.hdr.length)) {
512 if (len >= 0) 595 if (len >= 0)
513 new_smc->sk.sk_err = EPROTO; 596 new_smc->sk.sk_err = EPROTO;
514 else 597 else
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 41ff9ea96139..18da89b681c2 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -23,17 +23,26 @@
23#define SMC_CLC_DECLINE 0x04 23#define SMC_CLC_DECLINE 0x04
24 24
25#define SMC_CLC_V1 0x1 /* SMC version */ 25#define SMC_CLC_V1 0x1 /* SMC version */
26#define SMC_TYPE_R 0 /* SMC-R only */
27#define SMC_TYPE_D 1 /* SMC-D only */
28#define SMC_TYPE_B 3 /* SMC-R and SMC-D */
26#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ 29#define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */
27#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */ 30#define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */
28#define SMC_CLC_DECL_TIMEOUT 0x02000000 /* timeout */ 31#define SMC_CLC_DECL_TIMEOUT_CL 0x02010000 /* timeout w4 QP confirm link */
32#define SMC_CLC_DECL_TIMEOUT_AL 0x02020000 /* timeout w4 QP add link */
29#define SMC_CLC_DECL_CNFERR 0x03000000 /* configuration error */ 33#define SMC_CLC_DECL_CNFERR 0x03000000 /* configuration error */
30#define SMC_CLC_DECL_IPSEC 0x03030000 /* IPsec usage */ 34#define SMC_CLC_DECL_PEERNOSMC 0x03010000 /* peer did not indicate SMC */
35#define SMC_CLC_DECL_IPSEC 0x03020000 /* IPsec usage */
36#define SMC_CLC_DECL_NOSMCDEV 0x03030000 /* no SMC device found */
37#define SMC_CLC_DECL_MODEUNSUPP 0x03040000 /* smc modes do not match (R or D)*/
38#define SMC_CLC_DECL_RMBE_EC 0x03050000 /* peer has eyecatcher in RMBE */
39#define SMC_CLC_DECL_OPTUNSUPP 0x03060000 /* fastopen sockopt not supported */
31#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */ 40#define SMC_CLC_DECL_SYNCERR 0x04000000 /* synchronization error */
32#define SMC_CLC_DECL_REPLY 0x06000000 /* reply to a received decline */ 41#define SMC_CLC_DECL_PEERDECL 0x05000000 /* peer declined during handshake */
33#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */ 42#define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */
34#define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */ 43#define SMC_CLC_DECL_ERR_RTOK 0x99990001 /* rtoken handling failed */
35#define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */ 44#define SMC_CLC_DECL_ERR_RDYLNK 0x99990002 /* ib ready link failed */
36#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */ 45#define SMC_CLC_DECL_ERR_REGRMB 0x99990003 /* reg rmb failed */
37 46
38struct smc_clc_msg_hdr { /* header1 of clc messages */ 47struct smc_clc_msg_hdr { /* header1 of clc messages */
39 u8 eyecatcher[4]; /* eye catcher */ 48 u8 eyecatcher[4]; /* eye catcher */
@@ -42,9 +51,11 @@ struct smc_clc_msg_hdr { /* header1 of clc messages */
42#if defined(__BIG_ENDIAN_BITFIELD) 51#if defined(__BIG_ENDIAN_BITFIELD)
43 u8 version : 4, 52 u8 version : 4,
44 flag : 1, 53 flag : 1,
45 rsvd : 3; 54 rsvd : 1,
55 path : 2;
46#elif defined(__LITTLE_ENDIAN_BITFIELD) 56#elif defined(__LITTLE_ENDIAN_BITFIELD)
47 u8 rsvd : 3, 57 u8 path : 2,
58 rsvd : 1,
48 flag : 1, 59 flag : 1,
49 version : 4; 60 version : 4;
50#endif 61#endif
@@ -77,6 +88,11 @@ struct smc_clc_msg_proposal_prefix { /* prefix part of clc proposal message*/
77 u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */ 88 u8 ipv6_prefixes_cnt; /* number of IPv6 prefixes in prefix array */
78} __aligned(4); 89} __aligned(4);
79 90
91struct smc_clc_msg_smcd { /* SMC-D GID information */
92 u64 gid; /* ISM GID of requestor */
93 u8 res[32];
94};
95
80struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */ 96struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
81 struct smc_clc_msg_hdr hdr; 97 struct smc_clc_msg_hdr hdr;
82 struct smc_clc_msg_local lcl; 98 struct smc_clc_msg_local lcl;
@@ -94,23 +110,45 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
94 110
95struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */ 111struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
96 struct smc_clc_msg_hdr hdr; 112 struct smc_clc_msg_hdr hdr;
97 struct smc_clc_msg_local lcl; 113 union {
98 u8 qpn[3]; /* QP number */ 114 struct { /* SMC-R */
99 __be32 rmb_rkey; /* RMB rkey */ 115 struct smc_clc_msg_local lcl;
100 u8 rmbe_idx; /* Index of RMBE in RMB */ 116 u8 qpn[3]; /* QP number */
101 __be32 rmbe_alert_token;/* unique connection id */ 117 __be32 rmb_rkey; /* RMB rkey */
118 u8 rmbe_idx; /* Index of RMBE in RMB */
119 __be32 rmbe_alert_token;/* unique connection id */
102#if defined(__BIG_ENDIAN_BITFIELD) 120#if defined(__BIG_ENDIAN_BITFIELD)
103 u8 rmbe_size : 4, /* RMBE buf size (compressed notation) */ 121 u8 rmbe_size : 4, /* buf size (compressed) */
104 qp_mtu : 4; /* QP mtu */ 122 qp_mtu : 4; /* QP mtu */
105#elif defined(__LITTLE_ENDIAN_BITFIELD) 123#elif defined(__LITTLE_ENDIAN_BITFIELD)
106 u8 qp_mtu : 4, 124 u8 qp_mtu : 4,
107 rmbe_size : 4; 125 rmbe_size : 4;
108#endif 126#endif
109 u8 reserved; 127 u8 reserved;
110 __be64 rmb_dma_addr; /* RMB virtual address */ 128 __be64 rmb_dma_addr; /* RMB virtual address */
111 u8 reserved2; 129 u8 reserved2;
112 u8 psn[3]; /* initial packet sequence number */ 130 u8 psn[3]; /* packet sequence number */
113 struct smc_clc_msg_trail trl; /* eye catcher "SMCR" EBCDIC */ 131 struct smc_clc_msg_trail smcr_trl;
132 /* eye catcher "SMCR" EBCDIC */
133 } __packed;
134 struct { /* SMC-D */
135 u64 gid; /* Sender GID */
136 u64 token; /* DMB token */
137 u8 dmbe_idx; /* DMBE index */
138#if defined(__BIG_ENDIAN_BITFIELD)
139 u8 dmbe_size : 4, /* buf size (compressed) */
140 reserved3 : 4;
141#elif defined(__LITTLE_ENDIAN_BITFIELD)
142 u8 reserved3 : 4,
143 dmbe_size : 4;
144#endif
145 u16 reserved4;
146 u32 linkid; /* Link identifier */
147 u32 reserved5[3];
148 struct smc_clc_msg_trail smcd_trl;
149 /* eye catcher "SMCD" EBCDIC */
150 } __packed;
151 };
114} __packed; /* format defined in RFC7609 */ 152} __packed; /* format defined in RFC7609 */
115 153
116struct smc_clc_msg_decline { /* clc decline message */ 154struct smc_clc_msg_decline { /* clc decline message */
@@ -129,13 +167,26 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc)
129 ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); 167 ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset));
130} 168}
131 169
170/* get SMC-D info from proposal message */
171static inline struct smc_clc_msg_smcd *
172smc_get_clc_msg_smcd(struct smc_clc_msg_proposal *prop)
173{
174 if (ntohs(prop->iparea_offset) != sizeof(struct smc_clc_msg_smcd))
175 return NULL;
176
177 return (struct smc_clc_msg_smcd *)(prop + 1);
178}
179
180struct smcd_dev;
181
132int smc_clc_prfx_match(struct socket *clcsock, 182int smc_clc_prfx_match(struct socket *clcsock,
133 struct smc_clc_msg_proposal_prefix *prop); 183 struct smc_clc_msg_proposal_prefix *prop);
134int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, 184int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
135 u8 expected_type); 185 u8 expected_type);
136int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); 186int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
137int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, 187int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
138 u8 ibport); 188 struct smc_ib_device *smcibdev, u8 ibport, u8 gid[],
189 struct smcd_dev *ismdev);
139int smc_clc_send_confirm(struct smc_sock *smc); 190int smc_clc_send_confirm(struct smc_sock *smc);
140int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact); 191int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);
141 192
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 15bad268f37d..e871368500e3 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -26,10 +26,12 @@
26#include "smc_llc.h" 26#include "smc_llc.h"
27#include "smc_cdc.h" 27#include "smc_cdc.h"
28#include "smc_close.h" 28#include "smc_close.h"
29#include "smc_ism.h"
29 30
30#define SMC_LGR_NUM_INCR 256 31#define SMC_LGR_NUM_INCR 256
31#define SMC_LGR_FREE_DELAY_SERV (600 * HZ) 32#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
32#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) 33#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
34#define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
33 35
34static struct smc_lgr_list smc_lgr_list = { /* established link groups */ 36static struct smc_lgr_list smc_lgr_list = { /* established link groups */
35 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 37 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
@@ -47,8 +49,13 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
47 * otherwise there is a risk of out-of-sync link groups. 49 * otherwise there is a risk of out-of-sync link groups.
48 */ 50 */
49 mod_delayed_work(system_wq, &lgr->free_work, 51 mod_delayed_work(system_wq, &lgr->free_work,
50 lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : 52 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
51 SMC_LGR_FREE_DELAY_SERV); 53 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
54}
55
56void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
57{
58 mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
52} 59}
53 60
54/* Register connection's alert token in our lookup structure. 61/* Register connection's alert token in our lookup structure.
@@ -133,6 +140,20 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
133 smc_lgr_schedule_free_work(lgr); 140 smc_lgr_schedule_free_work(lgr);
134} 141}
135 142
143/* Send delete link, either as client to request the initiation
144 * of the DELETE LINK sequence from server; or as server to
145 * initiate the delete processing. See smc_llc_rx_delete_link().
146 */
147static int smc_link_send_delete(struct smc_link *lnk)
148{
149 if (lnk->state == SMC_LNK_ACTIVE &&
150 !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) {
151 smc_llc_link_deleting(lnk);
152 return 0;
153 }
154 return -ENOTCONN;
155}
156
136static void smc_lgr_free_work(struct work_struct *work) 157static void smc_lgr_free_work(struct work_struct *work)
137{ 158{
138 struct smc_link_group *lgr = container_of(to_delayed_work(work), 159 struct smc_link_group *lgr = container_of(to_delayed_work(work),
@@ -153,17 +174,30 @@ static void smc_lgr_free_work(struct work_struct *work)
153 list_del_init(&lgr->list); /* remove from smc_lgr_list */ 174 list_del_init(&lgr->list); /* remove from smc_lgr_list */
154free: 175free:
155 spin_unlock_bh(&smc_lgr_list.lock); 176 spin_unlock_bh(&smc_lgr_list.lock);
177
178 if (!lgr->is_smcd && !lgr->terminating) {
179 /* try to send del link msg, on error free lgr immediately */
180 if (!smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK])) {
181 /* reschedule in case we never receive a response */
182 smc_lgr_schedule_free_work(lgr);
183 return;
184 }
185 }
186
156 if (!delayed_work_pending(&lgr->free_work)) { 187 if (!delayed_work_pending(&lgr->free_work)) {
157 if (lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE) 188 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
158 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 189
190 if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
191 smc_llc_link_inactive(lnk);
159 smc_lgr_free(lgr); 192 smc_lgr_free(lgr);
160 } 193 }
161} 194}
162 195
163/* create a new SMC link group */ 196/* create a new SMC link group */
164static int smc_lgr_create(struct smc_sock *smc, 197static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
165 struct smc_ib_device *smcibdev, u8 ibport, 198 struct smc_ib_device *smcibdev, u8 ibport,
166 char *peer_systemid, unsigned short vlan_id) 199 char *peer_systemid, unsigned short vlan_id,
200 struct smcd_dev *smcismdev, u64 peer_gid)
167{ 201{
168 struct smc_link_group *lgr; 202 struct smc_link_group *lgr;
169 struct smc_link *lnk; 203 struct smc_link *lnk;
@@ -171,17 +205,23 @@ static int smc_lgr_create(struct smc_sock *smc,
171 int rc = 0; 205 int rc = 0;
172 int i; 206 int i;
173 207
208 if (is_smcd && vlan_id) {
209 rc = smc_ism_get_vlan(smcismdev, vlan_id);
210 if (rc)
211 goto out;
212 }
213
174 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); 214 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
175 if (!lgr) { 215 if (!lgr) {
176 rc = -ENOMEM; 216 rc = -ENOMEM;
177 goto out; 217 goto out;
178 } 218 }
179 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; 219 lgr->is_smcd = is_smcd;
180 lgr->sync_err = 0; 220 lgr->sync_err = 0;
181 memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
182 lgr->vlan_id = vlan_id; 221 lgr->vlan_id = vlan_id;
183 rwlock_init(&lgr->sndbufs_lock); 222 rwlock_init(&lgr->sndbufs_lock);
184 rwlock_init(&lgr->rmbs_lock); 223 rwlock_init(&lgr->rmbs_lock);
224 rwlock_init(&lgr->conns_lock);
185 for (i = 0; i < SMC_RMBE_SIZES; i++) { 225 for (i = 0; i < SMC_RMBE_SIZES; i++) {
186 INIT_LIST_HEAD(&lgr->sndbufs[i]); 226 INIT_LIST_HEAD(&lgr->sndbufs[i]);
187 INIT_LIST_HEAD(&lgr->rmbs[i]); 227 INIT_LIST_HEAD(&lgr->rmbs[i]);
@@ -190,36 +230,48 @@ static int smc_lgr_create(struct smc_sock *smc,
190 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); 230 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
191 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); 231 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
192 lgr->conns_all = RB_ROOT; 232 lgr->conns_all = RB_ROOT;
193 233 if (is_smcd) {
194 lnk = &lgr->lnk[SMC_SINGLE_LINK]; 234 /* SMC-D specific settings */
195 /* initialize link */ 235 lgr->peer_gid = peer_gid;
196 lnk->state = SMC_LNK_ACTIVATING; 236 lgr->smcd = smcismdev;
197 lnk->link_id = SMC_SINGLE_LINK; 237 } else {
198 lnk->smcibdev = smcibdev; 238 /* SMC-R specific settings */
199 lnk->ibport = ibport; 239 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
200 lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; 240 memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN);
201 if (!smcibdev->initialized) 241
202 smc_ib_setup_per_ibdev(smcibdev); 242 lnk = &lgr->lnk[SMC_SINGLE_LINK];
203 get_random_bytes(rndvec, sizeof(rndvec)); 243 /* initialize link */
204 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + (rndvec[2] << 16); 244 lnk->state = SMC_LNK_ACTIVATING;
205 rc = smc_llc_link_init(lnk); 245 lnk->link_id = SMC_SINGLE_LINK;
206 if (rc) 246 lnk->smcibdev = smcibdev;
207 goto free_lgr; 247 lnk->ibport = ibport;
208 rc = smc_wr_alloc_link_mem(lnk); 248 lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu;
209 if (rc) 249 if (!smcibdev->initialized)
210 goto clear_llc_lnk; 250 smc_ib_setup_per_ibdev(smcibdev);
211 rc = smc_ib_create_protection_domain(lnk); 251 get_random_bytes(rndvec, sizeof(rndvec));
212 if (rc) 252 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
213 goto free_link_mem; 253 (rndvec[2] << 16);
214 rc = smc_ib_create_queue_pair(lnk); 254 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
215 if (rc) 255 vlan_id, lnk->gid, &lnk->sgid_index);
216 goto dealloc_pd; 256 if (rc)
217 rc = smc_wr_create_link(lnk); 257 goto free_lgr;
218 if (rc) 258 rc = smc_llc_link_init(lnk);
219 goto destroy_qp; 259 if (rc)
220 260 goto free_lgr;
261 rc = smc_wr_alloc_link_mem(lnk);
262 if (rc)
263 goto clear_llc_lnk;
264 rc = smc_ib_create_protection_domain(lnk);
265 if (rc)
266 goto free_link_mem;
267 rc = smc_ib_create_queue_pair(lnk);
268 if (rc)
269 goto dealloc_pd;
270 rc = smc_wr_create_link(lnk);
271 if (rc)
272 goto destroy_qp;
273 }
221 smc->conn.lgr = lgr; 274 smc->conn.lgr = lgr;
222 rwlock_init(&lgr->conns_lock);
223 spin_lock_bh(&smc_lgr_list.lock); 275 spin_lock_bh(&smc_lgr_list.lock);
224 list_add(&lgr->list, &smc_lgr_list.list); 276 list_add(&lgr->list, &smc_lgr_list.list);
225 spin_unlock_bh(&smc_lgr_list.lock); 277 spin_unlock_bh(&smc_lgr_list.lock);
@@ -265,7 +317,12 @@ void smc_conn_free(struct smc_connection *conn)
265{ 317{
266 if (!conn->lgr) 318 if (!conn->lgr)
267 return; 319 return;
268 smc_cdc_tx_dismiss_slots(conn); 320 if (conn->lgr->is_smcd) {
321 smc_ism_unset_conn(conn);
322 tasklet_kill(&conn->rx_tsklet);
323 } else {
324 smc_cdc_tx_dismiss_slots(conn);
325 }
269 smc_lgr_unregister_conn(conn); 326 smc_lgr_unregister_conn(conn);
270 smc_buf_unuse(conn); 327 smc_buf_unuse(conn);
271} 328}
@@ -281,8 +338,8 @@ static void smc_link_clear(struct smc_link *lnk)
281 smc_wr_free_link_mem(lnk); 338 smc_wr_free_link_mem(lnk);
282} 339}
283 340
284static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, 341static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
285 struct smc_buf_desc *buf_desc) 342 struct smc_buf_desc *buf_desc)
286{ 343{
287 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 344 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
288 345
@@ -302,6 +359,28 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
302 kfree(buf_desc); 359 kfree(buf_desc);
303} 360}
304 361
362static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
363 struct smc_buf_desc *buf_desc)
364{
365 if (is_dmb) {
366 /* restore original buf len */
367 buf_desc->len += sizeof(struct smcd_cdc_msg);
368 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
369 } else {
370 kfree(buf_desc->cpu_addr);
371 }
372 kfree(buf_desc);
373}
374
375static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
376 struct smc_buf_desc *buf_desc)
377{
378 if (lgr->is_smcd)
379 smcd_buf_free(lgr, is_rmb, buf_desc);
380 else
381 smcr_buf_free(lgr, is_rmb, buf_desc);
382}
383
305static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) 384static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
306{ 385{
307 struct smc_buf_desc *buf_desc, *bf_desc; 386 struct smc_buf_desc *buf_desc, *bf_desc;
@@ -333,7 +412,10 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
333void smc_lgr_free(struct smc_link_group *lgr) 412void smc_lgr_free(struct smc_link_group *lgr)
334{ 413{
335 smc_lgr_free_bufs(lgr); 414 smc_lgr_free_bufs(lgr);
336 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); 415 if (lgr->is_smcd)
416 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
417 else
418 smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
337 kfree(lgr); 419 kfree(lgr);
338} 420}
339 421
@@ -358,7 +440,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
358 lgr->terminating = 1; 440 lgr->terminating = 1;
359 if (!list_empty(&lgr->list)) /* forget lgr */ 441 if (!list_empty(&lgr->list)) /* forget lgr */
360 list_del_init(&lgr->list); 442 list_del_init(&lgr->list);
361 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 443 if (!lgr->is_smcd)
444 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
362 445
363 write_lock_bh(&lgr->conns_lock); 446 write_lock_bh(&lgr->conns_lock);
364 node = rb_first(&lgr->conns_all); 447 node = rb_first(&lgr->conns_all);
@@ -375,7 +458,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr)
375 node = rb_first(&lgr->conns_all); 458 node = rb_first(&lgr->conns_all);
376 } 459 }
377 write_unlock_bh(&lgr->conns_lock); 460 write_unlock_bh(&lgr->conns_lock);
378 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); 461 if (!lgr->is_smcd)
462 wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
379 smc_lgr_schedule_free_work(lgr); 463 smc_lgr_schedule_free_work(lgr);
380} 464}
381 465
@@ -393,17 +477,44 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
393 477
394 spin_lock_bh(&smc_lgr_list.lock); 478 spin_lock_bh(&smc_lgr_list.lock);
395 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { 479 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
396 if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && 480 if (!lgr->is_smcd &&
481 lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
397 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) 482 lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
398 __smc_lgr_terminate(lgr); 483 __smc_lgr_terminate(lgr);
399 } 484 }
400 spin_unlock_bh(&smc_lgr_list.lock); 485 spin_unlock_bh(&smc_lgr_list.lock);
401} 486}
402 487
488/* Called when SMC-D device is terminated or peer is lost */
489void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
490{
491 struct smc_link_group *lgr, *l;
492 LIST_HEAD(lgr_free_list);
493
494 /* run common cleanup function and build free list */
495 spin_lock_bh(&smc_lgr_list.lock);
496 list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
497 if (lgr->is_smcd && lgr->smcd == dev &&
498 (!peer_gid || lgr->peer_gid == peer_gid) &&
499 !list_empty(&lgr->list)) {
500 __smc_lgr_terminate(lgr);
501 list_move(&lgr->list, &lgr_free_list);
502 }
503 }
504 spin_unlock_bh(&smc_lgr_list.lock);
505
506 /* cancel the regular free workers and actually free lgrs */
507 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
508 list_del_init(&lgr->list);
509 cancel_delayed_work_sync(&lgr->free_work);
510 smc_lgr_free(lgr);
511 }
512}
513
403/* Determine vlan of internal TCP socket. 514/* Determine vlan of internal TCP socket.
404 * @vlan_id: address to store the determined vlan id into 515 * @vlan_id: address to store the determined vlan id into
405 */ 516 */
406static int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id) 517int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
407{ 518{
408 struct dst_entry *dst = sk_dst_get(clcsock->sk); 519 struct dst_entry *dst = sk_dst_get(clcsock->sk);
409 struct net_device *ndev; 520 struct net_device *ndev;
@@ -447,41 +558,30 @@ out:
447 return rc; 558 return rc;
448} 559}
449 560
450/* determine the link gid matching the vlan id of the link group */ 561static bool smcr_lgr_match(struct smc_link_group *lgr,
451static int smc_link_determine_gid(struct smc_link_group *lgr) 562 struct smc_clc_msg_local *lcl,
563 enum smc_lgr_role role)
452{ 564{
453 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; 565 return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
454 struct ib_gid_attr gattr; 566 SMC_SYSTEMID_LEN) &&
455 union ib_gid gid; 567 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
456 int i; 568 SMC_GID_SIZE) &&
457 569 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
458 if (!lgr->vlan_id) { 570 sizeof(lcl->mac)) &&
459 lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1]; 571 lgr->role == role;
460 return 0; 572}
461 }
462 573
463 for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len; 574static bool smcd_lgr_match(struct smc_link_group *lgr,
464 i++) { 575 struct smcd_dev *smcismdev, u64 peer_gid)
465 if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, 576{
466 &gattr)) 577 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
467 continue;
468 if (gattr.ndev) {
469 if (is_vlan_dev(gattr.ndev) &&
470 vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
471 lnk->gid = gid;
472 dev_put(gattr.ndev);
473 return 0;
474 }
475 dev_put(gattr.ndev);
476 }
477 }
478 return -ENODEV;
479} 578}
480 579
481/* create a new SMC connection (and a new link group if necessary) */ 580/* create a new SMC connection (and a new link group if necessary) */
482int smc_conn_create(struct smc_sock *smc, 581int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
483 struct smc_ib_device *smcibdev, u8 ibport, 582 struct smc_ib_device *smcibdev, u8 ibport,
484 struct smc_clc_msg_local *lcl, int srv_first_contact) 583 struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
584 u64 peer_gid)
485{ 585{
486 struct smc_connection *conn = &smc->conn; 586 struct smc_connection *conn = &smc->conn;
487 int local_contact = SMC_FIRST_CONTACT; 587 int local_contact = SMC_FIRST_CONTACT;
@@ -503,17 +603,12 @@ int smc_conn_create(struct smc_sock *smc,
503 spin_lock_bh(&smc_lgr_list.lock); 603 spin_lock_bh(&smc_lgr_list.lock);
504 list_for_each_entry(lgr, &smc_lgr_list.list, list) { 604 list_for_each_entry(lgr, &smc_lgr_list.list, list) {
505 write_lock_bh(&lgr->conns_lock); 605 write_lock_bh(&lgr->conns_lock);
506 if (!memcmp(lgr->peer_systemid, lcl->id_for_peer, 606 if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
507 SMC_SYSTEMID_LEN) && 607 smcr_lgr_match(lgr, lcl, role)) &&
508 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
509 SMC_GID_SIZE) &&
510 !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
511 sizeof(lcl->mac)) &&
512 !lgr->sync_err && 608 !lgr->sync_err &&
513 (lgr->role == role) && 609 lgr->vlan_id == vlan_id &&
514 (lgr->vlan_id == vlan_id) && 610 (role == SMC_CLNT ||
515 ((role == SMC_CLNT) || 611 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
516 (lgr->conns_num < SMC_RMBS_PER_LGR_MAX))) {
517 /* link group found */ 612 /* link group found */
518 local_contact = SMC_REUSE_CONTACT; 613 local_contact = SMC_REUSE_CONTACT;
519 conn->lgr = lgr; 614 conn->lgr = lgr;
@@ -536,16 +631,19 @@ int smc_conn_create(struct smc_sock *smc,
536 631
537create: 632create:
538 if (local_contact == SMC_FIRST_CONTACT) { 633 if (local_contact == SMC_FIRST_CONTACT) {
539 rc = smc_lgr_create(smc, smcibdev, ibport, 634 rc = smc_lgr_create(smc, is_smcd, smcibdev, ibport,
540 lcl->id_for_peer, vlan_id); 635 lcl->id_for_peer, vlan_id, smcd, peer_gid);
541 if (rc) 636 if (rc)
542 goto out; 637 goto out;
543 smc_lgr_register_conn(conn); /* add smc conn to lgr */ 638 smc_lgr_register_conn(conn); /* add smc conn to lgr */
544 rc = smc_link_determine_gid(conn->lgr);
545 } 639 }
546 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; 640 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
547 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; 641 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
548 conn->urg_state = SMC_URG_READ; 642 conn->urg_state = SMC_URG_READ;
643 if (is_smcd) {
644 conn->rx_off = sizeof(struct smcd_cdc_msg);
645 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
646 }
549#ifndef KERNEL_HAS_ATOMIC64 647#ifndef KERNEL_HAS_ATOMIC64
550 spin_lock_init(&conn->acurs_lock); 648 spin_lock_init(&conn->acurs_lock);
551#endif 649#endif
@@ -610,8 +708,8 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
610 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); 708 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
611} 709}
612 710
613static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, 711static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
614 bool is_rmb, int bufsize) 712 bool is_rmb, int bufsize)
615{ 713{
616 struct smc_buf_desc *buf_desc; 714 struct smc_buf_desc *buf_desc;
617 struct smc_link *lnk; 715 struct smc_link *lnk;
@@ -669,7 +767,44 @@ static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr,
669 return buf_desc; 767 return buf_desc;
670} 768}
671 769
672static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) 770#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
771
772static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
773 bool is_dmb, int bufsize)
774{
775 struct smc_buf_desc *buf_desc;
776 int rc;
777
778 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
779 return ERR_PTR(-EAGAIN);
780
781 /* try to alloc a new DMB */
782 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
783 if (!buf_desc)
784 return ERR_PTR(-ENOMEM);
785 if (is_dmb) {
786 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
787 if (rc) {
788 kfree(buf_desc);
789 return ERR_PTR(-EAGAIN);
790 }
791 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
792 /* CDC header stored in buf. So, pretend it was smaller */
793 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
794 } else {
795 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
796 __GFP_NOWARN | __GFP_NORETRY |
797 __GFP_NOMEMALLOC);
798 if (!buf_desc->cpu_addr) {
799 kfree(buf_desc);
800 return ERR_PTR(-EAGAIN);
801 }
802 buf_desc->len = bufsize;
803 }
804 return buf_desc;
805}
806
807static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
673{ 808{
674 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); 809 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
675 struct smc_connection *conn = &smc->conn; 810 struct smc_connection *conn = &smc->conn;
@@ -707,7 +842,11 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
707 break; /* found reusable slot */ 842 break; /* found reusable slot */
708 } 843 }
709 844
710 buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); 845 if (is_smcd)
846 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
847 else
848 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
849
711 if (PTR_ERR(buf_desc) == -ENOMEM) 850 if (PTR_ERR(buf_desc) == -ENOMEM)
712 break; 851 break;
713 if (IS_ERR(buf_desc)) 852 if (IS_ERR(buf_desc))
@@ -728,7 +867,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_rmb)
728 conn->rmbe_size_short = bufsize_short; 867 conn->rmbe_size_short = bufsize_short;
729 smc->sk.sk_rcvbuf = bufsize * 2; 868 smc->sk.sk_rcvbuf = bufsize * 2;
730 atomic_set(&conn->bytes_to_rcv, 0); 869 atomic_set(&conn->bytes_to_rcv, 0);
731 conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); 870 conn->rmbe_update_limit =
871 smc_rmb_wnd_update_limit(buf_desc->len);
872 if (is_smcd)
873 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
732 } else { 874 } else {
733 conn->sndbuf_desc = buf_desc; 875 conn->sndbuf_desc = buf_desc;
734 smc->sk.sk_sndbuf = bufsize * 2; 876 smc->sk.sk_sndbuf = bufsize * 2;
@@ -741,6 +883,8 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
741{ 883{
742 struct smc_link_group *lgr = conn->lgr; 884 struct smc_link_group *lgr = conn->lgr;
743 885
886 if (!conn->lgr || conn->lgr->is_smcd)
887 return;
744 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 888 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
745 conn->sndbuf_desc, DMA_TO_DEVICE); 889 conn->sndbuf_desc, DMA_TO_DEVICE);
746} 890}
@@ -749,6 +893,8 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
749{ 893{
750 struct smc_link_group *lgr = conn->lgr; 894 struct smc_link_group *lgr = conn->lgr;
751 895
896 if (!conn->lgr || conn->lgr->is_smcd)
897 return;
752 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 898 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
753 conn->sndbuf_desc, DMA_TO_DEVICE); 899 conn->sndbuf_desc, DMA_TO_DEVICE);
754} 900}
@@ -757,6 +903,8 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
757{ 903{
758 struct smc_link_group *lgr = conn->lgr; 904 struct smc_link_group *lgr = conn->lgr;
759 905
906 if (!conn->lgr || conn->lgr->is_smcd)
907 return;
760 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 908 smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
761 conn->rmb_desc, DMA_FROM_DEVICE); 909 conn->rmb_desc, DMA_FROM_DEVICE);
762} 910}
@@ -765,6 +913,8 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
765{ 913{
766 struct smc_link_group *lgr = conn->lgr; 914 struct smc_link_group *lgr = conn->lgr;
767 915
916 if (!conn->lgr || conn->lgr->is_smcd)
917 return;
768 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, 918 smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
769 conn->rmb_desc, DMA_FROM_DEVICE); 919 conn->rmb_desc, DMA_FROM_DEVICE);
770} 920}
@@ -775,16 +925,16 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
775 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an 925 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
776 * extra RMB for every connection in a link group 926 * extra RMB for every connection in a link group
777 */ 927 */
778int smc_buf_create(struct smc_sock *smc) 928int smc_buf_create(struct smc_sock *smc, bool is_smcd)
779{ 929{
780 int rc; 930 int rc;
781 931
782 /* create send buffer */ 932 /* create send buffer */
783 rc = __smc_buf_create(smc, false); 933 rc = __smc_buf_create(smc, is_smcd, false);
784 if (rc) 934 if (rc)
785 return rc; 935 return rc;
786 /* create rmb */ 936 /* create rmb */
787 rc = __smc_buf_create(smc, true); 937 rc = __smc_buf_create(smc, is_smcd, true);
788 if (rc) 938 if (rc)
789 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); 939 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
790 return rc; 940 return rc;
@@ -866,7 +1016,14 @@ void smc_core_exit(void)
866 spin_unlock_bh(&smc_lgr_list.lock); 1016 spin_unlock_bh(&smc_lgr_list.lock);
867 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1017 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
868 list_del_init(&lgr->list); 1018 list_del_init(&lgr->list);
869 smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); 1019 if (!lgr->is_smcd) {
1020 struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
1021
1022 if (lnk->state == SMC_LNK_ACTIVE)
1023 smc_llc_send_delete_link(lnk, SMC_LLC_REQ,
1024 false);
1025 smc_llc_link_inactive(lnk);
1026 }
870 cancel_delayed_work_sync(&lgr->free_work); 1027 cancel_delayed_work_sync(&lgr->free_work);
871 smc_lgr_free(lgr); /* free link group */ 1028 smc_lgr_free(lgr); /* free link group */
872 } 1029 }
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 93cb3523bf50..c156674733c9 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -34,7 +34,8 @@ enum smc_lgr_role { /* possible roles of a link group */
34enum smc_link_state { /* possible states of a link */ 34enum smc_link_state { /* possible states of a link */
35 SMC_LNK_INACTIVE, /* link is inactive */ 35 SMC_LNK_INACTIVE, /* link is inactive */
36 SMC_LNK_ACTIVATING, /* link is being activated */ 36 SMC_LNK_ACTIVATING, /* link is being activated */
37 SMC_LNK_ACTIVE /* link is active */ 37 SMC_LNK_ACTIVE, /* link is active */
38 SMC_LNK_DELETING, /* link is being deleted */
38}; 39};
39 40
40#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ 41#define SMC_WR_BUF_SIZE 48 /* size of work request buffer */
@@ -84,14 +85,15 @@ struct smc_link {
84 wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ 85 wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */
85 enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ 86 enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */
86 87
87 union ib_gid gid; /* gid matching used vlan id */ 88 u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/
89 u8 sgid_index; /* gid index for vlan id */
88 u32 peer_qpn; /* QP number of peer */ 90 u32 peer_qpn; /* QP number of peer */
89 enum ib_mtu path_mtu; /* used mtu */ 91 enum ib_mtu path_mtu; /* used mtu */
90 enum ib_mtu peer_mtu; /* mtu size of peer */ 92 enum ib_mtu peer_mtu; /* mtu size of peer */
91 u32 psn_initial; /* QP tx initial packet seqno */ 93 u32 psn_initial; /* QP tx initial packet seqno */
92 u32 peer_psn; /* QP rx initial packet seqno */ 94 u32 peer_psn; /* QP rx initial packet seqno */
93 u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ 95 u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */
94 u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/ 96 u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/
95 u8 link_id; /* unique # within link group */ 97 u8 link_id; /* unique # within link group */
96 98
97 enum smc_link_state state; /* state of link */ 99 enum smc_link_state state; /* state of link */
@@ -124,15 +126,28 @@ struct smc_buf_desc {
124 void *cpu_addr; /* virtual address of buffer */ 126 void *cpu_addr; /* virtual address of buffer */
125 struct page *pages; 127 struct page *pages;
126 int len; /* length of buffer */ 128 int len; /* length of buffer */
127 struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */
128 struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
129 /* for rmb only: memory region
130 * incl. rkey provided to peer
131 */
132 u32 order; /* allocation order */
133 u32 used; /* currently used / unused */ 129 u32 used; /* currently used / unused */
134 u8 reused : 1; /* new created / reused */ 130 u8 reused : 1; /* new created / reused */
135 u8 regerr : 1; /* err during registration */ 131 u8 regerr : 1; /* err during registration */
132 union {
133 struct { /* SMC-R */
134 struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];
135 /* virtual buffer */
136 struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX];
137 /* for rmb only: memory region
138 * incl. rkey provided to peer
139 */
140 u32 order; /* allocation order */
141 };
142 struct { /* SMC-D */
143 unsigned short sba_idx;
144 /* SBA index number */
145 u64 token;
146 /* DMB token number */
147 dma_addr_t dma_addr;
148 /* DMA address */
149 };
150 };
136}; 151};
137 152
138struct smc_rtoken { /* address/key of remote RMB */ 153struct smc_rtoken { /* address/key of remote RMB */
@@ -148,12 +163,10 @@ struct smc_rtoken { /* address/key of remote RMB */
148 * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15) 163 * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
149 */ 164 */
150 165
166struct smcd_dev;
167
151struct smc_link_group { 168struct smc_link_group {
152 struct list_head list; 169 struct list_head list;
153 enum smc_lgr_role role; /* client or server */
154 struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */
155 char peer_systemid[SMC_SYSTEMID_LEN];
156 /* unique system_id of peer */
157 struct rb_root conns_all; /* connection tree */ 170 struct rb_root conns_all; /* connection tree */
158 rwlock_t conns_lock; /* protects conns_all */ 171 rwlock_t conns_lock; /* protects conns_all */
159 unsigned int conns_num; /* current # of connections */ 172 unsigned int conns_num; /* current # of connections */
@@ -163,17 +176,34 @@ struct smc_link_group {
163 rwlock_t sndbufs_lock; /* protects tx buffers */ 176 rwlock_t sndbufs_lock; /* protects tx buffers */
164 struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */ 177 struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
165 rwlock_t rmbs_lock; /* protects rx buffers */ 178 rwlock_t rmbs_lock; /* protects rx buffers */
166 struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
167 [SMC_LINKS_PER_LGR_MAX];
168 /* remote addr/key pairs */
169 unsigned long rtokens_used_mask[BITS_TO_LONGS(
170 SMC_RMBS_PER_LGR_MAX)];
171 /* used rtoken elements */
172 179
173 u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ 180 u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
174 struct delayed_work free_work; /* delayed freeing of an lgr */ 181 struct delayed_work free_work; /* delayed freeing of an lgr */
175 u8 sync_err : 1; /* lgr no longer fits to peer */ 182 u8 sync_err : 1; /* lgr no longer fits to peer */
176 u8 terminating : 1;/* lgr is terminating */ 183 u8 terminating : 1;/* lgr is terminating */
184
185 bool is_smcd; /* SMC-R or SMC-D */
186 union {
187 struct { /* SMC-R */
188 enum smc_lgr_role role;
189 /* client or server */
190 struct smc_link lnk[SMC_LINKS_PER_LGR_MAX];
191 /* smc link */
192 char peer_systemid[SMC_SYSTEMID_LEN];
193 /* unique system_id of peer */
194 struct smc_rtoken rtokens[SMC_RMBS_PER_LGR_MAX]
195 [SMC_LINKS_PER_LGR_MAX];
196 /* remote addr/key pairs */
197 DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
198 /* used rtoken elements */
199 };
200 struct { /* SMC-D */
201 u64 peer_gid;
202 /* Peer GID (remote) */
203 struct smcd_dev *smcd;
204 /* ISM device for VLAN reg. */
205 };
206 };
177}; 207};
178 208
179/* Find the connection associated with the given alert token in the link group. 209/* Find the connection associated with the given alert token in the link group.
@@ -217,7 +247,8 @@ void smc_lgr_free(struct smc_link_group *lgr);
217void smc_lgr_forget(struct smc_link_group *lgr); 247void smc_lgr_forget(struct smc_link_group *lgr);
218void smc_lgr_terminate(struct smc_link_group *lgr); 248void smc_lgr_terminate(struct smc_link_group *lgr);
219void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); 249void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
220int smc_buf_create(struct smc_sock *smc); 250void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
251int smc_buf_create(struct smc_sock *smc, bool is_smcd);
221int smc_uncompress_bufsize(u8 compressed); 252int smc_uncompress_bufsize(u8 compressed);
222int smc_rmb_rtoken_handling(struct smc_connection *conn, 253int smc_rmb_rtoken_handling(struct smc_connection *conn,
223 struct smc_clc_msg_accept_confirm *clc); 254 struct smc_clc_msg_accept_confirm *clc);
@@ -227,9 +258,19 @@ void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
227void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); 258void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
228void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); 259void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
229void smc_rmb_sync_sg_for_device(struct smc_connection *conn); 260void smc_rmb_sync_sg_for_device(struct smc_connection *conn);
261int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
262
230void smc_conn_free(struct smc_connection *conn); 263void smc_conn_free(struct smc_connection *conn);
231int smc_conn_create(struct smc_sock *smc, 264int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
232 struct smc_ib_device *smcibdev, u8 ibport, 265 struct smc_ib_device *smcibdev, u8 ibport,
233 struct smc_clc_msg_local *lcl, int srv_first_contact); 266 struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
267 u64 peer_gid);
268void smcd_conn_free(struct smc_connection *conn);
269void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
234void smc_core_exit(void); 270void smc_core_exit(void);
271
272static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
273{
274 return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
275}
235#endif 276#endif
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 839354402215..dbf64a93d68a 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -79,6 +79,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
79 struct nlattr *bc) 79 struct nlattr *bc)
80{ 80{
81 struct smc_sock *smc = smc_sk(sk); 81 struct smc_sock *smc = smc_sk(sk);
82 struct smc_diag_fallback fallback;
82 struct user_namespace *user_ns; 83 struct user_namespace *user_ns;
83 struct smc_diag_msg *r; 84 struct smc_diag_msg *r;
84 struct nlmsghdr *nlh; 85 struct nlmsghdr *nlh;
@@ -91,11 +92,21 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
91 r = nlmsg_data(nlh); 92 r = nlmsg_data(nlh);
92 smc_diag_msg_common_fill(r, sk); 93 smc_diag_msg_common_fill(r, sk);
93 r->diag_state = sk->sk_state; 94 r->diag_state = sk->sk_state;
94 r->diag_fallback = smc->use_fallback; 95 if (smc->use_fallback)
96 r->diag_mode = SMC_DIAG_MODE_FALLBACK_TCP;
97 else if (smc->conn.lgr && smc->conn.lgr->is_smcd)
98 r->diag_mode = SMC_DIAG_MODE_SMCD;
99 else
100 r->diag_mode = SMC_DIAG_MODE_SMCR;
95 user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk); 101 user_ns = sk_user_ns(NETLINK_CB(cb->skb).sk);
96 if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) 102 if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
97 goto errout; 103 goto errout;
98 104
105 fallback.reason = smc->fallback_rsn;
106 fallback.peer_diagnosis = smc->peer_diagnosis;
107 if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0)
108 goto errout;
109
99 if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && 110 if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
100 smc->conn.alert_token_local) { 111 smc->conn.alert_token_local) {
101 struct smc_connection *conn = &smc->conn; 112 struct smc_connection *conn = &smc->conn;
@@ -136,7 +147,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
136 goto errout; 147 goto errout;
137 } 148 }
138 149
139 if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr && 150 if (smc->conn.lgr && !smc->conn.lgr->is_smcd &&
151 (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) &&
140 !list_empty(&smc->conn.lgr->list)) { 152 !list_empty(&smc->conn.lgr->list)) {
141 struct smc_diag_lgrinfo linfo = { 153 struct smc_diag_lgrinfo linfo = {
142 .role = smc->conn.lgr->role, 154 .role = smc->conn.lgr->role,
@@ -148,13 +160,28 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
148 smc->conn.lgr->lnk[0].smcibdev->ibdev->name, 160 smc->conn.lgr->lnk[0].smcibdev->ibdev->name,
149 sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name)); 161 sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name));
150 smc_gid_be16_convert(linfo.lnk[0].gid, 162 smc_gid_be16_convert(linfo.lnk[0].gid,
151 smc->conn.lgr->lnk[0].gid.raw); 163 smc->conn.lgr->lnk[0].gid);
152 smc_gid_be16_convert(linfo.lnk[0].peer_gid, 164 smc_gid_be16_convert(linfo.lnk[0].peer_gid,
153 smc->conn.lgr->lnk[0].peer_gid); 165 smc->conn.lgr->lnk[0].peer_gid);
154 166
155 if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0) 167 if (nla_put(skb, SMC_DIAG_LGRINFO, sizeof(linfo), &linfo) < 0)
156 goto errout; 168 goto errout;
157 } 169 }
170 if (smc->conn.lgr && smc->conn.lgr->is_smcd &&
171 (req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
172 !list_empty(&smc->conn.lgr->list)) {
173 struct smc_connection *conn = &smc->conn;
174 struct smcd_diag_dmbinfo dinfo = {
175 .linkid = *((u32 *)conn->lgr->id),
176 .peer_gid = conn->lgr->peer_gid,
177 .my_gid = conn->lgr->smcd->local_gid,
178 .token = conn->rmb_desc->token,
179 .peer_token = conn->peer_token
180 };
181
182 if (nla_put(skb, SMC_DIAG_DMBINFO, sizeof(dinfo), &dinfo) < 0)
183 goto errout;
184 }
158 185
159 nlmsg_end(skb, nlh); 186 nlmsg_end(skb, nlh);
160 return 0; 187 return 0;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 117b05f1a494..9bb5274a244e 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -69,7 +69,7 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
69 qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu); 69 qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
70 qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; 70 qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
71 rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport); 71 rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
72 rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0); 72 rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0);
73 rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid); 73 rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
74 memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac, 74 memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
75 sizeof(lnk->peer_mac)); 75 sizeof(lnk->peer_mac));
@@ -113,8 +113,7 @@ int smc_ib_modify_qp_reset(struct smc_link *lnk)
113 113
114int smc_ib_ready_link(struct smc_link *lnk) 114int smc_ib_ready_link(struct smc_link *lnk)
115{ 115{
116 struct smc_link_group *lgr = 116 struct smc_link_group *lgr = smc_get_lgr(lnk);
117 container_of(lnk, struct smc_link_group, lnk[0]);
118 int rc = 0; 117 int rc = 0;
119 118
120 rc = smc_ib_modify_qp_init(lnk); 119 rc = smc_ib_modify_qp_init(lnk);
@@ -144,6 +143,93 @@ out:
144 return rc; 143 return rc;
145} 144}
146 145
146static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport)
147{
148 struct ib_gid_attr gattr;
149 union ib_gid gid;
150 int rc;
151
152 rc = ib_query_gid(smcibdev->ibdev, ibport, 0, &gid, &gattr);
153 if (rc || !gattr.ndev)
154 return -ENODEV;
155
156 memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
157 dev_put(gattr.ndev);
158 return 0;
159}
160
161/* Create an identifier unique for this instance of SMC-R.
162 * The MAC-address of the first active registered IB device
163 * plus a random 2-byte number is used to create this identifier.
164 * This name is delivered to the peer during connection initialization.
165 */
166static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
167 u8 ibport)
168{
169 memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
170 sizeof(smcibdev->mac[ibport - 1]));
171 get_random_bytes(&local_systemid[0], 2);
172}
173
174bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
175{
176 return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
177}
178
179/* determine the gid for an ib-device port and vlan id */
180int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
181 unsigned short vlan_id, u8 gid[], u8 *sgid_index)
182{
183 struct ib_gid_attr gattr;
184 union ib_gid _gid;
185 int i;
186
187 for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
188 memset(&_gid, 0, SMC_GID_SIZE);
189 memset(&gattr, 0, sizeof(gattr));
190 if (ib_query_gid(smcibdev->ibdev, ibport, i, &_gid, &gattr))
191 continue;
192 if (!gattr.ndev)
193 continue;
194 if (((!vlan_id && !is_vlan_dev(gattr.ndev)) ||
195 (vlan_id && is_vlan_dev(gattr.ndev) &&
196 vlan_dev_vlan_id(gattr.ndev) == vlan_id)) &&
197 gattr.gid_type == IB_GID_TYPE_IB) {
198 if (gid)
199 memcpy(gid, &_gid, SMC_GID_SIZE);
200 if (sgid_index)
201 *sgid_index = i;
202 dev_put(gattr.ndev);
203 return 0;
204 }
205 dev_put(gattr.ndev);
206 }
207 return -ENODEV;
208}
209
210static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
211{
212 int rc;
213
214 memset(&smcibdev->pattr[ibport - 1], 0,
215 sizeof(smcibdev->pattr[ibport - 1]));
216 rc = ib_query_port(smcibdev->ibdev, ibport,
217 &smcibdev->pattr[ibport - 1]);
218 if (rc)
219 goto out;
220 /* the SMC protocol requires specification of the RoCE MAC address */
221 rc = smc_ib_fill_mac(smcibdev, ibport);
222 if (rc)
223 goto out;
224 if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
225 sizeof(local_systemid)) &&
226 smc_ib_port_active(smcibdev, ibport))
227 /* create unique system identifier */
228 smc_ib_define_local_systemid(smcibdev, ibport);
229out:
230 return rc;
231}
232
147/* process context wrapper for might_sleep smc_ib_remember_port_attr */ 233/* process context wrapper for might_sleep smc_ib_remember_port_attr */
148static void smc_ib_port_event_work(struct work_struct *work) 234static void smc_ib_port_event_work(struct work_struct *work)
149{ 235{
@@ -371,62 +457,6 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
371 buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0; 457 buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
372} 458}
373 459
374static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
375{
376 struct ib_gid_attr gattr;
377 int rc;
378
379 rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
380 &smcibdev->gid[ibport - 1], &gattr);
381 if (rc || !gattr.ndev)
382 return -ENODEV;
383
384 memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
385 dev_put(gattr.ndev);
386 return 0;
387}
388
389/* Create an identifier unique for this instance of SMC-R.
390 * The MAC-address of the first active registered IB device
391 * plus a random 2-byte number is used to create this identifier.
392 * This name is delivered to the peer during connection initialization.
393 */
394static inline void smc_ib_define_local_systemid(struct smc_ib_device *smcibdev,
395 u8 ibport)
396{
397 memcpy(&local_systemid[2], &smcibdev->mac[ibport - 1],
398 sizeof(smcibdev->mac[ibport - 1]));
399 get_random_bytes(&local_systemid[0], 2);
400}
401
402bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
403{
404 return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
405}
406
407int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
408{
409 int rc;
410
411 memset(&smcibdev->pattr[ibport - 1], 0,
412 sizeof(smcibdev->pattr[ibport - 1]));
413 rc = ib_query_port(smcibdev->ibdev, ibport,
414 &smcibdev->pattr[ibport - 1]);
415 if (rc)
416 goto out;
417 /* the SMC protocol requires specification of the RoCE MAC address */
418 rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
419 if (rc)
420 goto out;
421 if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,
422 sizeof(local_systemid)) &&
423 smc_ib_port_active(smcibdev, ibport))
424 /* create unique system identifier */
425 smc_ib_define_local_systemid(smcibdev, ibport);
426out:
427 return rc;
428}
429
430long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) 460long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
431{ 461{
432 struct ib_cq_init_attr cqattr = { 462 struct ib_cq_init_attr cqattr = {
@@ -455,9 +485,6 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
455 smcibdev->roce_cq_recv = NULL; 485 smcibdev->roce_cq_recv = NULL;
456 goto err; 486 goto err;
457 } 487 }
458 INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
459 smc_ib_global_event_handler);
460 ib_register_event_handler(&smcibdev->event_handler);
461 smc_wr_add_dev(smcibdev); 488 smc_wr_add_dev(smcibdev);
462 smcibdev->initialized = 1; 489 smcibdev->initialized = 1;
463 return rc; 490 return rc;
@@ -473,7 +500,6 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
473 return; 500 return;
474 smcibdev->initialized = 0; 501 smcibdev->initialized = 0;
475 smc_wr_remove_dev(smcibdev); 502 smc_wr_remove_dev(smcibdev);
476 ib_unregister_event_handler(&smcibdev->event_handler);
477 ib_destroy_cq(smcibdev->roce_cq_recv); 503 ib_destroy_cq(smcibdev->roce_cq_recv);
478 ib_destroy_cq(smcibdev->roce_cq_send); 504 ib_destroy_cq(smcibdev->roce_cq_send);
479} 505}
@@ -484,6 +510,8 @@ static struct ib_client smc_ib_client;
484static void smc_ib_add_dev(struct ib_device *ibdev) 510static void smc_ib_add_dev(struct ib_device *ibdev)
485{ 511{
486 struct smc_ib_device *smcibdev; 512 struct smc_ib_device *smcibdev;
513 u8 port_cnt;
514 int i;
487 515
488 if (ibdev->node_type != RDMA_NODE_IB_CA) 516 if (ibdev->node_type != RDMA_NODE_IB_CA)
489 return; 517 return;
@@ -499,6 +527,21 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
499 list_add_tail(&smcibdev->list, &smc_ib_devices.list); 527 list_add_tail(&smcibdev->list, &smc_ib_devices.list);
500 spin_unlock(&smc_ib_devices.lock); 528 spin_unlock(&smc_ib_devices.lock);
501 ib_set_client_data(ibdev, &smc_ib_client, smcibdev); 529 ib_set_client_data(ibdev, &smc_ib_client, smcibdev);
530 INIT_IB_EVENT_HANDLER(&smcibdev->event_handler, smcibdev->ibdev,
531 smc_ib_global_event_handler);
532 ib_register_event_handler(&smcibdev->event_handler);
533
534 /* trigger reading of the port attributes */
535 port_cnt = smcibdev->ibdev->phys_port_cnt;
536 for (i = 0;
537 i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
538 i++) {
539 set_bit(i, &smcibdev->port_event_mask);
540 /* determine pnetids of the port */
541 smc_pnetid_by_dev_port(ibdev->dev.parent, i,
542 smcibdev->pnetid[i]);
543 }
544 schedule_work(&smcibdev->port_event_work);
502} 545}
503 546
504/* callback function for ib_register_client() */ 547/* callback function for ib_register_client() */
@@ -513,6 +556,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
513 spin_unlock(&smc_ib_devices.lock); 556 spin_unlock(&smc_ib_devices.lock);
514 smc_pnet_remove_by_ibdev(smcibdev); 557 smc_pnet_remove_by_ibdev(smcibdev);
515 smc_ib_cleanup_per_ibdev(smcibdev); 558 smc_ib_cleanup_per_ibdev(smcibdev);
559 ib_unregister_event_handler(&smcibdev->event_handler);
516 kfree(smcibdev); 560 kfree(smcibdev);
517} 561}
518 562
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index e90630dadf8e..bac7fd65a4c0 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -15,6 +15,7 @@
15#include <linux/interrupt.h> 15#include <linux/interrupt.h>
16#include <linux/if_ether.h> 16#include <linux/if_ether.h>
17#include <rdma/ib_verbs.h> 17#include <rdma/ib_verbs.h>
18#include <net/smc.h>
18 19
19#define SMC_MAX_PORTS 2 /* Max # of ports */ 20#define SMC_MAX_PORTS 2 /* Max # of ports */
20#define SMC_GID_SIZE sizeof(union ib_gid) 21#define SMC_GID_SIZE sizeof(union ib_gid)
@@ -39,7 +40,8 @@ struct smc_ib_device { /* ib-device infos for smc */
39 struct tasklet_struct recv_tasklet; /* called by recv cq handler */ 40 struct tasklet_struct recv_tasklet; /* called by recv cq handler */
40 char mac[SMC_MAX_PORTS][ETH_ALEN]; 41 char mac[SMC_MAX_PORTS][ETH_ALEN];
41 /* mac address per port*/ 42 /* mac address per port*/
42 union ib_gid gid[SMC_MAX_PORTS]; /* gid per port */ 43 u8 pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
44 /* pnetid per port */
43 u8 initialized : 1; /* ib dev CQ, evthdl done */ 45 u8 initialized : 1; /* ib dev CQ, evthdl done */
44 struct work_struct port_event_work; 46 struct work_struct port_event_work;
45 unsigned long port_event_mask; 47 unsigned long port_event_mask;
@@ -51,7 +53,6 @@ struct smc_link;
51int smc_ib_register_client(void) __init; 53int smc_ib_register_client(void) __init;
52void smc_ib_unregister_client(void); 54void smc_ib_unregister_client(void);
53bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); 55bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
54int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport);
55int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, 56int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
56 struct smc_buf_desc *buf_slot, 57 struct smc_buf_desc *buf_slot,
57 enum dma_data_direction data_direction); 58 enum dma_data_direction data_direction);
@@ -75,4 +76,6 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
75void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, 76void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
76 struct smc_buf_desc *buf_slot, 77 struct smc_buf_desc *buf_slot,
77 enum dma_data_direction data_direction); 78 enum dma_data_direction data_direction);
79int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
80 unsigned short vlan_id, u8 gid[], u8 *sgid_index);
78#endif 81#endif
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
new file mode 100644
index 000000000000..e36f21ce7252
--- /dev/null
+++ b/net/smc/smc_ism.c
@@ -0,0 +1,348 @@
1// SPDX-License-Identifier: GPL-2.0
2/* Shared Memory Communications Direct over ISM devices (SMC-D)
3 *
4 * Functions for ISM device.
5 *
6 * Copyright IBM Corp. 2018
7 */
8
9#include <linux/spinlock.h>
10#include <linux/slab.h>
11#include <asm/page.h>
12
13#include "smc.h"
14#include "smc_core.h"
15#include "smc_ism.h"
16#include "smc_pnet.h"
17
18struct smcd_dev_list smcd_dev_list = {
19 .list = LIST_HEAD_INIT(smcd_dev_list.list),
20 .lock = __SPIN_LOCK_UNLOCKED(smcd_dev_list.lock)
21};
22
23/* Test if an ISM communication is possible. */
24int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
25{
26 return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
27 vlan_id);
28}
29
30int smc_ism_write(struct smcd_dev *smcd, const struct smc_ism_position *pos,
31 void *data, size_t len)
32{
33 int rc;
34
35 rc = smcd->ops->move_data(smcd, pos->token, pos->index, pos->signal,
36 pos->offset, data, len);
37
38 return rc < 0 ? rc : 0;
39}
40
41/* Set a connection using this DMBE. */
42void smc_ism_set_conn(struct smc_connection *conn)
43{
44 unsigned long flags;
45
46 spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
47 conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = conn;
48 spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
49}
50
51/* Unset a connection using this DMBE. */
52void smc_ism_unset_conn(struct smc_connection *conn)
53{
54 unsigned long flags;
55
56 if (!conn->rmb_desc)
57 return;
58
59 spin_lock_irqsave(&conn->lgr->smcd->lock, flags);
60 conn->lgr->smcd->conn[conn->rmb_desc->sba_idx] = NULL;
61 spin_unlock_irqrestore(&conn->lgr->smcd->lock, flags);
62}
63
64/* Register a VLAN identifier with the ISM device. Use a reference count
65 * and add a VLAN identifier only when the first DMB using this VLAN is
66 * registered.
67 */
68int smc_ism_get_vlan(struct smcd_dev *smcd, unsigned short vlanid)
69{
70 struct smc_ism_vlanid *new_vlan, *vlan;
71 unsigned long flags;
72 int rc = 0;
73
74 if (!vlanid) /* No valid vlan id */
75 return -EINVAL;
76
77 /* create new vlan entry, in case we need it */
78 new_vlan = kzalloc(sizeof(*new_vlan), GFP_KERNEL);
79 if (!new_vlan)
80 return -ENOMEM;
81 new_vlan->vlanid = vlanid;
82 refcount_set(&new_vlan->refcnt, 1);
83
84 /* if there is an existing entry, increase count and return */
85 spin_lock_irqsave(&smcd->lock, flags);
86 list_for_each_entry(vlan, &smcd->vlan, list) {
87 if (vlan->vlanid == vlanid) {
88 refcount_inc(&vlan->refcnt);
89 kfree(new_vlan);
90 goto out;
91 }
92 }
93
94 /* no existing entry found.
95 * add new entry to device; might fail, e.g., if HW limit reached
96 */
97 if (smcd->ops->add_vlan_id(smcd, vlanid)) {
98 kfree(new_vlan);
99 rc = -EIO;
100 goto out;
101 }
102 list_add_tail(&new_vlan->list, &smcd->vlan);
103out:
104 spin_unlock_irqrestore(&smcd->lock, flags);
105 return rc;
106}
107
108/* Unregister a VLAN identifier with the ISM device. Use a reference count
109 * and remove a VLAN identifier only when the last DMB using this VLAN is
110 * unregistered.
111 */
112int smc_ism_put_vlan(struct smcd_dev *smcd, unsigned short vlanid)
113{
114 struct smc_ism_vlanid *vlan;
115 unsigned long flags;
116 bool found = false;
117 int rc = 0;
118
119 if (!vlanid) /* No valid vlan id */
120 return -EINVAL;
121
122 spin_lock_irqsave(&smcd->lock, flags);
123 list_for_each_entry(vlan, &smcd->vlan, list) {
124 if (vlan->vlanid == vlanid) {
125 if (!refcount_dec_and_test(&vlan->refcnt))
126 goto out;
127 found = true;
128 break;
129 }
130 }
131 if (!found) {
132 rc = -ENOENT;
133 goto out; /* VLAN id not in table */
134 }
135
136 /* Found and the last reference just gone */
137 if (smcd->ops->del_vlan_id(smcd, vlanid))
138 rc = -EIO;
139 list_del(&vlan->list);
140 kfree(vlan);
141out:
142 spin_unlock_irqrestore(&smcd->lock, flags);
143 return rc;
144}
145
146int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
147{
148 struct smcd_dmb dmb;
149
150 memset(&dmb, 0, sizeof(dmb));
151 dmb.dmb_tok = dmb_desc->token;
152 dmb.sba_idx = dmb_desc->sba_idx;
153 dmb.cpu_addr = dmb_desc->cpu_addr;
154 dmb.dma_addr = dmb_desc->dma_addr;
155 dmb.dmb_len = dmb_desc->len;
156 return smcd->ops->unregister_dmb(smcd, &dmb);
157}
158
159int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
160 struct smc_buf_desc *dmb_desc)
161{
162 struct smcd_dmb dmb;
163 int rc;
164
165 memset(&dmb, 0, sizeof(dmb));
166 dmb.dmb_len = dmb_len;
167 dmb.sba_idx = dmb_desc->sba_idx;
168 dmb.vlan_id = lgr->vlan_id;
169 dmb.rgid = lgr->peer_gid;
170 rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb);
171 if (!rc) {
172 dmb_desc->sba_idx = dmb.sba_idx;
173 dmb_desc->token = dmb.dmb_tok;
174 dmb_desc->cpu_addr = dmb.cpu_addr;
175 dmb_desc->dma_addr = dmb.dma_addr;
176 dmb_desc->len = dmb.dmb_len;
177 }
178 return rc;
179}
180
181struct smc_ism_event_work {
182 struct work_struct work;
183 struct smcd_dev *smcd;
184 struct smcd_event event;
185};
186
187#define ISM_EVENT_REQUEST 0x0001
188#define ISM_EVENT_RESPONSE 0x0002
189#define ISM_EVENT_REQUEST_IR 0x00000001
190#define ISM_EVENT_CODE_TESTLINK 0x83
191
192static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
193{
194 union {
195 u64 info;
196 struct {
197 u32 uid;
198 unsigned short vlanid;
199 u16 code;
200 };
201 } ev_info;
202
203 switch (wrk->event.code) {
204 case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
205 ev_info.info = wrk->event.info;
206 if (ev_info.code == ISM_EVENT_REQUEST) {
207 ev_info.code = ISM_EVENT_RESPONSE;
208 wrk->smcd->ops->signal_event(wrk->smcd,
209 wrk->event.tok,
210 ISM_EVENT_REQUEST_IR,
211 ISM_EVENT_CODE_TESTLINK,
212 ev_info.info);
213 }
214 break;
215 }
216}
217
218/* worker for SMC-D events */
219static void smc_ism_event_work(struct work_struct *work)
220{
221 struct smc_ism_event_work *wrk =
222 container_of(work, struct smc_ism_event_work, work);
223
224 switch (wrk->event.type) {
225 case ISM_EVENT_GID: /* GID event, token is peer GID */
226 smc_smcd_terminate(wrk->smcd, wrk->event.tok);
227 break;
228 case ISM_EVENT_DMB:
229 break;
230 case ISM_EVENT_SWR: /* Software defined event */
231 smcd_handle_sw_event(wrk);
232 break;
233 }
234 kfree(wrk);
235}
236
237static void smcd_release(struct device *dev)
238{
239 struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev);
240
241 kfree(smcd->conn);
242 kfree(smcd);
243}
244
245struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
246 const struct smcd_ops *ops, int max_dmbs)
247{
248 struct smcd_dev *smcd;
249
250 smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
251 if (!smcd)
252 return NULL;
253 smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
254 GFP_KERNEL);
255 if (!smcd->conn) {
256 kfree(smcd);
257 return NULL;
258 }
259
260 smcd->dev.parent = parent;
261 smcd->dev.release = smcd_release;
262 device_initialize(&smcd->dev);
263 dev_set_name(&smcd->dev, name);
264 smcd->ops = ops;
265 smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
266
267 spin_lock_init(&smcd->lock);
268 INIT_LIST_HEAD(&smcd->vlan);
269 smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
270 WQ_MEM_RECLAIM, name);
271 return smcd;
272}
273EXPORT_SYMBOL_GPL(smcd_alloc_dev);
274
275int smcd_register_dev(struct smcd_dev *smcd)
276{
277 spin_lock(&smcd_dev_list.lock);
278 list_add_tail(&smcd->list, &smcd_dev_list.list);
279 spin_unlock(&smcd_dev_list.lock);
280
281 return device_add(&smcd->dev);
282}
283EXPORT_SYMBOL_GPL(smcd_register_dev);
284
285void smcd_unregister_dev(struct smcd_dev *smcd)
286{
287 spin_lock(&smcd_dev_list.lock);
288 list_del(&smcd->list);
289 spin_unlock(&smcd_dev_list.lock);
290 flush_workqueue(smcd->event_wq);
291 destroy_workqueue(smcd->event_wq);
292 smc_smcd_terminate(smcd, 0);
293
294 device_del(&smcd->dev);
295}
296EXPORT_SYMBOL_GPL(smcd_unregister_dev);
297
298void smcd_free_dev(struct smcd_dev *smcd)
299{
300 put_device(&smcd->dev);
301}
302EXPORT_SYMBOL_GPL(smcd_free_dev);
303
304/* SMCD Device event handler. Called from ISM device interrupt handler.
305 * Parameters are smcd device pointer,
306 * - event->type (0 --> DMB, 1 --> GID),
307 * - event->code (event code),
308 * - event->tok (either DMB token when event type 0, or GID when event type 1)
309 * - event->time (time of day)
310 * - event->info (debug info).
311 *
312 * Context:
313 * - Function called in IRQ context from ISM device driver event handler.
314 */
315void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
316{
317 struct smc_ism_event_work *wrk;
318
319 /* copy event to event work queue, and let it be handled there */
320 wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
321 if (!wrk)
322 return;
323 INIT_WORK(&wrk->work, smc_ism_event_work);
324 wrk->smcd = smcd;
325 wrk->event = *event;
326 queue_work(smcd->event_wq, &wrk->work);
327}
328EXPORT_SYMBOL_GPL(smcd_handle_event);
329
330/* SMCD Device interrupt handler. Called from ISM device interrupt handler.
331 * Parameters are smcd device pointer and DMB number. Find the connection and
332 * schedule the tasklet for this connection.
333 *
334 * Context:
335 * - Function called in IRQ context from ISM device driver IRQ handler.
336 */
337void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno)
338{
339 struct smc_connection *conn = NULL;
340 unsigned long flags;
341
342 spin_lock_irqsave(&smcd->lock, flags);
343 conn = smcd->conn[dmbno];
344 if (conn)
345 tasklet_schedule(&conn->rx_tsklet);
346 spin_unlock_irqrestore(&smcd->lock, flags);
347}
348EXPORT_SYMBOL_GPL(smcd_handle_irq);
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
new file mode 100644
index 000000000000..aee45b860b79
--- /dev/null
+++ b/net/smc/smc_ism.h
@@ -0,0 +1,48 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* Shared Memory Communications Direct over ISM devices (SMC-D)
3 *
4 * SMC-D ISM device structure definitions.
5 *
6 * Copyright IBM Corp. 2018
7 */
8
9#ifndef SMCD_ISM_H
10#define SMCD_ISM_H
11
12#include <linux/uio.h>
13
14#include "smc.h"
15
16struct smcd_dev_list { /* List of SMCD devices */
17 struct list_head list;
18 spinlock_t lock; /* Protects list of devices */
19};
20
21extern struct smcd_dev_list smcd_dev_list; /* list of smcd devices */
22
23struct smc_ism_vlanid { /* VLAN id set on ISM device */
24 struct list_head list;
25 unsigned short vlanid; /* Vlan id */
26 refcount_t refcnt; /* Reference count */
27};
28
29struct smc_ism_position { /* ISM device position to write to */
30 u64 token; /* Token of DMB */
31 u32 offset; /* Offset into DMBE */
32 u8 index; /* Index of DMBE */
33 u8 signal; /* Generate interrupt on owner side */
34};
35
36struct smcd_dev;
37
38int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev);
39void smc_ism_set_conn(struct smc_connection *conn);
40void smc_ism_unset_conn(struct smc_connection *conn);
41int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id);
42int smc_ism_put_vlan(struct smcd_dev *dev, unsigned short vlan_id);
43int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
44 struct smc_buf_desc *dmb_desc);
45int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
46int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
47 void *data, size_t len);
48#endif
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 5800a6b43d83..9c916c709ca7 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -182,12 +182,10 @@ static int smc_llc_add_pending_send(struct smc_link *link,
182} 182}
183 183
184/* high-level API to send LLC confirm link */ 184/* high-level API to send LLC confirm link */
185int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], 185int smc_llc_send_confirm_link(struct smc_link *link,
186 union ib_gid *gid,
187 enum smc_llc_reqresp reqresp) 186 enum smc_llc_reqresp reqresp)
188{ 187{
189 struct smc_link_group *lgr = container_of(link, struct smc_link_group, 188 struct smc_link_group *lgr = smc_get_lgr(link);
190 lnk[SMC_SINGLE_LINK]);
191 struct smc_llc_msg_confirm_link *confllc; 189 struct smc_llc_msg_confirm_link *confllc;
192 struct smc_wr_tx_pend_priv *pend; 190 struct smc_wr_tx_pend_priv *pend;
193 struct smc_wr_buf *wr_buf; 191 struct smc_wr_buf *wr_buf;
@@ -203,8 +201,9 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
203 confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; 201 confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
204 if (reqresp == SMC_LLC_RESP) 202 if (reqresp == SMC_LLC_RESP)
205 confllc->hd.flags |= SMC_LLC_FLAG_RESP; 203 confllc->hd.flags |= SMC_LLC_FLAG_RESP;
206 memcpy(confllc->sender_mac, mac, ETH_ALEN); 204 memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1],
207 memcpy(confllc->sender_gid, gid, SMC_GID_SIZE); 205 ETH_ALEN);
206 memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
208 hton24(confllc->sender_qp_num, link->roce_qp->qp_num); 207 hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
209 confllc->link_num = link->link_id; 208 confllc->link_num = link->link_id;
210 memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); 209 memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
@@ -241,8 +240,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link,
241 240
242/* prepare an add link message */ 241/* prepare an add link message */
243static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc, 242static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
244 struct smc_link *link, u8 mac[], 243 struct smc_link *link, u8 mac[], u8 gid[],
245 union ib_gid *gid,
246 enum smc_llc_reqresp reqresp) 244 enum smc_llc_reqresp reqresp)
247{ 245{
248 memset(addllc, 0, sizeof(*addllc)); 246 memset(addllc, 0, sizeof(*addllc));
@@ -259,8 +257,7 @@ static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
259} 257}
260 258
261/* send ADD LINK request or response */ 259/* send ADD LINK request or response */
262int smc_llc_send_add_link(struct smc_link *link, u8 mac[], 260int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
263 union ib_gid *gid,
264 enum smc_llc_reqresp reqresp) 261 enum smc_llc_reqresp reqresp)
265{ 262{
266 struct smc_llc_msg_add_link *addllc; 263 struct smc_llc_msg_add_link *addllc;
@@ -281,7 +278,7 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
281/* prepare a delete link message */ 278/* prepare a delete link message */
282static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc, 279static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
283 struct smc_link *link, 280 struct smc_link *link,
284 enum smc_llc_reqresp reqresp) 281 enum smc_llc_reqresp reqresp, bool orderly)
285{ 282{
286 memset(delllc, 0, sizeof(*delllc)); 283 memset(delllc, 0, sizeof(*delllc));
287 delllc->hd.common.type = SMC_LLC_DELETE_LINK; 284 delllc->hd.common.type = SMC_LLC_DELETE_LINK;
@@ -290,13 +287,14 @@ static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
290 delllc->hd.flags |= SMC_LLC_FLAG_RESP; 287 delllc->hd.flags |= SMC_LLC_FLAG_RESP;
291 /* DEL_LINK_ALL because only 1 link supported */ 288 /* DEL_LINK_ALL because only 1 link supported */
292 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; 289 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
293 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; 290 if (orderly)
291 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
294 delllc->link_num = link->link_id; 292 delllc->link_num = link->link_id;
295} 293}
296 294
297/* send DELETE LINK request or response */ 295/* send DELETE LINK request or response */
298int smc_llc_send_delete_link(struct smc_link *link, 296int smc_llc_send_delete_link(struct smc_link *link,
299 enum smc_llc_reqresp reqresp) 297 enum smc_llc_reqresp reqresp, bool orderly)
300{ 298{
301 struct smc_llc_msg_del_link *delllc; 299 struct smc_llc_msg_del_link *delllc;
302 struct smc_wr_tx_pend_priv *pend; 300 struct smc_wr_tx_pend_priv *pend;
@@ -307,7 +305,7 @@ int smc_llc_send_delete_link(struct smc_link *link,
307 if (rc) 305 if (rc)
308 return rc; 306 return rc;
309 delllc = (struct smc_llc_msg_del_link *)wr_buf; 307 delllc = (struct smc_llc_msg_del_link *)wr_buf;
310 smc_llc_prep_delete_link(delllc, link, reqresp); 308 smc_llc_prep_delete_link(delllc, link, reqresp, orderly);
311 /* send llc message */ 309 /* send llc message */
312 rc = smc_wr_tx_send(link, pend); 310 rc = smc_wr_tx_send(link, pend);
313 return rc; 311 return rc;
@@ -381,11 +379,9 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
381static void smc_llc_rx_confirm_link(struct smc_link *link, 379static void smc_llc_rx_confirm_link(struct smc_link *link,
382 struct smc_llc_msg_confirm_link *llc) 380 struct smc_llc_msg_confirm_link *llc)
383{ 381{
384 struct smc_link_group *lgr; 382 struct smc_link_group *lgr = smc_get_lgr(link);
385 int conf_rc; 383 int conf_rc;
386 384
387 lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
388
389 /* RMBE eyecatchers are not supported */ 385 /* RMBE eyecatchers are not supported */
390 if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC) 386 if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
391 conf_rc = 0; 387 conf_rc = 0;
@@ -411,8 +407,7 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
411static void smc_llc_rx_add_link(struct smc_link *link, 407static void smc_llc_rx_add_link(struct smc_link *link,
412 struct smc_llc_msg_add_link *llc) 408 struct smc_llc_msg_add_link *llc)
413{ 409{
414 struct smc_link_group *lgr = container_of(link, struct smc_link_group, 410 struct smc_link_group *lgr = smc_get_lgr(link);
415 lnk[SMC_SINGLE_LINK]);
416 411
417 if (llc->hd.flags & SMC_LLC_FLAG_RESP) { 412 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
418 if (link->state == SMC_LNK_ACTIVATING) 413 if (link->state == SMC_LNK_ACTIVATING)
@@ -426,14 +421,12 @@ static void smc_llc_rx_add_link(struct smc_link *link,
426 if (lgr->role == SMC_SERV) { 421 if (lgr->role == SMC_SERV) {
427 smc_llc_prep_add_link(llc, link, 422 smc_llc_prep_add_link(llc, link,
428 link->smcibdev->mac[link->ibport - 1], 423 link->smcibdev->mac[link->ibport - 1],
429 &link->smcibdev->gid[link->ibport - 1], 424 link->gid, SMC_LLC_REQ);
430 SMC_LLC_REQ);
431 425
432 } else { 426 } else {
433 smc_llc_prep_add_link(llc, link, 427 smc_llc_prep_add_link(llc, link,
434 link->smcibdev->mac[link->ibport - 1], 428 link->smcibdev->mac[link->ibport - 1],
435 &link->smcibdev->gid[link->ibport - 1], 429 link->gid, SMC_LLC_RESP);
436 SMC_LLC_RESP);
437 } 430 }
438 smc_llc_send_message(link, llc, sizeof(*llc)); 431 smc_llc_send_message(link, llc, sizeof(*llc));
439 } 432 }
@@ -442,22 +435,23 @@ static void smc_llc_rx_add_link(struct smc_link *link,
442static void smc_llc_rx_delete_link(struct smc_link *link, 435static void smc_llc_rx_delete_link(struct smc_link *link,
443 struct smc_llc_msg_del_link *llc) 436 struct smc_llc_msg_del_link *llc)
444{ 437{
445 struct smc_link_group *lgr = container_of(link, struct smc_link_group, 438 struct smc_link_group *lgr = smc_get_lgr(link);
446 lnk[SMC_SINGLE_LINK]);
447 439
448 if (llc->hd.flags & SMC_LLC_FLAG_RESP) { 440 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
449 if (lgr->role == SMC_SERV) 441 if (lgr->role == SMC_SERV)
450 smc_lgr_terminate(lgr); 442 smc_lgr_schedule_free_work_fast(lgr);
451 } else { 443 } else {
444 smc_lgr_forget(lgr);
445 smc_llc_link_deleting(link);
452 if (lgr->role == SMC_SERV) { 446 if (lgr->role == SMC_SERV) {
453 smc_lgr_forget(lgr); 447 /* client asks to delete this link, send request */
454 smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ); 448 smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
455 smc_llc_send_message(link, llc, sizeof(*llc));
456 } else { 449 } else {
457 smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP); 450 /* server requests to delete this link, send response */
458 smc_llc_send_message(link, llc, sizeof(*llc)); 451 smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
459 smc_lgr_terminate(lgr);
460 } 452 }
453 smc_llc_send_message(link, llc, sizeof(*llc));
454 smc_lgr_schedule_free_work_fast(lgr);
461 } 455 }
462} 456}
463 457
@@ -476,17 +470,14 @@ static void smc_llc_rx_test_link(struct smc_link *link,
476static void smc_llc_rx_confirm_rkey(struct smc_link *link, 470static void smc_llc_rx_confirm_rkey(struct smc_link *link,
477 struct smc_llc_msg_confirm_rkey *llc) 471 struct smc_llc_msg_confirm_rkey *llc)
478{ 472{
479 struct smc_link_group *lgr;
480 int rc; 473 int rc;
481 474
482 lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
483
484 if (llc->hd.flags & SMC_LLC_FLAG_RESP) { 475 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
485 link->llc_confirm_rkey_rc = llc->hd.flags & 476 link->llc_confirm_rkey_rc = llc->hd.flags &
486 SMC_LLC_FLAG_RKEY_NEG; 477 SMC_LLC_FLAG_RKEY_NEG;
487 complete(&link->llc_confirm_rkey); 478 complete(&link->llc_confirm_rkey);
488 } else { 479 } else {
489 rc = smc_rtoken_add(lgr, 480 rc = smc_rtoken_add(smc_get_lgr(link),
490 llc->rtoken[0].rmb_vaddr, 481 llc->rtoken[0].rmb_vaddr,
491 llc->rtoken[0].rmb_key); 482 llc->rtoken[0].rmb_key);
492 483
@@ -514,18 +505,15 @@ static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
514static void smc_llc_rx_delete_rkey(struct smc_link *link, 505static void smc_llc_rx_delete_rkey(struct smc_link *link,
515 struct smc_llc_msg_delete_rkey *llc) 506 struct smc_llc_msg_delete_rkey *llc)
516{ 507{
517 struct smc_link_group *lgr;
518 u8 err_mask = 0; 508 u8 err_mask = 0;
519 int i, max; 509 int i, max;
520 510
521 lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
522
523 if (llc->hd.flags & SMC_LLC_FLAG_RESP) { 511 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
524 /* unused as long as we don't send this type of msg */ 512 /* unused as long as we don't send this type of msg */
525 } else { 513 } else {
526 max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); 514 max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
527 for (i = 0; i < max; i++) { 515 for (i = 0; i < max; i++) {
528 if (smc_rtoken_delete(lgr, llc->rkey[i])) 516 if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i]))
529 err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); 517 err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
530 } 518 }
531 519
@@ -583,12 +571,10 @@ static void smc_llc_testlink_work(struct work_struct *work)
583 struct smc_link *link = container_of(to_delayed_work(work), 571 struct smc_link *link = container_of(to_delayed_work(work),
584 struct smc_link, llc_testlink_wrk); 572 struct smc_link, llc_testlink_wrk);
585 unsigned long next_interval; 573 unsigned long next_interval;
586 struct smc_link_group *lgr;
587 unsigned long expire_time; 574 unsigned long expire_time;
588 u8 user_data[16] = { 0 }; 575 u8 user_data[16] = { 0 };
589 int rc; 576 int rc;
590 577
591 lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
592 if (link->state != SMC_LNK_ACTIVE) 578 if (link->state != SMC_LNK_ACTIVE)
593 return; /* don't reschedule worker */ 579 return; /* don't reschedule worker */
594 expire_time = link->wr_rx_tstamp + link->llc_testlink_time; 580 expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
@@ -602,7 +588,7 @@ static void smc_llc_testlink_work(struct work_struct *work)
602 rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, 588 rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
603 SMC_LLC_WAIT_TIME); 589 SMC_LLC_WAIT_TIME);
604 if (rc <= 0) { 590 if (rc <= 0) {
605 smc_lgr_terminate(lgr); 591 smc_lgr_terminate(smc_get_lgr(link));
606 return; 592 return;
607 } 593 }
608 next_interval = link->llc_testlink_time; 594 next_interval = link->llc_testlink_time;
@@ -613,8 +599,7 @@ out:
613 599
614int smc_llc_link_init(struct smc_link *link) 600int smc_llc_link_init(struct smc_link *link)
615{ 601{
616 struct smc_link_group *lgr = container_of(link, struct smc_link_group, 602 struct smc_link_group *lgr = smc_get_lgr(link);
617 lnk[SMC_SINGLE_LINK]);
618 link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM, 603 link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,
619 *((u32 *)lgr->id), 604 *((u32 *)lgr->id),
620 link->link_id); 605 link->link_id);
@@ -640,6 +625,11 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time)
640 } 625 }
641} 626}
642 627
628void smc_llc_link_deleting(struct smc_link *link)
629{
630 link->state = SMC_LNK_DELETING;
631}
632
643/* called in tasklet context */ 633/* called in tasklet context */
644void smc_llc_link_inactive(struct smc_link *link) 634void smc_llc_link_inactive(struct smc_link *link)
645{ 635{
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 65c8645e96a1..9e2ff088e301 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -36,14 +36,15 @@ enum smc_llc_msg_type {
36}; 36};
37 37
38/* transmit */ 38/* transmit */
39int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid, 39int smc_llc_send_confirm_link(struct smc_link *lnk,
40 enum smc_llc_reqresp reqresp); 40 enum smc_llc_reqresp reqresp);
41int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid, 41int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
42 enum smc_llc_reqresp reqresp); 42 enum smc_llc_reqresp reqresp);
43int smc_llc_send_delete_link(struct smc_link *link, 43int smc_llc_send_delete_link(struct smc_link *link,
44 enum smc_llc_reqresp reqresp); 44 enum smc_llc_reqresp reqresp, bool orderly);
45int smc_llc_link_init(struct smc_link *link); 45int smc_llc_link_init(struct smc_link *link);
46void smc_llc_link_active(struct smc_link *link, int testlink_time); 46void smc_llc_link_active(struct smc_link *link, int testlink_time);
47void smc_llc_link_deleting(struct smc_link *link);
47void smc_llc_link_inactive(struct smc_link *link); 48void smc_llc_link_inactive(struct smc_link *link);
48void smc_llc_link_clear(struct smc_link *link); 49void smc_llc_link_clear(struct smc_link *link);
49int smc_llc_do_confirm_rkey(struct smc_link *link, 50int smc_llc_do_confirm_rkey(struct smc_link *link,
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index d7b88b2d1b22..01c6ce042a1c 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -22,13 +22,12 @@
22 22
23#include "smc_pnet.h" 23#include "smc_pnet.h"
24#include "smc_ib.h" 24#include "smc_ib.h"
25 25#include "smc_ism.h"
26#define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */
27 26
28static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = { 27static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
29 [SMC_PNETID_NAME] = { 28 [SMC_PNETID_NAME] = {
30 .type = NLA_NUL_STRING, 29 .type = NLA_NUL_STRING,
31 .len = SMC_MAX_PNET_ID_LEN - 1 30 .len = SMC_MAX_PNETID_LEN - 1
32 }, 31 },
33 [SMC_PNETID_ETHNAME] = { 32 [SMC_PNETID_ETHNAME] = {
34 .type = NLA_NUL_STRING, 33 .type = NLA_NUL_STRING,
@@ -65,7 +64,7 @@ static struct smc_pnettable {
65 */ 64 */
66struct smc_pnetentry { 65struct smc_pnetentry {
67 struct list_head list; 66 struct list_head list;
68 char pnet_name[SMC_MAX_PNET_ID_LEN + 1]; 67 char pnet_name[SMC_MAX_PNETID_LEN + 1];
69 struct net_device *ndev; 68 struct net_device *ndev;
70 struct smc_ib_device *smcibdev; 69 struct smc_ib_device *smcibdev;
71 u8 ib_port; 70 u8 ib_port;
@@ -209,7 +208,7 @@ static bool smc_pnetid_valid(const char *pnet_name, char *pnetid)
209 return false; 208 return false;
210 while (--end >= bf && isspace(*end)) 209 while (--end >= bf && isspace(*end))
211 ; 210 ;
212 if (end - bf >= SMC_MAX_PNET_ID_LEN) 211 if (end - bf >= SMC_MAX_PNETID_LEN)
213 return false; 212 return false;
214 while (bf <= end) { 213 while (bf <= end) {
215 if (!isalnum(*bf)) 214 if (!isalnum(*bf))
@@ -358,9 +357,6 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
358 kfree(pnetelem); 357 kfree(pnetelem);
359 return rc; 358 return rc;
360 } 359 }
361 rc = smc_ib_remember_port_attr(pnetelem->smcibdev, pnetelem->ib_port);
362 if (rc)
363 smc_pnet_remove_by_pnetid(pnetelem->pnet_name);
364 return rc; 360 return rc;
365} 361}
366 362
@@ -485,10 +481,10 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
485 case NETDEV_REBOOT: 481 case NETDEV_REBOOT:
486 case NETDEV_UNREGISTER: 482 case NETDEV_UNREGISTER:
487 smc_pnet_remove_by_ndev(event_dev); 483 smc_pnet_remove_by_ndev(event_dev);
484 return NOTIFY_OK;
488 default: 485 default:
489 break; 486 return NOTIFY_DONE;
490 } 487 }
491 return NOTIFY_DONE;
492} 488}
493 489
494static struct notifier_block smc_netdev_notifier = { 490static struct notifier_block smc_netdev_notifier = {
@@ -515,28 +511,104 @@ void smc_pnet_exit(void)
515 genl_unregister_family(&smc_pnet_nl_family); 511 genl_unregister_family(&smc_pnet_nl_family);
516} 512}
517 513
518/* PNET table analysis for a given sock: 514/* Determine one base device for stacked net devices.
519 * determine ib_device and port belonging to used internal TCP socket 515 * If the lower device level contains more than one devices
520 * ethernet interface. 516 * (for instance with bonding slaves), just the first device
517 * is used to reach a base device.
521 */ 518 */
522void smc_pnet_find_roce_resource(struct sock *sk, 519static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
523 struct smc_ib_device **smcibdev, u8 *ibport)
524{ 520{
525 struct dst_entry *dst = sk_dst_get(sk); 521 int i, nest_lvl;
526 struct smc_pnetentry *pnetelem;
527 522
528 *smcibdev = NULL; 523 rtnl_lock();
529 *ibport = 0; 524 nest_lvl = dev_get_nest_level(ndev);
525 for (i = 0; i < nest_lvl; i++) {
526 struct list_head *lower = &ndev->adj_list.lower;
527
528 if (list_empty(lower))
529 break;
530 lower = lower->next;
531 ndev = netdev_lower_get_next(ndev, &lower);
532 }
533 rtnl_unlock();
534 return ndev;
535}
536
537/* Determine the corresponding IB device port based on the hardware PNETID.
538 * Searching stops at the first matching active IB device port with vlan_id
539 * configured.
540 */
541static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
542 struct smc_ib_device **smcibdev,
543 u8 *ibport, unsigned short vlan_id,
544 u8 gid[])
545{
546 u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
547 struct smc_ib_device *ibdev;
548 int i;
549
550 ndev = pnet_find_base_ndev(ndev);
551 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
552 ndev_pnetid))
553 return; /* pnetid could not be determined */
554
555 spin_lock(&smc_ib_devices.lock);
556 list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
557 for (i = 1; i <= SMC_MAX_PORTS; i++) {
558 if (!rdma_is_port_valid(ibdev->ibdev, i))
559 continue;
560 if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
561 SMC_MAX_PNETID_LEN) &&
562 smc_ib_port_active(ibdev, i) &&
563 !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
564 NULL)) {
565 *smcibdev = ibdev;
566 *ibport = i;
567 goto out;
568 }
569 }
570 }
571out:
572 spin_unlock(&smc_ib_devices.lock);
573}
574
575static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
576 struct smcd_dev **smcismdev)
577{
578 u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
579 struct smcd_dev *ismdev;
580
581 ndev = pnet_find_base_ndev(ndev);
582 if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
583 ndev_pnetid))
584 return; /* pnetid could not be determined */
585
586 spin_lock(&smcd_dev_list.lock);
587 list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
588 if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
589 *smcismdev = ismdev;
590 break;
591 }
592 }
593 spin_unlock(&smcd_dev_list.lock);
594}
595
596/* Lookup of coupled ib_device via SMC pnet table */
597static void smc_pnet_find_roce_by_table(struct net_device *netdev,
598 struct smc_ib_device **smcibdev,
599 u8 *ibport, unsigned short vlan_id,
600 u8 gid[])
601{
602 struct smc_pnetentry *pnetelem;
530 603
531 if (!dst)
532 return;
533 if (!dst->dev)
534 goto out_rel;
535 read_lock(&smc_pnettable.lock); 604 read_lock(&smc_pnettable.lock);
536 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) { 605 list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
537 if (dst->dev == pnetelem->ndev) { 606 if (netdev == pnetelem->ndev) {
538 if (smc_ib_port_active(pnetelem->smcibdev, 607 if (smc_ib_port_active(pnetelem->smcibdev,
539 pnetelem->ib_port)) { 608 pnetelem->ib_port) &&
609 !smc_ib_determine_gid(pnetelem->smcibdev,
610 pnetelem->ib_port, vlan_id,
611 gid, NULL)) {
540 *smcibdev = pnetelem->smcibdev; 612 *smcibdev = pnetelem->smcibdev;
541 *ibport = pnetelem->ib_port; 613 *ibport = pnetelem->ib_port;
542 } 614 }
@@ -544,6 +616,55 @@ void smc_pnet_find_roce_resource(struct sock *sk,
544 } 616 }
545 } 617 }
546 read_unlock(&smc_pnettable.lock); 618 read_unlock(&smc_pnettable.lock);
619}
620
621/* PNET table analysis for a given sock:
622 * determine ib_device and port belonging to used internal TCP socket
623 * ethernet interface.
624 */
625void smc_pnet_find_roce_resource(struct sock *sk,
626 struct smc_ib_device **smcibdev, u8 *ibport,
627 unsigned short vlan_id, u8 gid[])
628{
629 struct dst_entry *dst = sk_dst_get(sk);
630
631 *smcibdev = NULL;
632 *ibport = 0;
633
634 if (!dst)
635 goto out;
636 if (!dst->dev)
637 goto out_rel;
638
639 /* if possible, lookup via hardware-defined pnetid */
640 smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
641 if (*smcibdev)
642 goto out_rel;
643
644 /* lookup via SMC PNET table */
645 smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid);
646
647out_rel:
648 dst_release(dst);
649out:
650 return;
651}
652
653void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
654{
655 struct dst_entry *dst = sk_dst_get(sk);
656
657 *smcismdev = NULL;
658 if (!dst)
659 goto out;
660 if (!dst->dev)
661 goto out_rel;
662
663 /* if possible, lookup via hardware-defined pnetid */
664 smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
665
547out_rel: 666out_rel:
548 dst_release(dst); 667 dst_release(dst);
668out:
669 return;
549} 670}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 5a29519db976..8ff777636e32 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -12,12 +12,29 @@
12#ifndef _SMC_PNET_H 12#ifndef _SMC_PNET_H
13#define _SMC_PNET_H 13#define _SMC_PNET_H
14 14
15#if IS_ENABLED(CONFIG_HAVE_PNETID)
16#include <asm/pnet.h>
17#endif
18
15struct smc_ib_device; 19struct smc_ib_device;
20struct smcd_dev;
21
22static inline int smc_pnetid_by_dev_port(struct device *dev,
23 unsigned short port, u8 *pnetid)
24{
25#if IS_ENABLED(CONFIG_HAVE_PNETID)
26 return pnet_id_by_dev_port(dev, port, pnetid);
27#else
28 return -ENOENT;
29#endif
30}
16 31
17int smc_pnet_init(void) __init; 32int smc_pnet_init(void) __init;
18void smc_pnet_exit(void); 33void smc_pnet_exit(void);
19int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev); 34int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
20void smc_pnet_find_roce_resource(struct sock *sk, 35void smc_pnet_find_roce_resource(struct sock *sk,
21 struct smc_ib_device **smcibdev, u8 *ibport); 36 struct smc_ib_device **smcibdev, u8 *ibport,
37 unsigned short vlan_id, u8 gid[]);
38void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
22 39
23#endif 40#endif
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 3d77b383cccd..bbcf0fe4ae10 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -82,8 +82,7 @@ static int smc_rx_update_consumer(struct smc_sock *smc,
82 } 82 }
83 } 83 }
84 84
85 smc_curs_write(&conn->local_tx_ctrl.cons, smc_curs_read(&cons, conn), 85 smc_curs_copy(&conn->local_tx_ctrl.cons, &cons, conn);
86 conn);
87 86
88 /* send consumer cursor update if required */ 87 /* send consumer cursor update if required */
89 /* similar to advertising new TCP rcv_wnd if required */ 88 /* similar to advertising new TCP rcv_wnd if required */
@@ -97,8 +96,7 @@ static void smc_rx_update_cons(struct smc_sock *smc, size_t len)
97 struct smc_connection *conn = &smc->conn; 96 struct smc_connection *conn = &smc->conn;
98 union smc_host_cursor cons; 97 union smc_host_cursor cons;
99 98
100 smc_curs_write(&cons, smc_curs_read(&conn->local_tx_ctrl.cons, conn), 99 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
101 conn);
102 smc_rx_update_consumer(smc, cons, len); 100 smc_rx_update_consumer(smc, cons, len);
103} 101}
104 102
@@ -157,10 +155,8 @@ static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,
157 struct splice_pipe_desc spd; 155 struct splice_pipe_desc spd;
158 struct partial_page partial; 156 struct partial_page partial;
159 struct smc_spd_priv *priv; 157 struct smc_spd_priv *priv;
160 struct page *page;
161 int bytes; 158 int bytes;
162 159
163 page = virt_to_page(smc->conn.rmb_desc->cpu_addr);
164 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 160 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
165 if (!priv) 161 if (!priv)
166 return -ENOMEM; 162 return -ENOMEM;
@@ -172,7 +168,7 @@ static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,
172 168
173 spd.nr_pages_max = 1; 169 spd.nr_pages_max = 1;
174 spd.nr_pages = 1; 170 spd.nr_pages = 1;
175 spd.pages = &page; 171 spd.pages = &smc->conn.rmb_desc->pages;
176 spd.partial = &partial; 172 spd.partial = &partial;
177 spd.ops = &smc_pipe_ops; 173 spd.ops = &smc_pipe_ops;
178 spd.spd_release = smc_rx_spd_release; 174 spd.spd_release = smc_rx_spd_release;
@@ -245,10 +241,7 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
245 if (!(flags & MSG_TRUNC)) 241 if (!(flags & MSG_TRUNC))
246 rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1); 242 rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1);
247 len = 1; 243 len = 1;
248 smc_curs_write(&cons, 244 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
249 smc_curs_read(&conn->local_tx_ctrl.cons,
250 conn),
251 conn);
252 if (smc_curs_diff(conn->rmb_desc->len, &cons, 245 if (smc_curs_diff(conn->rmb_desc->len, &cons,
253 &conn->urg_curs) > 1) 246 &conn->urg_curs) > 1)
254 conn->urg_rx_skip_pend = true; 247 conn->urg_rx_skip_pend = true;
@@ -305,7 +298,7 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
305 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 298 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
306 299
307 /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */ 300 /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */
308 rcvbuf_base = conn->rmb_desc->cpu_addr; 301 rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr;
309 302
310 do { /* while (read_remaining) */ 303 do { /* while (read_remaining) */
311 if (read_done >= target || (pipe && read_done)) 304 if (read_done >= target || (pipe && read_done))
@@ -370,9 +363,7 @@ copy:
370 continue; 363 continue;
371 } 364 }
372 365
373 smc_curs_write(&cons, 366 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
374 smc_curs_read(&conn->local_tx_ctrl.cons, conn),
375 conn);
376 /* subsequent splice() calls pick up where previous left */ 367 /* subsequent splice() calls pick up where previous left */
377 if (splbytes) 368 if (splbytes)
378 smc_curs_add(conn->rmb_desc->len, &cons, splbytes); 369 smc_curs_add(conn->rmb_desc->len, &cons, splbytes);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 72e1a2782fe8..d8366ed51757 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -24,6 +24,7 @@
24#include "smc.h" 24#include "smc.h"
25#include "smc_wr.h" 25#include "smc_wr.h"
26#include "smc_cdc.h" 26#include "smc_cdc.h"
27#include "smc_ism.h"
27#include "smc_tx.h" 28#include "smc_tx.h"
28 29
29#define SMC_TX_WORK_DELAY HZ 30#define SMC_TX_WORK_DELAY HZ
@@ -180,9 +181,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
180 copylen = min_t(size_t, send_remaining, writespace); 181 copylen = min_t(size_t, send_remaining, writespace);
181 /* determine start of sndbuf */ 182 /* determine start of sndbuf */
182 sndbuf_base = conn->sndbuf_desc->cpu_addr; 183 sndbuf_base = conn->sndbuf_desc->cpu_addr;
183 smc_curs_write(&prep, 184 smc_curs_copy(&prep, &conn->tx_curs_prep, conn);
184 smc_curs_read(&conn->tx_curs_prep, conn),
185 conn);
186 tx_cnt_prep = prep.count; 185 tx_cnt_prep = prep.count;
187 /* determine chunks where to write into sndbuf */ 186 /* determine chunks where to write into sndbuf */
188 /* either unwrapped case, or 1st chunk of wrapped case */ 187 /* either unwrapped case, or 1st chunk of wrapped case */
@@ -213,9 +212,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
213 smc_sndbuf_sync_sg_for_device(conn); 212 smc_sndbuf_sync_sg_for_device(conn);
214 /* update cursors */ 213 /* update cursors */
215 smc_curs_add(conn->sndbuf_desc->len, &prep, copylen); 214 smc_curs_add(conn->sndbuf_desc->len, &prep, copylen);
216 smc_curs_write(&conn->tx_curs_prep, 215 smc_curs_copy(&conn->tx_curs_prep, &prep, conn);
217 smc_curs_read(&prep, conn),
218 conn);
219 /* increased in send tasklet smc_cdc_tx_handler() */ 216 /* increased in send tasklet smc_cdc_tx_handler() */
220 smp_mb__before_atomic(); 217 smp_mb__before_atomic();
221 atomic_sub(copylen, &conn->sndbuf_space); 218 atomic_sub(copylen, &conn->sndbuf_space);
@@ -250,6 +247,24 @@ out_err:
250 247
251/***************************** sndbuf consumer *******************************/ 248/***************************** sndbuf consumer *******************************/
252 249
250/* sndbuf consumer: actual data transfer of one target chunk with ISM write */
251int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
252 u32 offset, int signal)
253{
254 struct smc_ism_position pos;
255 int rc;
256
257 memset(&pos, 0, sizeof(pos));
258 pos.token = conn->peer_token;
259 pos.index = conn->peer_rmbe_idx;
260 pos.offset = conn->tx_off + offset;
261 pos.signal = signal;
262 rc = smc_ism_write(conn->lgr->smcd, &pos, data, len);
263 if (rc)
264 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
265 return rc;
266}
267
253/* sndbuf consumer: actual data transfer of one target chunk with RDMA write */ 268/* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
254static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, 269static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
255 int num_sges, struct ib_sge sges[]) 270 int num_sges, struct ib_sge sges[])
@@ -296,26 +311,109 @@ static inline void smc_tx_advance_cursors(struct smc_connection *conn,
296 smc_curs_add(conn->sndbuf_desc->len, sent, len); 311 smc_curs_add(conn->sndbuf_desc->len, sent, len);
297} 312}
298 313
314/* SMC-R helper for smc_tx_rdma_writes() */
315static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
316 size_t src_off, size_t src_len,
317 size_t dst_off, size_t dst_len)
318{
319 dma_addr_t dma_addr =
320 sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
321 struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
322 int src_len_sum = src_len, dst_len_sum = dst_len;
323 struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
324 int sent_count = src_off;
325 int srcchunk, dstchunk;
326 int num_sges;
327 int rc;
328
329 for (dstchunk = 0; dstchunk < 2; dstchunk++) {
330 num_sges = 0;
331 for (srcchunk = 0; srcchunk < 2; srcchunk++) {
332 sges[srcchunk].addr = dma_addr + src_off;
333 sges[srcchunk].length = src_len;
334 sges[srcchunk].lkey = link->roce_pd->local_dma_lkey;
335 num_sges++;
336
337 src_off += src_len;
338 if (src_off >= conn->sndbuf_desc->len)
339 src_off -= conn->sndbuf_desc->len;
340 /* modulo in send ring */
341 if (src_len_sum == dst_len)
342 break; /* either on 1st or 2nd iteration */
343 /* prepare next (== 2nd) iteration */
344 src_len = dst_len - src_len; /* remainder */
345 src_len_sum += src_len;
346 }
347 rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
348 if (rc)
349 return rc;
350 if (dst_len_sum == len)
351 break; /* either on 1st or 2nd iteration */
352 /* prepare next (== 2nd) iteration */
353 dst_off = 0; /* modulo offset in RMBE ring buffer */
354 dst_len = len - dst_len; /* remainder */
355 dst_len_sum += dst_len;
356 src_len = min_t(int, dst_len, conn->sndbuf_desc->len -
357 sent_count);
358 src_len_sum = src_len;
359 }
360 return 0;
361}
362
363/* SMC-D helper for smc_tx_rdma_writes() */
364static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
365 size_t src_off, size_t src_len,
366 size_t dst_off, size_t dst_len)
367{
368 int src_len_sum = src_len, dst_len_sum = dst_len;
369 int srcchunk, dstchunk;
370 int rc;
371
372 for (dstchunk = 0; dstchunk < 2; dstchunk++) {
373 for (srcchunk = 0; srcchunk < 2; srcchunk++) {
374 void *data = conn->sndbuf_desc->cpu_addr + src_off;
375
376 rc = smcd_tx_ism_write(conn, data, src_len, dst_off +
377 sizeof(struct smcd_cdc_msg), 0);
378 if (rc)
379 return rc;
380 dst_off += src_len;
381 src_off += src_len;
382 if (src_off >= conn->sndbuf_desc->len)
383 src_off -= conn->sndbuf_desc->len;
384 /* modulo in send ring */
385 if (src_len_sum == dst_len)
386 break; /* either on 1st or 2nd iteration */
387 /* prepare next (== 2nd) iteration */
388 src_len = dst_len - src_len; /* remainder */
389 src_len_sum += src_len;
390 }
391 if (dst_len_sum == len)
392 break; /* either on 1st or 2nd iteration */
393 /* prepare next (== 2nd) iteration */
394 dst_off = 0; /* modulo offset in RMBE ring buffer */
395 dst_len = len - dst_len; /* remainder */
396 dst_len_sum += dst_len;
397 src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off);
398 src_len_sum = src_len;
399 }
400 return 0;
401}
402
299/* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit; 403/* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
300 * usable snd_wnd as max transmit 404 * usable snd_wnd as max transmit
301 */ 405 */
302static int smc_tx_rdma_writes(struct smc_connection *conn) 406static int smc_tx_rdma_writes(struct smc_connection *conn)
303{ 407{
304 size_t src_off, src_len, dst_off, dst_len; /* current chunk values */ 408 size_t len, src_len, dst_off, dst_len; /* current chunk values */
305 size_t len, dst_len_sum, src_len_sum, dstchunk, srcchunk;
306 union smc_host_cursor sent, prep, prod, cons; 409 union smc_host_cursor sent, prep, prod, cons;
307 struct ib_sge sges[SMC_IB_MAX_SEND_SGE];
308 struct smc_link_group *lgr = conn->lgr;
309 struct smc_cdc_producer_flags *pflags; 410 struct smc_cdc_producer_flags *pflags;
310 int to_send, rmbespace; 411 int to_send, rmbespace;
311 struct smc_link *link;
312 dma_addr_t dma_addr;
313 int num_sges;
314 int rc; 412 int rc;
315 413
316 /* source: sndbuf */ 414 /* source: sndbuf */
317 smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); 415 smc_curs_copy(&sent, &conn->tx_curs_sent, conn);
318 smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); 416 smc_curs_copy(&prep, &conn->tx_curs_prep, conn);
319 /* cf. wmem_alloc - (snd_max - snd_una) */ 417 /* cf. wmem_alloc - (snd_max - snd_una) */
320 to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); 418 to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
321 if (to_send <= 0) 419 if (to_send <= 0)
@@ -326,12 +424,8 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
326 rmbespace = atomic_read(&conn->peer_rmbe_space); 424 rmbespace = atomic_read(&conn->peer_rmbe_space);
327 if (rmbespace <= 0) 425 if (rmbespace <= 0)
328 return 0; 426 return 0;
329 smc_curs_write(&prod, 427 smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
330 smc_curs_read(&conn->local_tx_ctrl.prod, conn), 428 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
331 conn);
332 smc_curs_write(&cons,
333 smc_curs_read(&conn->local_rx_ctrl.cons, conn),
334 conn);
335 429
336 /* if usable snd_wnd closes ask peer to advertise once it opens again */ 430 /* if usable snd_wnd closes ask peer to advertise once it opens again */
337 pflags = &conn->local_tx_ctrl.prod_flags; 431 pflags = &conn->local_tx_ctrl.prod_flags;
@@ -340,7 +434,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
340 len = min(to_send, rmbespace); 434 len = min(to_send, rmbespace);
341 435
342 /* initialize variables for first iteration of subsequent nested loop */ 436 /* initialize variables for first iteration of subsequent nested loop */
343 link = &lgr->lnk[SMC_SINGLE_LINK];
344 dst_off = prod.count; 437 dst_off = prod.count;
345 if (prod.wrap == cons.wrap) { 438 if (prod.wrap == cons.wrap) {
346 /* the filled destination area is unwrapped, 439 /* the filled destination area is unwrapped,
@@ -357,8 +450,6 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
357 */ 450 */
358 dst_len = len; 451 dst_len = len;
359 } 452 }
360 dst_len_sum = dst_len;
361 src_off = sent.count;
362 /* dst_len determines the maximum src_len */ 453 /* dst_len determines the maximum src_len */
363 if (sent.count + dst_len <= conn->sndbuf_desc->len) { 454 if (sent.count + dst_len <= conn->sndbuf_desc->len) {
364 /* unwrapped src case: single chunk of entire dst_len */ 455 /* unwrapped src case: single chunk of entire dst_len */
@@ -367,51 +458,23 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
367 /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */ 458 /* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
368 src_len = conn->sndbuf_desc->len - sent.count; 459 src_len = conn->sndbuf_desc->len - sent.count;
369 } 460 }
370 src_len_sum = src_len; 461
371 dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); 462 if (conn->lgr->is_smcd)
372 for (dstchunk = 0; dstchunk < 2; dstchunk++) { 463 rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len,
373 num_sges = 0; 464 dst_off, dst_len);
374 for (srcchunk = 0; srcchunk < 2; srcchunk++) { 465 else
375 sges[srcchunk].addr = dma_addr + src_off; 466 rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
376 sges[srcchunk].length = src_len; 467 dst_off, dst_len);
377 sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; 468 if (rc)
378 num_sges++; 469 return rc;
379 src_off += src_len;
380 if (src_off >= conn->sndbuf_desc->len)
381 src_off -= conn->sndbuf_desc->len;
382 /* modulo in send ring */
383 if (src_len_sum == dst_len)
384 break; /* either on 1st or 2nd iteration */
385 /* prepare next (== 2nd) iteration */
386 src_len = dst_len - src_len; /* remainder */
387 src_len_sum += src_len;
388 }
389 rc = smc_tx_rdma_write(conn, dst_off, num_sges, sges);
390 if (rc)
391 return rc;
392 if (dst_len_sum == len)
393 break; /* either on 1st or 2nd iteration */
394 /* prepare next (== 2nd) iteration */
395 dst_off = 0; /* modulo offset in RMBE ring buffer */
396 dst_len = len - dst_len; /* remainder */
397 dst_len_sum += dst_len;
398 src_len = min_t(int,
399 dst_len, conn->sndbuf_desc->len - sent.count);
400 src_len_sum = src_len;
401 }
402 470
403 if (conn->urg_tx_pend && len == to_send) 471 if (conn->urg_tx_pend && len == to_send)
404 pflags->urg_data_present = 1; 472 pflags->urg_data_present = 1;
405 smc_tx_advance_cursors(conn, &prod, &sent, len); 473 smc_tx_advance_cursors(conn, &prod, &sent, len);
406 /* update connection's cursors with advanced local cursors */ 474 /* update connection's cursors with advanced local cursors */
407 smc_curs_write(&conn->local_tx_ctrl.prod, 475 smc_curs_copy(&conn->local_tx_ctrl.prod, &prod, conn);
408 smc_curs_read(&prod, conn),
409 conn);
410 /* dst: peer RMBE */ 476 /* dst: peer RMBE */
411 smc_curs_write(&conn->tx_curs_sent, 477 smc_curs_copy(&conn->tx_curs_sent, &sent, conn);/* src: local sndbuf */
412 smc_curs_read(&sent, conn),
413 conn);
414 /* src: local sndbuf */
415 478
416 return 0; 479 return 0;
417} 480}
@@ -419,7 +482,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn)
419/* Wakeup sndbuf consumers from any context (IRQ or process) 482/* Wakeup sndbuf consumers from any context (IRQ or process)
420 * since there is more data to transmit; usable snd_wnd as max transmit 483 * since there is more data to transmit; usable snd_wnd as max transmit
421 */ 484 */
422int smc_tx_sndbuf_nonempty(struct smc_connection *conn) 485static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
423{ 486{
424 struct smc_cdc_producer_flags *pflags; 487 struct smc_cdc_producer_flags *pflags;
425 struct smc_cdc_tx_pend *pend; 488 struct smc_cdc_tx_pend *pend;
@@ -466,6 +529,37 @@ out_unlock:
466 return rc; 529 return rc;
467} 530}
468 531
532static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
533{
534 struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
535 int rc = 0;
536
537 spin_lock_bh(&conn->send_lock);
538 if (!pflags->urg_data_present)
539 rc = smc_tx_rdma_writes(conn);
540 if (!rc)
541 rc = smcd_cdc_msg_send(conn);
542
543 if (!rc && pflags->urg_data_present) {
544 pflags->urg_data_pending = 0;
545 pflags->urg_data_present = 0;
546 }
547 spin_unlock_bh(&conn->send_lock);
548 return rc;
549}
550
551int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
552{
553 int rc;
554
555 if (conn->lgr->is_smcd)
556 rc = smcd_tx_sndbuf_nonempty(conn);
557 else
558 rc = smcr_tx_sndbuf_nonempty(conn);
559
560 return rc;
561}
562
469/* Wakeup sndbuf consumers from process context 563/* Wakeup sndbuf consumers from process context
470 * since there is more data to transmit 564 * since there is more data to transmit
471 */ 565 */
@@ -498,17 +592,11 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
498 int sender_free = conn->rmb_desc->len; 592 int sender_free = conn->rmb_desc->len;
499 int to_confirm; 593 int to_confirm;
500 594
501 smc_curs_write(&cons, 595 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
502 smc_curs_read(&conn->local_tx_ctrl.cons, conn), 596 smc_curs_copy(&cfed, &conn->rx_curs_confirmed, conn);
503 conn);
504 smc_curs_write(&cfed,
505 smc_curs_read(&conn->rx_curs_confirmed, conn),
506 conn);
507 to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons); 597 to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
508 if (to_confirm > conn->rmbe_update_limit) { 598 if (to_confirm > conn->rmbe_update_limit) {
509 smc_curs_write(&prod, 599 smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn);
510 smc_curs_read(&conn->local_rx_ctrl.prod, conn),
511 conn);
512 sender_free = conn->rmb_desc->len - 600 sender_free = conn->rmb_desc->len -
513 smc_curs_diff(conn->rmb_desc->len, &prod, &cfed); 601 smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
514 } 602 }
@@ -524,9 +612,8 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
524 SMC_TX_WORK_DELAY); 612 SMC_TX_WORK_DELAY);
525 return; 613 return;
526 } 614 }
527 smc_curs_write(&conn->rx_curs_confirmed, 615 smc_curs_copy(&conn->rx_curs_confirmed,
528 smc_curs_read(&conn->local_tx_ctrl.cons, conn), 616 &conn->local_tx_ctrl.cons, conn);
529 conn);
530 conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; 617 conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
531 } 618 }
532 if (conn->local_rx_ctrl.prod_flags.write_blocked && 619 if (conn->local_rx_ctrl.prod_flags.write_blocked &&
diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h
index 9d2238909fa0..07e6ad76224a 100644
--- a/net/smc/smc_tx.h
+++ b/net/smc/smc_tx.h
@@ -22,8 +22,8 @@ static inline int smc_tx_prepared_sends(struct smc_connection *conn)
22{ 22{
23 union smc_host_cursor sent, prep; 23 union smc_host_cursor sent, prep;
24 24
25 smc_curs_write(&sent, smc_curs_read(&conn->tx_curs_sent, conn), conn); 25 smc_curs_copy(&sent, &conn->tx_curs_sent, conn);
26 smc_curs_write(&prep, smc_curs_read(&conn->tx_curs_prep, conn), conn); 26 smc_curs_copy(&prep, &conn->tx_curs_prep, conn);
27 return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep); 27 return smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
28} 28}
29 29
@@ -33,5 +33,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len);
33int smc_tx_sndbuf_nonempty(struct smc_connection *conn); 33int smc_tx_sndbuf_nonempty(struct smc_connection *conn);
34void smc_tx_sndbuf_nonfull(struct smc_sock *smc); 34void smc_tx_sndbuf_nonfull(struct smc_sock *smc);
35void smc_tx_consumer_update(struct smc_connection *conn, bool force); 35void smc_tx_consumer_update(struct smc_connection *conn, bool force);
36int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
37 u32 offset, int signal);
36 38
37#endif /* SMC_TX_H */ 39#endif /* SMC_TX_H */
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index de1a438cf977..3c458d279855 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -92,8 +92,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
92 if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) 92 if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
93 return; 93 return;
94 if (wc->status) { 94 if (wc->status) {
95 struct smc_link_group *lgr;
96
97 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { 95 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
98 /* clear full struct smc_wr_tx_pend including .priv */ 96 /* clear full struct smc_wr_tx_pend including .priv */
99 memset(&link->wr_tx_pends[i], 0, 97 memset(&link->wr_tx_pends[i], 0,
@@ -103,9 +101,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
103 clear_bit(i, link->wr_tx_mask); 101 clear_bit(i, link->wr_tx_mask);
104 } 102 }
105 /* terminate connections of this link group abnormally */ 103 /* terminate connections of this link group abnormally */
106 lgr = container_of(link, struct smc_link_group, 104 smc_lgr_terminate(smc_get_lgr(link));
107 lnk[SMC_SINGLE_LINK]);
108 smc_lgr_terminate(lgr);
109 } 105 }
110 if (pnd_snd.handler) 106 if (pnd_snd.handler)
111 pnd_snd.handler(&pnd_snd.priv, link, wc->status); 107 pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -186,18 +182,14 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
186 if (rc) 182 if (rc)
187 return rc; 183 return rc;
188 } else { 184 } else {
189 struct smc_link_group *lgr;
190
191 lgr = container_of(link, struct smc_link_group,
192 lnk[SMC_SINGLE_LINK]);
193 rc = wait_event_timeout( 185 rc = wait_event_timeout(
194 link->wr_tx_wait, 186 link->wr_tx_wait,
195 list_empty(&lgr->list) || /* lgr terminated */ 187 link->state == SMC_LNK_INACTIVE ||
196 (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), 188 (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
197 SMC_WR_TX_WAIT_FREE_SLOT_TIME); 189 SMC_WR_TX_WAIT_FREE_SLOT_TIME);
198 if (!rc) { 190 if (!rc) {
199 /* timeout - terminate connections */ 191 /* timeout - terminate connections */
200 smc_lgr_terminate(lgr); 192 smc_lgr_terminate(smc_get_lgr(link));
201 return -EPIPE; 193 return -EPIPE;
202 } 194 }
203 if (idx == link->wr_tx_cnt) 195 if (idx == link->wr_tx_cnt)
@@ -248,12 +240,8 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
248 pend = container_of(priv, struct smc_wr_tx_pend, priv); 240 pend = container_of(priv, struct smc_wr_tx_pend, priv);
249 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL); 241 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
250 if (rc) { 242 if (rc) {
251 struct smc_link_group *lgr =
252 container_of(link, struct smc_link_group,
253 lnk[SMC_SINGLE_LINK]);
254
255 smc_wr_tx_put_slot(link, priv); 243 smc_wr_tx_put_slot(link, priv);
256 smc_lgr_terminate(lgr); 244 smc_lgr_terminate(smc_get_lgr(link));
257 } 245 }
258 return rc; 246 return rc;
259} 247}
@@ -278,11 +266,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
278 SMC_WR_REG_MR_WAIT_TIME); 266 SMC_WR_REG_MR_WAIT_TIME);
279 if (!rc) { 267 if (!rc) {
280 /* timeout - terminate connections */ 268 /* timeout - terminate connections */
281 struct smc_link_group *lgr; 269 smc_lgr_terminate(smc_get_lgr(link));
282
283 lgr = container_of(link, struct smc_link_group,
284 lnk[SMC_SINGLE_LINK]);
285 smc_lgr_terminate(lgr);
286 return -EPIPE; 270 return -EPIPE;
287 } 271 }
288 if (rc == -ERESTARTSYS) 272 if (rc == -ERESTARTSYS)
@@ -375,8 +359,6 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
375 smc_wr_rx_demultiplex(&wc[i]); 359 smc_wr_rx_demultiplex(&wc[i]);
376 smc_wr_rx_post(link); /* refill WR RX */ 360 smc_wr_rx_post(link); /* refill WR RX */
377 } else { 361 } else {
378 struct smc_link_group *lgr;
379
380 /* handle status errors */ 362 /* handle status errors */
381 switch (wc[i].status) { 363 switch (wc[i].status) {
382 case IB_WC_RETRY_EXC_ERR: 364 case IB_WC_RETRY_EXC_ERR:
@@ -385,9 +367,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
385 /* terminate connections of this link group 367 /* terminate connections of this link group
386 * abnormally 368 * abnormally
387 */ 369 */
388 lgr = container_of(link, struct smc_link_group, 370 smc_lgr_terminate(smc_get_lgr(link));
389 lnk[SMC_SINGLE_LINK]);
390 smc_lgr_terminate(lgr);
391 break; 371 break;
392 default: 372 default:
393 smc_wr_rx_post(link); /* refill WR RX */ 373 smc_wr_rx_post(link); /* refill WR RX */