diff options
author | Vladimir Sokolovsky <vlad@mellanox.co.il> | 2010-04-14 10:23:39 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-04-21 19:37:49 -0400 |
commit | 6fa8f719844b8455033e295f720e739c1dc3804a (patch) | |
tree | f3080e38beb16b836ec5eb3ba6cb99dfc07316a0 /drivers | |
parent | 5e80ba8ff0bd33ff4af2365969a231cbdb98cafb (diff) |
IB/mlx4: Add support for masked atomic operations
Add support for masked atomic operations (masked compare and swap,
masked fetch and add).
Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/infiniband/hw/mlx4/cq.c | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/main.c | 1 | ||||
-rw-r--r-- | drivers/infiniband/hw/mlx4/qp.c | 50 |
3 files changed, 48 insertions, 11 deletions
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index cc2ddd29ac57..5a219a2fdf16 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c | |||
@@ -661,6 +661,14 @@ repoll: | |||
661 | wc->opcode = IB_WC_FETCH_ADD; | 661 | wc->opcode = IB_WC_FETCH_ADD; |
662 | wc->byte_len = 8; | 662 | wc->byte_len = 8; |
663 | break; | 663 | break; |
664 | case MLX4_OPCODE_MASKED_ATOMIC_CS: | ||
665 | wc->opcode = IB_WC_MASKED_COMP_SWAP; | ||
666 | wc->byte_len = 8; | ||
667 | break; | ||
668 | case MLX4_OPCODE_MASKED_ATOMIC_FA: | ||
669 | wc->opcode = IB_WC_MASKED_FETCH_ADD; | ||
670 | wc->byte_len = 8; | ||
671 | break; | ||
664 | case MLX4_OPCODE_BIND_MW: | 672 | case MLX4_OPCODE_BIND_MW: |
665 | wc->opcode = IB_WC_BIND_MW; | 673 | wc->opcode = IB_WC_BIND_MW; |
666 | break; | 674 | break; |
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 01f2a3f93355..39051417054c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c | |||
@@ -139,6 +139,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, | |||
139 | props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; | 139 | props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; |
140 | props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? | 140 | props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? |
141 | IB_ATOMIC_HCA : IB_ATOMIC_NONE; | 141 | IB_ATOMIC_HCA : IB_ATOMIC_NONE; |
142 | props->masked_atomic_cap = IB_ATOMIC_HCA; | ||
142 | props->max_pkeys = dev->dev->caps.pkey_table_len[1]; | 143 | props->max_pkeys = dev->dev->caps.pkey_table_len[1]; |
143 | props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; | 144 | props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; |
144 | props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; | 145 | props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; |
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 5643f4a8ffef..6a60827b2301 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c | |||
@@ -74,17 +74,19 @@ enum { | |||
74 | }; | 74 | }; |
75 | 75 | ||
76 | static const __be32 mlx4_ib_opcode[] = { | 76 | static const __be32 mlx4_ib_opcode[] = { |
77 | [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), | 77 | [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), |
78 | [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), | 78 | [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), |
79 | [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), | 79 | [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), |
80 | [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), | 80 | [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), |
81 | [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), | 81 | [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), |
82 | [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), | 82 | [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), |
83 | [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), | 83 | [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), |
84 | [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), | 84 | [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), |
85 | [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), | 85 | [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), |
86 | [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), | 86 | [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), |
87 | [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), | 87 | [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), |
88 | [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), | ||
89 | [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), | ||
88 | }; | 90 | }; |
89 | 91 | ||
90 | static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) | 92 | static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) |
@@ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * | |||
1407 | if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { | 1409 | if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { |
1408 | aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); | 1410 | aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); |
1409 | aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); | 1411 | aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); |
1412 | } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) { | ||
1413 | aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); | ||
1414 | aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask); | ||
1410 | } else { | 1415 | } else { |
1411 | aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); | 1416 | aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); |
1412 | aseg->compare = 0; | 1417 | aseg->compare = 0; |
@@ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr * | |||
1414 | 1419 | ||
1415 | } | 1420 | } |
1416 | 1421 | ||
1422 | static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg, | ||
1423 | struct ib_send_wr *wr) | ||
1424 | { | ||
1425 | aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); | ||
1426 | aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask); | ||
1427 | aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); | ||
1428 | aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask); | ||
1429 | } | ||
1430 | |||
1417 | static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, | 1431 | static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, |
1418 | struct ib_send_wr *wr) | 1432 | struct ib_send_wr *wr) |
1419 | { | 1433 | { |
@@ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1567 | switch (wr->opcode) { | 1581 | switch (wr->opcode) { |
1568 | case IB_WR_ATOMIC_CMP_AND_SWP: | 1582 | case IB_WR_ATOMIC_CMP_AND_SWP: |
1569 | case IB_WR_ATOMIC_FETCH_AND_ADD: | 1583 | case IB_WR_ATOMIC_FETCH_AND_ADD: |
1584 | case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: | ||
1570 | set_raddr_seg(wqe, wr->wr.atomic.remote_addr, | 1585 | set_raddr_seg(wqe, wr->wr.atomic.remote_addr, |
1571 | wr->wr.atomic.rkey); | 1586 | wr->wr.atomic.rkey); |
1572 | wqe += sizeof (struct mlx4_wqe_raddr_seg); | 1587 | wqe += sizeof (struct mlx4_wqe_raddr_seg); |
@@ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |||
1579 | 1594 | ||
1580 | break; | 1595 | break; |
1581 | 1596 | ||
1597 | case IB_WR_MASKED_ATOMIC_CMP_AND_SWP: | ||
1598 | set_raddr_seg(wqe, wr->wr.atomic.remote_addr, | ||
1599 | wr->wr.atomic.rkey); | ||
1600 | wqe += sizeof (struct mlx4_wqe_raddr_seg); | ||
1601 | |||
1602 | set_masked_atomic_seg(wqe, wr); | ||
1603 | wqe += sizeof (struct mlx4_wqe_masked_atomic_seg); | ||
1604 | |||
1605 | size += (sizeof (struct mlx4_wqe_raddr_seg) + | ||
1606 | sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16; | ||
1607 | |||
1608 | break; | ||
1609 | |||
1582 | case IB_WR_RDMA_READ: | 1610 | case IB_WR_RDMA_READ: |
1583 | case IB_WR_RDMA_WRITE: | 1611 | case IB_WR_RDMA_WRITE: |
1584 | case IB_WR_RDMA_WRITE_WITH_IMM: | 1612 | case IB_WR_RDMA_WRITE_WITH_IMM: |