aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Sokolovsky <vlad@mellanox.co.il>2010-04-14 10:23:39 -0400
committerRoland Dreier <rolandd@cisco.com>2010-04-21 19:37:49 -0400
commit6fa8f719844b8455033e295f720e739c1dc3804a (patch)
treef3080e38beb16b836ec5eb3ba6cb99dfc07316a0
parent5e80ba8ff0bd33ff4af2365969a231cbdb98cafb (diff)
IB/mlx4: Add support for masked atomic operations
Add support for masked atomic operations (masked compare and swap, masked fetch and add). Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c8
-rw-r--r--drivers/infiniband/hw/mlx4/main.c1
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c50
-rw-r--r--include/linux/mlx4/device.h4
-rw-r--r--include/linux/mlx4/qp.h7
5 files changed, 57 insertions, 13 deletions
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index cc2ddd29ac57..5a219a2fdf16 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -661,6 +661,14 @@ repoll:
661 wc->opcode = IB_WC_FETCH_ADD; 661 wc->opcode = IB_WC_FETCH_ADD;
662 wc->byte_len = 8; 662 wc->byte_len = 8;
663 break; 663 break;
664 case MLX4_OPCODE_MASKED_ATOMIC_CS:
665 wc->opcode = IB_WC_MASKED_COMP_SWAP;
666 wc->byte_len = 8;
667 break;
668 case MLX4_OPCODE_MASKED_ATOMIC_FA:
669 wc->opcode = IB_WC_MASKED_FETCH_ADD;
670 wc->byte_len = 8;
671 break;
664 case MLX4_OPCODE_BIND_MW: 672 case MLX4_OPCODE_BIND_MW:
665 wc->opcode = IB_WC_BIND_MW; 673 wc->opcode = IB_WC_BIND_MW;
666 break; 674 break;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 01f2a3f93355..39051417054c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -139,6 +139,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
139 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; 139 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
140 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? 140 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
141 IB_ATOMIC_HCA : IB_ATOMIC_NONE; 141 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
142 props->masked_atomic_cap = IB_ATOMIC_HCA;
142 props->max_pkeys = dev->dev->caps.pkey_table_len[1]; 143 props->max_pkeys = dev->dev->caps.pkey_table_len[1];
143 props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; 144 props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
144 props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; 145 props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5643f4a8ffef..6a60827b2301 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -74,17 +74,19 @@ enum {
74}; 74};
75 75
76static const __be32 mlx4_ib_opcode[] = { 76static const __be32 mlx4_ib_opcode[] = {
77 [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), 77 [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
78 [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), 78 [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
79 [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM), 79 [IB_WR_SEND_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_SEND_IMM),
80 [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), 80 [IB_WR_RDMA_WRITE] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
81 [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), 81 [IB_WR_RDMA_WRITE_WITH_IMM] = cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
82 [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ), 82 [IB_WR_RDMA_READ] = cpu_to_be32(MLX4_OPCODE_RDMA_READ),
83 [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), 83 [IB_WR_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
84 [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), 84 [IB_WR_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
85 [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL), 85 [IB_WR_SEND_WITH_INV] = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
86 [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL), 86 [IB_WR_LOCAL_INV] = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
87 [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), 87 [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
88 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
89 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
88}; 90};
89 91
90static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) 92static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
1407 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { 1409 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1408 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap); 1410 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1409 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add); 1411 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1412 } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
1413 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1414 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
1410 } else { 1415 } else {
1411 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add); 1416 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1412 aseg->compare = 0; 1417 aseg->compare = 0;
@@ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
1414 1419
1415} 1420}
1416 1421
1422static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
1423 struct ib_send_wr *wr)
1424{
1425 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1426 aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
1427 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1428 aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
1429}
1430
1417static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, 1431static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
1418 struct ib_send_wr *wr) 1432 struct ib_send_wr *wr)
1419{ 1433{
@@ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1567 switch (wr->opcode) { 1581 switch (wr->opcode) {
1568 case IB_WR_ATOMIC_CMP_AND_SWP: 1582 case IB_WR_ATOMIC_CMP_AND_SWP:
1569 case IB_WR_ATOMIC_FETCH_AND_ADD: 1583 case IB_WR_ATOMIC_FETCH_AND_ADD:
1584 case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
1570 set_raddr_seg(wqe, wr->wr.atomic.remote_addr, 1585 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
1571 wr->wr.atomic.rkey); 1586 wr->wr.atomic.rkey);
1572 wqe += sizeof (struct mlx4_wqe_raddr_seg); 1587 wqe += sizeof (struct mlx4_wqe_raddr_seg);
@@ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1579 1594
1580 break; 1595 break;
1581 1596
1597 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
1598 set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
1599 wr->wr.atomic.rkey);
1600 wqe += sizeof (struct mlx4_wqe_raddr_seg);
1601
1602 set_masked_atomic_seg(wqe, wr);
1603 wqe += sizeof (struct mlx4_wqe_masked_atomic_seg);
1604
1605 size += (sizeof (struct mlx4_wqe_raddr_seg) +
1606 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
1607
1608 break;
1609
1582 case IB_WR_RDMA_READ: 1610 case IB_WR_RDMA_READ:
1583 case IB_WR_RDMA_WRITE: 1611 case IB_WR_RDMA_WRITE:
1584 case IB_WR_RDMA_WRITE_WITH_IMM: 1612 case IB_WR_RDMA_WRITE_WITH_IMM:
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e92d1bfdb330..7a7f9c1e679a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -123,8 +123,8 @@ enum {
123 MLX4_OPCODE_RDMA_READ = 0x10, 123 MLX4_OPCODE_RDMA_READ = 0x10,
124 MLX4_OPCODE_ATOMIC_CS = 0x11, 124 MLX4_OPCODE_ATOMIC_CS = 0x11,
125 MLX4_OPCODE_ATOMIC_FA = 0x12, 125 MLX4_OPCODE_ATOMIC_FA = 0x12,
126 MLX4_OPCODE_ATOMIC_MASK_CS = 0x14, 126 MLX4_OPCODE_MASKED_ATOMIC_CS = 0x14,
127 MLX4_OPCODE_ATOMIC_MASK_FA = 0x15, 127 MLX4_OPCODE_MASKED_ATOMIC_FA = 0x15,
128 MLX4_OPCODE_BIND_MW = 0x18, 128 MLX4_OPCODE_BIND_MW = 0x18,
129 MLX4_OPCODE_FMR = 0x19, 129 MLX4_OPCODE_FMR = 0x19,
130 MLX4_OPCODE_LOCAL_INVAL = 0x1b, 130 MLX4_OPCODE_LOCAL_INVAL = 0x1b,
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 9f29d86e5dc9..7abe64326f72 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg {
285 __be64 compare; 285 __be64 compare;
286}; 286};
287 287
288struct mlx4_wqe_masked_atomic_seg {
289 __be64 swap_add;
290 __be64 compare;
291 __be64 swap_add_mask;
292 __be64 compare_mask;
293};
294
288struct mlx4_wqe_data_seg { 295struct mlx4_wqe_data_seg {
289 __be32 byte_count; 296 __be32 byte_count;
290 __be32 lkey; 297 __be32 lkey;