aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOr Gerlitz <ogerlitz@mellanox.com>2012-10-21 10:59:24 -0400
committerRoland Dreier <roland@purestorage.com>2012-11-26 13:19:17 -0500
commit08ff32352d6ff7083533dc1c25618d42f92ec28e (patch)
tree00cc99b260b03c7c3e66a017f9dafcfa56b53364
parentf4a75d2eb7b1e2206094b901be09adb31ba63681 (diff)
mlx4: 64-byte CQE/EQE support
ConnectX-3 devices can use either 64- or 32-byte completion queue entries (CQEs) and event queue entries (EQEs). Using 64-byte EQEs/CQEs performs better because each entry is aligned to a complete cacheline. This patch queries the HCA's capabilities, and if it supports 64-byte CQEs and EQES the driver will configure the HW to work in 64-byte mode. The 32-byte vs 64-byte mode is global per HCA and not per CQ or EQ. Since this mode is global, userspace (libmlx4) must be updated to work with the configured CQE size, and guests using SR-IOV virtual functions need to know both EQE and CQE size. In case one of the 64-byte CQE/EQE capabilities is activated, the patch makes sure that older guest drivers that use the QUERY_DEV_FUNC command (e.g as done in mlx4_core of Linux 3.3..3.6) will notice that they need an update to be able to work with the PPF. This is done by changing the returned pf_context_behaviour not to be zero any more. In case none of these capabilities is activated that value remains zero and older guest drivers can run OK. The SRIOV related flow is as follows 1. the PPF does the detection of the new capabilities using QUERY_DEV_CAP command. 2. the PPF activates the new capabilities using INIT_HCA. 3. the VF detects if the PPF activated the capabilities using QUERY_HCA, and if this is the case activates them for itself too. Note that the VF detects that it must be aware to the new PF behaviour using QUERY_FUNC_CAP. Steps 1 and 2 apply also for native mode. User space notification is done through a new field introduced in struct mlx4_ib_ucontext which holds device capabilities for which user space must take action. This changes the binary interface so the ABI towards libmlx4 exposed through uverbs is bumped from 3 to 4 but only when **needed** i.e. only when the driver does use 64-byte CQEs or future device capabilities which must be in sync by user space. This practice allows to work with unmodified libmlx4 on older devices (e.g A0, B0) which don't support 64-byte CQEs. In order to keep existing systems functional when they update to a newer kernel that contains these changes in VF and userspace ABI, a module parameter enable_64b_cqe_eqe must be set to enable 64-byte mode; the default is currently false. Signed-off-by: Eli Cohen <eli@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c34
-rw-r--r--drivers/infiniband/hw/mlx4/main.c27
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx4/user.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_cq.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/eq.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h1
-rw-r--r--include/linux/mlx4/device.h21
15 files changed, 175 insertions, 31 deletions
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index c9eb6a6815c..ae67df35dd4 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
66 66
67static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) 67static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
68{ 68{
69 return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe)); 69 return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
70} 70}
71 71
72static void *get_cqe(struct mlx4_ib_cq *cq, int n) 72static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
77static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) 77static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
78{ 78{
79 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); 79 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
80 struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
80 81
81 return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ 82 return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
82 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; 83 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
83} 84}
84 85
@@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
99{ 100{
100 int err; 101 int err;
101 102
102 err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe), 103 err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
103 PAGE_SIZE * 2, &buf->buf); 104 PAGE_SIZE * 2, &buf->buf);
104 105
105 if (err) 106 if (err)
106 goto out; 107 goto out;
107 108
109 buf->entry_size = dev->dev->caps.cqe_size;
108 err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, 110 err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
109 &buf->mtt); 111 &buf->mtt);
110 if (err) 112 if (err)
@@ -120,8 +122,7 @@ err_mtt:
120 mlx4_mtt_cleanup(dev->dev, &buf->mtt); 122 mlx4_mtt_cleanup(dev->dev, &buf->mtt);
121 123
122err_buf: 124err_buf:
123 mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe), 125 mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
124 &buf->buf);
125 126
126out: 127out:
127 return err; 128 return err;
@@ -129,7 +130,7 @@ out:
129 130
130static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) 131static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
131{ 132{
132 mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf); 133 mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
133} 134}
134 135
135static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, 136static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
137 u64 buf_addr, int cqe) 138 u64 buf_addr, int cqe)
138{ 139{
139 int err; 140 int err;
141 int cqe_size = dev->dev->caps.cqe_size;
140 142
141 *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe), 143 *umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
142 IB_ACCESS_LOCAL_WRITE, 1); 144 IB_ACCESS_LOCAL_WRITE, 1);
143 if (IS_ERR(*umem)) 145 if (IS_ERR(*umem))
144 return PTR_ERR(*umem); 146 return PTR_ERR(*umem);
@@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
331{ 333{
332 struct mlx4_cqe *cqe, *new_cqe; 334 struct mlx4_cqe *cqe, *new_cqe;
333 int i; 335 int i;
336 int cqe_size = cq->buf.entry_size;
337 int cqe_inc = cqe_size == 64 ? 1 : 0;
334 338
335 i = cq->mcq.cons_index; 339 i = cq->mcq.cons_index;
336 cqe = get_cqe(cq, i & cq->ibcq.cqe); 340 cqe = get_cqe(cq, i & cq->ibcq.cqe);
341 cqe += cqe_inc;
342
337 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { 343 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
338 new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, 344 new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
339 (i + 1) & cq->resize_buf->cqe); 345 (i + 1) & cq->resize_buf->cqe);
340 memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe)); 346 memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
347 new_cqe += cqe_inc;
348
341 new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | 349 new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
342 (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); 350 (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
343 cqe = get_cqe(cq, ++i & cq->ibcq.cqe); 351 cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
352 cqe += cqe_inc;
344 } 353 }
345 ++cq->mcq.cons_index; 354 ++cq->mcq.cons_index;
346} 355}
@@ -438,6 +447,7 @@ err_buf:
438 447
439out: 448out:
440 mutex_unlock(&cq->resize_mutex); 449 mutex_unlock(&cq->resize_mutex);
450
441 return err; 451 return err;
442} 452}
443 453
@@ -586,6 +596,9 @@ repoll:
586 if (!cqe) 596 if (!cqe)
587 return -EAGAIN; 597 return -EAGAIN;
588 598
599 if (cq->buf.entry_size == 64)
600 cqe++;
601
589 ++cq->mcq.cons_index; 602 ++cq->mcq.cons_index;
590 603
591 /* 604 /*
@@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
807 int nfreed = 0; 820 int nfreed = 0;
808 struct mlx4_cqe *cqe, *dest; 821 struct mlx4_cqe *cqe, *dest;
809 u8 owner_bit; 822 u8 owner_bit;
823 int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
810 824
811 /* 825 /*
812 * First we need to find the current producer index, so we 826 * First we need to find the current producer index, so we
@@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
825 */ 839 */
826 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { 840 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
827 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); 841 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
842 cqe += cqe_inc;
843
828 if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { 844 if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
829 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) 845 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
830 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); 846 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
831 ++nfreed; 847 ++nfreed;
832 } else if (nfreed) { 848 } else if (nfreed) {
833 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); 849 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
850 dest += cqe_inc;
851
834 owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; 852 owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
835 memcpy(dest, cqe, sizeof *cqe); 853 memcpy(dest, cqe, sizeof *cqe);
836 dest->owner_sr_opcode = owner_bit | 854 dest->owner_sr_opcode = owner_bit |
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 718ec6b2bad..e7d81c0d1ac 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
563{ 563{
564 struct mlx4_ib_dev *dev = to_mdev(ibdev); 564 struct mlx4_ib_dev *dev = to_mdev(ibdev);
565 struct mlx4_ib_ucontext *context; 565 struct mlx4_ib_ucontext *context;
566 struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
566 struct mlx4_ib_alloc_ucontext_resp resp; 567 struct mlx4_ib_alloc_ucontext_resp resp;
567 int err; 568 int err;
568 569
569 if (!dev->ib_active) 570 if (!dev->ib_active)
570 return ERR_PTR(-EAGAIN); 571 return ERR_PTR(-EAGAIN);
571 572
572 resp.qp_tab_size = dev->dev->caps.num_qps; 573 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
573 resp.bf_reg_size = dev->dev->caps.bf_reg_size; 574 resp_v3.qp_tab_size = dev->dev->caps.num_qps;
574 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; 575 resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size;
576 resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
577 } else {
578 resp.dev_caps = dev->dev->caps.userspace_caps;
579 resp.qp_tab_size = dev->dev->caps.num_qps;
580 resp.bf_reg_size = dev->dev->caps.bf_reg_size;
581 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
582 resp.cqe_size = dev->dev->caps.cqe_size;
583 }
575 584
576 context = kmalloc(sizeof *context, GFP_KERNEL); 585 context = kmalloc(sizeof *context, GFP_KERNEL);
577 if (!context) 586 if (!context)
@@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
586 INIT_LIST_HEAD(&context->db_page_list); 595 INIT_LIST_HEAD(&context->db_page_list);
587 mutex_init(&context->db_page_mutex); 596 mutex_init(&context->db_page_mutex);
588 597
589 err = ib_copy_to_udata(udata, &resp, sizeof resp); 598 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
599 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
600 else
601 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
602
590 if (err) { 603 if (err) {
591 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); 604 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
592 kfree(context); 605 kfree(context);
@@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1342 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; 1355 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
1343 ibdev->ib_dev.dma_device = &dev->pdev->dev; 1356 ibdev->ib_dev.dma_device = &dev->pdev->dev;
1344 1357
1345 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; 1358 if (dev->caps.userspace_caps)
1359 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
1360 else
1361 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
1362
1346 ibdev->ib_dev.uverbs_cmd_mask = 1363 ibdev->ib_dev.uverbs_cmd_mask =
1347 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 1364 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
1348 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 1365 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e04cbc9a54a..dcd845bc30f 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -90,6 +90,7 @@ struct mlx4_ib_xrcd {
90struct mlx4_ib_cq_buf { 90struct mlx4_ib_cq_buf {
91 struct mlx4_buf buf; 91 struct mlx4_buf buf;
92 struct mlx4_mtt mtt; 92 struct mlx4_mtt mtt;
93 int entry_size;
93}; 94};
94 95
95struct mlx4_ib_cq_resize { 96struct mlx4_ib_cq_resize {
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index 13beedeeef9..07e6769ef43 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -40,7 +40,9 @@
40 * Increment this value if any changes that break userspace ABI 40 * Increment this value if any changes that break userspace ABI
41 * compatibility are made. 41 * compatibility are made.
42 */ 42 */
43#define MLX4_IB_UVERBS_ABI_VERSION 3 43
44#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
45#define MLX4_IB_UVERBS_ABI_VERSION 4
44 46
45/* 47/*
46 * Make sure that all structs defined in this file remain laid out so 48 * Make sure that all structs defined in this file remain laid out so
@@ -50,10 +52,18 @@
50 * instead. 52 * instead.
51 */ 53 */
52 54
55struct mlx4_ib_alloc_ucontext_resp_v3 {
56 __u32 qp_tab_size;
57 __u16 bf_reg_size;
58 __u16 bf_regs_per_page;
59};
60
53struct mlx4_ib_alloc_ucontext_resp { 61struct mlx4_ib_alloc_ucontext_resp {
62 __u32 dev_caps;
54 __u32 qp_tab_size; 63 __u32 qp_tab_size;
55 __u16 bf_reg_size; 64 __u16 bf_reg_size;
56 __u16 bf_regs_per_page; 65 __u16 bf_regs_per_page;
66 __u32 cqe_size;
57}; 67};
58 68
59struct mlx4_ib_alloc_pd_resp { 69struct mlx4_ib_alloc_pd_resp {
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 3d1899ff107..e791e705f7b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1755,7 +1755,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
1755 spin_lock_init(&s_state->lock); 1755 spin_lock_init(&s_state->lock);
1756 } 1756 }
1757 1757
1758 memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe)); 1758 memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
1759 priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD; 1759 priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
1760 INIT_WORK(&priv->mfunc.master.comm_work, 1760 INIT_WORK(&priv->mfunc.master.comm_work,
1761 mlx4_master_comm_channel); 1761 mlx4_master_comm_channel);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index aa9c2f6cf3c..b8d0854a7ad 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
51 int err; 51 int err;
52 52
53 cq->size = entries; 53 cq->size = entries;
54 cq->buf_size = cq->size * sizeof(struct mlx4_cqe); 54 cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
55 55
56 cq->ring = ring; 56 cq->ring = ring;
57 cq->is_tx = mode; 57 cq->is_tx = mode;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index edd9cb8d3e1..93a32566958 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1600,6 +1600,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
1600 goto out; 1600 goto out;
1601 } 1601 }
1602 priv->rx_ring_num = prof->rx_ring_num; 1602 priv->rx_ring_num = prof->rx_ring_num;
1603 priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
1603 priv->mac_index = -1; 1604 priv->mac_index = -1;
1604 priv->msg_enable = MLX4_EN_MSG_LEVEL; 1605 priv->msg_enable = MLX4_EN_MSG_LEVEL;
1605 spin_lock_init(&priv->stats_lock); 1606 spin_lock_init(&priv->stats_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5aba5ecdf1e..6fa106f6c0e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
566 struct ethhdr *ethh; 566 struct ethhdr *ethh;
567 dma_addr_t dma; 567 dma_addr_t dma;
568 u64 s_mac; 568 u64 s_mac;
569 int factor = priv->cqe_factor;
569 570
570 if (!priv->port_up) 571 if (!priv->port_up)
571 return 0; 572 return 0;
@@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
574 * descriptor offset can be deduced from the CQE index instead of 575 * descriptor offset can be deduced from the CQE index instead of
575 * reading 'cqe->index' */ 576 * reading 'cqe->index' */
576 index = cq->mcq.cons_index & ring->size_mask; 577 index = cq->mcq.cons_index & ring->size_mask;
577 cqe = &cq->buf[index]; 578 cqe = &cq->buf[(index << factor) + factor];
578 579
579 /* Process all completed CQEs */ 580 /* Process all completed CQEs */
580 while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, 581 while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@@ -709,7 +710,7 @@ next:
709 710
710 ++cq->mcq.cons_index; 711 ++cq->mcq.cons_index;
711 index = (cq->mcq.cons_index) & ring->size_mask; 712 index = (cq->mcq.cons_index) & ring->size_mask;
712 cqe = &cq->buf[index]; 713 cqe = &cq->buf[(index << factor) + factor];
713 if (++polled == budget) { 714 if (++polled == budget) {
714 /* We are here because we reached the NAPI budget - 715 /* We are here because we reached the NAPI budget -
715 * flush only pending LRO sessions */ 716 * flush only pending LRO sessions */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index b35094c590b..25c157abdd9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
315 struct mlx4_cqe *buf = cq->buf; 315 struct mlx4_cqe *buf = cq->buf;
316 u32 packets = 0; 316 u32 packets = 0;
317 u32 bytes = 0; 317 u32 bytes = 0;
318 int factor = priv->cqe_factor;
318 319
319 if (!priv->port_up) 320 if (!priv->port_up)
320 return; 321 return;
321 322
322 index = cons_index & size_mask; 323 index = cons_index & size_mask;
323 cqe = &buf[index]; 324 cqe = &buf[(index << factor) + factor];
324 ring_index = ring->cons & size_mask; 325 ring_index = ring->cons & size_mask;
325 326
326 /* Process all completed CQEs */ 327 /* Process all completed CQEs */
@@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
349 350
350 ++cons_index; 351 ++cons_index;
351 index = cons_index & size_mask; 352 index = cons_index & size_mask;
352 cqe = &buf[index]; 353 cqe = &buf[(index << factor) + factor];
353 } 354 }
354 355
355 356
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index b84a88bc44d..c509a86db61 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
101 mb(); 101 mb();
102} 102}
103 103
104static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry) 104static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
105{ 105{
106 unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE; 106 /* (entry & (eq->nent - 1)) gives us a cyclic array */
107 return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE; 107 unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
108 /* CX3 is capable of extending the EQE from 32 to 64 bytes.
109 * When this feature is enabled, the first (in the lower addresses)
110 * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
111 * contain the legacy EQE information.
112 */
113 return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
108} 114}
109 115
110static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq) 116static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
111{ 117{
112 struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index); 118 struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
113 return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe; 119 return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
114} 120}
115 121
@@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
177 return; 183 return;
178 } 184 }
179 185
180 memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1); 186 memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
181 s_eqe->slave_id = slave; 187 s_eqe->slave_id = slave;
182 /* ensure all information is written before setting the ownersip bit */ 188 /* ensure all information is written before setting the ownersip bit */
183 wmb(); 189 wmb();
@@ -441,7 +447,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
441 int i; 447 int i;
442 enum slave_port_gen_event gen_event; 448 enum slave_port_gen_event gen_event;
443 449
444 while ((eqe = next_eqe_sw(eq))) { 450 while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
445 /* 451 /*
446 * Make sure we read EQ entry contents after we've 452 * Make sure we read EQ entry contents after we've
447 * checked the ownership bit. 453 * checked the ownership bit.
@@ -864,7 +870,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
864 870
865 eq->dev = dev; 871 eq->dev = dev;
866 eq->nent = roundup_pow_of_two(max(nent, 2)); 872 eq->nent = roundup_pow_of_two(max(nent, 2));
867 npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE; 873 /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
874 npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
868 875
869 eq->page_list = kmalloc(npages * sizeof *eq->page_list, 876 eq->page_list = kmalloc(npages * sizeof *eq->page_list,
870 GFP_KERNEL); 877 GFP_KERNEL);
@@ -966,8 +973,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
966 struct mlx4_priv *priv = mlx4_priv(dev); 973 struct mlx4_priv *priv = mlx4_priv(dev);
967 struct mlx4_cmd_mailbox *mailbox; 974 struct mlx4_cmd_mailbox *mailbox;
968 int err; 975 int err;
969 int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
970 int i; 976 int i;
977 /* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
978 int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
971 979
972 mailbox = mlx4_alloc_cmd_mailbox(dev); 980 mailbox = mlx4_alloc_cmd_mailbox(dev);
973 if (IS_ERR(mailbox)) 981 if (IS_ERR(mailbox))
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 4f30b99324c..9a9de51ecc9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
110 [42] = "Multicast VEP steering support", 110 [42] = "Multicast VEP steering support",
111 [48] = "Counters support", 111 [48] = "Counters support",
112 [59] = "Port management change event support", 112 [59] = "Port management change event support",
113 [61] = "64 byte EQE support",
114 [62] = "64 byte CQE support",
113 }; 115 };
114 int i; 116 int i;
115 117
@@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
235 field = dev->caps.num_ports; 237 field = dev->caps.num_ports;
236 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); 238 MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
237 239
238 size = 0; /* no PF behaviour is set for now */ 240 size = dev->caps.function_caps; /* set PF behaviours */
239 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET); 241 MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
240 242
241 field = 0; /* protected FMR support not available as yet */ 243 field = 0; /* protected FMR support not available as yet */
@@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
1237 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS) 1239 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
1238 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4); 1240 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
1239 1241
1242 /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
1243 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
1244 *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
1245 dev->caps.eqe_size = 64;
1246 dev->caps.eqe_factor = 1;
1247 } else {
1248 dev->caps.eqe_size = 32;
1249 dev->caps.eqe_factor = 0;
1250 }
1251
1252 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
1253 *(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
1254 dev->caps.cqe_size = 64;
1255 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
1256 } else {
1257 dev->caps.cqe_size = 32;
1258 }
1259
1240 /* QPC/EEC/CQC/EQC/RDMARC attributes */ 1260 /* QPC/EEC/CQC/EQC/RDMARC attributes */
1241 1261
1242 MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET); 1262 MLX4_PUT(inbox, param->qpc_base, INIT_HCA_QPC_BASE_OFFSET);
@@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
1319 struct mlx4_cmd_mailbox *mailbox; 1339 struct mlx4_cmd_mailbox *mailbox;
1320 __be32 *outbox; 1340 __be32 *outbox;
1321 int err; 1341 int err;
1342 u8 byte_field;
1322 1343
1323#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04 1344#define QUERY_HCA_GLOBAL_CAPS_OFFSET 0x04
1324 1345
@@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
1370 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET); 1391 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
1371 } 1392 }
1372 1393
1394 /* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
1395 MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
1396 if (byte_field & 0x20) /* 64-bytes eqe enabled */
1397 param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
1398 if (byte_field & 0x40) /* 64-bytes cqe enabled */
1399 param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
1400
1373 /* TPT attributes */ 1401 /* TPT attributes */
1374 1402
1375 MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); 1403 MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 85abe9c11a2..2c2e7ade2a3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -172,6 +172,7 @@ struct mlx4_init_hca_param {
172 u8 log_uar_sz; 172 u8 log_uar_sz;
173 u8 uar_page_sz; /* log pg sz in 4k chunks */ 173 u8 uar_page_sz; /* log pg sz in 4k chunks */
174 u8 fs_hash_enable_bits; 174 u8 fs_hash_enable_bits;
175 u64 dev_cap_enabled;
175}; 176};
176 177
177struct mlx4_init_ib_param { 178struct mlx4_init_ib_param {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2aa80afd98d..4337f685175 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
95 " Not in use with device managed" 95 " Not in use with device managed"
96 " flow steering"); 96 " flow steering");
97 97
98static bool enable_64b_cqe_eqe;
99module_param(enable_64b_cqe_eqe, bool, 0444);
100MODULE_PARM_DESC(enable_64b_cqe_eqe,
101 "Enable 64 byte CQEs/EQEs when the the FW supports this");
102
98#define HCA_GLOBAL_CAP_MASK 0 103#define HCA_GLOBAL_CAP_MASK 0
99#define PF_CONTEXT_BEHAVIOUR_MASK 0 104
105#define PF_CONTEXT_BEHAVIOUR_MASK MLX4_FUNC_CAP_64B_EQE_CQE
100 106
101static char mlx4_version[] __devinitdata = 107static char mlx4_version[] __devinitdata =
102 DRV_NAME ": Mellanox ConnectX core driver v" 108 DRV_NAME ": Mellanox ConnectX core driver v"
@@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
386 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; 392 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
387 393
388 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; 394 dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
395
396 if (!enable_64b_cqe_eqe) {
397 if (dev_cap->flags &
398 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
399 mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
400 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
401 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
402 }
403 }
404
405 if ((dev_cap->flags &
406 (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
407 mlx4_is_master(dev))
408 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
409
389 return 0; 410 return 0;
390} 411}
391/*The function checks if there are live vf, return the num of them*/ 412/*The function checks if there are live vf, return the num of them*/
@@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
599 goto err_mem; 620 goto err_mem;
600 } 621 }
601 622
623 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
624 dev->caps.eqe_size = 64;
625 dev->caps.eqe_factor = 1;
626 } else {
627 dev->caps.eqe_size = 32;
628 dev->caps.eqe_factor = 0;
629 }
630
631 if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
632 dev->caps.cqe_size = 64;
633 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
634 } else {
635 dev->caps.cqe_size = 32;
636 }
637
602 return 0; 638 return 0;
603 639
604err_mem: 640err_mem:
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 9d27e42264e..73b5c2ac5bd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -487,6 +487,7 @@ struct mlx4_en_priv {
487 int mac_index; 487 int mac_index;
488 unsigned max_mtu; 488 unsigned max_mtu;
489 int base_qpn; 489 int base_qpn;
490 int cqe_factor;
490 491
491 struct mlx4_en_rss_map rss_map; 492 struct mlx4_en_rss_map rss_map;
492 __be32 ctrl_flags; 493 __be32 ctrl_flags;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6d1acb04cd1..21821da2abf 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -142,6 +142,8 @@ enum {
142 MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48, 142 MLX4_DEV_CAP_FLAG_COUNTERS = 1LL << 48,
143 MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55, 143 MLX4_DEV_CAP_FLAG_SENSE_SUPPORT = 1LL << 55,
144 MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59, 144 MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
145 MLX4_DEV_CAP_FLAG_64B_EQE = 1LL << 61,
146 MLX4_DEV_CAP_FLAG_64B_CQE = 1LL << 62
145}; 147};
146 148
147enum { 149enum {
@@ -151,6 +153,20 @@ enum {
151 MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3 153 MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3
152}; 154};
153 155
156enum {
157 MLX4_DEV_CAP_64B_EQE_ENABLED = 1LL << 0,
158 MLX4_DEV_CAP_64B_CQE_ENABLED = 1LL << 1
159};
160
161enum {
162 MLX4_USER_DEV_CAP_64B_CQE = 1L << 0
163};
164
165enum {
166 MLX4_FUNC_CAP_64B_EQE_CQE = 1L << 0
167};
168
169
154#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) 170#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
155 171
156enum { 172enum {
@@ -419,6 +435,11 @@ struct mlx4_caps {
419 u32 max_counters; 435 u32 max_counters;
420 u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; 436 u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
421 u16 sqp_demux; 437 u16 sqp_demux;
438 u32 eqe_size;
439 u32 cqe_size;
440 u8 eqe_factor;
441 u32 userspace_caps; /* userspace must be aware of these */
442 u32 function_caps; /* VFs must be aware of these */
422}; 443};
423 444
424struct mlx4_buf_list { 445struct mlx4_buf_list {