aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-driver-ib_srp7
-rw-r--r--MAINTAINERS22
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/addr.c20
-rw-r--r--drivers/infiniband/core/cma.c906
-rw-r--r--drivers/infiniband/core/sa_query.c6
-rw-r--r--drivers/infiniband/core/sysfs.c8
-rw-r--r--drivers/infiniband/core/ucma.c321
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig10
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c92
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c843
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c100
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c139
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1504
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c162
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h545
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1007
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2524
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c473
-rw-r--r--drivers/infiniband/hw/mlx5/user.h121
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h63
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c86
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h35
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c135
-rw-r--r--drivers/infiniband/hw/qib/Kconfig8
-rw-r--r--drivers/infiniband/hw/qib/Makefile1
-rw-r--r--drivers/infiniband/hw/qib/qib.h63
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_cq.c67
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c283
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.h45
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c176
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c507
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c145
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c123
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c56
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h33
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c89
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h1
-rw-r--r--drivers/net/ethernet/mellanox/Kconfig1
-rw-r--r--drivers/net/ethernet/mellanox/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c238
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c1515
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c224
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/debugfs.c583
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c521
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c185
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c227
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mad.c78
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c475
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mcg.c106
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c136
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c435
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pd.c101
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/port.c104
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c301
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c223
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/uar.c223
-rw-r--r--include/linux/mlx5/cmd.h51
-rw-r--r--include/linux/mlx5/cq.h165
-rw-r--r--include/linux/mlx5/device.h893
-rw-r--r--include/linux/mlx5/doorbell.h79
-rw-r--r--include/linux/mlx5/driver.h769
-rw-r--r--include/linux/mlx5/qp.h467
-rw-r--r--include/linux/mlx5/srq.h41
-rw-r--r--include/linux/socket.h2
-rw-r--r--include/rdma/ib.h89
-rw-r--r--include/rdma/ib_addr.h6
-rw-r--r--include/rdma/ib_sa.h7
-rw-r--r--include/rdma/ib_verbs.h35
-rw-r--r--include/rdma/rdma_cm.h13
-rw-r--r--include/uapi/rdma/rdma_user_cm.h73
85 files changed, 18436 insertions, 803 deletions
diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp
index 481aae95c7d1..5c53d28f775c 100644
--- a/Documentation/ABI/stable/sysfs-driver-ib_srp
+++ b/Documentation/ABI/stable/sysfs-driver-ib_srp
@@ -54,6 +54,13 @@ Description: Interface for making ib_srp connect to a new target.
54 ib_srp. Specifying a value that exceeds cmd_sg_entries is 54 ib_srp. Specifying a value that exceeds cmd_sg_entries is
55 only safe with partial memory descriptor list support enabled 55 only safe with partial memory descriptor list support enabled
56 (allow_ext_sg=1). 56 (allow_ext_sg=1).
57 * comp_vector, a number in the range 0..n-1 specifying the
58 MSI-X completion vector. Some HCA's allocate multiple (n)
59 MSI-X vectors per HCA port. If the IRQ affinity masks of
60 these interrupts have been configured such that each MSI-X
61 interrupt is handled by a different CPU then the comp_vector
62 parameter can be used to spread the SRP completion workload
63 over multiple CPU's.
57 64
58What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev 65What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev
59Date: January 2, 2006 66Date: January 2, 2006
diff --git a/MAINTAINERS b/MAINTAINERS
index b41a9fce3211..705681e5a6bb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5430,6 +5430,28 @@ W: http://linuxtv.org
5430S: Odd Fixes 5430S: Odd Fixes
5431F: drivers/media/radio/radio-miropcm20* 5431F: drivers/media/radio/radio-miropcm20*
5432 5432
5433Mellanox MLX5 core VPI driver
5434M: Eli Cohen <eli@mellanox.com>
5435L: netdev@vger.kernel.org
5436L: linux-rdma@vger.kernel.org
5437W: http://www.mellanox.com
5438Q: http://patchwork.ozlabs.org/project/netdev/list/
5439Q: http://patchwork.kernel.org/project/linux-rdma/list/
5440T: git://openfabrics.org/~eli/connect-ib.git
5441S: Supported
5442F: drivers/net/ethernet/mellanox/mlx5/core/
5443F: include/linux/mlx5/
5444
5445Mellanox MLX5 IB driver
5446M: Eli Cohen <eli@mellanox.com>
5447L: linux-rdma@vger.kernel.org
5448W: http://www.mellanox.com
5449Q: http://patchwork.kernel.org/project/linux-rdma/list/
5450T: git://openfabrics.org/~eli/connect-ib.git
5451S: Supported
5452F: include/linux/mlx5/
5453F: drivers/infiniband/hw/mlx5/
5454
5433MODULE SUPPORT 5455MODULE SUPPORT
5434M: Rusty Russell <rusty@rustcorp.com.au> 5456M: Rusty Russell <rusty@rustcorp.com.au>
5435S: Maintained 5457S: Maintained
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index c85b56c28099..5ceda710f516 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -50,6 +50,7 @@ source "drivers/infiniband/hw/amso1100/Kconfig"
50source "drivers/infiniband/hw/cxgb3/Kconfig" 50source "drivers/infiniband/hw/cxgb3/Kconfig"
51source "drivers/infiniband/hw/cxgb4/Kconfig" 51source "drivers/infiniband/hw/cxgb4/Kconfig"
52source "drivers/infiniband/hw/mlx4/Kconfig" 52source "drivers/infiniband/hw/mlx4/Kconfig"
53source "drivers/infiniband/hw/mlx5/Kconfig"
53source "drivers/infiniband/hw/nes/Kconfig" 54source "drivers/infiniband/hw/nes/Kconfig"
54source "drivers/infiniband/hw/ocrdma/Kconfig" 55source "drivers/infiniband/hw/ocrdma/Kconfig"
55 56
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index b126fefe0b1c..1fe69888515f 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
7obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ 7obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
8obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/ 8obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/
9obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ 9obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
10obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/
10obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ 11obj-$(CONFIG_INFINIBAND_NES) += hw/nes/
11obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/ 12obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/
12obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 13obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index eaec8d7a3b73..e90f2b2eabd7 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -45,6 +45,7 @@
45#include <net/addrconf.h> 45#include <net/addrconf.h>
46#include <net/ip6_route.h> 46#include <net/ip6_route.h>
47#include <rdma/ib_addr.h> 47#include <rdma/ib_addr.h>
48#include <rdma/ib.h>
48 49
49MODULE_AUTHOR("Sean Hefty"); 50MODULE_AUTHOR("Sean Hefty");
50MODULE_DESCRIPTION("IB Address Translation"); 51MODULE_DESCRIPTION("IB Address Translation");
@@ -70,6 +71,21 @@ static LIST_HEAD(req_list);
70static DECLARE_DELAYED_WORK(work, process_req); 71static DECLARE_DELAYED_WORK(work, process_req);
71static struct workqueue_struct *addr_wq; 72static struct workqueue_struct *addr_wq;
72 73
74int rdma_addr_size(struct sockaddr *addr)
75{
76 switch (addr->sa_family) {
77 case AF_INET:
78 return sizeof(struct sockaddr_in);
79 case AF_INET6:
80 return sizeof(struct sockaddr_in6);
81 case AF_IB:
82 return sizeof(struct sockaddr_ib);
83 default:
84 return 0;
85 }
86}
87EXPORT_SYMBOL(rdma_addr_size);
88
73void rdma_addr_register_client(struct rdma_addr_client *client) 89void rdma_addr_register_client(struct rdma_addr_client *client)
74{ 90{
75 atomic_set(&client->refcount, 1); 91 atomic_set(&client->refcount, 1);
@@ -369,12 +385,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
369 goto err; 385 goto err;
370 } 386 }
371 387
372 memcpy(src_in, src_addr, ip_addr_size(src_addr)); 388 memcpy(src_in, src_addr, rdma_addr_size(src_addr));
373 } else { 389 } else {
374 src_in->sa_family = dst_addr->sa_family; 390 src_in->sa_family = dst_addr->sa_family;
375 } 391 }
376 392
377 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); 393 memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
378 req->addr = addr; 394 req->addr = addr;
379 req->callback = callback; 395 req->callback = callback;
380 req->context = context; 396 req->context = context;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 34fbc2f60a09..f1c279fabe64 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -50,6 +50,7 @@
50#include <rdma/rdma_cm.h> 50#include <rdma/rdma_cm.h>
51#include <rdma/rdma_cm_ib.h> 51#include <rdma/rdma_cm_ib.h>
52#include <rdma/rdma_netlink.h> 52#include <rdma/rdma_netlink.h>
53#include <rdma/ib.h>
53#include <rdma/ib_cache.h> 54#include <rdma/ib_cache.h>
54#include <rdma/ib_cm.h> 55#include <rdma/ib_cm.h>
55#include <rdma/ib_sa.h> 56#include <rdma/ib_sa.h>
@@ -79,7 +80,6 @@ static LIST_HEAD(dev_list);
79static LIST_HEAD(listen_any_list); 80static LIST_HEAD(listen_any_list);
80static DEFINE_MUTEX(lock); 81static DEFINE_MUTEX(lock);
81static struct workqueue_struct *cma_wq; 82static struct workqueue_struct *cma_wq;
82static DEFINE_IDR(sdp_ps);
83static DEFINE_IDR(tcp_ps); 83static DEFINE_IDR(tcp_ps);
84static DEFINE_IDR(udp_ps); 84static DEFINE_IDR(udp_ps);
85static DEFINE_IDR(ipoib_ps); 85static DEFINE_IDR(ipoib_ps);
@@ -195,24 +195,7 @@ struct cma_hdr {
195 union cma_ip_addr dst_addr; 195 union cma_ip_addr dst_addr;
196}; 196};
197 197
198struct sdp_hh {
199 u8 bsdh[16];
200 u8 sdp_version; /* Major version: 7:4 */
201 u8 ip_version; /* IP version: 7:4 */
202 u8 sdp_specific1[10];
203 __be16 port;
204 __be16 sdp_specific2;
205 union cma_ip_addr src_addr;
206 union cma_ip_addr dst_addr;
207};
208
209struct sdp_hah {
210 u8 bsdh[16];
211 u8 sdp_version;
212};
213
214#define CMA_VERSION 0x00 198#define CMA_VERSION 0x00
215#define SDP_MAJ_VERSION 0x2
216 199
217static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 200static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
218{ 201{
@@ -261,21 +244,6 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
261 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 244 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
262} 245}
263 246
264static inline u8 sdp_get_majv(u8 sdp_version)
265{
266 return sdp_version >> 4;
267}
268
269static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
270{
271 return hh->ip_version >> 4;
272}
273
274static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
275{
276 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
277}
278
279static void cma_attach_to_dev(struct rdma_id_private *id_priv, 247static void cma_attach_to_dev(struct rdma_id_private *id_priv,
280 struct cma_device *cma_dev) 248 struct cma_device *cma_dev)
281{ 249{
@@ -310,16 +278,40 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
310 mutex_unlock(&lock); 278 mutex_unlock(&lock);
311} 279}
312 280
313static int cma_set_qkey(struct rdma_id_private *id_priv) 281static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
282{
283 return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
284}
285
286static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
287{
288 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
289}
290
291static inline unsigned short cma_family(struct rdma_id_private *id_priv)
292{
293 return id_priv->id.route.addr.src_addr.ss_family;
294}
295
296static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
314{ 297{
315 struct ib_sa_mcmember_rec rec; 298 struct ib_sa_mcmember_rec rec;
316 int ret = 0; 299 int ret = 0;
317 300
318 if (id_priv->qkey) 301 if (id_priv->qkey) {
302 if (qkey && id_priv->qkey != qkey)
303 return -EINVAL;
304 return 0;
305 }
306
307 if (qkey) {
308 id_priv->qkey = qkey;
319 return 0; 309 return 0;
310 }
320 311
321 switch (id_priv->id.ps) { 312 switch (id_priv->id.ps) {
322 case RDMA_PS_UDP: 313 case RDMA_PS_UDP:
314 case RDMA_PS_IB:
323 id_priv->qkey = RDMA_UDP_QKEY; 315 id_priv->qkey = RDMA_UDP_QKEY;
324 break; 316 break;
325 case RDMA_PS_IPOIB: 317 case RDMA_PS_IPOIB:
@@ -358,6 +350,27 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
358 return -EADDRNOTAVAIL; 350 return -EADDRNOTAVAIL;
359} 351}
360 352
353static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
354{
355 dev_addr->dev_type = ARPHRD_INFINIBAND;
356 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
357 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
358}
359
360static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
361{
362 int ret;
363
364 if (addr->sa_family != AF_IB) {
365 ret = rdma_translate_ip(addr, dev_addr);
366 } else {
367 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
368 ret = 0;
369 }
370
371 return ret;
372}
373
361static int cma_acquire_dev(struct rdma_id_private *id_priv) 374static int cma_acquire_dev(struct rdma_id_private *id_priv)
362{ 375{
363 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 376 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -401,6 +414,61 @@ out:
401 return ret; 414 return ret;
402} 415}
403 416
417/*
418 * Select the source IB device and address to reach the destination IB address.
419 */
420static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
421{
422 struct cma_device *cma_dev, *cur_dev;
423 struct sockaddr_ib *addr;
424 union ib_gid gid, sgid, *dgid;
425 u16 pkey, index;
426 u8 port, p;
427 int i;
428
429 cma_dev = NULL;
430 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
431 dgid = (union ib_gid *) &addr->sib_addr;
432 pkey = ntohs(addr->sib_pkey);
433
434 list_for_each_entry(cur_dev, &dev_list, list) {
435 if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
436 continue;
437
438 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
439 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
440 continue;
441
442 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) {
443 if (!memcmp(&gid, dgid, sizeof(gid))) {
444 cma_dev = cur_dev;
445 sgid = gid;
446 port = p;
447 goto found;
448 }
449
450 if (!cma_dev && (gid.global.subnet_prefix ==
451 dgid->global.subnet_prefix)) {
452 cma_dev = cur_dev;
453 sgid = gid;
454 port = p;
455 }
456 }
457 }
458 }
459
460 if (!cma_dev)
461 return -ENODEV;
462
463found:
464 cma_attach_to_dev(id_priv, cma_dev);
465 id_priv->id.port_num = port;
466 addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
467 memcpy(&addr->sib_addr, &sgid, sizeof sgid);
468 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
469 return 0;
470}
471
404static void cma_deref_id(struct rdma_id_private *id_priv) 472static void cma_deref_id(struct rdma_id_private *id_priv)
405{ 473{
406 if (atomic_dec_and_test(&id_priv->refcount)) 474 if (atomic_dec_and_test(&id_priv->refcount))
@@ -630,7 +698,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
630 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 698 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
631 699
632 if (id_priv->id.qp_type == IB_QPT_UD) { 700 if (id_priv->id.qp_type == IB_QPT_UD) {
633 ret = cma_set_qkey(id_priv); 701 ret = cma_set_qkey(id_priv, 0);
634 if (ret) 702 if (ret)
635 return ret; 703 return ret;
636 704
@@ -679,26 +747,30 @@ EXPORT_SYMBOL(rdma_init_qp_attr);
679 747
680static inline int cma_zero_addr(struct sockaddr *addr) 748static inline int cma_zero_addr(struct sockaddr *addr)
681{ 749{
682 struct in6_addr *ip6; 750 switch (addr->sa_family) {
683 751 case AF_INET:
684 if (addr->sa_family == AF_INET) 752 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
685 return ipv4_is_zeronet( 753 case AF_INET6:
686 ((struct sockaddr_in *)addr)->sin_addr.s_addr); 754 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
687 else { 755 case AF_IB:
688 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; 756 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
689 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | 757 default:
690 ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; 758 return 0;
691 } 759 }
692} 760}
693 761
694static inline int cma_loopback_addr(struct sockaddr *addr) 762static inline int cma_loopback_addr(struct sockaddr *addr)
695{ 763{
696 if (addr->sa_family == AF_INET) 764 switch (addr->sa_family) {
697 return ipv4_is_loopback( 765 case AF_INET:
698 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 766 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
699 else 767 case AF_INET6:
700 return ipv6_addr_loopback( 768 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
701 &((struct sockaddr_in6 *) addr)->sin6_addr); 769 case AF_IB:
770 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
771 default:
772 return 0;
773 }
702} 774}
703 775
704static inline int cma_any_addr(struct sockaddr *addr) 776static inline int cma_any_addr(struct sockaddr *addr)
@@ -715,18 +787,31 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
715 case AF_INET: 787 case AF_INET:
716 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 788 return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
717 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 789 ((struct sockaddr_in *) dst)->sin_addr.s_addr;
718 default: 790 case AF_INET6:
719 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 791 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
720 &((struct sockaddr_in6 *) dst)->sin6_addr); 792 &((struct sockaddr_in6 *) dst)->sin6_addr);
793 default:
794 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
795 &((struct sockaddr_ib *) dst)->sib_addr);
721 } 796 }
722} 797}
723 798
724static inline __be16 cma_port(struct sockaddr *addr) 799static __be16 cma_port(struct sockaddr *addr)
725{ 800{
726 if (addr->sa_family == AF_INET) 801 struct sockaddr_ib *sib;
802
803 switch (addr->sa_family) {
804 case AF_INET:
727 return ((struct sockaddr_in *) addr)->sin_port; 805 return ((struct sockaddr_in *) addr)->sin_port;
728 else 806 case AF_INET6:
729 return ((struct sockaddr_in6 *) addr)->sin6_port; 807 return ((struct sockaddr_in6 *) addr)->sin6_port;
808 case AF_IB:
809 sib = (struct sockaddr_ib *) addr;
810 return htons((u16) (be64_to_cpu(sib->sib_sid) &
811 be64_to_cpu(sib->sib_sid_mask)));
812 default:
813 return 0;
814 }
730} 815}
731 816
732static inline int cma_any_port(struct sockaddr *addr) 817static inline int cma_any_port(struct sockaddr *addr)
@@ -734,83 +819,92 @@ static inline int cma_any_port(struct sockaddr *addr)
734 return !cma_port(addr); 819 return !cma_port(addr);
735} 820}
736 821
737static int cma_get_net_info(void *hdr, enum rdma_port_space ps, 822static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
738 u8 *ip_ver, __be16 *port, 823 struct ib_sa_path_rec *path)
739 union cma_ip_addr **src, union cma_ip_addr **dst)
740{ 824{
741 switch (ps) { 825 struct sockaddr_ib *listen_ib, *ib;
742 case RDMA_PS_SDP:
743 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
744 SDP_MAJ_VERSION)
745 return -EINVAL;
746 826
747 *ip_ver = sdp_get_ip_ver(hdr); 827 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
748 *port = ((struct sdp_hh *) hdr)->port; 828 ib = (struct sockaddr_ib *) &id->route.addr.src_addr;
749 *src = &((struct sdp_hh *) hdr)->src_addr; 829 ib->sib_family = listen_ib->sib_family;
750 *dst = &((struct sdp_hh *) hdr)->dst_addr; 830 ib->sib_pkey = path->pkey;
751 break; 831 ib->sib_flowinfo = path->flow_label;
752 default: 832 memcpy(&ib->sib_addr, &path->sgid, 16);
753 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION) 833 ib->sib_sid = listen_ib->sib_sid;
754 return -EINVAL; 834 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
835 ib->sib_scope_id = listen_ib->sib_scope_id;
755 836
756 *ip_ver = cma_get_ip_ver(hdr); 837 ib = (struct sockaddr_ib *) &id->route.addr.dst_addr;
757 *port = ((struct cma_hdr *) hdr)->port; 838 ib->sib_family = listen_ib->sib_family;
758 *src = &((struct cma_hdr *) hdr)->src_addr; 839 ib->sib_pkey = path->pkey;
759 *dst = &((struct cma_hdr *) hdr)->dst_addr; 840 ib->sib_flowinfo = path->flow_label;
760 break; 841 memcpy(&ib->sib_addr, &path->dgid, 16);
761 }
762
763 if (*ip_ver != 4 && *ip_ver != 6)
764 return -EINVAL;
765 return 0;
766} 842}
767 843
768static void cma_save_net_info(struct rdma_addr *addr, 844static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
769 struct rdma_addr *listen_addr, 845 struct cma_hdr *hdr)
770 u8 ip_ver, __be16 port,
771 union cma_ip_addr *src, union cma_ip_addr *dst)
772{ 846{
773 struct sockaddr_in *listen4, *ip4; 847 struct sockaddr_in *listen4, *ip4;
848
849 listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr;
850 ip4 = (struct sockaddr_in *) &id->route.addr.src_addr;
851 ip4->sin_family = listen4->sin_family;
852 ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
853 ip4->sin_port = listen4->sin_port;
854
855 ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr;
856 ip4->sin_family = listen4->sin_family;
857 ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
858 ip4->sin_port = hdr->port;
859}
860
861static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
862 struct cma_hdr *hdr)
863{
774 struct sockaddr_in6 *listen6, *ip6; 864 struct sockaddr_in6 *listen6, *ip6;
775 865
776 switch (ip_ver) { 866 listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr;
867 ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr;
868 ip6->sin6_family = listen6->sin6_family;
869 ip6->sin6_addr = hdr->dst_addr.ip6;
870 ip6->sin6_port = listen6->sin6_port;
871
872 ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr;
873 ip6->sin6_family = listen6->sin6_family;
874 ip6->sin6_addr = hdr->src_addr.ip6;
875 ip6->sin6_port = hdr->port;
876}
877
878static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
879 struct ib_cm_event *ib_event)
880{
881 struct cma_hdr *hdr;
882
883 if (listen_id->route.addr.src_addr.ss_family == AF_IB) {
884 cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
885 return 0;
886 }
887
888 hdr = ib_event->private_data;
889 if (hdr->cma_version != CMA_VERSION)
890 return -EINVAL;
891
892 switch (cma_get_ip_ver(hdr)) {
777 case 4: 893 case 4:
778 listen4 = (struct sockaddr_in *) &listen_addr->src_addr; 894 cma_save_ip4_info(id, listen_id, hdr);
779 ip4 = (struct sockaddr_in *) &addr->src_addr;
780 ip4->sin_family = listen4->sin_family;
781 ip4->sin_addr.s_addr = dst->ip4.addr;
782 ip4->sin_port = listen4->sin_port;
783
784 ip4 = (struct sockaddr_in *) &addr->dst_addr;
785 ip4->sin_family = listen4->sin_family;
786 ip4->sin_addr.s_addr = src->ip4.addr;
787 ip4->sin_port = port;
788 break; 895 break;
789 case 6: 896 case 6:
790 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr; 897 cma_save_ip6_info(id, listen_id, hdr);
791 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
792 ip6->sin6_family = listen6->sin6_family;
793 ip6->sin6_addr = dst->ip6;
794 ip6->sin6_port = listen6->sin6_port;
795
796 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
797 ip6->sin6_family = listen6->sin6_family;
798 ip6->sin6_addr = src->ip6;
799 ip6->sin6_port = port;
800 break; 898 break;
801 default: 899 default:
802 break; 900 return -EINVAL;
803 } 901 }
902 return 0;
804} 903}
805 904
806static inline int cma_user_data_offset(enum rdma_port_space ps) 905static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
807{ 906{
808 switch (ps) { 907 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
809 case RDMA_PS_SDP:
810 return 0;
811 default:
812 return sizeof(struct cma_hdr);
813 }
814} 908}
815 909
816static void cma_cancel_route(struct rdma_id_private *id_priv) 910static void cma_cancel_route(struct rdma_id_private *id_priv)
@@ -861,8 +955,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
861 cma_cancel_route(id_priv); 955 cma_cancel_route(id_priv);
862 break; 956 break;
863 case RDMA_CM_LISTEN: 957 case RDMA_CM_LISTEN:
864 if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr) 958 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
865 && !id_priv->cma_dev)
866 cma_cancel_listens(id_priv); 959 cma_cancel_listens(id_priv);
867 break; 960 break;
868 default: 961 default:
@@ -977,16 +1070,6 @@ reject:
977 return ret; 1070 return ret;
978} 1071}
979 1072
980static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
981{
982 if (id_priv->id.ps == RDMA_PS_SDP &&
983 sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
984 SDP_MAJ_VERSION)
985 return -EINVAL;
986
987 return 0;
988}
989
990static void cma_set_rep_event_data(struct rdma_cm_event *event, 1073static void cma_set_rep_event_data(struct rdma_cm_event *event,
991 struct ib_cm_rep_event_param *rep_data, 1074 struct ib_cm_rep_event_param *rep_data,
992 void *private_data) 1075 void *private_data)
@@ -1021,15 +1104,13 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1021 event.status = -ETIMEDOUT; 1104 event.status = -ETIMEDOUT;
1022 break; 1105 break;
1023 case IB_CM_REP_RECEIVED: 1106 case IB_CM_REP_RECEIVED:
1024 event.status = cma_verify_rep(id_priv, ib_event->private_data); 1107 if (id_priv->id.qp) {
1025 if (event.status)
1026 event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1027 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
1028 event.status = cma_rep_recv(id_priv); 1108 event.status = cma_rep_recv(id_priv);
1029 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1109 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1030 RDMA_CM_EVENT_ESTABLISHED; 1110 RDMA_CM_EVENT_ESTABLISHED;
1031 } else 1111 } else {
1032 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1112 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1113 }
1033 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1114 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1034 ib_event->private_data); 1115 ib_event->private_data);
1035 break; 1116 break;
@@ -1085,22 +1166,16 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1085 struct rdma_id_private *id_priv; 1166 struct rdma_id_private *id_priv;
1086 struct rdma_cm_id *id; 1167 struct rdma_cm_id *id;
1087 struct rdma_route *rt; 1168 struct rdma_route *rt;
1088 union cma_ip_addr *src, *dst;
1089 __be16 port;
1090 u8 ip_ver;
1091 int ret; 1169 int ret;
1092 1170
1093 if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1094 &ip_ver, &port, &src, &dst))
1095 return NULL;
1096
1097 id = rdma_create_id(listen_id->event_handler, listen_id->context, 1171 id = rdma_create_id(listen_id->event_handler, listen_id->context,
1098 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1172 listen_id->ps, ib_event->param.req_rcvd.qp_type);
1099 if (IS_ERR(id)) 1173 if (IS_ERR(id))
1100 return NULL; 1174 return NULL;
1101 1175
1102 cma_save_net_info(&id->route.addr, &listen_id->route.addr, 1176 id_priv = container_of(id, struct rdma_id_private, id);
1103 ip_ver, port, src, dst); 1177 if (cma_save_net_info(id, listen_id, ib_event))
1178 goto err;
1104 1179
1105 rt = &id->route; 1180 rt = &id->route;
1106 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 1181 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
@@ -1113,19 +1188,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1113 if (rt->num_paths == 2) 1188 if (rt->num_paths == 2)
1114 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1189 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1115 1190
1116 if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { 1191 if (cma_any_addr(cma_src_addr(id_priv))) {
1117 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1192 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1118 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1193 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1119 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1194 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1120 } else { 1195 } else {
1121 ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, 1196 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
1122 &rt->addr.dev_addr);
1123 if (ret) 1197 if (ret)
1124 goto err; 1198 goto err;
1125 } 1199 }
1126 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1200 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1127 1201
1128 id_priv = container_of(id, struct rdma_id_private, id);
1129 id_priv->state = RDMA_CM_CONNECT; 1202 id_priv->state = RDMA_CM_CONNECT;
1130 return id_priv; 1203 return id_priv;
1131 1204
@@ -1139,9 +1212,6 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1139{ 1212{
1140 struct rdma_id_private *id_priv; 1213 struct rdma_id_private *id_priv;
1141 struct rdma_cm_id *id; 1214 struct rdma_cm_id *id;
1142 union cma_ip_addr *src, *dst;
1143 __be16 port;
1144 u8 ip_ver;
1145 int ret; 1215 int ret;
1146 1216
1147 id = rdma_create_id(listen_id->event_handler, listen_id->context, 1217 id = rdma_create_id(listen_id->event_handler, listen_id->context,
@@ -1149,22 +1219,16 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1149 if (IS_ERR(id)) 1219 if (IS_ERR(id))
1150 return NULL; 1220 return NULL;
1151 1221
1152 1222 id_priv = container_of(id, struct rdma_id_private, id);
1153 if (cma_get_net_info(ib_event->private_data, listen_id->ps, 1223 if (cma_save_net_info(id, listen_id, ib_event))
1154 &ip_ver, &port, &src, &dst))
1155 goto err; 1224 goto err;
1156 1225
1157 cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1158 ip_ver, port, src, dst);
1159
1160 if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { 1226 if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
1161 ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, 1227 ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr);
1162 &id->route.addr.dev_addr);
1163 if (ret) 1228 if (ret)
1164 goto err; 1229 goto err;
1165 } 1230 }
1166 1231
1167 id_priv = container_of(id, struct rdma_id_private, id);
1168 id_priv->state = RDMA_CM_CONNECT; 1232 id_priv->state = RDMA_CM_CONNECT;
1169 return id_priv; 1233 return id_priv;
1170err: 1234err:
@@ -1210,7 +1274,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1210 return -ECONNABORTED; 1274 return -ECONNABORTED;
1211 1275
1212 memset(&event, 0, sizeof event); 1276 memset(&event, 0, sizeof event);
1213 offset = cma_user_data_offset(listen_id->id.ps); 1277 offset = cma_user_data_offset(listen_id);
1214 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1278 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1215 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1279 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
1216 conn_id = cma_new_udp_id(&listen_id->id, ib_event); 1280 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
@@ -1272,58 +1336,44 @@ err1:
1272 return ret; 1336 return ret;
1273} 1337}
1274 1338
1275static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr) 1339__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
1276{ 1340{
1277 return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr))); 1341 if (addr->sa_family == AF_IB)
1342 return ((struct sockaddr_ib *) addr)->sib_sid;
1343
1344 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
1278} 1345}
1346EXPORT_SYMBOL(rdma_get_service_id);
1279 1347
1280static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, 1348static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1281 struct ib_cm_compare_data *compare) 1349 struct ib_cm_compare_data *compare)
1282{ 1350{
1283 struct cma_hdr *cma_data, *cma_mask; 1351 struct cma_hdr *cma_data, *cma_mask;
1284 struct sdp_hh *sdp_data, *sdp_mask;
1285 __be32 ip4_addr; 1352 __be32 ip4_addr;
1286 struct in6_addr ip6_addr; 1353 struct in6_addr ip6_addr;
1287 1354
1288 memset(compare, 0, sizeof *compare); 1355 memset(compare, 0, sizeof *compare);
1289 cma_data = (void *) compare->data; 1356 cma_data = (void *) compare->data;
1290 cma_mask = (void *) compare->mask; 1357 cma_mask = (void *) compare->mask;
1291 sdp_data = (void *) compare->data;
1292 sdp_mask = (void *) compare->mask;
1293 1358
1294 switch (addr->sa_family) { 1359 switch (addr->sa_family) {
1295 case AF_INET: 1360 case AF_INET:
1296 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr; 1361 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1297 if (ps == RDMA_PS_SDP) { 1362 cma_set_ip_ver(cma_data, 4);
1298 sdp_set_ip_ver(sdp_data, 4); 1363 cma_set_ip_ver(cma_mask, 0xF);
1299 sdp_set_ip_ver(sdp_mask, 0xF); 1364 if (!cma_any_addr(addr)) {
1300 sdp_data->dst_addr.ip4.addr = ip4_addr; 1365 cma_data->dst_addr.ip4.addr = ip4_addr;
1301 sdp_mask->dst_addr.ip4.addr = htonl(~0); 1366 cma_mask->dst_addr.ip4.addr = htonl(~0);
1302 } else {
1303 cma_set_ip_ver(cma_data, 4);
1304 cma_set_ip_ver(cma_mask, 0xF);
1305 if (!cma_any_addr(addr)) {
1306 cma_data->dst_addr.ip4.addr = ip4_addr;
1307 cma_mask->dst_addr.ip4.addr = htonl(~0);
1308 }
1309 } 1367 }
1310 break; 1368 break;
1311 case AF_INET6: 1369 case AF_INET6:
1312 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; 1370 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1313 if (ps == RDMA_PS_SDP) { 1371 cma_set_ip_ver(cma_data, 6);
1314 sdp_set_ip_ver(sdp_data, 6); 1372 cma_set_ip_ver(cma_mask, 0xF);
1315 sdp_set_ip_ver(sdp_mask, 0xF); 1373 if (!cma_any_addr(addr)) {
1316 sdp_data->dst_addr.ip6 = ip6_addr; 1374 cma_data->dst_addr.ip6 = ip6_addr;
1317 memset(&sdp_mask->dst_addr.ip6, 0xFF, 1375 memset(&cma_mask->dst_addr.ip6, 0xFF,
1318 sizeof sdp_mask->dst_addr.ip6); 1376 sizeof cma_mask->dst_addr.ip6);
1319 } else {
1320 cma_set_ip_ver(cma_data, 6);
1321 cma_set_ip_ver(cma_mask, 0xF);
1322 if (!cma_any_addr(addr)) {
1323 cma_data->dst_addr.ip6 = ip6_addr;
1324 memset(&cma_mask->dst_addr.ip6, 0xFF,
1325 sizeof cma_mask->dst_addr.ip6);
1326 }
1327 } 1377 }
1328 break; 1378 break;
1329 default: 1379 default:
@@ -1347,9 +1397,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1347 event.event = RDMA_CM_EVENT_DISCONNECTED; 1397 event.event = RDMA_CM_EVENT_DISCONNECTED;
1348 break; 1398 break;
1349 case IW_CM_EVENT_CONNECT_REPLY: 1399 case IW_CM_EVENT_CONNECT_REPLY:
1350 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; 1400 sin = (struct sockaddr_in *) cma_src_addr(id_priv);
1351 *sin = iw_event->local_addr; 1401 *sin = iw_event->local_addr;
1352 sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; 1402 sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
1353 *sin = iw_event->remote_addr; 1403 *sin = iw_event->remote_addr;
1354 switch (iw_event->status) { 1404 switch (iw_event->status) {
1355 case 0: 1405 case 0:
@@ -1447,9 +1497,9 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1447 cm_id->context = conn_id; 1497 cm_id->context = conn_id;
1448 cm_id->cm_handler = cma_iw_handler; 1498 cm_id->cm_handler = cma_iw_handler;
1449 1499
1450 sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr; 1500 sin = (struct sockaddr_in *) cma_src_addr(conn_id);
1451 *sin = iw_event->local_addr; 1501 *sin = iw_event->local_addr;
1452 sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; 1502 sin = (struct sockaddr_in *) cma_dst_addr(conn_id);
1453 *sin = iw_event->remote_addr; 1503 *sin = iw_event->remote_addr;
1454 1504
1455 ret = ib_query_device(conn_id->id.device, &attr); 1505 ret = ib_query_device(conn_id->id.device, &attr);
@@ -1506,8 +1556,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
1506 1556
1507 id_priv->cm_id.ib = id; 1557 id_priv->cm_id.ib = id;
1508 1558
1509 addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; 1559 addr = cma_src_addr(id_priv);
1510 svc_id = cma_get_service_id(id_priv->id.ps, addr); 1560 svc_id = rdma_get_service_id(&id_priv->id, addr);
1511 if (cma_any_addr(addr) && !id_priv->afonly) 1561 if (cma_any_addr(addr) && !id_priv->afonly)
1512 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); 1562 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1513 else { 1563 else {
@@ -1537,7 +1587,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1537 1587
1538 id_priv->cm_id.iw = id; 1588 id_priv->cm_id.iw = id;
1539 1589
1540 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; 1590 sin = (struct sockaddr_in *) cma_src_addr(id_priv);
1541 id_priv->cm_id.iw->local_addr = *sin; 1591 id_priv->cm_id.iw->local_addr = *sin;
1542 1592
1543 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 1593 ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
@@ -1567,6 +1617,10 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1567 struct rdma_cm_id *id; 1617 struct rdma_cm_id *id;
1568 int ret; 1618 int ret;
1569 1619
1620 if (cma_family(id_priv) == AF_IB &&
1621 rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB)
1622 return;
1623
1570 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, 1624 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
1571 id_priv->id.qp_type); 1625 id_priv->id.qp_type);
1572 if (IS_ERR(id)) 1626 if (IS_ERR(id))
@@ -1575,8 +1629,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1575 dev_id_priv = container_of(id, struct rdma_id_private, id); 1629 dev_id_priv = container_of(id, struct rdma_id_private, id);
1576 1630
1577 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 1631 dev_id_priv->state = RDMA_CM_ADDR_BOUND;
1578 memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, 1632 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
1579 ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); 1633 rdma_addr_size(cma_src_addr(id_priv)));
1580 1634
1581 cma_attach_to_dev(dev_id_priv, cma_dev); 1635 cma_attach_to_dev(dev_id_priv, cma_dev);
1582 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 1636 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
@@ -1634,31 +1688,39 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1634static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 1688static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1635 struct cma_work *work) 1689 struct cma_work *work)
1636{ 1690{
1637 struct rdma_addr *addr = &id_priv->id.route.addr; 1691 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
1638 struct ib_sa_path_rec path_rec; 1692 struct ib_sa_path_rec path_rec;
1639 ib_sa_comp_mask comp_mask; 1693 ib_sa_comp_mask comp_mask;
1640 struct sockaddr_in6 *sin6; 1694 struct sockaddr_in6 *sin6;
1695 struct sockaddr_ib *sib;
1641 1696
1642 memset(&path_rec, 0, sizeof path_rec); 1697 memset(&path_rec, 0, sizeof path_rec);
1643 rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); 1698 rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
1644 rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); 1699 rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
1645 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); 1700 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
1646 path_rec.numb_path = 1; 1701 path_rec.numb_path = 1;
1647 path_rec.reversible = 1; 1702 path_rec.reversible = 1;
1648 path_rec.service_id = cma_get_service_id(id_priv->id.ps, 1703 path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
1649 (struct sockaddr *) &addr->dst_addr);
1650 1704
1651 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 1705 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1652 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 1706 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1653 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 1707 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1654 1708
1655 if (addr->src_addr.ss_family == AF_INET) { 1709 switch (cma_family(id_priv)) {
1710 case AF_INET:
1656 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 1711 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1657 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 1712 comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1658 } else { 1713 break;
1659 sin6 = (struct sockaddr_in6 *) &addr->src_addr; 1714 case AF_INET6:
1715 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
1660 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 1716 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1661 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 1717 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1718 break;
1719 case AF_IB:
1720 sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
1721 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
1722 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1723 break;
1662 } 1724 }
1663 1725
1664 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 1726 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
@@ -1800,14 +1862,9 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
1800 struct rdma_addr *addr = &route->addr; 1862 struct rdma_addr *addr = &route->addr;
1801 struct cma_work *work; 1863 struct cma_work *work;
1802 int ret; 1864 int ret;
1803 struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
1804 struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
1805 struct net_device *ndev = NULL; 1865 struct net_device *ndev = NULL;
1806 u16 vid; 1866 u16 vid;
1807 1867
1808 if (src_addr->sin_family != dst_addr->sin_family)
1809 return -EINVAL;
1810
1811 work = kzalloc(sizeof *work, GFP_KERNEL); 1868 work = kzalloc(sizeof *work, GFP_KERNEL);
1812 if (!work) 1869 if (!work)
1813 return -ENOMEM; 1870 return -ENOMEM;
@@ -1913,28 +1970,57 @@ err:
1913} 1970}
1914EXPORT_SYMBOL(rdma_resolve_route); 1971EXPORT_SYMBOL(rdma_resolve_route);
1915 1972
1973static void cma_set_loopback(struct sockaddr *addr)
1974{
1975 switch (addr->sa_family) {
1976 case AF_INET:
1977 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1978 break;
1979 case AF_INET6:
1980 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
1981 0, 0, 0, htonl(1));
1982 break;
1983 default:
1984 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
1985 0, 0, 0, htonl(1));
1986 break;
1987 }
1988}
1989
1916static int cma_bind_loopback(struct rdma_id_private *id_priv) 1990static int cma_bind_loopback(struct rdma_id_private *id_priv)
1917{ 1991{
1918 struct cma_device *cma_dev; 1992 struct cma_device *cma_dev, *cur_dev;
1919 struct ib_port_attr port_attr; 1993 struct ib_port_attr port_attr;
1920 union ib_gid gid; 1994 union ib_gid gid;
1921 u16 pkey; 1995 u16 pkey;
1922 int ret; 1996 int ret;
1923 u8 p; 1997 u8 p;
1924 1998
1999 cma_dev = NULL;
1925 mutex_lock(&lock); 2000 mutex_lock(&lock);
1926 if (list_empty(&dev_list)) { 2001 list_for_each_entry(cur_dev, &dev_list, list) {
2002 if (cma_family(id_priv) == AF_IB &&
2003 rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
2004 continue;
2005
2006 if (!cma_dev)
2007 cma_dev = cur_dev;
2008
2009 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
2010 if (!ib_query_port(cur_dev->device, p, &port_attr) &&
2011 port_attr.state == IB_PORT_ACTIVE) {
2012 cma_dev = cur_dev;
2013 goto port_found;
2014 }
2015 }
2016 }
2017
2018 if (!cma_dev) {
1927 ret = -ENODEV; 2019 ret = -ENODEV;
1928 goto out; 2020 goto out;
1929 } 2021 }
1930 list_for_each_entry(cma_dev, &dev_list, list)
1931 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1932 if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1933 port_attr.state == IB_PORT_ACTIVE)
1934 goto port_found;
1935 2022
1936 p = 1; 2023 p = 1;
1937 cma_dev = list_entry(dev_list.next, struct cma_device, list);
1938 2024
1939port_found: 2025port_found:
1940 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); 2026 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
@@ -1953,6 +2039,7 @@ port_found:
1953 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2039 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1954 id_priv->id.port_num = p; 2040 id_priv->id.port_num = p;
1955 cma_attach_to_dev(id_priv, cma_dev); 2041 cma_attach_to_dev(id_priv, cma_dev);
2042 cma_set_loopback(cma_src_addr(id_priv));
1956out: 2043out:
1957 mutex_unlock(&lock); 2044 mutex_unlock(&lock);
1958 return ret; 2045 return ret;
@@ -1980,8 +2067,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
1980 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2067 event.event = RDMA_CM_EVENT_ADDR_ERROR;
1981 event.status = status; 2068 event.status = status;
1982 } else { 2069 } else {
1983 memcpy(&id_priv->id.route.addr.src_addr, src_addr, 2070 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
1984 ip_addr_size(src_addr));
1985 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2071 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1986 } 2072 }
1987 2073
@@ -2000,7 +2086,6 @@ out:
2000static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2086static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2001{ 2087{
2002 struct cma_work *work; 2088 struct cma_work *work;
2003 struct sockaddr *src, *dst;
2004 union ib_gid gid; 2089 union ib_gid gid;
2005 int ret; 2090 int ret;
2006 2091
@@ -2017,18 +2102,36 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2017 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2102 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2018 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2103 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2019 2104
2020 src = (struct sockaddr *) &id_priv->id.route.addr.src_addr; 2105 work->id = id_priv;
2021 if (cma_zero_addr(src)) { 2106 INIT_WORK(&work->work, cma_work_handler);
2022 dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 2107 work->old_state = RDMA_CM_ADDR_QUERY;
2023 if ((src->sa_family = dst->sa_family) == AF_INET) { 2108 work->new_state = RDMA_CM_ADDR_RESOLVED;
2024 ((struct sockaddr_in *)src)->sin_addr = 2109 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2025 ((struct sockaddr_in *)dst)->sin_addr; 2110 queue_work(cma_wq, &work->work);
2026 } else { 2111 return 0;
2027 ((struct sockaddr_in6 *)src)->sin6_addr = 2112err:
2028 ((struct sockaddr_in6 *)dst)->sin6_addr; 2113 kfree(work);
2029 } 2114 return ret;
2115}
2116
2117static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
2118{
2119 struct cma_work *work;
2120 int ret;
2121
2122 work = kzalloc(sizeof *work, GFP_KERNEL);
2123 if (!work)
2124 return -ENOMEM;
2125
2126 if (!id_priv->cma_dev) {
2127 ret = cma_resolve_ib_dev(id_priv);
2128 if (ret)
2129 goto err;
2030 } 2130 }
2031 2131
2132 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
2133 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
2134
2032 work->id = id_priv; 2135 work->id = id_priv;
2033 INIT_WORK(&work->work, cma_work_handler); 2136 INIT_WORK(&work->work, cma_work_handler);
2034 work->old_state = RDMA_CM_ADDR_QUERY; 2137 work->old_state = RDMA_CM_ADDR_QUERY;
@@ -2046,9 +2149,13 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2046{ 2149{
2047 if (!src_addr || !src_addr->sa_family) { 2150 if (!src_addr || !src_addr->sa_family) {
2048 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2151 src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2049 if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) { 2152 src_addr->sa_family = dst_addr->sa_family;
2153 if (dst_addr->sa_family == AF_INET6) {
2050 ((struct sockaddr_in6 *) src_addr)->sin6_scope_id = 2154 ((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
2051 ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id; 2155 ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
2156 } else if (dst_addr->sa_family == AF_IB) {
2157 ((struct sockaddr_ib *) src_addr)->sib_pkey =
2158 ((struct sockaddr_ib *) dst_addr)->sib_pkey;
2052 } 2159 }
2053 } 2160 }
2054 return rdma_bind_addr(id, src_addr); 2161 return rdma_bind_addr(id, src_addr);
@@ -2067,17 +2174,25 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2067 return ret; 2174 return ret;
2068 } 2175 }
2069 2176
2177 if (cma_family(id_priv) != dst_addr->sa_family)
2178 return -EINVAL;
2179
2070 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) 2180 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
2071 return -EINVAL; 2181 return -EINVAL;
2072 2182
2073 atomic_inc(&id_priv->refcount); 2183 atomic_inc(&id_priv->refcount);
2074 memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr)); 2184 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
2075 if (cma_any_addr(dst_addr)) 2185 if (cma_any_addr(dst_addr)) {
2076 ret = cma_resolve_loopback(id_priv); 2186 ret = cma_resolve_loopback(id_priv);
2077 else 2187 } else {
2078 ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr, 2188 if (dst_addr->sa_family == AF_IB) {
2079 dst_addr, &id->route.addr.dev_addr, 2189 ret = cma_resolve_ib_addr(id_priv);
2080 timeout_ms, addr_handler, id_priv); 2190 } else {
2191 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
2192 dst_addr, &id->route.addr.dev_addr,
2193 timeout_ms, addr_handler, id_priv);
2194 }
2195 }
2081 if (ret) 2196 if (ret)
2082 goto err; 2197 goto err;
2083 2198
@@ -2097,7 +2212,7 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
2097 2212
2098 id_priv = container_of(id, struct rdma_id_private, id); 2213 id_priv = container_of(id, struct rdma_id_private, id);
2099 spin_lock_irqsave(&id_priv->lock, flags); 2214 spin_lock_irqsave(&id_priv->lock, flags);
2100 if (id_priv->state == RDMA_CM_IDLE) { 2215 if (reuse || id_priv->state == RDMA_CM_IDLE) {
2101 id_priv->reuseaddr = reuse; 2216 id_priv->reuseaddr = reuse;
2102 ret = 0; 2217 ret = 0;
2103 } else { 2218 } else {
@@ -2131,10 +2246,29 @@ EXPORT_SYMBOL(rdma_set_afonly);
2131static void cma_bind_port(struct rdma_bind_list *bind_list, 2246static void cma_bind_port(struct rdma_bind_list *bind_list,
2132 struct rdma_id_private *id_priv) 2247 struct rdma_id_private *id_priv)
2133{ 2248{
2134 struct sockaddr_in *sin; 2249 struct sockaddr *addr;
2250 struct sockaddr_ib *sib;
2251 u64 sid, mask;
2252 __be16 port;
2135 2253
2136 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; 2254 addr = cma_src_addr(id_priv);
2137 sin->sin_port = htons(bind_list->port); 2255 port = htons(bind_list->port);
2256
2257 switch (addr->sa_family) {
2258 case AF_INET:
2259 ((struct sockaddr_in *) addr)->sin_port = port;
2260 break;
2261 case AF_INET6:
2262 ((struct sockaddr_in6 *) addr)->sin6_port = port;
2263 break;
2264 case AF_IB:
2265 sib = (struct sockaddr_ib *) addr;
2266 sid = be64_to_cpu(sib->sib_sid);
2267 mask = be64_to_cpu(sib->sib_sid_mask);
2268 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
2269 sib->sib_sid_mask = cpu_to_be64(~0ULL);
2270 break;
2271 }
2138 id_priv->bind_list = bind_list; 2272 id_priv->bind_list = bind_list;
2139 hlist_add_head(&id_priv->node, &bind_list->owners); 2273 hlist_add_head(&id_priv->node, &bind_list->owners);
2140} 2274}
@@ -2205,7 +2339,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
2205 struct rdma_id_private *cur_id; 2339 struct rdma_id_private *cur_id;
2206 struct sockaddr *addr, *cur_addr; 2340 struct sockaddr *addr, *cur_addr;
2207 2341
2208 addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; 2342 addr = cma_src_addr(id_priv);
2209 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 2343 hlist_for_each_entry(cur_id, &bind_list->owners, node) {
2210 if (id_priv == cur_id) 2344 if (id_priv == cur_id)
2211 continue; 2345 continue;
@@ -2214,7 +2348,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
2214 cur_id->reuseaddr) 2348 cur_id->reuseaddr)
2215 continue; 2349 continue;
2216 2350
2217 cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; 2351 cur_addr = cma_src_addr(cur_id);
2218 if (id_priv->afonly && cur_id->afonly && 2352 if (id_priv->afonly && cur_id->afonly &&
2219 (addr->sa_family != cur_addr->sa_family)) 2353 (addr->sa_family != cur_addr->sa_family))
2220 continue; 2354 continue;
@@ -2234,7 +2368,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
2234 unsigned short snum; 2368 unsigned short snum;
2235 int ret; 2369 int ret;
2236 2370
2237 snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)); 2371 snum = ntohs(cma_port(cma_src_addr(id_priv)));
2238 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 2372 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2239 return -EACCES; 2373 return -EACCES;
2240 2374
@@ -2261,33 +2395,67 @@ static int cma_bind_listen(struct rdma_id_private *id_priv)
2261 return ret; 2395 return ret;
2262} 2396}
2263 2397
2264static int cma_get_port(struct rdma_id_private *id_priv) 2398static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv)
2265{ 2399{
2266 struct idr *ps;
2267 int ret;
2268
2269 switch (id_priv->id.ps) { 2400 switch (id_priv->id.ps) {
2270 case RDMA_PS_SDP:
2271 ps = &sdp_ps;
2272 break;
2273 case RDMA_PS_TCP: 2401 case RDMA_PS_TCP:
2274 ps = &tcp_ps; 2402 return &tcp_ps;
2275 break;
2276 case RDMA_PS_UDP: 2403 case RDMA_PS_UDP:
2277 ps = &udp_ps; 2404 return &udp_ps;
2278 break;
2279 case RDMA_PS_IPOIB: 2405 case RDMA_PS_IPOIB:
2280 ps = &ipoib_ps; 2406 return &ipoib_ps;
2281 break;
2282 case RDMA_PS_IB: 2407 case RDMA_PS_IB:
2283 ps = &ib_ps; 2408 return &ib_ps;
2284 break;
2285 default: 2409 default:
2286 return -EPROTONOSUPPORT; 2410 return NULL;
2411 }
2412}
2413
2414static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv)
2415{
2416 struct idr *ps = NULL;
2417 struct sockaddr_ib *sib;
2418 u64 sid_ps, mask, sid;
2419
2420 sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
2421 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
2422 sid = be64_to_cpu(sib->sib_sid) & mask;
2423
2424 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
2425 sid_ps = RDMA_IB_IP_PS_IB;
2426 ps = &ib_ps;
2427 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
2428 (sid == (RDMA_IB_IP_PS_TCP & mask))) {
2429 sid_ps = RDMA_IB_IP_PS_TCP;
2430 ps = &tcp_ps;
2431 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
2432 (sid == (RDMA_IB_IP_PS_UDP & mask))) {
2433 sid_ps = RDMA_IB_IP_PS_UDP;
2434 ps = &udp_ps;
2287 } 2435 }
2288 2436
2437 if (ps) {
2438 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
2439 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
2440 be64_to_cpu(sib->sib_sid_mask));
2441 }
2442 return ps;
2443}
2444
2445static int cma_get_port(struct rdma_id_private *id_priv)
2446{
2447 struct idr *ps;
2448 int ret;
2449
2450 if (cma_family(id_priv) != AF_IB)
2451 ps = cma_select_inet_ps(id_priv);
2452 else
2453 ps = cma_select_ib_ps(id_priv);
2454 if (!ps)
2455 return -EPROTONOSUPPORT;
2456
2289 mutex_lock(&lock); 2457 mutex_lock(&lock);
2290 if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)) 2458 if (cma_any_port(cma_src_addr(id_priv)))
2291 ret = cma_alloc_any_port(ps, id_priv); 2459 ret = cma_alloc_any_port(ps, id_priv);
2292 else 2460 else
2293 ret = cma_use_port(ps, id_priv); 2461 ret = cma_use_port(ps, id_priv);
@@ -2322,8 +2490,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
2322 2490
2323 id_priv = container_of(id, struct rdma_id_private, id); 2491 id_priv = container_of(id, struct rdma_id_private, id);
2324 if (id_priv->state == RDMA_CM_IDLE) { 2492 if (id_priv->state == RDMA_CM_IDLE) {
2325 ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; 2493 id->route.addr.src_addr.ss_family = AF_INET;
2326 ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); 2494 ret = rdma_bind_addr(id, cma_src_addr(id_priv));
2327 if (ret) 2495 if (ret)
2328 return ret; 2496 return ret;
2329 } 2497 }
@@ -2370,7 +2538,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2370 struct rdma_id_private *id_priv; 2538 struct rdma_id_private *id_priv;
2371 int ret; 2539 int ret;
2372 2540
2373 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 2541 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
2542 addr->sa_family != AF_IB)
2374 return -EAFNOSUPPORT; 2543 return -EAFNOSUPPORT;
2375 2544
2376 id_priv = container_of(id, struct rdma_id_private, id); 2545 id_priv = container_of(id, struct rdma_id_private, id);
@@ -2382,7 +2551,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2382 goto err1; 2551 goto err1;
2383 2552
2384 if (!cma_any_addr(addr)) { 2553 if (!cma_any_addr(addr)) {
2385 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); 2554 ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
2386 if (ret) 2555 if (ret)
2387 goto err1; 2556 goto err1;
2388 2557
@@ -2391,7 +2560,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2391 goto err1; 2560 goto err1;
2392 } 2561 }
2393 2562
2394 memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); 2563 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
2395 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 2564 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
2396 if (addr->sa_family == AF_INET) 2565 if (addr->sa_family == AF_INET)
2397 id_priv->afonly = 1; 2566 id_priv->afonly = 1;
@@ -2414,62 +2583,32 @@ err1:
2414} 2583}
2415EXPORT_SYMBOL(rdma_bind_addr); 2584EXPORT_SYMBOL(rdma_bind_addr);
2416 2585
2417static int cma_format_hdr(void *hdr, enum rdma_port_space ps, 2586static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
2418 struct rdma_route *route)
2419{ 2587{
2420 struct cma_hdr *cma_hdr; 2588 struct cma_hdr *cma_hdr;
2421 struct sdp_hh *sdp_hdr;
2422 2589
2423 if (route->addr.src_addr.ss_family == AF_INET) { 2590 cma_hdr = hdr;
2591 cma_hdr->cma_version = CMA_VERSION;
2592 if (cma_family(id_priv) == AF_INET) {
2424 struct sockaddr_in *src4, *dst4; 2593 struct sockaddr_in *src4, *dst4;
2425 2594
2426 src4 = (struct sockaddr_in *) &route->addr.src_addr; 2595 src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
2427 dst4 = (struct sockaddr_in *) &route->addr.dst_addr; 2596 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
2428 2597
2429 switch (ps) { 2598 cma_set_ip_ver(cma_hdr, 4);
2430 case RDMA_PS_SDP: 2599 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2431 sdp_hdr = hdr; 2600 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2432 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) 2601 cma_hdr->port = src4->sin_port;
2433 return -EINVAL; 2602 } else if (cma_family(id_priv) == AF_INET6) {
2434 sdp_set_ip_ver(sdp_hdr, 4);
2435 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2436 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2437 sdp_hdr->port = src4->sin_port;
2438 break;
2439 default:
2440 cma_hdr = hdr;
2441 cma_hdr->cma_version = CMA_VERSION;
2442 cma_set_ip_ver(cma_hdr, 4);
2443 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2444 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2445 cma_hdr->port = src4->sin_port;
2446 break;
2447 }
2448 } else {
2449 struct sockaddr_in6 *src6, *dst6; 2603 struct sockaddr_in6 *src6, *dst6;
2450 2604
2451 src6 = (struct sockaddr_in6 *) &route->addr.src_addr; 2605 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
2452 dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr; 2606 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
2453 2607
2454 switch (ps) { 2608 cma_set_ip_ver(cma_hdr, 6);
2455 case RDMA_PS_SDP: 2609 cma_hdr->src_addr.ip6 = src6->sin6_addr;
2456 sdp_hdr = hdr; 2610 cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2457 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) 2611 cma_hdr->port = src6->sin6_port;
2458 return -EINVAL;
2459 sdp_set_ip_ver(sdp_hdr, 6);
2460 sdp_hdr->src_addr.ip6 = src6->sin6_addr;
2461 sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
2462 sdp_hdr->port = src6->sin6_port;
2463 break;
2464 default:
2465 cma_hdr = hdr;
2466 cma_hdr->cma_version = CMA_VERSION;
2467 cma_set_ip_ver(cma_hdr, 6);
2468 cma_hdr->src_addr.ip6 = src6->sin6_addr;
2469 cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2470 cma_hdr->port = src6->sin6_port;
2471 break;
2472 }
2473 } 2612 }
2474 return 0; 2613 return 0;
2475} 2614}
@@ -2499,15 +2638,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2499 event.status = ib_event->param.sidr_rep_rcvd.status; 2638 event.status = ib_event->param.sidr_rep_rcvd.status;
2500 break; 2639 break;
2501 } 2640 }
2502 ret = cma_set_qkey(id_priv); 2641 ret = cma_set_qkey(id_priv, rep->qkey);
2503 if (ret) { 2642 if (ret) {
2504 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2643 event.event = RDMA_CM_EVENT_ADDR_ERROR;
2505 event.status = -EINVAL; 2644 event.status = ret;
2506 break;
2507 }
2508 if (id_priv->qkey != rep->qkey) {
2509 event.event = RDMA_CM_EVENT_UNREACHABLE;
2510 event.status = -EINVAL;
2511 break; 2645 break;
2512 } 2646 }
2513 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, 2647 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
@@ -2542,27 +2676,31 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2542 struct rdma_conn_param *conn_param) 2676 struct rdma_conn_param *conn_param)
2543{ 2677{
2544 struct ib_cm_sidr_req_param req; 2678 struct ib_cm_sidr_req_param req;
2545 struct rdma_route *route;
2546 struct ib_cm_id *id; 2679 struct ib_cm_id *id;
2547 int ret; 2680 int offset, ret;
2548 2681
2549 req.private_data_len = sizeof(struct cma_hdr) + 2682 offset = cma_user_data_offset(id_priv);
2550 conn_param->private_data_len; 2683 req.private_data_len = offset + conn_param->private_data_len;
2551 if (req.private_data_len < conn_param->private_data_len) 2684 if (req.private_data_len < conn_param->private_data_len)
2552 return -EINVAL; 2685 return -EINVAL;
2553 2686
2554 req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 2687 if (req.private_data_len) {
2555 if (!req.private_data) 2688 req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2556 return -ENOMEM; 2689 if (!req.private_data)
2690 return -ENOMEM;
2691 } else {
2692 req.private_data = NULL;
2693 }
2557 2694
2558 if (conn_param->private_data && conn_param->private_data_len) 2695 if (conn_param->private_data && conn_param->private_data_len)
2559 memcpy((void *) req.private_data + sizeof(struct cma_hdr), 2696 memcpy((void *) req.private_data + offset,
2560 conn_param->private_data, conn_param->private_data_len); 2697 conn_param->private_data, conn_param->private_data_len);
2561 2698
2562 route = &id_priv->id.route; 2699 if (req.private_data) {
2563 ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route); 2700 ret = cma_format_hdr((void *) req.private_data, id_priv);
2564 if (ret) 2701 if (ret)
2565 goto out; 2702 goto out;
2703 }
2566 2704
2567 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 2705 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
2568 id_priv); 2706 id_priv);
@@ -2572,9 +2710,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2572 } 2710 }
2573 id_priv->cm_id.ib = id; 2711 id_priv->cm_id.ib = id;
2574 2712
2575 req.path = route->path_rec; 2713 req.path = id_priv->id.route.path_rec;
2576 req.service_id = cma_get_service_id(id_priv->id.ps, 2714 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
2577 (struct sockaddr *) &route->addr.dst_addr);
2578 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 2715 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2579 req.max_cm_retries = CMA_MAX_CM_RETRIES; 2716 req.max_cm_retries = CMA_MAX_CM_RETRIES;
2580 2717
@@ -2598,14 +2735,18 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
2598 int offset, ret; 2735 int offset, ret;
2599 2736
2600 memset(&req, 0, sizeof req); 2737 memset(&req, 0, sizeof req);
2601 offset = cma_user_data_offset(id_priv->id.ps); 2738 offset = cma_user_data_offset(id_priv);
2602 req.private_data_len = offset + conn_param->private_data_len; 2739 req.private_data_len = offset + conn_param->private_data_len;
2603 if (req.private_data_len < conn_param->private_data_len) 2740 if (req.private_data_len < conn_param->private_data_len)
2604 return -EINVAL; 2741 return -EINVAL;
2605 2742
2606 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 2743 if (req.private_data_len) {
2607 if (!private_data) 2744 private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2608 return -ENOMEM; 2745 if (!private_data)
2746 return -ENOMEM;
2747 } else {
2748 private_data = NULL;
2749 }
2609 2750
2610 if (conn_param->private_data && conn_param->private_data_len) 2751 if (conn_param->private_data && conn_param->private_data_len)
2611 memcpy(private_data + offset, conn_param->private_data, 2752 memcpy(private_data + offset, conn_param->private_data,
@@ -2619,17 +2760,18 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
2619 id_priv->cm_id.ib = id; 2760 id_priv->cm_id.ib = id;
2620 2761
2621 route = &id_priv->id.route; 2762 route = &id_priv->id.route;
2622 ret = cma_format_hdr(private_data, id_priv->id.ps, route); 2763 if (private_data) {
2623 if (ret) 2764 ret = cma_format_hdr(private_data, id_priv);
2624 goto out; 2765 if (ret)
2625 req.private_data = private_data; 2766 goto out;
2767 req.private_data = private_data;
2768 }
2626 2769
2627 req.primary_path = &route->path_rec[0]; 2770 req.primary_path = &route->path_rec[0];
2628 if (route->num_paths == 2) 2771 if (route->num_paths == 2)
2629 req.alternate_path = &route->path_rec[1]; 2772 req.alternate_path = &route->path_rec[1];
2630 2773
2631 req.service_id = cma_get_service_id(id_priv->id.ps, 2774 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
2632 (struct sockaddr *) &route->addr.dst_addr);
2633 req.qp_num = id_priv->qp_num; 2775 req.qp_num = id_priv->qp_num;
2634 req.qp_type = id_priv->id.qp_type; 2776 req.qp_type = id_priv->id.qp_type;
2635 req.starting_psn = id_priv->seq_num; 2777 req.starting_psn = id_priv->seq_num;
@@ -2668,10 +2810,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
2668 2810
2669 id_priv->cm_id.iw = cm_id; 2811 id_priv->cm_id.iw = cm_id;
2670 2812
2671 sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr; 2813 sin = (struct sockaddr_in *) cma_src_addr(id_priv);
2672 cm_id->local_addr = *sin; 2814 cm_id->local_addr = *sin;
2673 2815
2674 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; 2816 sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
2675 cm_id->remote_addr = *sin; 2817 cm_id->remote_addr = *sin;
2676 2818
2677 ret = cma_modify_qp_rtr(id_priv, conn_param); 2819 ret = cma_modify_qp_rtr(id_priv, conn_param);
@@ -2789,7 +2931,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
2789} 2931}
2790 2932
2791static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 2933static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2792 enum ib_cm_sidr_status status, 2934 enum ib_cm_sidr_status status, u32 qkey,
2793 const void *private_data, int private_data_len) 2935 const void *private_data, int private_data_len)
2794{ 2936{
2795 struct ib_cm_sidr_rep_param rep; 2937 struct ib_cm_sidr_rep_param rep;
@@ -2798,7 +2940,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2798 memset(&rep, 0, sizeof rep); 2940 memset(&rep, 0, sizeof rep);
2799 rep.status = status; 2941 rep.status = status;
2800 if (status == IB_SIDR_SUCCESS) { 2942 if (status == IB_SIDR_SUCCESS) {
2801 ret = cma_set_qkey(id_priv); 2943 ret = cma_set_qkey(id_priv, qkey);
2802 if (ret) 2944 if (ret)
2803 return ret; 2945 return ret;
2804 rep.qp_num = id_priv->qp_num; 2946 rep.qp_num = id_priv->qp_num;
@@ -2832,11 +2974,12 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2832 if (id->qp_type == IB_QPT_UD) { 2974 if (id->qp_type == IB_QPT_UD) {
2833 if (conn_param) 2975 if (conn_param)
2834 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 2976 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2977 conn_param->qkey,
2835 conn_param->private_data, 2978 conn_param->private_data,
2836 conn_param->private_data_len); 2979 conn_param->private_data_len);
2837 else 2980 else
2838 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 2981 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2839 NULL, 0); 2982 0, NULL, 0);
2840 } else { 2983 } else {
2841 if (conn_param) 2984 if (conn_param)
2842 ret = cma_accept_ib(id_priv, conn_param); 2985 ret = cma_accept_ib(id_priv, conn_param);
@@ -2897,7 +3040,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2897 switch (rdma_node_get_transport(id->device->node_type)) { 3040 switch (rdma_node_get_transport(id->device->node_type)) {
2898 case RDMA_TRANSPORT_IB: 3041 case RDMA_TRANSPORT_IB:
2899 if (id->qp_type == IB_QPT_UD) 3042 if (id->qp_type == IB_QPT_UD)
2900 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 3043 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
2901 private_data, private_data_len); 3044 private_data, private_data_len);
2902 else 3045 else
2903 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3046 ret = ib_send_cm_rej(id_priv->cm_id.ib,
@@ -2958,6 +3101,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2958 cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED)) 3101 cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
2959 return 0; 3102 return 0;
2960 3103
3104 if (!status)
3105 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
2961 mutex_lock(&id_priv->qp_mutex); 3106 mutex_lock(&id_priv->qp_mutex);
2962 if (!status && id_priv->id.qp) 3107 if (!status && id_priv->id.qp)
2963 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3108 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
@@ -3004,6 +3149,8 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
3004 0xFF10A01B)) { 3149 0xFF10A01B)) {
3005 /* IPv6 address is an SA assigned MGID. */ 3150 /* IPv6 address is an SA assigned MGID. */
3006 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3151 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3152 } else if (addr->sa_family == AF_IB) {
3153 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
3007 } else if ((addr->sa_family == AF_INET6)) { 3154 } else if ((addr->sa_family == AF_INET6)) {
3008 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 3155 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
3009 if (id_priv->id.ps == RDMA_PS_UDP) 3156 if (id_priv->id.ps == RDMA_PS_UDP)
@@ -3031,9 +3178,12 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3031 if (ret) 3178 if (ret)
3032 return ret; 3179 return ret;
3033 3180
3181 ret = cma_set_qkey(id_priv, 0);
3182 if (ret)
3183 return ret;
3184
3034 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 3185 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
3035 if (id_priv->id.ps == RDMA_PS_UDP) 3186 rec.qkey = cpu_to_be32(id_priv->qkey);
3036 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3037 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 3187 rdma_addr_get_sgid(dev_addr, &rec.port_gid);
3038 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 3188 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
3039 rec.join_state = 1; 3189 rec.join_state = 1;
@@ -3170,7 +3320,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3170 if (!mc) 3320 if (!mc)
3171 return -ENOMEM; 3321 return -ENOMEM;
3172 3322
3173 memcpy(&mc->addr, addr, ip_addr_size(addr)); 3323 memcpy(&mc->addr, addr, rdma_addr_size(addr));
3174 mc->context = context; 3324 mc->context = context;
3175 mc->id_priv = id_priv; 3325 mc->id_priv = id_priv;
3176 3326
@@ -3215,7 +3365,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
3215 id_priv = container_of(id, struct rdma_id_private, id); 3365 id_priv = container_of(id, struct rdma_id_private, id);
3216 spin_lock_irq(&id_priv->lock); 3366 spin_lock_irq(&id_priv->lock);
3217 list_for_each_entry(mc, &id_priv->mc_list, list) { 3367 list_for_each_entry(mc, &id_priv->mc_list, list) {
3218 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { 3368 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
3219 list_del(&mc->list); 3369 list_del(&mc->list);
3220 spin_unlock_irq(&id_priv->lock); 3370 spin_unlock_irq(&id_priv->lock);
3221 3371
@@ -3436,33 +3586,16 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
3436 id_stats->bound_dev_if = 3586 id_stats->bound_dev_if =
3437 id->route.addr.dev_addr.bound_dev_if; 3587 id->route.addr.dev_addr.bound_dev_if;
3438 3588
3439 if (id->route.addr.src_addr.ss_family == AF_INET) { 3589 if (ibnl_put_attr(skb, nlh,
3440 if (ibnl_put_attr(skb, nlh, 3590 rdma_addr_size(cma_src_addr(id_priv)),
3441 sizeof(struct sockaddr_in), 3591 cma_src_addr(id_priv),
3442 &id->route.addr.src_addr, 3592 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
3443 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) { 3593 goto out;
3444 goto out; 3594 if (ibnl_put_attr(skb, nlh,
3445 } 3595 rdma_addr_size(cma_src_addr(id_priv)),
3446 if (ibnl_put_attr(skb, nlh, 3596 cma_dst_addr(id_priv),
3447 sizeof(struct sockaddr_in), 3597 RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
3448 &id->route.addr.dst_addr, 3598 goto out;
3449 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
3450 goto out;
3451 }
3452 } else if (id->route.addr.src_addr.ss_family == AF_INET6) {
3453 if (ibnl_put_attr(skb, nlh,
3454 sizeof(struct sockaddr_in6),
3455 &id->route.addr.src_addr,
3456 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) {
3457 goto out;
3458 }
3459 if (ibnl_put_attr(skb, nlh,
3460 sizeof(struct sockaddr_in6),
3461 &id->route.addr.dst_addr,
3462 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
3463 goto out;
3464 }
3465 }
3466 3599
3467 id_stats->pid = id_priv->owner; 3600 id_stats->pid = id_priv->owner;
3468 id_stats->port_space = id->ps; 3601 id_stats->port_space = id->ps;
@@ -3527,7 +3660,6 @@ static void __exit cma_cleanup(void)
3527 rdma_addr_unregister_client(&addr_client); 3660 rdma_addr_unregister_client(&addr_client);
3528 ib_sa_unregister_client(&sa_client); 3661 ib_sa_unregister_client(&sa_client);
3529 destroy_workqueue(cma_wq); 3662 destroy_workqueue(cma_wq);
3530 idr_destroy(&sdp_ps);
3531 idr_destroy(&tcp_ps); 3663 idr_destroy(&tcp_ps);
3532 idr_destroy(&udp_ps); 3664 idr_destroy(&udp_ps);
3533 idr_destroy(&ipoib_ps); 3665 idr_destroy(&ipoib_ps);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 934f45e79e5e..9838ca484389 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -652,6 +652,12 @@ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
652} 652}
653EXPORT_SYMBOL(ib_sa_unpack_path); 653EXPORT_SYMBOL(ib_sa_unpack_path);
654 654
655void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
656{
657 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
658}
659EXPORT_SYMBOL(ib_sa_pack_path);
660
655static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, 661static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
656 int status, 662 int status,
657 struct ib_sa_mad *mad) 663 struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 99904f7d59e3..cde1e7b5b85d 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -545,8 +545,10 @@ static int add_port(struct ib_device *device, int port_num,
545 545
546 p->gid_group.name = "gids"; 546 p->gid_group.name = "gids";
547 p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); 547 p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
548 if (!p->gid_group.attrs) 548 if (!p->gid_group.attrs) {
549 ret = -ENOMEM;
549 goto err_remove_pma; 550 goto err_remove_pma;
551 }
550 552
551 ret = sysfs_create_group(&p->kobj, &p->gid_group); 553 ret = sysfs_create_group(&p->kobj, &p->gid_group);
552 if (ret) 554 if (ret)
@@ -555,8 +557,10 @@ static int add_port(struct ib_device *device, int port_num,
555 p->pkey_group.name = "pkeys"; 557 p->pkey_group.name = "pkeys";
556 p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, 558 p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
557 attr.pkey_tbl_len); 559 attr.pkey_tbl_len);
558 if (!p->pkey_group.attrs) 560 if (!p->pkey_group.attrs) {
561 ret = -ENOMEM;
559 goto err_remove_gid; 562 goto err_remove_gid;
563 }
560 564
561 ret = sysfs_create_group(&p->kobj, &p->pkey_group); 565 ret = sysfs_create_group(&p->kobj, &p->pkey_group);
562 if (ret) 566 if (ret)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 5ca44cd9b00c..b0f189be543b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -47,6 +47,8 @@
47#include <rdma/ib_marshall.h> 47#include <rdma/ib_marshall.h>
48#include <rdma/rdma_cm.h> 48#include <rdma/rdma_cm.h>
49#include <rdma/rdma_cm_ib.h> 49#include <rdma/rdma_cm_ib.h>
50#include <rdma/ib_addr.h>
51#include <rdma/ib.h>
50 52
51MODULE_AUTHOR("Sean Hefty"); 53MODULE_AUTHOR("Sean Hefty");
52MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 54MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -510,10 +512,10 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
510 return ret; 512 return ret;
511} 513}
512 514
513static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf, 515static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
514 int in_len, int out_len) 516 int in_len, int out_len)
515{ 517{
516 struct rdma_ucm_bind_addr cmd; 518 struct rdma_ucm_bind_ip cmd;
517 struct ucma_context *ctx; 519 struct ucma_context *ctx;
518 int ret; 520 int ret;
519 521
@@ -529,24 +531,75 @@ static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
529 return ret; 531 return ret;
530} 532}
531 533
534static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
535 int in_len, int out_len)
536{
537 struct rdma_ucm_bind cmd;
538 struct sockaddr *addr;
539 struct ucma_context *ctx;
540 int ret;
541
542 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
543 return -EFAULT;
544
545 addr = (struct sockaddr *) &cmd.addr;
546 if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr)))
547 return -EINVAL;
548
549 ctx = ucma_get_ctx(file, cmd.id);
550 if (IS_ERR(ctx))
551 return PTR_ERR(ctx);
552
553 ret = rdma_bind_addr(ctx->cm_id, addr);
554 ucma_put_ctx(ctx);
555 return ret;
556}
557
558static ssize_t ucma_resolve_ip(struct ucma_file *file,
559 const char __user *inbuf,
560 int in_len, int out_len)
561{
562 struct rdma_ucm_resolve_ip cmd;
563 struct ucma_context *ctx;
564 int ret;
565
566 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
567 return -EFAULT;
568
569 ctx = ucma_get_ctx(file, cmd.id);
570 if (IS_ERR(ctx))
571 return PTR_ERR(ctx);
572
573 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
574 (struct sockaddr *) &cmd.dst_addr,
575 cmd.timeout_ms);
576 ucma_put_ctx(ctx);
577 return ret;
578}
579
532static ssize_t ucma_resolve_addr(struct ucma_file *file, 580static ssize_t ucma_resolve_addr(struct ucma_file *file,
533 const char __user *inbuf, 581 const char __user *inbuf,
534 int in_len, int out_len) 582 int in_len, int out_len)
535{ 583{
536 struct rdma_ucm_resolve_addr cmd; 584 struct rdma_ucm_resolve_addr cmd;
585 struct sockaddr *src, *dst;
537 struct ucma_context *ctx; 586 struct ucma_context *ctx;
538 int ret; 587 int ret;
539 588
540 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 589 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
541 return -EFAULT; 590 return -EFAULT;
542 591
592 src = (struct sockaddr *) &cmd.src_addr;
593 dst = (struct sockaddr *) &cmd.dst_addr;
594 if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) ||
595 !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst)))
596 return -EINVAL;
597
543 ctx = ucma_get_ctx(file, cmd.id); 598 ctx = ucma_get_ctx(file, cmd.id);
544 if (IS_ERR(ctx)) 599 if (IS_ERR(ctx))
545 return PTR_ERR(ctx); 600 return PTR_ERR(ctx);
546 601
547 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 602 ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
548 (struct sockaddr *) &cmd.dst_addr,
549 cmd.timeout_ms);
550 ucma_put_ctx(ctx); 603 ucma_put_ctx(ctx);
551 return ret; 604 return ret;
552} 605}
@@ -649,7 +702,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
649 const char __user *inbuf, 702 const char __user *inbuf,
650 int in_len, int out_len) 703 int in_len, int out_len)
651{ 704{
652 struct rdma_ucm_query_route cmd; 705 struct rdma_ucm_query cmd;
653 struct rdma_ucm_query_route_resp resp; 706 struct rdma_ucm_query_route_resp resp;
654 struct ucma_context *ctx; 707 struct ucma_context *ctx;
655 struct sockaddr *addr; 708 struct sockaddr *addr;
@@ -709,7 +762,162 @@ out:
709 return ret; 762 return ret;
710} 763}
711 764
712static void ucma_copy_conn_param(struct rdma_conn_param *dst, 765static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
766 struct rdma_ucm_query_addr_resp *resp)
767{
768 if (!cm_id->device)
769 return;
770
771 resp->node_guid = (__force __u64) cm_id->device->node_guid;
772 resp->port_num = cm_id->port_num;
773 resp->pkey = (__force __u16) cpu_to_be16(
774 ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
775}
776
777static ssize_t ucma_query_addr(struct ucma_context *ctx,
778 void __user *response, int out_len)
779{
780 struct rdma_ucm_query_addr_resp resp;
781 struct sockaddr *addr;
782 int ret = 0;
783
784 if (out_len < sizeof(resp))
785 return -ENOSPC;
786
787 memset(&resp, 0, sizeof resp);
788
789 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
790 resp.src_size = rdma_addr_size(addr);
791 memcpy(&resp.src_addr, addr, resp.src_size);
792
793 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
794 resp.dst_size = rdma_addr_size(addr);
795 memcpy(&resp.dst_addr, addr, resp.dst_size);
796
797 ucma_query_device_addr(ctx->cm_id, &resp);
798
799 if (copy_to_user(response, &resp, sizeof(resp)))
800 ret = -EFAULT;
801
802 return ret;
803}
804
805static ssize_t ucma_query_path(struct ucma_context *ctx,
806 void __user *response, int out_len)
807{
808 struct rdma_ucm_query_path_resp *resp;
809 int i, ret = 0;
810
811 if (out_len < sizeof(*resp))
812 return -ENOSPC;
813
814 resp = kzalloc(out_len, GFP_KERNEL);
815 if (!resp)
816 return -ENOMEM;
817
818 resp->num_paths = ctx->cm_id->route.num_paths;
819 for (i = 0, out_len -= sizeof(*resp);
820 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
821 i++, out_len -= sizeof(struct ib_path_rec_data)) {
822
823 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
824 IB_PATH_BIDIRECTIONAL;
825 ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
826 &resp->path_data[i].path_rec);
827 }
828
829 if (copy_to_user(response, resp,
830 sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
831 ret = -EFAULT;
832
833 kfree(resp);
834 return ret;
835}
836
837static ssize_t ucma_query_gid(struct ucma_context *ctx,
838 void __user *response, int out_len)
839{
840 struct rdma_ucm_query_addr_resp resp;
841 struct sockaddr_ib *addr;
842 int ret = 0;
843
844 if (out_len < sizeof(resp))
845 return -ENOSPC;
846
847 memset(&resp, 0, sizeof resp);
848
849 ucma_query_device_addr(ctx->cm_id, &resp);
850
851 addr = (struct sockaddr_ib *) &resp.src_addr;
852 resp.src_size = sizeof(*addr);
853 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
854 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
855 } else {
856 addr->sib_family = AF_IB;
857 addr->sib_pkey = (__force __be16) resp.pkey;
858 rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
859 (union ib_gid *) &addr->sib_addr);
860 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
861 &ctx->cm_id->route.addr.src_addr);
862 }
863
864 addr = (struct sockaddr_ib *) &resp.dst_addr;
865 resp.dst_size = sizeof(*addr);
866 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
867 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
868 } else {
869 addr->sib_family = AF_IB;
870 addr->sib_pkey = (__force __be16) resp.pkey;
871 rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
872 (union ib_gid *) &addr->sib_addr);
873 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
874 &ctx->cm_id->route.addr.dst_addr);
875 }
876
877 if (copy_to_user(response, &resp, sizeof(resp)))
878 ret = -EFAULT;
879
880 return ret;
881}
882
883static ssize_t ucma_query(struct ucma_file *file,
884 const char __user *inbuf,
885 int in_len, int out_len)
886{
887 struct rdma_ucm_query cmd;
888 struct ucma_context *ctx;
889 void __user *response;
890 int ret;
891
892 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
893 return -EFAULT;
894
895 response = (void __user *)(unsigned long) cmd.response;
896 ctx = ucma_get_ctx(file, cmd.id);
897 if (IS_ERR(ctx))
898 return PTR_ERR(ctx);
899
900 switch (cmd.option) {
901 case RDMA_USER_CM_QUERY_ADDR:
902 ret = ucma_query_addr(ctx, response, out_len);
903 break;
904 case RDMA_USER_CM_QUERY_PATH:
905 ret = ucma_query_path(ctx, response, out_len);
906 break;
907 case RDMA_USER_CM_QUERY_GID:
908 ret = ucma_query_gid(ctx, response, out_len);
909 break;
910 default:
911 ret = -ENOSYS;
912 break;
913 }
914
915 ucma_put_ctx(ctx);
916 return ret;
917}
918
919static void ucma_copy_conn_param(struct rdma_cm_id *id,
920 struct rdma_conn_param *dst,
713 struct rdma_ucm_conn_param *src) 921 struct rdma_ucm_conn_param *src)
714{ 922{
715 dst->private_data = src->private_data; 923 dst->private_data = src->private_data;
@@ -721,6 +929,7 @@ static void ucma_copy_conn_param(struct rdma_conn_param *dst,
721 dst->rnr_retry_count = src->rnr_retry_count; 929 dst->rnr_retry_count = src->rnr_retry_count;
722 dst->srq = src->srq; 930 dst->srq = src->srq;
723 dst->qp_num = src->qp_num; 931 dst->qp_num = src->qp_num;
932 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
724} 933}
725 934
726static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 935static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
@@ -741,7 +950,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
741 if (IS_ERR(ctx)) 950 if (IS_ERR(ctx))
742 return PTR_ERR(ctx); 951 return PTR_ERR(ctx);
743 952
744 ucma_copy_conn_param(&conn_param, &cmd.conn_param); 953 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
745 ret = rdma_connect(ctx->cm_id, &conn_param); 954 ret = rdma_connect(ctx->cm_id, &conn_param);
746 ucma_put_ctx(ctx); 955 ucma_put_ctx(ctx);
747 return ret; 956 return ret;
@@ -784,7 +993,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
784 return PTR_ERR(ctx); 993 return PTR_ERR(ctx);
785 994
786 if (cmd.conn_param.valid) { 995 if (cmd.conn_param.valid) {
787 ucma_copy_conn_param(&conn_param, &cmd.conn_param); 996 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
788 mutex_lock(&file->mut); 997 mutex_lock(&file->mut);
789 ret = rdma_accept(ctx->cm_id, &conn_param); 998 ret = rdma_accept(ctx->cm_id, &conn_param);
790 if (!ret) 999 if (!ret)
@@ -1020,23 +1229,23 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
1020 return ret; 1229 return ret;
1021} 1230}
1022 1231
1023static ssize_t ucma_join_multicast(struct ucma_file *file, 1232static ssize_t ucma_process_join(struct ucma_file *file,
1024 const char __user *inbuf, 1233 struct rdma_ucm_join_mcast *cmd, int out_len)
1025 int in_len, int out_len)
1026{ 1234{
1027 struct rdma_ucm_join_mcast cmd;
1028 struct rdma_ucm_create_id_resp resp; 1235 struct rdma_ucm_create_id_resp resp;
1029 struct ucma_context *ctx; 1236 struct ucma_context *ctx;
1030 struct ucma_multicast *mc; 1237 struct ucma_multicast *mc;
1238 struct sockaddr *addr;
1031 int ret; 1239 int ret;
1032 1240
1033 if (out_len < sizeof(resp)) 1241 if (out_len < sizeof(resp))
1034 return -ENOSPC; 1242 return -ENOSPC;
1035 1243
1036 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1244 addr = (struct sockaddr *) &cmd->addr;
1037 return -EFAULT; 1245 if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
1246 return -EINVAL;
1038 1247
1039 ctx = ucma_get_ctx(file, cmd.id); 1248 ctx = ucma_get_ctx(file, cmd->id);
1040 if (IS_ERR(ctx)) 1249 if (IS_ERR(ctx))
1041 return PTR_ERR(ctx); 1250 return PTR_ERR(ctx);
1042 1251
@@ -1047,14 +1256,14 @@ static ssize_t ucma_join_multicast(struct ucma_file *file,
1047 goto err1; 1256 goto err1;
1048 } 1257 }
1049 1258
1050 mc->uid = cmd.uid; 1259 mc->uid = cmd->uid;
1051 memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr); 1260 memcpy(&mc->addr, addr, cmd->addr_size);
1052 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); 1261 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
1053 if (ret) 1262 if (ret)
1054 goto err2; 1263 goto err2;
1055 1264
1056 resp.id = mc->id; 1265 resp.id = mc->id;
1057 if (copy_to_user((void __user *)(unsigned long)cmd.response, 1266 if (copy_to_user((void __user *)(unsigned long) cmd->response,
1058 &resp, sizeof(resp))) { 1267 &resp, sizeof(resp))) {
1059 ret = -EFAULT; 1268 ret = -EFAULT;
1060 goto err3; 1269 goto err3;
@@ -1079,6 +1288,38 @@ err1:
1079 return ret; 1288 return ret;
1080} 1289}
1081 1290
1291static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
1292 const char __user *inbuf,
1293 int in_len, int out_len)
1294{
1295 struct rdma_ucm_join_ip_mcast cmd;
1296 struct rdma_ucm_join_mcast join_cmd;
1297
1298 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1299 return -EFAULT;
1300
1301 join_cmd.response = cmd.response;
1302 join_cmd.uid = cmd.uid;
1303 join_cmd.id = cmd.id;
1304 join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
1305 join_cmd.reserved = 0;
1306 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
1307
1308 return ucma_process_join(file, &join_cmd, out_len);
1309}
1310
1311static ssize_t ucma_join_multicast(struct ucma_file *file,
1312 const char __user *inbuf,
1313 int in_len, int out_len)
1314{
1315 struct rdma_ucm_join_mcast cmd;
1316
1317 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1318 return -EFAULT;
1319
1320 return ucma_process_join(file, &cmd, out_len);
1321}
1322
1082static ssize_t ucma_leave_multicast(struct ucma_file *file, 1323static ssize_t ucma_leave_multicast(struct ucma_file *file,
1083 const char __user *inbuf, 1324 const char __user *inbuf,
1084 int in_len, int out_len) 1325 int in_len, int out_len)
@@ -1221,25 +1462,29 @@ file_put:
1221static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1462static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1222 const char __user *inbuf, 1463 const char __user *inbuf,
1223 int in_len, int out_len) = { 1464 int in_len, int out_len) = {
1224 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1465 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id,
1225 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1466 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id,
1226 [RDMA_USER_CM_CMD_BIND_ADDR] = ucma_bind_addr, 1467 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip,
1227 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1468 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip,
1228 [RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route, 1469 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
1229 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1470 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route,
1230 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1471 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect,
1231 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1472 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen,
1232 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1473 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept,
1233 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1474 [RDMA_USER_CM_CMD_REJECT] = ucma_reject,
1234 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1475 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect,
1235 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1476 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
1236 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1477 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event,
1237 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1478 [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
1238 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1479 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option,
1239 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1480 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
1240 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, 1481 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
1241 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1482 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
1242 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id 1483 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id,
1484 [RDMA_USER_CM_CMD_QUERY] = ucma_query,
1485 [RDMA_USER_CM_CMD_BIND] = ucma_bind,
1486 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
1487 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast
1243}; 1488};
1244 1489
1245static ssize_t ucma_write(struct file *filp, const char __user *buf, 1490static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a7d00f6b3bc1..b3c07b0c9f26 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -334,7 +334,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
334 334
335 resp.num_comp_vectors = file->device->num_comp_vectors; 335 resp.num_comp_vectors = file->device->num_comp_vectors;
336 336
337 ret = get_unused_fd(); 337 ret = get_unused_fd_flags(O_CLOEXEC);
338 if (ret < 0) 338 if (ret < 0)
339 goto err_free; 339 goto err_free;
340 resp.async_fd = ret; 340 resp.async_fd = ret;
@@ -1184,7 +1184,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
1184 if (copy_from_user(&cmd, buf, sizeof cmd)) 1184 if (copy_from_user(&cmd, buf, sizeof cmd))
1185 return -EFAULT; 1185 return -EFAULT;
1186 1186
1187 ret = get_unused_fd(); 1187 ret = get_unused_fd_flags(O_CLOEXEC);
1188 if (ret < 0) 1188 if (ret < 0)
1189 return ret; 1189 return ret;
1190 resp.fd = ret; 1190 resp.fd = ret;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index e5649e8b215d..b57c0befd962 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -883,7 +883,8 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp)
883{ 883{
884 union t3_wr *wqe = qhp->wq.queue; 884 union t3_wr *wqe = qhp->wq.queue;
885 u16 count = 0; 885 u16 count = 0;
886 while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { 886
887 while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
887 count++; 888 count++;
888 wqe++; 889 wqe++;
889 } 890 }
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 982e3efd98d3..cd8d290a09fc 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -211,6 +211,7 @@ static int ehca_create_slab_caches(void)
211 if (!ctblk_cache) { 211 if (!ctblk_cache) {
212 ehca_gen_err("Cannot create ctblk SLAB cache."); 212 ehca_gen_err("Cannot create ctblk SLAB cache.");
213 ehca_cleanup_small_qp_cache(); 213 ehca_cleanup_small_qp_cache();
214 ret = -ENOMEM;
214 goto create_slab_caches6; 215 goto create_slab_caches6;
215 } 216 }
216#endif 217#endif
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
new file mode 100644
index 000000000000..8e6aebfaf8a4
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -0,0 +1,10 @@
1config MLX5_INFINIBAND
2 tristate "Mellanox Connect-IB HCA support"
3 depends on NETDEVICES && ETHERNET && PCI && X86
4 select NET_VENDOR_MELLANOX
5 select MLX5_CORE
6 ---help---
7 This driver provides low-level InfiniBand support for
8 Mellanox Connect-IB PCI Express host channel adapters (HCAs).
9 This is required to use InfiniBand protocols such as
10 IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
new file mode 100644
index 000000000000..4ea0135af484
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
2
3mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
new file mode 100644
index 000000000000..39ab0caefdf9
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -0,0 +1,92 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "mlx5_ib.h"
34
35struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
36 struct mlx5_ib_ah *ah)
37{
38 if (ah_attr->ah_flags & IB_AH_GRH) {
39 memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
40 ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label |
41 (1 << 30) |
42 ah_attr->grh.sgid_index << 20);
43 ah->av.hop_limit = ah_attr->grh.hop_limit;
44 ah->av.tclass = ah_attr->grh.traffic_class;
45 }
46
47 ah->av.rlid = cpu_to_be16(ah_attr->dlid);
48 ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
49 ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
50
51 return &ah->ibah;
52}
53
54struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
55{
56 struct mlx5_ib_ah *ah;
57
58 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
59 if (!ah)
60 return ERR_PTR(-ENOMEM);
61
62 return create_ib_ah(ah_attr, ah); /* never fails */
63}
64
65int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
66{
67 struct mlx5_ib_ah *ah = to_mah(ibah);
68 u32 tmp;
69
70 memset(ah_attr, 0, sizeof(*ah_attr));
71
72 tmp = be32_to_cpu(ah->av.grh_gid_fl);
73 if (tmp & (1 << 30)) {
74 ah_attr->ah_flags = IB_AH_GRH;
75 ah_attr->grh.sgid_index = (tmp >> 20) & 0xff;
76 ah_attr->grh.flow_label = tmp & 0xfffff;
77 memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16);
78 ah_attr->grh.hop_limit = ah->av.hop_limit;
79 ah_attr->grh.traffic_class = ah->av.tclass;
80 }
81 ah_attr->dlid = be16_to_cpu(ah->av.rlid);
82 ah_attr->static_rate = ah->av.stat_rate_sl >> 4;
83 ah_attr->sl = ah->av.stat_rate_sl & 0xf;
84
85 return 0;
86}
87
88int mlx5_ib_destroy_ah(struct ib_ah *ah)
89{
90 kfree(to_mah(ah));
91 return 0;
92}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
new file mode 100644
index 000000000000..344ab03948a3
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -0,0 +1,843 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kref.h>
34#include <rdma/ib_umem.h>
35#include "mlx5_ib.h"
36#include "user.h"
37
38static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
39{
40 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
41
42 ibcq->comp_handler(ibcq, ibcq->cq_context);
43}
44
45static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
46{
47 struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
48 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
49 struct ib_cq *ibcq = &cq->ibcq;
50 struct ib_event event;
51
52 if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
53 mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
54 type, mcq->cqn);
55 return;
56 }
57
58 if (ibcq->event_handler) {
59 event.device = &dev->ib_dev;
60 event.event = IB_EVENT_CQ_ERR;
61 event.element.cq = ibcq;
62 ibcq->event_handler(&event, ibcq->cq_context);
63 }
64}
65
66static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
67{
68 return mlx5_buf_offset(&buf->buf, n * size);
69}
70
71static void *get_cqe(struct mlx5_ib_cq *cq, int n)
72{
73 return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
74}
75
76static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
77{
78 void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
79 struct mlx5_cqe64 *cqe64;
80
81 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
82 return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
83 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
84}
85
86static void *next_cqe_sw(struct mlx5_ib_cq *cq)
87{
88 return get_sw_cqe(cq, cq->mcq.cons_index);
89}
90
91static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
92{
93 switch (wq->wr_data[idx]) {
94 case MLX5_IB_WR_UMR:
95 return 0;
96
97 case IB_WR_LOCAL_INV:
98 return IB_WC_LOCAL_INV;
99
100 case IB_WR_FAST_REG_MR:
101 return IB_WC_FAST_REG_MR;
102
103 default:
104 pr_warn("unknown completion status\n");
105 return 0;
106 }
107}
108
109static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
110 struct mlx5_ib_wq *wq, int idx)
111{
112 wc->wc_flags = 0;
113 switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
114 case MLX5_OPCODE_RDMA_WRITE_IMM:
115 wc->wc_flags |= IB_WC_WITH_IMM;
116 case MLX5_OPCODE_RDMA_WRITE:
117 wc->opcode = IB_WC_RDMA_WRITE;
118 break;
119 case MLX5_OPCODE_SEND_IMM:
120 wc->wc_flags |= IB_WC_WITH_IMM;
121 case MLX5_OPCODE_SEND:
122 case MLX5_OPCODE_SEND_INVAL:
123 wc->opcode = IB_WC_SEND;
124 break;
125 case MLX5_OPCODE_RDMA_READ:
126 wc->opcode = IB_WC_RDMA_READ;
127 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
128 break;
129 case MLX5_OPCODE_ATOMIC_CS:
130 wc->opcode = IB_WC_COMP_SWAP;
131 wc->byte_len = 8;
132 break;
133 case MLX5_OPCODE_ATOMIC_FA:
134 wc->opcode = IB_WC_FETCH_ADD;
135 wc->byte_len = 8;
136 break;
137 case MLX5_OPCODE_ATOMIC_MASKED_CS:
138 wc->opcode = IB_WC_MASKED_COMP_SWAP;
139 wc->byte_len = 8;
140 break;
141 case MLX5_OPCODE_ATOMIC_MASKED_FA:
142 wc->opcode = IB_WC_MASKED_FETCH_ADD;
143 wc->byte_len = 8;
144 break;
145 case MLX5_OPCODE_BIND_MW:
146 wc->opcode = IB_WC_BIND_MW;
147 break;
148 case MLX5_OPCODE_UMR:
149 wc->opcode = get_umr_comp(wq, idx);
150 break;
151 }
152}
153
154enum {
155 MLX5_GRH_IN_BUFFER = 1,
156 MLX5_GRH_IN_CQE = 2,
157};
158
159static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
160 struct mlx5_ib_qp *qp)
161{
162 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
163 struct mlx5_ib_srq *srq;
164 struct mlx5_ib_wq *wq;
165 u16 wqe_ctr;
166 u8 g;
167
168 if (qp->ibqp.srq || qp->ibqp.xrcd) {
169 struct mlx5_core_srq *msrq = NULL;
170
171 if (qp->ibqp.xrcd) {
172 msrq = mlx5_core_get_srq(&dev->mdev,
173 be32_to_cpu(cqe->srqn));
174 srq = to_mibsrq(msrq);
175 } else {
176 srq = to_msrq(qp->ibqp.srq);
177 }
178 if (srq) {
179 wqe_ctr = be16_to_cpu(cqe->wqe_counter);
180 wc->wr_id = srq->wrid[wqe_ctr];
181 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
182 if (msrq && atomic_dec_and_test(&msrq->refcount))
183 complete(&msrq->free);
184 }
185 } else {
186 wq = &qp->rq;
187 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
188 ++wq->tail;
189 }
190 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
191
192 switch (cqe->op_own >> 4) {
193 case MLX5_CQE_RESP_WR_IMM:
194 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
195 wc->wc_flags = IB_WC_WITH_IMM;
196 wc->ex.imm_data = cqe->imm_inval_pkey;
197 break;
198 case MLX5_CQE_RESP_SEND:
199 wc->opcode = IB_WC_RECV;
200 wc->wc_flags = 0;
201 break;
202 case MLX5_CQE_RESP_SEND_IMM:
203 wc->opcode = IB_WC_RECV;
204 wc->wc_flags = IB_WC_WITH_IMM;
205 wc->ex.imm_data = cqe->imm_inval_pkey;
206 break;
207 case MLX5_CQE_RESP_SEND_INV:
208 wc->opcode = IB_WC_RECV;
209 wc->wc_flags = IB_WC_WITH_INVALIDATE;
210 wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
211 break;
212 }
213 wc->slid = be16_to_cpu(cqe->slid);
214 wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
215 wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
216 wc->dlid_path_bits = cqe->ml_path;
217 g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
218 wc->wc_flags |= g ? IB_WC_GRH : 0;
219 wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
220}
221
222static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
223{
224 __be32 *p = (__be32 *)cqe;
225 int i;
226
227 mlx5_ib_warn(dev, "dump error cqe\n");
228 for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
229 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
230 be32_to_cpu(p[1]), be32_to_cpu(p[2]),
231 be32_to_cpu(p[3]));
232}
233
234static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
235 struct mlx5_err_cqe *cqe,
236 struct ib_wc *wc)
237{
238 int dump = 1;
239
240 switch (cqe->syndrome) {
241 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
242 wc->status = IB_WC_LOC_LEN_ERR;
243 break;
244 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
245 wc->status = IB_WC_LOC_QP_OP_ERR;
246 break;
247 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
248 wc->status = IB_WC_LOC_PROT_ERR;
249 break;
250 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
251 dump = 0;
252 wc->status = IB_WC_WR_FLUSH_ERR;
253 break;
254 case MLX5_CQE_SYNDROME_MW_BIND_ERR:
255 wc->status = IB_WC_MW_BIND_ERR;
256 break;
257 case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
258 wc->status = IB_WC_BAD_RESP_ERR;
259 break;
260 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
261 wc->status = IB_WC_LOC_ACCESS_ERR;
262 break;
263 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
264 wc->status = IB_WC_REM_INV_REQ_ERR;
265 break;
266 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
267 wc->status = IB_WC_REM_ACCESS_ERR;
268 break;
269 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
270 wc->status = IB_WC_REM_OP_ERR;
271 break;
272 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
273 wc->status = IB_WC_RETRY_EXC_ERR;
274 dump = 0;
275 break;
276 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
277 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
278 dump = 0;
279 break;
280 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
281 wc->status = IB_WC_REM_ABORT_ERR;
282 break;
283 default:
284 wc->status = IB_WC_GENERAL_ERR;
285 break;
286 }
287
288 wc->vendor_err = cqe->vendor_err_synd;
289 if (dump)
290 dump_cqe(dev, cqe);
291}
292
293static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
294{
295 /* TBD: waiting decision
296 */
297 return 0;
298}
299
300static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
301{
302 struct mlx5_wqe_data_seg *dpseg;
303 void *addr;
304
305 dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
306 sizeof(struct mlx5_wqe_raddr_seg) +
307 sizeof(struct mlx5_wqe_atomic_seg);
308 addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
309 return addr;
310}
311
312static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
313 uint16_t idx)
314{
315 void *addr;
316 int byte_count;
317 int i;
318
319 if (!is_atomic_response(qp, idx))
320 return;
321
322 byte_count = be32_to_cpu(cqe64->byte_cnt);
323 addr = mlx5_get_atomic_laddr(qp, idx);
324
325 if (byte_count == 4) {
326 *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
327 } else {
328 for (i = 0; i < byte_count; i += 8) {
329 *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
330 addr += 8;
331 }
332 }
333
334 return;
335}
336
337static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
338 u16 tail, u16 head)
339{
340 int idx;
341
342 do {
343 idx = tail & (qp->sq.wqe_cnt - 1);
344 handle_atomic(qp, cqe64, idx);
345 if (idx == head)
346 break;
347
348 tail = qp->sq.w_list[idx].next;
349 } while (1);
350 tail = qp->sq.w_list[idx].next;
351 qp->sq.last_poll = tail;
352}
353
354static int mlx5_poll_one(struct mlx5_ib_cq *cq,
355 struct mlx5_ib_qp **cur_qp,
356 struct ib_wc *wc)
357{
358 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
359 struct mlx5_err_cqe *err_cqe;
360 struct mlx5_cqe64 *cqe64;
361 struct mlx5_core_qp *mqp;
362 struct mlx5_ib_wq *wq;
363 uint8_t opcode;
364 uint32_t qpn;
365 u16 wqe_ctr;
366 void *cqe;
367 int idx;
368
369 cqe = next_cqe_sw(cq);
370 if (!cqe)
371 return -EAGAIN;
372
373 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
374
375 ++cq->mcq.cons_index;
376
377 /* Make sure we read CQ entry contents after we've checked the
378 * ownership bit.
379 */
380 rmb();
381
382 /* TBD: resize CQ */
383
384 qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
385 if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
386 /* We do not have to take the QP table lock here,
387 * because CQs will be locked while QPs are removed
388 * from the table.
389 */
390 mqp = __mlx5_qp_lookup(&dev->mdev, qpn);
391 if (unlikely(!mqp)) {
392 mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
393 cq->mcq.cqn, qpn);
394 return -EINVAL;
395 }
396
397 *cur_qp = to_mibqp(mqp);
398 }
399
400 wc->qp = &(*cur_qp)->ibqp;
401 opcode = cqe64->op_own >> 4;
402 switch (opcode) {
403 case MLX5_CQE_REQ:
404 wq = &(*cur_qp)->sq;
405 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
406 idx = wqe_ctr & (wq->wqe_cnt - 1);
407 handle_good_req(wc, cqe64, wq, idx);
408 handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
409 wc->wr_id = wq->wrid[idx];
410 wq->tail = wq->wqe_head[idx] + 1;
411 wc->status = IB_WC_SUCCESS;
412 break;
413 case MLX5_CQE_RESP_WR_IMM:
414 case MLX5_CQE_RESP_SEND:
415 case MLX5_CQE_RESP_SEND_IMM:
416 case MLX5_CQE_RESP_SEND_INV:
417 handle_responder(wc, cqe64, *cur_qp);
418 wc->status = IB_WC_SUCCESS;
419 break;
420 case MLX5_CQE_RESIZE_CQ:
421 break;
422 case MLX5_CQE_REQ_ERR:
423 case MLX5_CQE_RESP_ERR:
424 err_cqe = (struct mlx5_err_cqe *)cqe64;
425 mlx5_handle_error_cqe(dev, err_cqe, wc);
426 mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
427 opcode == MLX5_CQE_REQ_ERR ?
428 "Requestor" : "Responder", cq->mcq.cqn);
429 mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
430 err_cqe->syndrome, err_cqe->vendor_err_synd);
431 if (opcode == MLX5_CQE_REQ_ERR) {
432 wq = &(*cur_qp)->sq;
433 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
434 idx = wqe_ctr & (wq->wqe_cnt - 1);
435 wc->wr_id = wq->wrid[idx];
436 wq->tail = wq->wqe_head[idx] + 1;
437 } else {
438 struct mlx5_ib_srq *srq;
439
440 if ((*cur_qp)->ibqp.srq) {
441 srq = to_msrq((*cur_qp)->ibqp.srq);
442 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
443 wc->wr_id = srq->wrid[wqe_ctr];
444 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
445 } else {
446 wq = &(*cur_qp)->rq;
447 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
448 ++wq->tail;
449 }
450 }
451 break;
452 }
453
454 return 0;
455}
456
457int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
458{
459 struct mlx5_ib_cq *cq = to_mcq(ibcq);
460 struct mlx5_ib_qp *cur_qp = NULL;
461 unsigned long flags;
462 int npolled;
463 int err = 0;
464
465 spin_lock_irqsave(&cq->lock, flags);
466
467 for (npolled = 0; npolled < num_entries; npolled++) {
468 err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
469 if (err)
470 break;
471 }
472
473 if (npolled)
474 mlx5_cq_set_ci(&cq->mcq);
475
476 spin_unlock_irqrestore(&cq->lock, flags);
477
478 if (err == 0 || err == -EAGAIN)
479 return npolled;
480 else
481 return err;
482}
483
484int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
485{
486 mlx5_cq_arm(&to_mcq(ibcq)->mcq,
487 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
488 MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
489 to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map,
490 MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock));
491
492 return 0;
493}
494
495static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
496 int nent, int cqe_size)
497{
498 int err;
499
500 err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size,
501 PAGE_SIZE * 2, &buf->buf);
502 if (err)
503 return err;
504
505 buf->cqe_size = cqe_size;
506
507 return 0;
508}
509
510static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
511{
512 mlx5_buf_free(&dev->mdev, &buf->buf);
513}
514
515static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
516 struct ib_ucontext *context, struct mlx5_ib_cq *cq,
517 int entries, struct mlx5_create_cq_mbox_in **cqb,
518 int *cqe_size, int *index, int *inlen)
519{
520 struct mlx5_ib_create_cq ucmd;
521 int page_shift;
522 int npages;
523 int ncont;
524 int err;
525
526 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
527 return -EFAULT;
528
529 if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
530 return -EINVAL;
531
532 *cqe_size = ucmd.cqe_size;
533
534 cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
535 entries * ucmd.cqe_size,
536 IB_ACCESS_LOCAL_WRITE, 1);
537 if (IS_ERR(cq->buf.umem)) {
538 err = PTR_ERR(cq->buf.umem);
539 return err;
540 }
541
542 err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
543 &cq->db);
544 if (err)
545 goto err_umem;
546
547 mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
548 &ncont, NULL);
549 mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
550 ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
551
552 *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
553 *cqb = mlx5_vzalloc(*inlen);
554 if (!*cqb) {
555 err = -ENOMEM;
556 goto err_db;
557 }
558 mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
559 (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
560
561 *index = to_mucontext(context)->uuari.uars[0].index;
562
563 return 0;
564
565err_db:
566 mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
567
568err_umem:
569 ib_umem_release(cq->buf.umem);
570 return err;
571}
572
573static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
574{
575 mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
576 ib_umem_release(cq->buf.umem);
577}
578
579static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
580{
581 int i;
582 void *cqe;
583 struct mlx5_cqe64 *cqe64;
584
585 for (i = 0; i < nent; i++) {
586 cqe = get_cqe(cq, i);
587 cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
588 cqe64->op_own = 0xf1;
589 }
590}
591
592static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
593 int entries, int cqe_size,
594 struct mlx5_create_cq_mbox_in **cqb,
595 int *index, int *inlen)
596{
597 int err;
598
599 err = mlx5_db_alloc(&dev->mdev, &cq->db);
600 if (err)
601 return err;
602
603 cq->mcq.set_ci_db = cq->db.db;
604 cq->mcq.arm_db = cq->db.db + 1;
605 *cq->mcq.set_ci_db = 0;
606 *cq->mcq.arm_db = 0;
607 cq->mcq.cqe_sz = cqe_size;
608
609 err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
610 if (err)
611 goto err_db;
612
613 init_cq_buf(cq, entries);
614
615 *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
616 *cqb = mlx5_vzalloc(*inlen);
617 if (!*cqb) {
618 err = -ENOMEM;
619 goto err_buf;
620 }
621 mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
622
623 (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
624 *index = dev->mdev.priv.uuari.uars[0].index;
625
626 return 0;
627
628err_buf:
629 free_cq_buf(dev, &cq->buf);
630
631err_db:
632 mlx5_db_free(&dev->mdev, &cq->db);
633 return err;
634}
635
636static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
637{
638 free_cq_buf(dev, &cq->buf);
639 mlx5_db_free(&dev->mdev, &cq->db);
640}
641
642struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
643 int vector, struct ib_ucontext *context,
644 struct ib_udata *udata)
645{
646 struct mlx5_create_cq_mbox_in *cqb = NULL;
647 struct mlx5_ib_dev *dev = to_mdev(ibdev);
648 struct mlx5_ib_cq *cq;
649 int uninitialized_var(index);
650 int uninitialized_var(inlen);
651 int cqe_size;
652 int irqn;
653 int eqn;
654 int err;
655
656 entries = roundup_pow_of_two(entries + 1);
657 if (entries < 1 || entries > dev->mdev.caps.max_cqes)
658 return ERR_PTR(-EINVAL);
659
660 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
661 if (!cq)
662 return ERR_PTR(-ENOMEM);
663
664 cq->ibcq.cqe = entries - 1;
665 mutex_init(&cq->resize_mutex);
666 spin_lock_init(&cq->lock);
667 cq->resize_buf = NULL;
668 cq->resize_umem = NULL;
669
670 if (context) {
671 err = create_cq_user(dev, udata, context, cq, entries,
672 &cqb, &cqe_size, &index, &inlen);
673 if (err)
674 goto err_create;
675 } else {
676 /* for now choose 64 bytes till we have a proper interface */
677 cqe_size = 64;
678 err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
679 &index, &inlen);
680 if (err)
681 goto err_create;
682 }
683
684 cq->cqe_size = cqe_size;
685 cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
686 cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
687 err = mlx5_vector2eqn(dev, vector, &eqn, &irqn);
688 if (err)
689 goto err_cqb;
690
691 cqb->ctx.c_eqn = cpu_to_be16(eqn);
692 cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
693
694 err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen);
695 if (err)
696 goto err_cqb;
697
698 mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
699 cq->mcq.irqn = irqn;
700 cq->mcq.comp = mlx5_ib_cq_comp;
701 cq->mcq.event = mlx5_ib_cq_event;
702
703 if (context)
704 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
705 err = -EFAULT;
706 goto err_cmd;
707 }
708
709
710 mlx5_vfree(cqb);
711 return &cq->ibcq;
712
713err_cmd:
714 mlx5_core_destroy_cq(&dev->mdev, &cq->mcq);
715
716err_cqb:
717 mlx5_vfree(cqb);
718 if (context)
719 destroy_cq_user(cq, context);
720 else
721 destroy_cq_kernel(dev, cq);
722
723err_create:
724 kfree(cq);
725
726 return ERR_PTR(err);
727}
728
729
730int mlx5_ib_destroy_cq(struct ib_cq *cq)
731{
732 struct mlx5_ib_dev *dev = to_mdev(cq->device);
733 struct mlx5_ib_cq *mcq = to_mcq(cq);
734 struct ib_ucontext *context = NULL;
735
736 if (cq->uobject)
737 context = cq->uobject->context;
738
739 mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq);
740 if (context)
741 destroy_cq_user(mcq, context);
742 else
743 destroy_cq_kernel(dev, mcq);
744
745 kfree(mcq);
746
747 return 0;
748}
749
750static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
751 u32 rsn)
752{
753 u32 lrsn;
754
755 if (srq)
756 lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
757 else
758 lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
759
760 return rsn == lrsn;
761}
762
763void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
764{
765 struct mlx5_cqe64 *cqe64, *dest64;
766 void *cqe, *dest;
767 u32 prod_index;
768 int nfreed = 0;
769 u8 owner_bit;
770
771 if (!cq)
772 return;
773
774 /* First we need to find the current producer index, so we
775 * know where to start cleaning from. It doesn't matter if HW
776 * adds new entries after this loop -- the QP we're worried
777 * about is already in RESET, so the new entries won't come
778 * from our QP and therefore don't need to be checked.
779 */
780 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
781 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
782 break;
783
784 /* Now sweep backwards through the CQ, removing CQ entries
785 * that match our QP by copying older entries on top of them.
786 */
787 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
788 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
789 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
790 if (is_equal_rsn(cqe64, srq, rsn)) {
791 if (srq)
792 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
793 ++nfreed;
794 } else if (nfreed) {
795 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
796 dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
797 owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
798 memcpy(dest, cqe, cq->mcq.cqe_sz);
799 dest64->op_own = owner_bit |
800 (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
801 }
802 }
803
804 if (nfreed) {
805 cq->mcq.cons_index += nfreed;
806 /* Make sure update of buffer contents is done before
807 * updating consumer index.
808 */
809 wmb();
810 mlx5_cq_set_ci(&cq->mcq);
811 }
812}
813
814void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
815{
816 if (!cq)
817 return;
818
819 spin_lock_irq(&cq->lock);
820 __mlx5_ib_cq_clean(cq, qpn, srq);
821 spin_unlock_irq(&cq->lock);
822}
823
824int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
825{
826 return -ENOSYS;
827}
828
829int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
830{
831 return -ENOSYS;
832}
833
834int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
835{
836 struct mlx5_ib_cq *cq;
837
838 if (!ibcq)
839 return 128;
840
841 cq = to_mcq(ibcq);
842 return cq->cqe_size;
843}
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
new file mode 100644
index 000000000000..256a23344f28
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -0,0 +1,100 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kref.h>
34#include <linux/slab.h>
35#include <rdma/ib_umem.h>
36
37#include "mlx5_ib.h"
38
39struct mlx5_ib_user_db_page {
40 struct list_head list;
41 struct ib_umem *umem;
42 unsigned long user_virt;
43 int refcnt;
44};
45
46int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
47 struct mlx5_db *db)
48{
49 struct mlx5_ib_user_db_page *page;
50 struct ib_umem_chunk *chunk;
51 int err = 0;
52
53 mutex_lock(&context->db_page_mutex);
54
55 list_for_each_entry(page, &context->db_page_list, list)
56 if (page->user_virt == (virt & PAGE_MASK))
57 goto found;
58
59 page = kmalloc(sizeof(*page), GFP_KERNEL);
60 if (!page) {
61 err = -ENOMEM;
62 goto out;
63 }
64
65 page->user_virt = (virt & PAGE_MASK);
66 page->refcnt = 0;
67 page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
68 PAGE_SIZE, 0, 0);
69 if (IS_ERR(page->umem)) {
70 err = PTR_ERR(page->umem);
71 kfree(page);
72 goto out;
73 }
74
75 list_add(&page->list, &context->db_page_list);
76
77found:
78 chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
79 db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
80 db->u.user_page = page;
81 ++page->refcnt;
82
83out:
84 mutex_unlock(&context->db_page_mutex);
85
86 return err;
87}
88
89void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
90{
91 mutex_lock(&context->db_page_mutex);
92
93 if (!--db->u.user_page->refcnt) {
94 list_del(&db->u.user_page->list);
95 ib_umem_release(db->u.user_page->umem);
96 kfree(db->u.user_page);
97 }
98
99 mutex_unlock(&context->db_page_mutex);
100}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
new file mode 100644
index 000000000000..5c8938be0e08
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -0,0 +1,139 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx5/cmd.h>
34#include <rdma/ib_mad.h>
35#include <rdma/ib_smi.h>
36#include "mlx5_ib.h"
37
38enum {
39 MLX5_IB_VENDOR_CLASS1 = 0x9,
40 MLX5_IB_VENDOR_CLASS2 = 0xa
41};
42
43int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
44 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
45 void *in_mad, void *response_mad)
46{
47 u8 op_modifier = 0;
48
49 /* Key check traps can't be generated unless we have in_wc to
50 * tell us where to send the trap.
51 */
52 if (ignore_mkey || !in_wc)
53 op_modifier |= 0x1;
54 if (ignore_bkey || !in_wc)
55 op_modifier |= 0x2;
56
57 return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port);
58}
59
60int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
61 struct ib_wc *in_wc, struct ib_grh *in_grh,
62 struct ib_mad *in_mad, struct ib_mad *out_mad)
63{
64 u16 slid;
65 int err;
66
67 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
68
69 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
70 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
71
72 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
73 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
74 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
75 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
76 in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
77 return IB_MAD_RESULT_SUCCESS;
78
79 /* Don't process SMInfo queries -- the SMA can't handle them.
80 */
81 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
82 return IB_MAD_RESULT_SUCCESS;
83 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
84 in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 ||
85 in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 ||
86 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
87 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
88 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
89 return IB_MAD_RESULT_SUCCESS;
90 } else {
91 return IB_MAD_RESULT_SUCCESS;
92 }
93
94 err = mlx5_MAD_IFC(to_mdev(ibdev),
95 mad_flags & IB_MAD_IGNORE_MKEY,
96 mad_flags & IB_MAD_IGNORE_BKEY,
97 port_num, in_wc, in_grh, in_mad, out_mad);
98 if (err)
99 return IB_MAD_RESULT_FAILURE;
100
101 /* set return bit in status of directed route responses */
102 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
103 out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
104
105 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
106 /* no response for trap repress */
107 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
108
109 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
110}
111
112int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
113{
114 struct ib_smp *in_mad = NULL;
115 struct ib_smp *out_mad = NULL;
116 int err = -ENOMEM;
117 u16 packet_error;
118
119 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
120 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
121 if (!in_mad || !out_mad)
122 goto out;
123
124 init_query_mad(in_mad);
125 in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
126 in_mad->attr_mod = cpu_to_be32(port);
127
128 err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
129
130 packet_error = be16_to_cpu(out_mad->status);
131
132 dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
133 MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
134
135out:
136 kfree(in_mad);
137 kfree(out_mad);
138 return err;
139}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
new file mode 100644
index 000000000000..8000fff4d444
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -0,0 +1,1504 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <asm-generic/kmap_types.h>
34#include <linux/module.h>
35#include <linux/init.h>
36#include <linux/errno.h>
37#include <linux/pci.h>
38#include <linux/dma-mapping.h>
39#include <linux/slab.h>
40#include <linux/io-mapping.h>
41#include <linux/sched.h>
42#include <rdma/ib_user_verbs.h>
43#include <rdma/ib_smi.h>
44#include <rdma/ib_umem.h>
45#include "user.h"
46#include "mlx5_ib.h"
47
48#define DRIVER_NAME "mlx5_ib"
49#define DRIVER_VERSION "1.0"
50#define DRIVER_RELDATE "June 2013"
51
52MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
53MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
54MODULE_LICENSE("Dual BSD/GPL");
55MODULE_VERSION(DRIVER_VERSION);
56
57static int prof_sel = 2;
58module_param_named(prof_sel, prof_sel, int, 0444);
59MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
60
61static char mlx5_version[] =
62 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
63 DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
64
65static struct mlx5_profile profile[] = {
66 [0] = {
67 .mask = 0,
68 },
69 [1] = {
70 .mask = MLX5_PROF_MASK_QP_SIZE,
71 .log_max_qp = 12,
72 },
73 [2] = {
74 .mask = MLX5_PROF_MASK_QP_SIZE |
75 MLX5_PROF_MASK_MR_CACHE,
76 .log_max_qp = 17,
77 .mr_cache[0] = {
78 .size = 500,
79 .limit = 250
80 },
81 .mr_cache[1] = {
82 .size = 500,
83 .limit = 250
84 },
85 .mr_cache[2] = {
86 .size = 500,
87 .limit = 250
88 },
89 .mr_cache[3] = {
90 .size = 500,
91 .limit = 250
92 },
93 .mr_cache[4] = {
94 .size = 500,
95 .limit = 250
96 },
97 .mr_cache[5] = {
98 .size = 500,
99 .limit = 250
100 },
101 .mr_cache[6] = {
102 .size = 500,
103 .limit = 250
104 },
105 .mr_cache[7] = {
106 .size = 500,
107 .limit = 250
108 },
109 .mr_cache[8] = {
110 .size = 500,
111 .limit = 250
112 },
113 .mr_cache[9] = {
114 .size = 500,
115 .limit = 250
116 },
117 .mr_cache[10] = {
118 .size = 500,
119 .limit = 250
120 },
121 .mr_cache[11] = {
122 .size = 500,
123 .limit = 250
124 },
125 .mr_cache[12] = {
126 .size = 64,
127 .limit = 32
128 },
129 .mr_cache[13] = {
130 .size = 32,
131 .limit = 16
132 },
133 .mr_cache[14] = {
134 .size = 16,
135 .limit = 8
136 },
137 .mr_cache[15] = {
138 .size = 8,
139 .limit = 4
140 },
141 },
142};
143
144int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
145{
146 struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
147 struct mlx5_eq *eq, *n;
148 int err = -ENOENT;
149
150 spin_lock(&table->lock);
151 list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
152 if (eq->index == vector) {
153 *eqn = eq->eqn;
154 *irqn = eq->irqn;
155 err = 0;
156 break;
157 }
158 }
159 spin_unlock(&table->lock);
160
161 return err;
162}
163
164static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
165{
166 struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
167 struct mlx5_eq *eq, *n;
168 int ncomp_vec;
169 int nent;
170 int err;
171 int i;
172
173 INIT_LIST_HEAD(&dev->eqs_list);
174 ncomp_vec = table->num_comp_vectors;
175 nent = MLX5_COMP_EQ_SIZE;
176 for (i = 0; i < ncomp_vec; i++) {
177 eq = kzalloc(sizeof(*eq), GFP_KERNEL);
178 if (!eq) {
179 err = -ENOMEM;
180 goto clean;
181 }
182
183 snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
184 err = mlx5_create_map_eq(&dev->mdev, eq,
185 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
186 eq->name,
187 &dev->mdev.priv.uuari.uars[0]);
188 if (err) {
189 kfree(eq);
190 goto clean;
191 }
192 mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
193 eq->index = i;
194 spin_lock(&table->lock);
195 list_add_tail(&eq->list, &dev->eqs_list);
196 spin_unlock(&table->lock);
197 }
198
199 dev->num_comp_vectors = ncomp_vec;
200 return 0;
201
202clean:
203 spin_lock(&table->lock);
204 list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
205 list_del(&eq->list);
206 spin_unlock(&table->lock);
207 if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
208 mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
209 kfree(eq);
210 spin_lock(&table->lock);
211 }
212 spin_unlock(&table->lock);
213 return err;
214}
215
216static void free_comp_eqs(struct mlx5_ib_dev *dev)
217{
218 struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
219 struct mlx5_eq *eq, *n;
220
221 spin_lock(&table->lock);
222 list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
223 list_del(&eq->list);
224 spin_unlock(&table->lock);
225 if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
226 mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
227 kfree(eq);
228 spin_lock(&table->lock);
229 }
230 spin_unlock(&table->lock);
231}
232
233static int mlx5_ib_query_device(struct ib_device *ibdev,
234 struct ib_device_attr *props)
235{
236 struct mlx5_ib_dev *dev = to_mdev(ibdev);
237 struct ib_smp *in_mad = NULL;
238 struct ib_smp *out_mad = NULL;
239 int err = -ENOMEM;
240 int max_rq_sg;
241 int max_sq_sg;
242 u64 flags;
243
244 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
245 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
246 if (!in_mad || !out_mad)
247 goto out;
248
249 init_query_mad(in_mad);
250 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
251
252 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
253 if (err)
254 goto out;
255
256 memset(props, 0, sizeof(*props));
257
258 props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) |
259 (fw_rev_min(&dev->mdev) << 16) |
260 fw_rev_sub(&dev->mdev);
261 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
262 IB_DEVICE_PORT_ACTIVE_EVENT |
263 IB_DEVICE_SYS_IMAGE_GUID |
264 IB_DEVICE_RC_RNR_NAK_GEN |
265 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
266 flags = dev->mdev.caps.flags;
267 if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
268 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
269 if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
270 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
271 if (flags & MLX5_DEV_CAP_FLAG_APM)
272 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
273 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
274 if (flags & MLX5_DEV_CAP_FLAG_XRC)
275 props->device_cap_flags |= IB_DEVICE_XRC;
276 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
277
278 props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
279 0xffffff;
280 props->vendor_part_id = be16_to_cpup((__be16 *)(out_mad->data + 30));
281 props->hw_ver = be32_to_cpup((__be32 *)(out_mad->data + 32));
282 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
283
284 props->max_mr_size = ~0ull;
285 props->page_size_cap = dev->mdev.caps.min_page_sz;
286 props->max_qp = 1 << dev->mdev.caps.log_max_qp;
287 props->max_qp_wr = dev->mdev.caps.max_wqes;
288 max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
289 max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
290 sizeof(struct mlx5_wqe_data_seg);
291 props->max_sge = min(max_rq_sg, max_sq_sg);
292 props->max_cq = 1 << dev->mdev.caps.log_max_cq;
293 props->max_cqe = dev->mdev.caps.max_cqes - 1;
294 props->max_mr = 1 << dev->mdev.caps.log_max_mkey;
295 props->max_pd = 1 << dev->mdev.caps.log_max_pd;
296 props->max_qp_rd_atom = dev->mdev.caps.max_ra_req_qp;
297 props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp;
298 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
299 props->max_srq = 1 << dev->mdev.caps.log_max_srq;
300 props->max_srq_wr = dev->mdev.caps.max_srq_wqes - 1;
301 props->max_srq_sge = max_rq_sg - 1;
302 props->max_fast_reg_page_list_len = (unsigned int)-1;
303 props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay;
304 props->atomic_cap = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ?
305 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
306 props->masked_atomic_cap = IB_ATOMIC_HCA;
307 props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28));
308 props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg;
309 props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
310 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
311 props->max_mcast_grp;
312 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
313
314out:
315 kfree(in_mad);
316 kfree(out_mad);
317
318 return err;
319}
320
321int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
322 struct ib_port_attr *props)
323{
324 struct mlx5_ib_dev *dev = to_mdev(ibdev);
325 struct ib_smp *in_mad = NULL;
326 struct ib_smp *out_mad = NULL;
327 int ext_active_speed;
328 int err = -ENOMEM;
329
330 if (port < 1 || port > dev->mdev.caps.num_ports) {
331 mlx5_ib_warn(dev, "invalid port number %d\n", port);
332 return -EINVAL;
333 }
334
335 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
336 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
337 if (!in_mad || !out_mad)
338 goto out;
339
340 memset(props, 0, sizeof(*props));
341
342 init_query_mad(in_mad);
343 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
344 in_mad->attr_mod = cpu_to_be32(port);
345
346 err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad);
347 if (err) {
348 mlx5_ib_warn(dev, "err %d\n", err);
349 goto out;
350 }
351
352
353 props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16));
354 props->lmc = out_mad->data[34] & 0x7;
355 props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18));
356 props->sm_sl = out_mad->data[36] & 0xf;
357 props->state = out_mad->data[32] & 0xf;
358 props->phys_state = out_mad->data[33] >> 4;
359 props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
360 props->gid_tbl_len = out_mad->data[50];
361 props->max_msg_sz = 1 << to_mdev(ibdev)->mdev.caps.log_max_msg;
362 props->pkey_tbl_len = to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len;
363 props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
364 props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
365 props->active_width = out_mad->data[31] & 0xf;
366 props->active_speed = out_mad->data[35] >> 4;
367 props->max_mtu = out_mad->data[41] & 0xf;
368 props->active_mtu = out_mad->data[36] >> 4;
369 props->subnet_timeout = out_mad->data[51] & 0x1f;
370 props->max_vl_num = out_mad->data[37] >> 4;
371 props->init_type_reply = out_mad->data[41] >> 4;
372
373 /* Check if extended speeds (EDR/FDR/...) are supported */
374 if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
375 ext_active_speed = out_mad->data[62] >> 4;
376
377 switch (ext_active_speed) {
378 case 1:
379 props->active_speed = 16; /* FDR */
380 break;
381 case 2:
382 props->active_speed = 32; /* EDR */
383 break;
384 }
385 }
386
387 /* If reported active speed is QDR, check if is FDR-10 */
388 if (props->active_speed == 4) {
389 if (dev->mdev.caps.ext_port_cap[port - 1] &
390 MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
391 init_query_mad(in_mad);
392 in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
393 in_mad->attr_mod = cpu_to_be32(port);
394
395 err = mlx5_MAD_IFC(dev, 1, 1, port,
396 NULL, NULL, in_mad, out_mad);
397 if (err)
398 goto out;
399
400 /* Checking LinkSpeedActive for FDR-10 */
401 if (out_mad->data[15] & 0x1)
402 props->active_speed = 8;
403 }
404 }
405
406out:
407 kfree(in_mad);
408 kfree(out_mad);
409
410 return err;
411}
412
413static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
414 union ib_gid *gid)
415{
416 struct ib_smp *in_mad = NULL;
417 struct ib_smp *out_mad = NULL;
418 int err = -ENOMEM;
419
420 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
421 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
422 if (!in_mad || !out_mad)
423 goto out;
424
425 init_query_mad(in_mad);
426 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
427 in_mad->attr_mod = cpu_to_be32(port);
428
429 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
430 if (err)
431 goto out;
432
433 memcpy(gid->raw, out_mad->data + 8, 8);
434
435 init_query_mad(in_mad);
436 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
437 in_mad->attr_mod = cpu_to_be32(index / 8);
438
439 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
440 if (err)
441 goto out;
442
443 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
444
445out:
446 kfree(in_mad);
447 kfree(out_mad);
448 return err;
449}
450
451static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
452 u16 *pkey)
453{
454 struct ib_smp *in_mad = NULL;
455 struct ib_smp *out_mad = NULL;
456 int err = -ENOMEM;
457
458 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
459 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
460 if (!in_mad || !out_mad)
461 goto out;
462
463 init_query_mad(in_mad);
464 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
465 in_mad->attr_mod = cpu_to_be32(index / 32);
466
467 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
468 if (err)
469 goto out;
470
471 *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]);
472
473out:
474 kfree(in_mad);
475 kfree(out_mad);
476 return err;
477}
478
479struct mlx5_reg_node_desc {
480 u8 desc[64];
481};
482
483static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
484 struct ib_device_modify *props)
485{
486 struct mlx5_ib_dev *dev = to_mdev(ibdev);
487 struct mlx5_reg_node_desc in;
488 struct mlx5_reg_node_desc out;
489 int err;
490
491 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
492 return -EOPNOTSUPP;
493
494 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
495 return 0;
496
497 /*
498 * If possible, pass node desc to FW, so it can generate
499 * a 144 trap. If cmd fails, just ignore.
500 */
501 memcpy(&in, props->node_desc, 64);
502 err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out,
503 sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
504 if (err)
505 return err;
506
507 memcpy(ibdev->node_desc, props->node_desc, 64);
508
509 return err;
510}
511
512static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
513 struct ib_port_modify *props)
514{
515 struct mlx5_ib_dev *dev = to_mdev(ibdev);
516 struct ib_port_attr attr;
517 u32 tmp;
518 int err;
519
520 mutex_lock(&dev->cap_mask_mutex);
521
522 err = mlx5_ib_query_port(ibdev, port, &attr);
523 if (err)
524 goto out;
525
526 tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
527 ~props->clr_port_cap_mask;
528
529 err = mlx5_set_port_caps(&dev->mdev, port, tmp);
530
531out:
532 mutex_unlock(&dev->cap_mask_mutex);
533 return err;
534}
535
536static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
537 struct ib_udata *udata)
538{
539 struct mlx5_ib_dev *dev = to_mdev(ibdev);
540 struct mlx5_ib_alloc_ucontext_req req;
541 struct mlx5_ib_alloc_ucontext_resp resp;
542 struct mlx5_ib_ucontext *context;
543 struct mlx5_uuar_info *uuari;
544 struct mlx5_uar *uars;
545 int num_uars;
546 int uuarn;
547 int err;
548 int i;
549
550 if (!dev->ib_active)
551 return ERR_PTR(-EAGAIN);
552
553 err = ib_copy_from_udata(&req, udata, sizeof(req));
554 if (err)
555 return ERR_PTR(err);
556
557 if (req.total_num_uuars > MLX5_MAX_UUARS)
558 return ERR_PTR(-ENOMEM);
559
560 if (req.total_num_uuars == 0)
561 return ERR_PTR(-EINVAL);
562
563 req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
564 if (req.num_low_latency_uuars > req.total_num_uuars - 1)
565 return ERR_PTR(-EINVAL);
566
567 num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
568 resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp;
569 resp.bf_reg_size = dev->mdev.caps.bf_reg_size;
570 resp.cache_line_size = L1_CACHE_BYTES;
571 resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz;
572 resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz;
573 resp.max_send_wqebb = dev->mdev.caps.max_wqes;
574 resp.max_recv_wr = dev->mdev.caps.max_wqes;
575 resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes;
576
577 context = kzalloc(sizeof(*context), GFP_KERNEL);
578 if (!context)
579 return ERR_PTR(-ENOMEM);
580
581 uuari = &context->uuari;
582 mutex_init(&uuari->lock);
583 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
584 if (!uars) {
585 err = -ENOMEM;
586 goto out_ctx;
587 }
588
589 uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
590 sizeof(*uuari->bitmap),
591 GFP_KERNEL);
592 if (!uuari->bitmap) {
593 err = -ENOMEM;
594 goto out_uar_ctx;
595 }
596 /*
597 * clear all fast path uuars
598 */
599 for (i = 0; i < req.total_num_uuars; i++) {
600 uuarn = i & 3;
601 if (uuarn == 2 || uuarn == 3)
602 set_bit(i, uuari->bitmap);
603 }
604
605 uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
606 if (!uuari->count) {
607 err = -ENOMEM;
608 goto out_bitmap;
609 }
610
611 for (i = 0; i < num_uars; i++) {
612 err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index);
613 if (err)
614 goto out_count;
615 }
616
617 INIT_LIST_HEAD(&context->db_page_list);
618 mutex_init(&context->db_page_mutex);
619
620 resp.tot_uuars = req.total_num_uuars;
621 resp.num_ports = dev->mdev.caps.num_ports;
622 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
623 if (err)
624 goto out_uars;
625
626 uuari->num_low_latency_uuars = req.num_low_latency_uuars;
627 uuari->uars = uars;
628 uuari->num_uars = num_uars;
629 return &context->ibucontext;
630
631out_uars:
632 for (i--; i >= 0; i--)
633 mlx5_cmd_free_uar(&dev->mdev, uars[i].index);
634out_count:
635 kfree(uuari->count);
636
637out_bitmap:
638 kfree(uuari->bitmap);
639
640out_uar_ctx:
641 kfree(uars);
642
643out_ctx:
644 kfree(context);
645 return ERR_PTR(err);
646}
647
648static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
649{
650 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
651 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
652 struct mlx5_uuar_info *uuari = &context->uuari;
653 int i;
654
655 for (i = 0; i < uuari->num_uars; i++) {
656 if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index))
657 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
658 }
659
660 kfree(uuari->count);
661 kfree(uuari->bitmap);
662 kfree(uuari->uars);
663 kfree(context);
664
665 return 0;
666}
667
668static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
669{
670 return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index;
671}
672
673static int get_command(unsigned long offset)
674{
675 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
676}
677
678static int get_arg(unsigned long offset)
679{
680 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
681}
682
683static int get_index(unsigned long offset)
684{
685 return get_arg(offset);
686}
687
688static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
689{
690 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
691 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
692 struct mlx5_uuar_info *uuari = &context->uuari;
693 unsigned long command;
694 unsigned long idx;
695 phys_addr_t pfn;
696
697 command = get_command(vma->vm_pgoff);
698 switch (command) {
699 case MLX5_IB_MMAP_REGULAR_PAGE:
700 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
701 return -EINVAL;
702
703 idx = get_index(vma->vm_pgoff);
704 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
705 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
706 (unsigned long long)pfn);
707
708 if (idx >= uuari->num_uars)
709 return -EINVAL;
710
711 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
712 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
713 PAGE_SIZE, vma->vm_page_prot))
714 return -EAGAIN;
715
716 mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
717 vma->vm_start,
718 (unsigned long long)pfn << PAGE_SHIFT);
719 break;
720
721 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
722 return -ENOSYS;
723
724 default:
725 return -EINVAL;
726 }
727
728 return 0;
729}
730
731static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
732{
733 struct mlx5_create_mkey_mbox_in *in;
734 struct mlx5_mkey_seg *seg;
735 struct mlx5_core_mr mr;
736 int err;
737
738 in = kzalloc(sizeof(*in), GFP_KERNEL);
739 if (!in)
740 return -ENOMEM;
741
742 seg = &in->seg;
743 seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
744 seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
745 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
746 seg->start_addr = 0;
747
748 err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
749 if (err) {
750 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
751 goto err_in;
752 }
753
754 kfree(in);
755 *key = mr.key;
756
757 return 0;
758
759err_in:
760 kfree(in);
761
762 return err;
763}
764
765static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
766{
767 struct mlx5_core_mr mr;
768 int err;
769
770 memset(&mr, 0, sizeof(mr));
771 mr.key = key;
772 err = mlx5_core_destroy_mkey(&dev->mdev, &mr);
773 if (err)
774 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
775}
776
777static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
778 struct ib_ucontext *context,
779 struct ib_udata *udata)
780{
781 struct mlx5_ib_alloc_pd_resp resp;
782 struct mlx5_ib_pd *pd;
783 int err;
784
785 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
786 if (!pd)
787 return ERR_PTR(-ENOMEM);
788
789 err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn);
790 if (err) {
791 kfree(pd);
792 return ERR_PTR(err);
793 }
794
795 if (context) {
796 resp.pdn = pd->pdn;
797 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
798 mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
799 kfree(pd);
800 return ERR_PTR(-EFAULT);
801 }
802 } else {
803 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
804 if (err) {
805 mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
806 kfree(pd);
807 return ERR_PTR(err);
808 }
809 }
810
811 return &pd->ibpd;
812}
813
814static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
815{
816 struct mlx5_ib_dev *mdev = to_mdev(pd->device);
817 struct mlx5_ib_pd *mpd = to_mpd(pd);
818
819 if (!pd->uobject)
820 free_pa_mkey(mdev, mpd->pa_lkey);
821
822 mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn);
823 kfree(mpd);
824
825 return 0;
826}
827
828static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
829{
830 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
831 int err;
832
833 err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num);
834 if (err)
835 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
836 ibqp->qp_num, gid->raw);
837
838 return err;
839}
840
841static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
842{
843 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
844 int err;
845
846 err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num);
847 if (err)
848 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
849 ibqp->qp_num, gid->raw);
850
851 return err;
852}
853
854static int init_node_data(struct mlx5_ib_dev *dev)
855{
856 struct ib_smp *in_mad = NULL;
857 struct ib_smp *out_mad = NULL;
858 int err = -ENOMEM;
859
860 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
861 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
862 if (!in_mad || !out_mad)
863 goto out;
864
865 init_query_mad(in_mad);
866 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
867
868 err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
869 if (err)
870 goto out;
871
872 memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
873
874 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
875
876 err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
877 if (err)
878 goto out;
879
880 dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32));
881 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
882
883out:
884 kfree(in_mad);
885 kfree(out_mad);
886 return err;
887}
888
889static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
890 char *buf)
891{
892 struct mlx5_ib_dev *dev =
893 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
894
895 return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages);
896}
897
898static ssize_t show_reg_pages(struct device *device,
899 struct device_attribute *attr, char *buf)
900{
901 struct mlx5_ib_dev *dev =
902 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
903
904 return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages);
905}
906
907static ssize_t show_hca(struct device *device, struct device_attribute *attr,
908 char *buf)
909{
910 struct mlx5_ib_dev *dev =
911 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
912 return sprintf(buf, "MT%d\n", dev->mdev.pdev->device);
913}
914
915static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
916 char *buf)
917{
918 struct mlx5_ib_dev *dev =
919 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
920 return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev),
921 fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev));
922}
923
924static ssize_t show_rev(struct device *device, struct device_attribute *attr,
925 char *buf)
926{
927 struct mlx5_ib_dev *dev =
928 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
929 return sprintf(buf, "%x\n", dev->mdev.rev_id);
930}
931
932static ssize_t show_board(struct device *device, struct device_attribute *attr,
933 char *buf)
934{
935 struct mlx5_ib_dev *dev =
936 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
937 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
938 dev->mdev.board_id);
939}
940
941static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
942static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
943static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
944static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
945static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
946static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
947
948static struct device_attribute *mlx5_class_attributes[] = {
949 &dev_attr_hw_rev,
950 &dev_attr_fw_ver,
951 &dev_attr_hca_type,
952 &dev_attr_board_id,
953 &dev_attr_fw_pages,
954 &dev_attr_reg_pages,
955};
956
957static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
958 void *data)
959{
960 struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev);
961 struct ib_event ibev;
962 u8 port = 0;
963
964 switch (event) {
965 case MLX5_DEV_EVENT_SYS_ERROR:
966 ibdev->ib_active = false;
967 ibev.event = IB_EVENT_DEVICE_FATAL;
968 break;
969
970 case MLX5_DEV_EVENT_PORT_UP:
971 ibev.event = IB_EVENT_PORT_ACTIVE;
972 port = *(u8 *)data;
973 break;
974
975 case MLX5_DEV_EVENT_PORT_DOWN:
976 ibev.event = IB_EVENT_PORT_ERR;
977 port = *(u8 *)data;
978 break;
979
980 case MLX5_DEV_EVENT_PORT_INITIALIZED:
981 /* not used by ULPs */
982 return;
983
984 case MLX5_DEV_EVENT_LID_CHANGE:
985 ibev.event = IB_EVENT_LID_CHANGE;
986 port = *(u8 *)data;
987 break;
988
989 case MLX5_DEV_EVENT_PKEY_CHANGE:
990 ibev.event = IB_EVENT_PKEY_CHANGE;
991 port = *(u8 *)data;
992 break;
993
994 case MLX5_DEV_EVENT_GUID_CHANGE:
995 ibev.event = IB_EVENT_GID_CHANGE;
996 port = *(u8 *)data;
997 break;
998
999 case MLX5_DEV_EVENT_CLIENT_REREG:
1000 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1001 port = *(u8 *)data;
1002 break;
1003 }
1004
1005 ibev.device = &ibdev->ib_dev;
1006 ibev.element.port_num = port;
1007
1008 if (ibdev->ib_active)
1009 ib_dispatch_event(&ibev);
1010}
1011
1012static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1013{
1014 int port;
1015
1016 for (port = 1; port <= dev->mdev.caps.num_ports; port++)
1017 mlx5_query_ext_port_caps(dev, port);
1018}
1019
1020static int get_port_caps(struct mlx5_ib_dev *dev)
1021{
1022 struct ib_device_attr *dprops = NULL;
1023 struct ib_port_attr *pprops = NULL;
1024 int err = 0;
1025 int port;
1026
1027 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1028 if (!pprops)
1029 goto out;
1030
1031 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1032 if (!dprops)
1033 goto out;
1034
1035 err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1036 if (err) {
1037 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1038 goto out;
1039 }
1040
1041 for (port = 1; port <= dev->mdev.caps.num_ports; port++) {
1042 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1043 if (err) {
1044 mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
1045 break;
1046 }
1047 dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
1048 dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
1049 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1050 dprops->max_pkeys, pprops->gid_tbl_len);
1051 }
1052
1053out:
1054 kfree(pprops);
1055 kfree(dprops);
1056
1057 return err;
1058}
1059
1060static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1061{
1062 int err;
1063
1064 err = mlx5_mr_cache_cleanup(dev);
1065 if (err)
1066 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1067
1068 mlx5_ib_destroy_qp(dev->umrc.qp);
1069 ib_destroy_cq(dev->umrc.cq);
1070 ib_dereg_mr(dev->umrc.mr);
1071 ib_dealloc_pd(dev->umrc.pd);
1072}
1073
1074enum {
1075 MAX_UMR_WR = 128,
1076};
1077
1078static int create_umr_res(struct mlx5_ib_dev *dev)
1079{
1080 struct ib_qp_init_attr *init_attr = NULL;
1081 struct ib_qp_attr *attr = NULL;
1082 struct ib_pd *pd;
1083 struct ib_cq *cq;
1084 struct ib_qp *qp;
1085 struct ib_mr *mr;
1086 int ret;
1087
1088 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
1089 init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
1090 if (!attr || !init_attr) {
1091 ret = -ENOMEM;
1092 goto error_0;
1093 }
1094
1095 pd = ib_alloc_pd(&dev->ib_dev);
1096 if (IS_ERR(pd)) {
1097 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1098 ret = PTR_ERR(pd);
1099 goto error_0;
1100 }
1101
1102 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
1103 if (IS_ERR(mr)) {
1104 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1105 ret = PTR_ERR(mr);
1106 goto error_1;
1107 }
1108
1109 cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128,
1110 0);
1111 if (IS_ERR(cq)) {
1112 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
1113 ret = PTR_ERR(cq);
1114 goto error_2;
1115 }
1116 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1117
1118 init_attr->send_cq = cq;
1119 init_attr->recv_cq = cq;
1120 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1121 init_attr->cap.max_send_wr = MAX_UMR_WR;
1122 init_attr->cap.max_send_sge = 1;
1123 init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
1124 init_attr->port_num = 1;
1125 qp = mlx5_ib_create_qp(pd, init_attr, NULL);
1126 if (IS_ERR(qp)) {
1127 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
1128 ret = PTR_ERR(qp);
1129 goto error_3;
1130 }
1131 qp->device = &dev->ib_dev;
1132 qp->real_qp = qp;
1133 qp->uobject = NULL;
1134 qp->qp_type = MLX5_IB_QPT_REG_UMR;
1135
1136 attr->qp_state = IB_QPS_INIT;
1137 attr->port_num = 1;
1138 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
1139 IB_QP_PORT, NULL);
1140 if (ret) {
1141 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
1142 goto error_4;
1143 }
1144
1145 memset(attr, 0, sizeof(*attr));
1146 attr->qp_state = IB_QPS_RTR;
1147 attr->path_mtu = IB_MTU_256;
1148
1149 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1150 if (ret) {
1151 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
1152 goto error_4;
1153 }
1154
1155 memset(attr, 0, sizeof(*attr));
1156 attr->qp_state = IB_QPS_RTS;
1157 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1158 if (ret) {
1159 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
1160 goto error_4;
1161 }
1162
1163 dev->umrc.qp = qp;
1164 dev->umrc.cq = cq;
1165 dev->umrc.mr = mr;
1166 dev->umrc.pd = pd;
1167
1168 sema_init(&dev->umrc.sem, MAX_UMR_WR);
1169 ret = mlx5_mr_cache_init(dev);
1170 if (ret) {
1171 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1172 goto error_4;
1173 }
1174
1175 kfree(attr);
1176 kfree(init_attr);
1177
1178 return 0;
1179
1180error_4:
1181 mlx5_ib_destroy_qp(qp);
1182
1183error_3:
1184 ib_destroy_cq(cq);
1185
1186error_2:
1187 ib_dereg_mr(mr);
1188
1189error_1:
1190 ib_dealloc_pd(pd);
1191
1192error_0:
1193 kfree(attr);
1194 kfree(init_attr);
1195 return ret;
1196}
1197
1198static int create_dev_resources(struct mlx5_ib_resources *devr)
1199{
1200 struct ib_srq_init_attr attr;
1201 struct mlx5_ib_dev *dev;
1202 int ret = 0;
1203
1204 dev = container_of(devr, struct mlx5_ib_dev, devr);
1205
1206 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1207 if (IS_ERR(devr->p0)) {
1208 ret = PTR_ERR(devr->p0);
1209 goto error0;
1210 }
1211 devr->p0->device = &dev->ib_dev;
1212 devr->p0->uobject = NULL;
1213 atomic_set(&devr->p0->usecnt, 0);
1214
1215 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
1216 if (IS_ERR(devr->c0)) {
1217 ret = PTR_ERR(devr->c0);
1218 goto error1;
1219 }
1220 devr->c0->device = &dev->ib_dev;
1221 devr->c0->uobject = NULL;
1222 devr->c0->comp_handler = NULL;
1223 devr->c0->event_handler = NULL;
1224 devr->c0->cq_context = NULL;
1225 atomic_set(&devr->c0->usecnt, 0);
1226
1227 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1228 if (IS_ERR(devr->x0)) {
1229 ret = PTR_ERR(devr->x0);
1230 goto error2;
1231 }
1232 devr->x0->device = &dev->ib_dev;
1233 devr->x0->inode = NULL;
1234 atomic_set(&devr->x0->usecnt, 0);
1235 mutex_init(&devr->x0->tgt_qp_mutex);
1236 INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1237
1238 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1239 if (IS_ERR(devr->x1)) {
1240 ret = PTR_ERR(devr->x1);
1241 goto error3;
1242 }
1243 devr->x1->device = &dev->ib_dev;
1244 devr->x1->inode = NULL;
1245 atomic_set(&devr->x1->usecnt, 0);
1246 mutex_init(&devr->x1->tgt_qp_mutex);
1247 INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1248
1249 memset(&attr, 0, sizeof(attr));
1250 attr.attr.max_sge = 1;
1251 attr.attr.max_wr = 1;
1252 attr.srq_type = IB_SRQT_XRC;
1253 attr.ext.xrc.cq = devr->c0;
1254 attr.ext.xrc.xrcd = devr->x0;
1255
1256 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1257 if (IS_ERR(devr->s0)) {
1258 ret = PTR_ERR(devr->s0);
1259 goto error4;
1260 }
1261 devr->s0->device = &dev->ib_dev;
1262 devr->s0->pd = devr->p0;
1263 devr->s0->uobject = NULL;
1264 devr->s0->event_handler = NULL;
1265 devr->s0->srq_context = NULL;
1266 devr->s0->srq_type = IB_SRQT_XRC;
1267 devr->s0->ext.xrc.xrcd = devr->x0;
1268 devr->s0->ext.xrc.cq = devr->c0;
1269 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1270 atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1271 atomic_inc(&devr->p0->usecnt);
1272 atomic_set(&devr->s0->usecnt, 0);
1273
1274 return 0;
1275
1276error4:
1277 mlx5_ib_dealloc_xrcd(devr->x1);
1278error3:
1279 mlx5_ib_dealloc_xrcd(devr->x0);
1280error2:
1281 mlx5_ib_destroy_cq(devr->c0);
1282error1:
1283 mlx5_ib_dealloc_pd(devr->p0);
1284error0:
1285 return ret;
1286}
1287
1288static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1289{
1290 mlx5_ib_destroy_srq(devr->s0);
1291 mlx5_ib_dealloc_xrcd(devr->x0);
1292 mlx5_ib_dealloc_xrcd(devr->x1);
1293 mlx5_ib_destroy_cq(devr->c0);
1294 mlx5_ib_dealloc_pd(devr->p0);
1295}
1296
1297static int init_one(struct pci_dev *pdev,
1298 const struct pci_device_id *id)
1299{
1300 struct mlx5_core_dev *mdev;
1301 struct mlx5_ib_dev *dev;
1302 int err;
1303 int i;
1304
1305 printk_once(KERN_INFO "%s", mlx5_version);
1306
1307 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
1308 if (!dev)
1309 return -ENOMEM;
1310
1311 mdev = &dev->mdev;
1312 mdev->event = mlx5_ib_event;
1313 if (prof_sel >= ARRAY_SIZE(profile)) {
1314 pr_warn("selected pofile out of range, selceting default\n");
1315 prof_sel = 0;
1316 }
1317 mdev->profile = &profile[prof_sel];
1318 err = mlx5_dev_init(mdev, pdev);
1319 if (err)
1320 goto err_free;
1321
1322 err = get_port_caps(dev);
1323 if (err)
1324 goto err_cleanup;
1325
1326 get_ext_port_caps(dev);
1327
1328 err = alloc_comp_eqs(dev);
1329 if (err)
1330 goto err_cleanup;
1331
1332 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
1333
1334 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
1335 dev->ib_dev.owner = THIS_MODULE;
1336 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
1337 dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey;
1338 dev->num_ports = mdev->caps.num_ports;
1339 dev->ib_dev.phys_port_cnt = dev->num_ports;
1340 dev->ib_dev.num_comp_vectors = dev->num_comp_vectors;
1341 dev->ib_dev.dma_device = &mdev->pdev->dev;
1342
1343 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
1344 dev->ib_dev.uverbs_cmd_mask =
1345 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
1346 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
1347 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
1348 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
1349 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
1350 (1ull << IB_USER_VERBS_CMD_REG_MR) |
1351 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
1352 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1353 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
1354 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
1355 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
1356 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
1357 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
1358 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
1359 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
1360 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
1361 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
1362 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
1363 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
1364 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
1365 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
1366 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
1367 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
1368
1369 dev->ib_dev.query_device = mlx5_ib_query_device;
1370 dev->ib_dev.query_port = mlx5_ib_query_port;
1371 dev->ib_dev.query_gid = mlx5_ib_query_gid;
1372 dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
1373 dev->ib_dev.modify_device = mlx5_ib_modify_device;
1374 dev->ib_dev.modify_port = mlx5_ib_modify_port;
1375 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
1376 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
1377 dev->ib_dev.mmap = mlx5_ib_mmap;
1378 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
1379 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
1380 dev->ib_dev.create_ah = mlx5_ib_create_ah;
1381 dev->ib_dev.query_ah = mlx5_ib_query_ah;
1382 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
1383 dev->ib_dev.create_srq = mlx5_ib_create_srq;
1384 dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
1385 dev->ib_dev.query_srq = mlx5_ib_query_srq;
1386 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
1387 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
1388 dev->ib_dev.create_qp = mlx5_ib_create_qp;
1389 dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
1390 dev->ib_dev.query_qp = mlx5_ib_query_qp;
1391 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
1392 dev->ib_dev.post_send = mlx5_ib_post_send;
1393 dev->ib_dev.post_recv = mlx5_ib_post_recv;
1394 dev->ib_dev.create_cq = mlx5_ib_create_cq;
1395 dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
1396 dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
1397 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
1398 dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
1399 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
1400 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
1401 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
1402 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
1403 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
1404 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
1405 dev->ib_dev.process_mad = mlx5_ib_process_mad;
1406 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
1407 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
1408 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
1409
1410 if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
1411 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
1412 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
1413 dev->ib_dev.uverbs_cmd_mask |=
1414 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
1415 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1416 }
1417
1418 err = init_node_data(dev);
1419 if (err)
1420 goto err_eqs;
1421
1422 mutex_init(&dev->cap_mask_mutex);
1423 spin_lock_init(&dev->mr_lock);
1424
1425 err = create_dev_resources(&dev->devr);
1426 if (err)
1427 goto err_eqs;
1428
1429 if (ib_register_device(&dev->ib_dev, NULL))
1430 goto err_rsrc;
1431
1432 err = create_umr_res(dev);
1433 if (err)
1434 goto err_dev;
1435
1436 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
1437 if (device_create_file(&dev->ib_dev.dev,
1438 mlx5_class_attributes[i]))
1439 goto err_umrc;
1440 }
1441
1442 dev->ib_active = true;
1443
1444 return 0;
1445
1446err_umrc:
1447 destroy_umrc_res(dev);
1448
1449err_dev:
1450 ib_unregister_device(&dev->ib_dev);
1451
1452err_rsrc:
1453 destroy_dev_resources(&dev->devr);
1454
1455err_eqs:
1456 free_comp_eqs(dev);
1457
1458err_cleanup:
1459 mlx5_dev_cleanup(mdev);
1460
1461err_free:
1462 ib_dealloc_device((struct ib_device *)dev);
1463
1464 return err;
1465}
1466
1467static void remove_one(struct pci_dev *pdev)
1468{
1469 struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev);
1470
1471 destroy_umrc_res(dev);
1472 ib_unregister_device(&dev->ib_dev);
1473 destroy_dev_resources(&dev->devr);
1474 free_comp_eqs(dev);
1475 mlx5_dev_cleanup(&dev->mdev);
1476 ib_dealloc_device(&dev->ib_dev);
1477}
1478
1479static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = {
1480 { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */
1481 { 0, }
1482};
1483
1484MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table);
1485
1486static struct pci_driver mlx5_ib_driver = {
1487 .name = DRIVER_NAME,
1488 .id_table = mlx5_ib_pci_table,
1489 .probe = init_one,
1490 .remove = remove_one
1491};
1492
1493static int __init mlx5_ib_init(void)
1494{
1495 return pci_register_driver(&mlx5_ib_driver);
1496}
1497
1498static void __exit mlx5_ib_cleanup(void)
1499{
1500 pci_unregister_driver(&mlx5_ib_driver);
1501}
1502
1503module_init(mlx5_ib_init);
1504module_exit(mlx5_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
new file mode 100644
index 000000000000..3a5322870b96
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -0,0 +1,162 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <rdma/ib_umem.h>
35#include "mlx5_ib.h"
36
37/* @umem: umem object to scan
38 * @addr: ib virtual address requested by the user
39 * @count: number of PAGE_SIZE pages covered by umem
40 * @shift: page shift for the compound pages found in the region
41 * @ncont: number of compund pages
42 * @order: log2 of the number of compound pages
43 */
44void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
45 int *ncont, int *order)
46{
47 struct ib_umem_chunk *chunk;
48 unsigned long tmp;
49 unsigned long m;
50 int i, j, k;
51 u64 base = 0;
52 int p = 0;
53 int skip;
54 int mask;
55 u64 len;
56 u64 pfn;
57
58 addr = addr >> PAGE_SHIFT;
59 tmp = (unsigned long)addr;
60 m = find_first_bit(&tmp, sizeof(tmp));
61 skip = 1 << m;
62 mask = skip - 1;
63 i = 0;
64 list_for_each_entry(chunk, &umem->chunk_list, list)
65 for (j = 0; j < chunk->nmap; j++) {
66 len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
67 pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT;
68 for (k = 0; k < len; k++) {
69 if (!(i & mask)) {
70 tmp = (unsigned long)pfn;
71 m = min(m, find_first_bit(&tmp, sizeof(tmp)));
72 skip = 1 << m;
73 mask = skip - 1;
74 base = pfn;
75 p = 0;
76 } else {
77 if (base + p != pfn) {
78 tmp = (unsigned long)p;
79 m = find_first_bit(&tmp, sizeof(tmp));
80 skip = 1 << m;
81 mask = skip - 1;
82 base = pfn;
83 p = 0;
84 }
85 }
86 p++;
87 i++;
88 }
89 }
90
91 if (i) {
92 m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
93
94 if (order)
95 *order = ilog2(roundup_pow_of_two(i) >> m);
96
97 *ncont = DIV_ROUND_UP(i, (1 << m));
98 } else {
99 m = 0;
100
101 if (order)
102 *order = 0;
103
104 *ncont = 0;
105 }
106 *shift = PAGE_SHIFT + m;
107 *count = i;
108}
109
110void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
111 int page_shift, __be64 *pas, int umr)
112{
113 int shift = page_shift - PAGE_SHIFT;
114 int mask = (1 << shift) - 1;
115 struct ib_umem_chunk *chunk;
116 int i, j, k;
117 u64 cur = 0;
118 u64 base;
119 int len;
120
121 i = 0;
122 list_for_each_entry(chunk, &umem->chunk_list, list)
123 for (j = 0; j < chunk->nmap; j++) {
124 len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
125 base = sg_dma_address(&chunk->page_list[j]);
126 for (k = 0; k < len; k++) {
127 if (!(i & mask)) {
128 cur = base + (k << PAGE_SHIFT);
129 if (umr)
130 cur |= 3;
131
132 pas[i >> shift] = cpu_to_be64(cur);
133 mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
134 i >> shift, be64_to_cpu(pas[i >> shift]));
135 } else
136 mlx5_ib_dbg(dev, "=====> 0x%llx\n",
137 base + (k << PAGE_SHIFT));
138 i++;
139 }
140 }
141}
142
143int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
144{
145 u64 page_size;
146 u64 page_mask;
147 u64 off_size;
148 u64 off_mask;
149 u64 buf_off;
150
151 page_size = 1 << page_shift;
152 page_mask = page_size - 1;
153 buf_off = addr & page_mask;
154 off_size = page_size >> 6;
155 off_mask = off_size - 1;
156
157 if (buf_off & off_mask)
158 return -EINVAL;
159
160 *offset = buf_off >> ilog2(off_size);
161 return 0;
162}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
new file mode 100644
index 000000000000..836be9157242
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -0,0 +1,545 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_IB_H
34#define MLX5_IB_H
35
36#include <linux/kernel.h>
37#include <linux/sched.h>
38#include <rdma/ib_verbs.h>
39#include <rdma/ib_smi.h>
40#include <linux/mlx5/driver.h>
41#include <linux/mlx5/cq.h>
42#include <linux/mlx5/qp.h>
43#include <linux/mlx5/srq.h>
44#include <linux/types.h>
45
46#define mlx5_ib_dbg(dev, format, arg...) \
47pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
48 __LINE__, current->pid, ##arg)
49
50#define mlx5_ib_err(dev, format, arg...) \
51pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
52 __LINE__, current->pid, ##arg)
53
54#define mlx5_ib_warn(dev, format, arg...) \
55pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
56 __LINE__, current->pid, ##arg)
57
58enum {
59 MLX5_IB_MMAP_CMD_SHIFT = 8,
60 MLX5_IB_MMAP_CMD_MASK = 0xff,
61};
62
63enum mlx5_ib_mmap_cmd {
64 MLX5_IB_MMAP_REGULAR_PAGE = 0,
65 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */
66};
67
68enum {
69 MLX5_RES_SCAT_DATA32_CQE = 0x1,
70 MLX5_RES_SCAT_DATA64_CQE = 0x2,
71 MLX5_REQ_SCAT_DATA32_CQE = 0x11,
72 MLX5_REQ_SCAT_DATA64_CQE = 0x22,
73};
74
75enum mlx5_ib_latency_class {
76 MLX5_IB_LATENCY_CLASS_LOW,
77 MLX5_IB_LATENCY_CLASS_MEDIUM,
78 MLX5_IB_LATENCY_CLASS_HIGH,
79 MLX5_IB_LATENCY_CLASS_FAST_PATH
80};
81
82enum mlx5_ib_mad_ifc_flags {
83 MLX5_MAD_IFC_IGNORE_MKEY = 1,
84 MLX5_MAD_IFC_IGNORE_BKEY = 2,
85 MLX5_MAD_IFC_NET_VIEW = 4,
86};
87
88struct mlx5_ib_ucontext {
89 struct ib_ucontext ibucontext;
90 struct list_head db_page_list;
91
92 /* protect doorbell record alloc/free
93 */
94 struct mutex db_page_mutex;
95 struct mlx5_uuar_info uuari;
96};
97
98static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
99{
100 return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext);
101}
102
103struct mlx5_ib_pd {
104 struct ib_pd ibpd;
105 u32 pdn;
106 u32 pa_lkey;
107};
108
109/* Use macros here so that don't have to duplicate
110 * enum ib_send_flags and enum ib_qp_type for low-level driver
111 */
112
113#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
114#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
115#define MLX5_IB_WR_UMR IB_WR_RESERVED1
116
117struct wr_list {
118 u16 opcode;
119 u16 next;
120};
121
122struct mlx5_ib_wq {
123 u64 *wrid;
124 u32 *wr_data;
125 struct wr_list *w_list;
126 unsigned *wqe_head;
127 u16 unsig_count;
128
129 /* serialize post to the work queue
130 */
131 spinlock_t lock;
132 int wqe_cnt;
133 int max_post;
134 int max_gs;
135 int offset;
136 int wqe_shift;
137 unsigned head;
138 unsigned tail;
139 u16 cur_post;
140 u16 last_poll;
141 void *qend;
142};
143
144enum {
145 MLX5_QP_USER,
146 MLX5_QP_KERNEL,
147 MLX5_QP_EMPTY
148};
149
150struct mlx5_ib_qp {
151 struct ib_qp ibqp;
152 struct mlx5_core_qp mqp;
153 struct mlx5_buf buf;
154
155 struct mlx5_db db;
156 struct mlx5_ib_wq rq;
157
158 u32 doorbell_qpn;
159 u8 sq_signal_bits;
160 u8 fm_cache;
161 int sq_max_wqes_per_wr;
162 int sq_spare_wqes;
163 struct mlx5_ib_wq sq;
164
165 struct ib_umem *umem;
166 int buf_size;
167
168 /* serialize qp state modifications
169 */
170 struct mutex mutex;
171 u16 xrcdn;
172 u32 flags;
173 u8 port;
174 u8 alt_port;
175 u8 atomic_rd_en;
176 u8 resp_depth;
177 u8 state;
178 int mlx_type;
179 int wq_sig;
180 int scat_cqe;
181 int max_inline_data;
182 struct mlx5_bf *bf;
183 int has_rq;
184
185 /* only for user space QPs. For kernel
186 * we have it from the bf object
187 */
188 int uuarn;
189
190 int create_type;
191 u32 pa_lkey;
192};
193
194struct mlx5_ib_cq_buf {
195 struct mlx5_buf buf;
196 struct ib_umem *umem;
197 int cqe_size;
198};
199
200enum mlx5_ib_qp_flags {
201 MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
202 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
203};
204
205struct mlx5_shared_mr_info {
206 int mr_id;
207 struct ib_umem *umem;
208};
209
210struct mlx5_ib_cq {
211 struct ib_cq ibcq;
212 struct mlx5_core_cq mcq;
213 struct mlx5_ib_cq_buf buf;
214 struct mlx5_db db;
215
216 /* serialize access to the CQ
217 */
218 spinlock_t lock;
219
220 /* protect resize cq
221 */
222 struct mutex resize_mutex;
223 struct mlx5_ib_cq_resize *resize_buf;
224 struct ib_umem *resize_umem;
225 int cqe_size;
226};
227
228struct mlx5_ib_srq {
229 struct ib_srq ibsrq;
230 struct mlx5_core_srq msrq;
231 struct mlx5_buf buf;
232 struct mlx5_db db;
233 u64 *wrid;
234 /* protect SRQ hanlding
235 */
236 spinlock_t lock;
237 int head;
238 int tail;
239 u16 wqe_ctr;
240 struct ib_umem *umem;
241 /* serialize arming a SRQ
242 */
243 struct mutex mutex;
244 int wq_sig;
245};
246
247struct mlx5_ib_xrcd {
248 struct ib_xrcd ibxrcd;
249 u32 xrcdn;
250};
251
252struct mlx5_ib_mr {
253 struct ib_mr ibmr;
254 struct mlx5_core_mr mmr;
255 struct ib_umem *umem;
256 struct mlx5_shared_mr_info *smr_info;
257 struct list_head list;
258 int order;
259 int umred;
260 __be64 *pas;
261 dma_addr_t dma;
262 int npages;
263 struct completion done;
264 enum ib_wc_status status;
265};
266
267struct mlx5_ib_fast_reg_page_list {
268 struct ib_fast_reg_page_list ibfrpl;
269 __be64 *mapped_page_list;
270 dma_addr_t map;
271};
272
273struct umr_common {
274 struct ib_pd *pd;
275 struct ib_cq *cq;
276 struct ib_qp *qp;
277 struct ib_mr *mr;
278 /* control access to UMR QP
279 */
280 struct semaphore sem;
281};
282
283enum {
284 MLX5_FMR_INVALID,
285 MLX5_FMR_VALID,
286 MLX5_FMR_BUSY,
287};
288
289struct mlx5_ib_fmr {
290 struct ib_fmr ibfmr;
291 struct mlx5_core_mr mr;
292 int access_flags;
293 int state;
294 /* protect fmr state
295 */
296 spinlock_t lock;
297 u64 wrid;
298 struct ib_send_wr wr[2];
299 u8 page_shift;
300 struct ib_fast_reg_page_list page_list;
301};
302
303struct mlx5_cache_ent {
304 struct list_head head;
305 /* sync access to the cahce entry
306 */
307 spinlock_t lock;
308
309
310 struct dentry *dir;
311 char name[4];
312 u32 order;
313 u32 size;
314 u32 cur;
315 u32 miss;
316 u32 limit;
317
318 struct dentry *fsize;
319 struct dentry *fcur;
320 struct dentry *fmiss;
321 struct dentry *flimit;
322
323 struct mlx5_ib_dev *dev;
324 struct work_struct work;
325 struct delayed_work dwork;
326};
327
328struct mlx5_mr_cache {
329 struct workqueue_struct *wq;
330 struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];
331 int stopped;
332 struct dentry *root;
333 unsigned long last_add;
334};
335
336struct mlx5_ib_resources {
337 struct ib_cq *c0;
338 struct ib_xrcd *x0;
339 struct ib_xrcd *x1;
340 struct ib_pd *p0;
341 struct ib_srq *s0;
342};
343
344struct mlx5_ib_dev {
345 struct ib_device ib_dev;
346 struct mlx5_core_dev mdev;
347 MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
348 struct list_head eqs_list;
349 int num_ports;
350 int num_comp_vectors;
351 /* serialize update of capability mask
352 */
353 struct mutex cap_mask_mutex;
354 bool ib_active;
355 struct umr_common umrc;
356 /* sync used page count stats
357 */
358 spinlock_t mr_lock;
359 struct mlx5_ib_resources devr;
360 struct mlx5_mr_cache cache;
361};
362
363static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
364{
365 return container_of(mcq, struct mlx5_ib_cq, mcq);
366}
367
368static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
369{
370 return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd);
371}
372
373static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
374{
375 return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
376}
377
378static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
379{
380 return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
381}
382
383static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
384{
385 return container_of(ibcq, struct mlx5_ib_cq, ibcq);
386}
387
388static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
389{
390 return container_of(mqp, struct mlx5_ib_qp, mqp);
391}
392
393static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
394{
395 return container_of(ibpd, struct mlx5_ib_pd, ibpd);
396}
397
398static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq)
399{
400 return container_of(ibsrq, struct mlx5_ib_srq, ibsrq);
401}
402
403static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
404{
405 return container_of(ibqp, struct mlx5_ib_qp, ibqp);
406}
407
408static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
409{
410 return container_of(msrq, struct mlx5_ib_srq, msrq);
411}
412
413static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
414{
415 return container_of(ibmr, struct mlx5_ib_mr, ibmr);
416}
417
418static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
419{
420 return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
421}
422
423struct mlx5_ib_ah {
424 struct ib_ah ibah;
425 struct mlx5_av av;
426};
427
428static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
429{
430 return container_of(ibah, struct mlx5_ib_ah, ibah);
431}
432
433static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev)
434{
435 return container_of(dev, struct mlx5_ib_dev, mdev);
436}
437
438static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev)
439{
440 return mlx5_core2ibdev(pci2mlx5_core_dev(pdev));
441}
442
443int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
444 struct mlx5_db *db);
445void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
446void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
447void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
448void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
449int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
450 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
451 void *in_mad, void *response_mad);
452struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
453 struct mlx5_ib_ah *ah);
454struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
455int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
456int mlx5_ib_destroy_ah(struct ib_ah *ah);
457struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
458 struct ib_srq_init_attr *init_attr,
459 struct ib_udata *udata);
460int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
461 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
462int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
463int mlx5_ib_destroy_srq(struct ib_srq *srq);
464int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
465 struct ib_recv_wr **bad_wr);
466struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
467 struct ib_qp_init_attr *init_attr,
468 struct ib_udata *udata);
469int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
470 int attr_mask, struct ib_udata *udata);
471int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
472 struct ib_qp_init_attr *qp_init_attr);
473int mlx5_ib_destroy_qp(struct ib_qp *qp);
474int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
475 struct ib_send_wr **bad_wr);
476int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
477 struct ib_recv_wr **bad_wr);
478void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
479struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
480 int vector, struct ib_ucontext *context,
481 struct ib_udata *udata);
482int mlx5_ib_destroy_cq(struct ib_cq *cq);
483int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
484int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
485int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
486int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
487struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
488struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
489 u64 virt_addr, int access_flags,
490 struct ib_udata *udata);
491int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
492struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
493 int max_page_list_len);
494struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
495 int page_list_len);
496void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
497struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
498 struct ib_fmr_attr *fmr_attr);
499int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
500 int npages, u64 iova);
501int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
502int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
503int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
504 struct ib_wc *in_wc, struct ib_grh *in_grh,
505 struct ib_mad *in_mad, struct ib_mad *out_mad);
506struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
507 struct ib_ucontext *context,
508 struct ib_udata *udata);
509int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
510int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn);
511int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
512int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
513int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
514 struct ib_port_attr *props);
515int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
516void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
517void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
518 int *ncont, int *order);
519void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
520 int page_shift, __be64 *pas, int umr);
521void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
522int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
523int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
524int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
525int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
526void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
527
528static inline void init_query_mad(struct ib_smp *mad)
529{
530 mad->base_version = 1;
531 mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
532 mad->class_version = 1;
533 mad->method = IB_MGMT_METHOD_GET;
534}
535
536static inline u8 convert_access(int acc)
537{
538 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
539 (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
540 (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
541 (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
542 MLX5_PERM_LOCAL_READ;
543}
544
545#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
new file mode 100644
index 000000000000..bd41df95b6f0
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -0,0 +1,1007 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34#include <linux/kref.h>
35#include <linux/random.h>
36#include <linux/debugfs.h>
37#include <linux/export.h>
38#include <rdma/ib_umem.h>
39#include "mlx5_ib.h"
40
41enum {
42 DEF_CACHE_SIZE = 10,
43};
44
45static __be64 *mr_align(__be64 *ptr, int align)
46{
47 unsigned long mask = align - 1;
48
49 return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
50}
51
52static int order2idx(struct mlx5_ib_dev *dev, int order)
53{
54 struct mlx5_mr_cache *cache = &dev->cache;
55
56 if (order < cache->ent[0].order)
57 return 0;
58 else
59 return order - cache->ent[0].order;
60}
61
62static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
63{
64 struct device *ddev = dev->ib_dev.dma_device;
65 struct mlx5_mr_cache *cache = &dev->cache;
66 struct mlx5_cache_ent *ent = &cache->ent[c];
67 struct mlx5_create_mkey_mbox_in *in;
68 struct mlx5_ib_mr *mr;
69 int npages = 1 << ent->order;
70 int size = sizeof(u64) * npages;
71 int err = 0;
72 int i;
73
74 in = kzalloc(sizeof(*in), GFP_KERNEL);
75 if (!in)
76 return -ENOMEM;
77
78 for (i = 0; i < num; i++) {
79 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
80 if (!mr) {
81 err = -ENOMEM;
82 goto out;
83 }
84 mr->order = ent->order;
85 mr->umred = 1;
86 mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
87 if (!mr->pas) {
88 kfree(mr);
89 err = -ENOMEM;
90 goto out;
91 }
92 mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
93 DMA_TO_DEVICE);
94 if (dma_mapping_error(ddev, mr->dma)) {
95 kfree(mr->pas);
96 kfree(mr);
97 err = -ENOMEM;
98 goto out;
99 }
100
101 in->seg.status = 1 << 6;
102 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
103 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
104 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
105 in->seg.log2_page_size = 12;
106
107 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
108 sizeof(*in));
109 if (err) {
110 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
111 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
112 kfree(mr->pas);
113 kfree(mr);
114 goto out;
115 }
116 cache->last_add = jiffies;
117
118 spin_lock(&ent->lock);
119 list_add_tail(&mr->list, &ent->head);
120 ent->cur++;
121 ent->size++;
122 spin_unlock(&ent->lock);
123 }
124
125out:
126 kfree(in);
127 return err;
128}
129
130static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
131{
132 struct device *ddev = dev->ib_dev.dma_device;
133 struct mlx5_mr_cache *cache = &dev->cache;
134 struct mlx5_cache_ent *ent = &cache->ent[c];
135 struct mlx5_ib_mr *mr;
136 int size;
137 int err;
138 int i;
139
140 for (i = 0; i < num; i++) {
141 spin_lock(&ent->lock);
142 if (list_empty(&ent->head)) {
143 spin_unlock(&ent->lock);
144 return;
145 }
146 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
147 list_del(&mr->list);
148 ent->cur--;
149 ent->size--;
150 spin_unlock(&ent->lock);
151 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
152 if (err) {
153 mlx5_ib_warn(dev, "failed destroy mkey\n");
154 } else {
155 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
156 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
157 kfree(mr->pas);
158 kfree(mr);
159 }
160 }
161}
162
163static ssize_t size_write(struct file *filp, const char __user *buf,
164 size_t count, loff_t *pos)
165{
166 struct mlx5_cache_ent *ent = filp->private_data;
167 struct mlx5_ib_dev *dev = ent->dev;
168 char lbuf[20];
169 u32 var;
170 int err;
171 int c;
172
173 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
174 return -EFAULT;
175
176 c = order2idx(dev, ent->order);
177 lbuf[sizeof(lbuf) - 1] = 0;
178
179 if (sscanf(lbuf, "%u", &var) != 1)
180 return -EINVAL;
181
182 if (var < ent->limit)
183 return -EINVAL;
184
185 if (var > ent->size) {
186 err = add_keys(dev, c, var - ent->size);
187 if (err)
188 return err;
189 } else if (var < ent->size) {
190 remove_keys(dev, c, ent->size - var);
191 }
192
193 return count;
194}
195
196static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
197 loff_t *pos)
198{
199 struct mlx5_cache_ent *ent = filp->private_data;
200 char lbuf[20];
201 int err;
202
203 if (*pos)
204 return 0;
205
206 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
207 if (err < 0)
208 return err;
209
210 if (copy_to_user(buf, lbuf, err))
211 return -EFAULT;
212
213 *pos += err;
214
215 return err;
216}
217
218static const struct file_operations size_fops = {
219 .owner = THIS_MODULE,
220 .open = simple_open,
221 .write = size_write,
222 .read = size_read,
223};
224
225static ssize_t limit_write(struct file *filp, const char __user *buf,
226 size_t count, loff_t *pos)
227{
228 struct mlx5_cache_ent *ent = filp->private_data;
229 struct mlx5_ib_dev *dev = ent->dev;
230 char lbuf[20];
231 u32 var;
232 int err;
233 int c;
234
235 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
236 return -EFAULT;
237
238 c = order2idx(dev, ent->order);
239 lbuf[sizeof(lbuf) - 1] = 0;
240
241 if (sscanf(lbuf, "%u", &var) != 1)
242 return -EINVAL;
243
244 if (var > ent->size)
245 return -EINVAL;
246
247 ent->limit = var;
248
249 if (ent->cur < ent->limit) {
250 err = add_keys(dev, c, 2 * ent->limit - ent->cur);
251 if (err)
252 return err;
253 }
254
255 return count;
256}
257
258static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
259 loff_t *pos)
260{
261 struct mlx5_cache_ent *ent = filp->private_data;
262 char lbuf[20];
263 int err;
264
265 if (*pos)
266 return 0;
267
268 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
269 if (err < 0)
270 return err;
271
272 if (copy_to_user(buf, lbuf, err))
273 return -EFAULT;
274
275 *pos += err;
276
277 return err;
278}
279
280static const struct file_operations limit_fops = {
281 .owner = THIS_MODULE,
282 .open = simple_open,
283 .write = limit_write,
284 .read = limit_read,
285};
286
287static int someone_adding(struct mlx5_mr_cache *cache)
288{
289 int i;
290
291 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
292 if (cache->ent[i].cur < cache->ent[i].limit)
293 return 1;
294 }
295
296 return 0;
297}
298
299static void __cache_work_func(struct mlx5_cache_ent *ent)
300{
301 struct mlx5_ib_dev *dev = ent->dev;
302 struct mlx5_mr_cache *cache = &dev->cache;
303 int i = order2idx(dev, ent->order);
304
305 if (cache->stopped)
306 return;
307
308 ent = &dev->cache.ent[i];
309 if (ent->cur < 2 * ent->limit) {
310 add_keys(dev, i, 1);
311 if (ent->cur < 2 * ent->limit)
312 queue_work(cache->wq, &ent->work);
313 } else if (ent->cur > 2 * ent->limit) {
314 if (!someone_adding(cache) &&
315 time_after(jiffies, cache->last_add + 60 * HZ)) {
316 remove_keys(dev, i, 1);
317 if (ent->cur > ent->limit)
318 queue_work(cache->wq, &ent->work);
319 } else {
320 queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
321 }
322 }
323}
324
325static void delayed_cache_work_func(struct work_struct *work)
326{
327 struct mlx5_cache_ent *ent;
328
329 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
330 __cache_work_func(ent);
331}
332
333static void cache_work_func(struct work_struct *work)
334{
335 struct mlx5_cache_ent *ent;
336
337 ent = container_of(work, struct mlx5_cache_ent, work);
338 __cache_work_func(ent);
339}
340
341static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
342{
343 struct mlx5_mr_cache *cache = &dev->cache;
344 struct mlx5_ib_mr *mr = NULL;
345 struct mlx5_cache_ent *ent;
346 int c;
347 int i;
348
349 c = order2idx(dev, order);
350 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
351 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
352 return NULL;
353 }
354
355 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
356 ent = &cache->ent[i];
357
358 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
359
360 spin_lock(&ent->lock);
361 if (!list_empty(&ent->head)) {
362 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
363 list);
364 list_del(&mr->list);
365 ent->cur--;
366 spin_unlock(&ent->lock);
367 if (ent->cur < ent->limit)
368 queue_work(cache->wq, &ent->work);
369 break;
370 }
371 spin_unlock(&ent->lock);
372
373 queue_work(cache->wq, &ent->work);
374
375 if (mr)
376 break;
377 }
378
379 if (!mr)
380 cache->ent[c].miss++;
381
382 return mr;
383}
384
385static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
386{
387 struct mlx5_mr_cache *cache = &dev->cache;
388 struct mlx5_cache_ent *ent;
389 int shrink = 0;
390 int c;
391
392 c = order2idx(dev, mr->order);
393 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
394 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
395 return;
396 }
397 ent = &cache->ent[c];
398 spin_lock(&ent->lock);
399 list_add_tail(&mr->list, &ent->head);
400 ent->cur++;
401 if (ent->cur > 2 * ent->limit)
402 shrink = 1;
403 spin_unlock(&ent->lock);
404
405 if (shrink)
406 queue_work(cache->wq, &ent->work);
407}
408
409static void clean_keys(struct mlx5_ib_dev *dev, int c)
410{
411 struct device *ddev = dev->ib_dev.dma_device;
412 struct mlx5_mr_cache *cache = &dev->cache;
413 struct mlx5_cache_ent *ent = &cache->ent[c];
414 struct mlx5_ib_mr *mr;
415 int size;
416 int err;
417
418 while (1) {
419 spin_lock(&ent->lock);
420 if (list_empty(&ent->head)) {
421 spin_unlock(&ent->lock);
422 return;
423 }
424 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
425 list_del(&mr->list);
426 ent->cur--;
427 ent->size--;
428 spin_unlock(&ent->lock);
429 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
430 if (err) {
431 mlx5_ib_warn(dev, "failed destroy mkey\n");
432 } else {
433 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
434 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
435 kfree(mr->pas);
436 kfree(mr);
437 }
438 }
439}
440
441static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
442{
443 struct mlx5_mr_cache *cache = &dev->cache;
444 struct mlx5_cache_ent *ent;
445 int i;
446
447 if (!mlx5_debugfs_root)
448 return 0;
449
450 cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
451 if (!cache->root)
452 return -ENOMEM;
453
454 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
455 ent = &cache->ent[i];
456 sprintf(ent->name, "%d", ent->order);
457 ent->dir = debugfs_create_dir(ent->name, cache->root);
458 if (!ent->dir)
459 return -ENOMEM;
460
461 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
462 &size_fops);
463 if (!ent->fsize)
464 return -ENOMEM;
465
466 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
467 &limit_fops);
468 if (!ent->flimit)
469 return -ENOMEM;
470
471 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
472 &ent->cur);
473 if (!ent->fcur)
474 return -ENOMEM;
475
476 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
477 &ent->miss);
478 if (!ent->fmiss)
479 return -ENOMEM;
480 }
481
482 return 0;
483}
484
485static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
486{
487 if (!mlx5_debugfs_root)
488 return;
489
490 debugfs_remove_recursive(dev->cache.root);
491}
492
493int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
494{
495 struct mlx5_mr_cache *cache = &dev->cache;
496 struct mlx5_cache_ent *ent;
497 int limit;
498 int size;
499 int err;
500 int i;
501
502 cache->wq = create_singlethread_workqueue("mkey_cache");
503 if (!cache->wq) {
504 mlx5_ib_warn(dev, "failed to create work queue\n");
505 return -ENOMEM;
506 }
507
508 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
509 INIT_LIST_HEAD(&cache->ent[i].head);
510 spin_lock_init(&cache->ent[i].lock);
511
512 ent = &cache->ent[i];
513 INIT_LIST_HEAD(&ent->head);
514 spin_lock_init(&ent->lock);
515 ent->order = i + 2;
516 ent->dev = dev;
517
518 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
519 size = dev->mdev.profile->mr_cache[i].size;
520 limit = dev->mdev.profile->mr_cache[i].limit;
521 } else {
522 size = DEF_CACHE_SIZE;
523 limit = 0;
524 }
525 INIT_WORK(&ent->work, cache_work_func);
526 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
527 ent->limit = limit;
528 queue_work(cache->wq, &ent->work);
529 }
530
531 err = mlx5_mr_cache_debugfs_init(dev);
532 if (err)
533 mlx5_ib_warn(dev, "cache debugfs failure\n");
534
535 return 0;
536}
537
538int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
539{
540 int i;
541
542 dev->cache.stopped = 1;
543 destroy_workqueue(dev->cache.wq);
544
545 mlx5_mr_cache_debugfs_cleanup(dev);
546
547 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
548 clean_keys(dev, i);
549
550 return 0;
551}
552
553struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
554{
555 struct mlx5_ib_dev *dev = to_mdev(pd->device);
556 struct mlx5_core_dev *mdev = &dev->mdev;
557 struct mlx5_create_mkey_mbox_in *in;
558 struct mlx5_mkey_seg *seg;
559 struct mlx5_ib_mr *mr;
560 int err;
561
562 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
563 if (!mr)
564 return ERR_PTR(-ENOMEM);
565
566 in = kzalloc(sizeof(*in), GFP_KERNEL);
567 if (!in) {
568 err = -ENOMEM;
569 goto err_free;
570 }
571
572 seg = &in->seg;
573 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
574 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
575 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
576 seg->start_addr = 0;
577
578 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
579 if (err)
580 goto err_in;
581
582 kfree(in);
583 mr->ibmr.lkey = mr->mmr.key;
584 mr->ibmr.rkey = mr->mmr.key;
585 mr->umem = NULL;
586
587 return &mr->ibmr;
588
589err_in:
590 kfree(in);
591
592err_free:
593 kfree(mr);
594
595 return ERR_PTR(err);
596}
597
598static int get_octo_len(u64 addr, u64 len, int page_size)
599{
600 u64 offset;
601 int npages;
602
603 offset = addr & (page_size - 1);
604 npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
605 return (npages + 1) / 2;
606}
607
608static int use_umr(int order)
609{
610 return order <= 17;
611}
612
613static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
614 struct ib_sge *sg, u64 dma, int n, u32 key,
615 int page_shift, u64 virt_addr, u64 len,
616 int access_flags)
617{
618 struct mlx5_ib_dev *dev = to_mdev(pd->device);
619 struct ib_mr *mr = dev->umrc.mr;
620
621 sg->addr = dma;
622 sg->length = ALIGN(sizeof(u64) * n, 64);
623 sg->lkey = mr->lkey;
624
625 wr->next = NULL;
626 wr->send_flags = 0;
627 wr->sg_list = sg;
628 if (n)
629 wr->num_sge = 1;
630 else
631 wr->num_sge = 0;
632
633 wr->opcode = MLX5_IB_WR_UMR;
634 wr->wr.fast_reg.page_list_len = n;
635 wr->wr.fast_reg.page_shift = page_shift;
636 wr->wr.fast_reg.rkey = key;
637 wr->wr.fast_reg.iova_start = virt_addr;
638 wr->wr.fast_reg.length = len;
639 wr->wr.fast_reg.access_flags = access_flags;
640 wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
641}
642
643static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
644 struct ib_send_wr *wr, u32 key)
645{
646 wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
647 wr->opcode = MLX5_IB_WR_UMR;
648 wr->wr.fast_reg.rkey = key;
649}
650
651void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
652{
653 struct mlx5_ib_mr *mr;
654 struct ib_wc wc;
655 int err;
656
657 while (1) {
658 err = ib_poll_cq(cq, 1, &wc);
659 if (err < 0) {
660 pr_warn("poll cq error %d\n", err);
661 return;
662 }
663 if (err == 0)
664 break;
665
666 mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
667 mr->status = wc.status;
668 complete(&mr->done);
669 }
670 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
671}
672
673static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
674 u64 virt_addr, u64 len, int npages,
675 int page_shift, int order, int access_flags)
676{
677 struct mlx5_ib_dev *dev = to_mdev(pd->device);
678 struct umr_common *umrc = &dev->umrc;
679 struct ib_send_wr wr, *bad;
680 struct mlx5_ib_mr *mr;
681 struct ib_sge sg;
682 int err;
683 int i;
684
685 for (i = 0; i < 10; i++) {
686 mr = alloc_cached_mr(dev, order);
687 if (mr)
688 break;
689
690 err = add_keys(dev, order2idx(dev, order), 1);
691 if (err) {
692 mlx5_ib_warn(dev, "add_keys failed\n");
693 break;
694 }
695 }
696
697 if (!mr)
698 return ERR_PTR(-EAGAIN);
699
700 mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
701
702 memset(&wr, 0, sizeof(wr));
703 wr.wr_id = (u64)(unsigned long)mr;
704 prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
705
706 /* We serialize polls so one process does not kidnap another's
707 * completion. This is not a problem since wr is completed in
708 * around 1 usec
709 */
710 down(&umrc->sem);
711 init_completion(&mr->done);
712 err = ib_post_send(umrc->qp, &wr, &bad);
713 if (err) {
714 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
715 up(&umrc->sem);
716 goto error;
717 }
718 wait_for_completion(&mr->done);
719 up(&umrc->sem);
720
721 if (mr->status != IB_WC_SUCCESS) {
722 mlx5_ib_warn(dev, "reg umr failed\n");
723 err = -EFAULT;
724 goto error;
725 }
726
727 return mr;
728
729error:
730 free_cached_mr(dev, mr);
731 return ERR_PTR(err);
732}
733
734static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
735 u64 length, struct ib_umem *umem,
736 int npages, int page_shift,
737 int access_flags)
738{
739 struct mlx5_ib_dev *dev = to_mdev(pd->device);
740 struct mlx5_create_mkey_mbox_in *in;
741 struct mlx5_ib_mr *mr;
742 int inlen;
743 int err;
744
745 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
746 if (!mr)
747 return ERR_PTR(-ENOMEM);
748
749 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
750 in = mlx5_vzalloc(inlen);
751 if (!in) {
752 err = -ENOMEM;
753 goto err_1;
754 }
755 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
756
757 in->seg.flags = convert_access(access_flags) |
758 MLX5_ACCESS_MODE_MTT;
759 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
760 in->seg.start_addr = cpu_to_be64(virt_addr);
761 in->seg.len = cpu_to_be64(length);
762 in->seg.bsfs_octo_size = 0;
763 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
764 in->seg.log2_page_size = page_shift;
765 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
766 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
767 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
768 if (err) {
769 mlx5_ib_warn(dev, "create mkey failed\n");
770 goto err_2;
771 }
772 mr->umem = umem;
773 mlx5_vfree(in);
774
775 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
776
777 return mr;
778
779err_2:
780 mlx5_vfree(in);
781
782err_1:
783 kfree(mr);
784
785 return ERR_PTR(err);
786}
787
788struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
789 u64 virt_addr, int access_flags,
790 struct ib_udata *udata)
791{
792 struct mlx5_ib_dev *dev = to_mdev(pd->device);
793 struct mlx5_ib_mr *mr = NULL;
794 struct ib_umem *umem;
795 int page_shift;
796 int npages;
797 int ncont;
798 int order;
799 int err;
800
801 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
802 start, virt_addr, length);
803 umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
804 0);
805 if (IS_ERR(umem)) {
806 mlx5_ib_dbg(dev, "umem get failed\n");
807 return (void *)umem;
808 }
809
810 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
811 if (!npages) {
812 mlx5_ib_warn(dev, "avoid zero region\n");
813 err = -EINVAL;
814 goto error;
815 }
816
817 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
818 npages, ncont, order, page_shift);
819
820 if (use_umr(order)) {
821 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
822 order, access_flags);
823 if (PTR_ERR(mr) == -EAGAIN) {
824 mlx5_ib_dbg(dev, "cache empty for order %d", order);
825 mr = NULL;
826 }
827 }
828
829 if (!mr)
830 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
831 access_flags);
832
833 if (IS_ERR(mr)) {
834 err = PTR_ERR(mr);
835 goto error;
836 }
837
838 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
839
840 mr->umem = umem;
841 mr->npages = npages;
842 spin_lock(&dev->mr_lock);
843 dev->mdev.priv.reg_pages += npages;
844 spin_unlock(&dev->mr_lock);
845 mr->ibmr.lkey = mr->mmr.key;
846 mr->ibmr.rkey = mr->mmr.key;
847
848 return &mr->ibmr;
849
850error:
851 ib_umem_release(umem);
852 return ERR_PTR(err);
853}
854
855static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
856{
857 struct umr_common *umrc = &dev->umrc;
858 struct ib_send_wr wr, *bad;
859 int err;
860
861 memset(&wr, 0, sizeof(wr));
862 wr.wr_id = (u64)(unsigned long)mr;
863 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
864
865 down(&umrc->sem);
866 init_completion(&mr->done);
867 err = ib_post_send(umrc->qp, &wr, &bad);
868 if (err) {
869 up(&umrc->sem);
870 mlx5_ib_dbg(dev, "err %d\n", err);
871 goto error;
872 }
873 wait_for_completion(&mr->done);
874 up(&umrc->sem);
875 if (mr->status != IB_WC_SUCCESS) {
876 mlx5_ib_warn(dev, "unreg umr failed\n");
877 err = -EFAULT;
878 goto error;
879 }
880 return 0;
881
882error:
883 return err;
884}
885
886int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
887{
888 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
889 struct mlx5_ib_mr *mr = to_mmr(ibmr);
890 struct ib_umem *umem = mr->umem;
891 int npages = mr->npages;
892 int umred = mr->umred;
893 int err;
894
895 if (!umred) {
896 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
897 if (err) {
898 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
899 mr->mmr.key, err);
900 return err;
901 }
902 } else {
903 err = unreg_umr(dev, mr);
904 if (err) {
905 mlx5_ib_warn(dev, "failed unregister\n");
906 return err;
907 }
908 free_cached_mr(dev, mr);
909 }
910
911 if (umem) {
912 ib_umem_release(umem);
913 spin_lock(&dev->mr_lock);
914 dev->mdev.priv.reg_pages -= npages;
915 spin_unlock(&dev->mr_lock);
916 }
917
918 if (!umred)
919 kfree(mr);
920
921 return 0;
922}
923
924struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
925 int max_page_list_len)
926{
927 struct mlx5_ib_dev *dev = to_mdev(pd->device);
928 struct mlx5_create_mkey_mbox_in *in;
929 struct mlx5_ib_mr *mr;
930 int err;
931
932 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
933 if (!mr)
934 return ERR_PTR(-ENOMEM);
935
936 in = kzalloc(sizeof(*in), GFP_KERNEL);
937 if (!in) {
938 err = -ENOMEM;
939 goto err_free;
940 }
941
942 in->seg.status = 1 << 6; /* free */
943 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
944 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
945 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
946 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
947 /*
948 * TBD not needed - issue 197292 */
949 in->seg.log2_page_size = PAGE_SHIFT;
950
951 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
952 kfree(in);
953 if (err)
954 goto err_free;
955
956 mr->ibmr.lkey = mr->mmr.key;
957 mr->ibmr.rkey = mr->mmr.key;
958 mr->umem = NULL;
959
960 return &mr->ibmr;
961
962err_free:
963 kfree(mr);
964 return ERR_PTR(err);
965}
966
967struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
968 int page_list_len)
969{
970 struct mlx5_ib_fast_reg_page_list *mfrpl;
971 int size = page_list_len * sizeof(u64);
972
973 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
974 if (!mfrpl)
975 return ERR_PTR(-ENOMEM);
976
977 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
978 if (!mfrpl->ibfrpl.page_list)
979 goto err_free;
980
981 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
982 size, &mfrpl->map,
983 GFP_KERNEL);
984 if (!mfrpl->mapped_page_list)
985 goto err_free;
986
987 WARN_ON(mfrpl->map & 0x3f);
988
989 return &mfrpl->ibfrpl;
990
991err_free:
992 kfree(mfrpl->ibfrpl.page_list);
993 kfree(mfrpl);
994 return ERR_PTR(-ENOMEM);
995}
996
997void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
998{
999 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1000 struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1001 int size = page_list->max_page_list_len * sizeof(u64);
1002
1003 dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
1004 mfrpl->map);
1005 kfree(mfrpl->ibfrpl.page_list);
1006 kfree(mfrpl);
1007}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
new file mode 100644
index 000000000000..16ac54c9819f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -0,0 +1,2524 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <rdma/ib_umem.h>
35#include "mlx5_ib.h"
36#include "user.h"
37
38/* not supported currently */
39static int wq_signature;
40
41enum {
42 MLX5_IB_ACK_REQ_FREQ = 8,
43};
44
45enum {
46 MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83,
47 MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
48 MLX5_IB_LINK_TYPE_IB = 0,
49 MLX5_IB_LINK_TYPE_ETH = 1
50};
51
52enum {
53 MLX5_IB_SQ_STRIDE = 6,
54 MLX5_IB_CACHE_LINE_SIZE = 64,
55};
56
57static const u32 mlx5_ib_opcode[] = {
58 [IB_WR_SEND] = MLX5_OPCODE_SEND,
59 [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
60 [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
61 [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
62 [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
63 [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
64 [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
65 [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
66 [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
67 [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR,
68 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
69 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
70 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
71};
72
73struct umr_wr {
74 u64 virt_addr;
75 struct ib_pd *pd;
76 unsigned int page_shift;
77 unsigned int npages;
78 u32 length;
79 int access_flags;
80 u32 mkey;
81};
82
83static int is_qp0(enum ib_qp_type qp_type)
84{
85 return qp_type == IB_QPT_SMI;
86}
87
88static int is_qp1(enum ib_qp_type qp_type)
89{
90 return qp_type == IB_QPT_GSI;
91}
92
93static int is_sqp(enum ib_qp_type qp_type)
94{
95 return is_qp0(qp_type) || is_qp1(qp_type);
96}
97
98static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
99{
100 return mlx5_buf_offset(&qp->buf, offset);
101}
102
103static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
104{
105 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
106}
107
108void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
109{
110 return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
111}
112
113static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
114{
115 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
116 struct ib_event event;
117
118 if (type == MLX5_EVENT_TYPE_PATH_MIG)
119 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
120
121 if (ibqp->event_handler) {
122 event.device = ibqp->device;
123 event.element.qp = ibqp;
124 switch (type) {
125 case MLX5_EVENT_TYPE_PATH_MIG:
126 event.event = IB_EVENT_PATH_MIG;
127 break;
128 case MLX5_EVENT_TYPE_COMM_EST:
129 event.event = IB_EVENT_COMM_EST;
130 break;
131 case MLX5_EVENT_TYPE_SQ_DRAINED:
132 event.event = IB_EVENT_SQ_DRAINED;
133 break;
134 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
135 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
136 break;
137 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
138 event.event = IB_EVENT_QP_FATAL;
139 break;
140 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
141 event.event = IB_EVENT_PATH_MIG_ERR;
142 break;
143 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
144 event.event = IB_EVENT_QP_REQ_ERR;
145 break;
146 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
147 event.event = IB_EVENT_QP_ACCESS_ERR;
148 break;
149 default:
150 pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
151 return;
152 }
153
154 ibqp->event_handler(&event, ibqp->qp_context);
155 }
156}
157
158static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
159 int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
160{
161 int wqe_size;
162 int wq_size;
163
164 /* Sanity check RQ size before proceeding */
165 if (cap->max_recv_wr > dev->mdev.caps.max_wqes)
166 return -EINVAL;
167
168 if (!has_rq) {
169 qp->rq.max_gs = 0;
170 qp->rq.wqe_cnt = 0;
171 qp->rq.wqe_shift = 0;
172 } else {
173 if (ucmd) {
174 qp->rq.wqe_cnt = ucmd->rq_wqe_count;
175 qp->rq.wqe_shift = ucmd->rq_wqe_shift;
176 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
177 qp->rq.max_post = qp->rq.wqe_cnt;
178 } else {
179 wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
180 wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
181 wqe_size = roundup_pow_of_two(wqe_size);
182 wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
183 wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
184 qp->rq.wqe_cnt = wq_size / wqe_size;
185 if (wqe_size > dev->mdev.caps.max_rq_desc_sz) {
186 mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
187 wqe_size,
188 dev->mdev.caps.max_rq_desc_sz);
189 return -EINVAL;
190 }
191 qp->rq.wqe_shift = ilog2(wqe_size);
192 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
193 qp->rq.max_post = qp->rq.wqe_cnt;
194 }
195 }
196
197 return 0;
198}
199
200static int sq_overhead(enum ib_qp_type qp_type)
201{
202 int size;
203
204 switch (qp_type) {
205 case IB_QPT_XRC_INI:
206 size = sizeof(struct mlx5_wqe_xrc_seg);
207 /* fall through */
208 case IB_QPT_RC:
209 size += sizeof(struct mlx5_wqe_ctrl_seg) +
210 sizeof(struct mlx5_wqe_atomic_seg) +
211 sizeof(struct mlx5_wqe_raddr_seg);
212 break;
213
214 case IB_QPT_UC:
215 size = sizeof(struct mlx5_wqe_ctrl_seg) +
216 sizeof(struct mlx5_wqe_raddr_seg);
217 break;
218
219 case IB_QPT_UD:
220 case IB_QPT_SMI:
221 case IB_QPT_GSI:
222 size = sizeof(struct mlx5_wqe_ctrl_seg) +
223 sizeof(struct mlx5_wqe_datagram_seg);
224 break;
225
226 case MLX5_IB_QPT_REG_UMR:
227 size = sizeof(struct mlx5_wqe_ctrl_seg) +
228 sizeof(struct mlx5_wqe_umr_ctrl_seg) +
229 sizeof(struct mlx5_mkey_seg);
230 break;
231
232 default:
233 return -EINVAL;
234 }
235
236 return size;
237}
238
239static int calc_send_wqe(struct ib_qp_init_attr *attr)
240{
241 int inl_size = 0;
242 int size;
243
244 size = sq_overhead(attr->qp_type);
245 if (size < 0)
246 return size;
247
248 if (attr->cap.max_inline_data) {
249 inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
250 attr->cap.max_inline_data;
251 }
252
253 size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
254
255 return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
256}
257
258static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
259 struct mlx5_ib_qp *qp)
260{
261 int wqe_size;
262 int wq_size;
263
264 if (!attr->cap.max_send_wr)
265 return 0;
266
267 wqe_size = calc_send_wqe(attr);
268 mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
269 if (wqe_size < 0)
270 return wqe_size;
271
272 if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
273 mlx5_ib_dbg(dev, "\n");
274 return -EINVAL;
275 }
276
277 qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
278 sizeof(struct mlx5_wqe_inline_seg);
279 attr->cap.max_inline_data = qp->max_inline_data;
280
281 wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
282 qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
283 qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
284 qp->sq.max_gs = attr->cap.max_send_sge;
285 qp->sq.max_post = 1 << ilog2(wq_size / wqe_size);
286
287 return wq_size;
288}
289
290static int set_user_buf_size(struct mlx5_ib_dev *dev,
291 struct mlx5_ib_qp *qp,
292 struct mlx5_ib_create_qp *ucmd)
293{
294 int desc_sz = 1 << qp->sq.wqe_shift;
295
296 if (desc_sz > dev->mdev.caps.max_sq_desc_sz) {
297 mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
298 desc_sz, dev->mdev.caps.max_sq_desc_sz);
299 return -EINVAL;
300 }
301
302 if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
303 mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
304 ucmd->sq_wqe_count, ucmd->sq_wqe_count);
305 return -EINVAL;
306 }
307
308 qp->sq.wqe_cnt = ucmd->sq_wqe_count;
309
310 if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
311 mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
312 qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
313 return -EINVAL;
314 }
315
316 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
317 (qp->sq.wqe_cnt << 6);
318
319 return 0;
320}
321
322static int qp_has_rq(struct ib_qp_init_attr *attr)
323{
324 if (attr->qp_type == IB_QPT_XRC_INI ||
325 attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
326 attr->qp_type == MLX5_IB_QPT_REG_UMR ||
327 !attr->cap.max_recv_wr)
328 return 0;
329
330 return 1;
331}
332
333static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
334{
335 int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
336 int start_uuar;
337 int i;
338
339 start_uuar = nuuars - uuari->num_low_latency_uuars;
340 for (i = start_uuar; i < nuuars; i++) {
341 if (!test_bit(i, uuari->bitmap)) {
342 set_bit(i, uuari->bitmap);
343 uuari->count[i]++;
344 return i;
345 }
346 }
347
348 return -ENOMEM;
349}
350
351static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
352{
353 int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
354 int minidx = 1;
355 int uuarn;
356 int end;
357 int i;
358
359 end = nuuars - uuari->num_low_latency_uuars;
360
361 for (i = 1; i < end; i++) {
362 uuarn = i & 3;
363 if (uuarn == 2 || uuarn == 3)
364 continue;
365
366 if (uuari->count[i] < uuari->count[minidx])
367 minidx = i;
368 }
369
370 uuari->count[minidx]++;
371 return minidx;
372}
373
374static int alloc_uuar(struct mlx5_uuar_info *uuari,
375 enum mlx5_ib_latency_class lat)
376{
377 int uuarn = -EINVAL;
378
379 mutex_lock(&uuari->lock);
380 switch (lat) {
381 case MLX5_IB_LATENCY_CLASS_LOW:
382 uuarn = 0;
383 uuari->count[uuarn]++;
384 break;
385
386 case MLX5_IB_LATENCY_CLASS_MEDIUM:
387 uuarn = alloc_med_class_uuar(uuari);
388 break;
389
390 case MLX5_IB_LATENCY_CLASS_HIGH:
391 uuarn = alloc_high_class_uuar(uuari);
392 break;
393
394 case MLX5_IB_LATENCY_CLASS_FAST_PATH:
395 uuarn = 2;
396 break;
397 }
398 mutex_unlock(&uuari->lock);
399
400 return uuarn;
401}
402
403static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
404{
405 clear_bit(uuarn, uuari->bitmap);
406 --uuari->count[uuarn];
407}
408
409static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
410{
411 clear_bit(uuarn, uuari->bitmap);
412 --uuari->count[uuarn];
413}
414
415static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
416{
417 int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
418 int high_uuar = nuuars - uuari->num_low_latency_uuars;
419
420 mutex_lock(&uuari->lock);
421 if (uuarn == 0) {
422 --uuari->count[uuarn];
423 goto out;
424 }
425
426 if (uuarn < high_uuar) {
427 free_med_class_uuar(uuari, uuarn);
428 goto out;
429 }
430
431 free_high_class_uuar(uuari, uuarn);
432
433out:
434 mutex_unlock(&uuari->lock);
435}
436
437static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
438{
439 switch (state) {
440 case IB_QPS_RESET: return MLX5_QP_STATE_RST;
441 case IB_QPS_INIT: return MLX5_QP_STATE_INIT;
442 case IB_QPS_RTR: return MLX5_QP_STATE_RTR;
443 case IB_QPS_RTS: return MLX5_QP_STATE_RTS;
444 case IB_QPS_SQD: return MLX5_QP_STATE_SQD;
445 case IB_QPS_SQE: return MLX5_QP_STATE_SQER;
446 case IB_QPS_ERR: return MLX5_QP_STATE_ERR;
447 default: return -1;
448 }
449}
450
451static int to_mlx5_st(enum ib_qp_type type)
452{
453 switch (type) {
454 case IB_QPT_RC: return MLX5_QP_ST_RC;
455 case IB_QPT_UC: return MLX5_QP_ST_UC;
456 case IB_QPT_UD: return MLX5_QP_ST_UD;
457 case MLX5_IB_QPT_REG_UMR: return MLX5_QP_ST_REG_UMR;
458 case IB_QPT_XRC_INI:
459 case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
460 case IB_QPT_SMI: return MLX5_QP_ST_QP0;
461 case IB_QPT_GSI: return MLX5_QP_ST_QP1;
462 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
463 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
464 case IB_QPT_RAW_PACKET:
465 case IB_QPT_MAX:
466 default: return -EINVAL;
467 }
468}
469
470static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
471{
472 return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
473}
474
475static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
476 struct mlx5_ib_qp *qp, struct ib_udata *udata,
477 struct mlx5_create_qp_mbox_in **in,
478 struct mlx5_ib_create_qp_resp *resp, int *inlen)
479{
480 struct mlx5_ib_ucontext *context;
481 struct mlx5_ib_create_qp ucmd;
482 int page_shift;
483 int uar_index;
484 int npages;
485 u32 offset;
486 int uuarn;
487 int ncont;
488 int err;
489
490 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
491 if (err) {
492 mlx5_ib_dbg(dev, "copy failed\n");
493 return err;
494 }
495
496 context = to_mucontext(pd->uobject->context);
497 /*
498 * TBD: should come from the verbs when we have the API
499 */
500 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
501 if (uuarn < 0) {
502 mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
503 mlx5_ib_dbg(dev, "reverting to high latency\n");
504 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
505 if (uuarn < 0) {
506 mlx5_ib_dbg(dev, "uuar allocation failed\n");
507 return uuarn;
508 }
509 }
510
511 uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
512 mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
513
514 err = set_user_buf_size(dev, qp, &ucmd);
515 if (err)
516 goto err_uuar;
517
518 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
519 qp->buf_size, 0, 0);
520 if (IS_ERR(qp->umem)) {
521 mlx5_ib_dbg(dev, "umem_get failed\n");
522 err = PTR_ERR(qp->umem);
523 goto err_uuar;
524 }
525
526 mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
527 &ncont, NULL);
528 err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
529 if (err) {
530 mlx5_ib_warn(dev, "bad offset\n");
531 goto err_umem;
532 }
533 mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
534 ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
535
536 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
537 *in = mlx5_vzalloc(*inlen);
538 if (!*in) {
539 err = -ENOMEM;
540 goto err_umem;
541 }
542 mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
543 (*in)->ctx.log_pg_sz_remote_qpn =
544 cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
545 (*in)->ctx.params2 = cpu_to_be32(offset << 6);
546
547 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
548 resp->uuar_index = uuarn;
549 qp->uuarn = uuarn;
550
551 err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
552 if (err) {
553 mlx5_ib_dbg(dev, "map failed\n");
554 goto err_free;
555 }
556
557 err = ib_copy_to_udata(udata, resp, sizeof(*resp));
558 if (err) {
559 mlx5_ib_dbg(dev, "copy failed\n");
560 goto err_unmap;
561 }
562 qp->create_type = MLX5_QP_USER;
563
564 return 0;
565
566err_unmap:
567 mlx5_ib_db_unmap_user(context, &qp->db);
568
569err_free:
570 mlx5_vfree(*in);
571
572err_umem:
573 ib_umem_release(qp->umem);
574
575err_uuar:
576 free_uuar(&context->uuari, uuarn);
577 return err;
578}
579
580static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
581{
582 struct mlx5_ib_ucontext *context;
583
584 context = to_mucontext(pd->uobject->context);
585 mlx5_ib_db_unmap_user(context, &qp->db);
586 ib_umem_release(qp->umem);
587 free_uuar(&context->uuari, qp->uuarn);
588}
589
590static int create_kernel_qp(struct mlx5_ib_dev *dev,
591 struct ib_qp_init_attr *init_attr,
592 struct mlx5_ib_qp *qp,
593 struct mlx5_create_qp_mbox_in **in, int *inlen)
594{
595 enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
596 struct mlx5_uuar_info *uuari;
597 int uar_index;
598 int uuarn;
599 int err;
600
601 uuari = &dev->mdev.priv.uuari;
602 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
603 qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
604
605 if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
606 lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
607
608 uuarn = alloc_uuar(uuari, lc);
609 if (uuarn < 0) {
610 mlx5_ib_dbg(dev, "\n");
611 return -ENOMEM;
612 }
613
614 qp->bf = &uuari->bfs[uuarn];
615 uar_index = qp->bf->uar->index;
616
617 err = calc_sq_size(dev, init_attr, qp);
618 if (err < 0) {
619 mlx5_ib_dbg(dev, "err %d\n", err);
620 goto err_uuar;
621 }
622
623 qp->rq.offset = 0;
624 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
625 qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
626
627 err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
628 if (err) {
629 mlx5_ib_dbg(dev, "err %d\n", err);
630 goto err_uuar;
631 }
632
633 qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
634 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
635 *in = mlx5_vzalloc(*inlen);
636 if (!*in) {
637 err = -ENOMEM;
638 goto err_buf;
639 }
640 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
641 (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
642 /* Set "fast registration enabled" for all kernel QPs */
643 (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
644 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
645
646 mlx5_fill_page_array(&qp->buf, (*in)->pas);
647
648 err = mlx5_db_alloc(&dev->mdev, &qp->db);
649 if (err) {
650 mlx5_ib_dbg(dev, "err %d\n", err);
651 goto err_free;
652 }
653
654 qp->db.db[0] = 0;
655 qp->db.db[1] = 0;
656
657 qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
658 qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
659 qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
660 qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL);
661 qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL);
662
663 if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
664 !qp->sq.w_list || !qp->sq.wqe_head) {
665 err = -ENOMEM;
666 goto err_wrid;
667 }
668 qp->create_type = MLX5_QP_KERNEL;
669
670 return 0;
671
672err_wrid:
673 mlx5_db_free(&dev->mdev, &qp->db);
674 kfree(qp->sq.wqe_head);
675 kfree(qp->sq.w_list);
676 kfree(qp->sq.wrid);
677 kfree(qp->sq.wr_data);
678 kfree(qp->rq.wrid);
679
680err_free:
681 mlx5_vfree(*in);
682
683err_buf:
684 mlx5_buf_free(&dev->mdev, &qp->buf);
685
686err_uuar:
687 free_uuar(&dev->mdev.priv.uuari, uuarn);
688 return err;
689}
690
691static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
692{
693 mlx5_db_free(&dev->mdev, &qp->db);
694 kfree(qp->sq.wqe_head);
695 kfree(qp->sq.w_list);
696 kfree(qp->sq.wrid);
697 kfree(qp->sq.wr_data);
698 kfree(qp->rq.wrid);
699 mlx5_buf_free(&dev->mdev, &qp->buf);
700 free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn);
701}
702
703static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
704{
705 if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
706 (attr->qp_type == IB_QPT_XRC_INI))
707 return cpu_to_be32(MLX5_SRQ_RQ);
708 else if (!qp->has_rq)
709 return cpu_to_be32(MLX5_ZERO_LEN_RQ);
710 else
711 return cpu_to_be32(MLX5_NON_ZERO_RQ);
712}
713
714static int is_connected(enum ib_qp_type qp_type)
715{
716 if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
717 return 1;
718
719 return 0;
720}
721
722static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
723 struct ib_qp_init_attr *init_attr,
724 struct ib_udata *udata, struct mlx5_ib_qp *qp)
725{
726 struct mlx5_ib_resources *devr = &dev->devr;
727 struct mlx5_ib_create_qp_resp resp;
728 struct mlx5_create_qp_mbox_in *in;
729 struct mlx5_ib_create_qp ucmd;
730 int inlen = sizeof(*in);
731 int err;
732
733 mutex_init(&qp->mutex);
734 spin_lock_init(&qp->sq.lock);
735 spin_lock_init(&qp->rq.lock);
736
737 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
738 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
739
740 if (pd && pd->uobject) {
741 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
742 mlx5_ib_dbg(dev, "copy failed\n");
743 return -EFAULT;
744 }
745
746 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
747 qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
748 } else {
749 qp->wq_sig = !!wq_signature;
750 }
751
752 qp->has_rq = qp_has_rq(init_attr);
753 err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
754 qp, (pd && pd->uobject) ? &ucmd : NULL);
755 if (err) {
756 mlx5_ib_dbg(dev, "err %d\n", err);
757 return err;
758 }
759
760 if (pd) {
761 if (pd->uobject) {
762 mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
763 if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
764 ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
765 mlx5_ib_dbg(dev, "invalid rq params\n");
766 return -EINVAL;
767 }
768 if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) {
769 mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
770 ucmd.sq_wqe_count, dev->mdev.caps.max_wqes);
771 return -EINVAL;
772 }
773 err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
774 if (err)
775 mlx5_ib_dbg(dev, "err %d\n", err);
776 } else {
777 err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
778 if (err)
779 mlx5_ib_dbg(dev, "err %d\n", err);
780 else
781 qp->pa_lkey = to_mpd(pd)->pa_lkey;
782 }
783
784 if (err)
785 return err;
786 } else {
787 in = mlx5_vzalloc(sizeof(*in));
788 if (!in)
789 return -ENOMEM;
790
791 qp->create_type = MLX5_QP_EMPTY;
792 }
793
794 if (is_sqp(init_attr->qp_type))
795 qp->port = init_attr->port_num;
796
797 in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
798 MLX5_QP_PM_MIGRATED << 11);
799
800 if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
801 in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
802 else
803 in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
804
805 if (qp->wq_sig)
806 in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
807
808 if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
809 int rcqe_sz;
810 int scqe_sz;
811
812 rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
813 scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
814
815 if (rcqe_sz == 128)
816 in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
817 else
818 in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
819
820 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
821 if (scqe_sz == 128)
822 in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
823 else
824 in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
825 }
826 }
827
828 if (qp->rq.wqe_cnt) {
829 in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
830 in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
831 }
832
833 in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
834
835 if (qp->sq.wqe_cnt)
836 in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
837 else
838 in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
839
840 /* Set default resources */
841 switch (init_attr->qp_type) {
842 case IB_QPT_XRC_TGT:
843 in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
844 in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
845 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
846 in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
847 break;
848 case IB_QPT_XRC_INI:
849 in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
850 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
851 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
852 break;
853 default:
854 if (init_attr->srq) {
855 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
856 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
857 } else {
858 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
859 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
860 }
861 }
862
863 if (init_attr->send_cq)
864 in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
865
866 if (init_attr->recv_cq)
867 in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
868
869 in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
870
871 err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen);
872 if (err) {
873 mlx5_ib_dbg(dev, "create qp failed\n");
874 goto err_create;
875 }
876
877 mlx5_vfree(in);
878 /* Hardware wants QPN written in big-endian order (after
879 * shifting) for send doorbell. Precompute this value to save
880 * a little bit when posting sends.
881 */
882 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
883
884 qp->mqp.event = mlx5_ib_qp_event;
885
886 return 0;
887
888err_create:
889 if (qp->create_type == MLX5_QP_USER)
890 destroy_qp_user(pd, qp);
891 else if (qp->create_type == MLX5_QP_KERNEL)
892 destroy_qp_kernel(dev, qp);
893
894 mlx5_vfree(in);
895 return err;
896}
897
898static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
899 __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
900{
901 if (send_cq) {
902 if (recv_cq) {
903 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
904 spin_lock_irq(&send_cq->lock);
905 spin_lock_nested(&recv_cq->lock,
906 SINGLE_DEPTH_NESTING);
907 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
908 spin_lock_irq(&send_cq->lock);
909 __acquire(&recv_cq->lock);
910 } else {
911 spin_lock_irq(&recv_cq->lock);
912 spin_lock_nested(&send_cq->lock,
913 SINGLE_DEPTH_NESTING);
914 }
915 } else {
916 spin_lock_irq(&send_cq->lock);
917 }
918 } else if (recv_cq) {
919 spin_lock_irq(&recv_cq->lock);
920 }
921}
922
923static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
924 __releases(&send_cq->lock) __releases(&recv_cq->lock)
925{
926 if (send_cq) {
927 if (recv_cq) {
928 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
929 spin_unlock(&recv_cq->lock);
930 spin_unlock_irq(&send_cq->lock);
931 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
932 __release(&recv_cq->lock);
933 spin_unlock_irq(&send_cq->lock);
934 } else {
935 spin_unlock(&send_cq->lock);
936 spin_unlock_irq(&recv_cq->lock);
937 }
938 } else {
939 spin_unlock_irq(&send_cq->lock);
940 }
941 } else if (recv_cq) {
942 spin_unlock_irq(&recv_cq->lock);
943 }
944}
945
946static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
947{
948 return to_mpd(qp->ibqp.pd);
949}
950
951static void get_cqs(struct mlx5_ib_qp *qp,
952 struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
953{
954 switch (qp->ibqp.qp_type) {
955 case IB_QPT_XRC_TGT:
956 *send_cq = NULL;
957 *recv_cq = NULL;
958 break;
959 case MLX5_IB_QPT_REG_UMR:
960 case IB_QPT_XRC_INI:
961 *send_cq = to_mcq(qp->ibqp.send_cq);
962 *recv_cq = NULL;
963 break;
964
965 case IB_QPT_SMI:
966 case IB_QPT_GSI:
967 case IB_QPT_RC:
968 case IB_QPT_UC:
969 case IB_QPT_UD:
970 case IB_QPT_RAW_IPV6:
971 case IB_QPT_RAW_ETHERTYPE:
972 *send_cq = to_mcq(qp->ibqp.send_cq);
973 *recv_cq = to_mcq(qp->ibqp.recv_cq);
974 break;
975
976 case IB_QPT_RAW_PACKET:
977 case IB_QPT_MAX:
978 default:
979 *send_cq = NULL;
980 *recv_cq = NULL;
981 break;
982 }
983}
984
985static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
986{
987 struct mlx5_ib_cq *send_cq, *recv_cq;
988 struct mlx5_modify_qp_mbox_in *in;
989 int err;
990
991 in = kzalloc(sizeof(*in), GFP_KERNEL);
992 if (!in)
993 return;
994 if (qp->state != IB_QPS_RESET)
995 if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state),
996 MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
997 mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
998 qp->mqp.qpn);
999
1000 get_cqs(qp, &send_cq, &recv_cq);
1001
1002 if (qp->create_type == MLX5_QP_KERNEL) {
1003 mlx5_ib_lock_cqs(send_cq, recv_cq);
1004 __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
1005 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1006 if (send_cq != recv_cq)
1007 __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1008 mlx5_ib_unlock_cqs(send_cq, recv_cq);
1009 }
1010
1011 err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp);
1012 if (err)
1013 mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
1014 kfree(in);
1015
1016
1017 if (qp->create_type == MLX5_QP_KERNEL)
1018 destroy_qp_kernel(dev, qp);
1019 else if (qp->create_type == MLX5_QP_USER)
1020 destroy_qp_user(&get_pd(qp)->ibpd, qp);
1021}
1022
1023static const char *ib_qp_type_str(enum ib_qp_type type)
1024{
1025 switch (type) {
1026 case IB_QPT_SMI:
1027 return "IB_QPT_SMI";
1028 case IB_QPT_GSI:
1029 return "IB_QPT_GSI";
1030 case IB_QPT_RC:
1031 return "IB_QPT_RC";
1032 case IB_QPT_UC:
1033 return "IB_QPT_UC";
1034 case IB_QPT_UD:
1035 return "IB_QPT_UD";
1036 case IB_QPT_RAW_IPV6:
1037 return "IB_QPT_RAW_IPV6";
1038 case IB_QPT_RAW_ETHERTYPE:
1039 return "IB_QPT_RAW_ETHERTYPE";
1040 case IB_QPT_XRC_INI:
1041 return "IB_QPT_XRC_INI";
1042 case IB_QPT_XRC_TGT:
1043 return "IB_QPT_XRC_TGT";
1044 case IB_QPT_RAW_PACKET:
1045 return "IB_QPT_RAW_PACKET";
1046 case MLX5_IB_QPT_REG_UMR:
1047 return "MLX5_IB_QPT_REG_UMR";
1048 case IB_QPT_MAX:
1049 default:
1050 return "Invalid QP type";
1051 }
1052}
1053
1054struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1055 struct ib_qp_init_attr *init_attr,
1056 struct ib_udata *udata)
1057{
1058 struct mlx5_ib_dev *dev;
1059 struct mlx5_ib_qp *qp;
1060 u16 xrcdn = 0;
1061 int err;
1062
1063 if (pd) {
1064 dev = to_mdev(pd->device);
1065 } else {
1066 /* being cautious here */
1067 if (init_attr->qp_type != IB_QPT_XRC_TGT &&
1068 init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
1069 pr_warn("%s: no PD for transport %s\n", __func__,
1070 ib_qp_type_str(init_attr->qp_type));
1071 return ERR_PTR(-EINVAL);
1072 }
1073 dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
1074 }
1075
1076 switch (init_attr->qp_type) {
1077 case IB_QPT_XRC_TGT:
1078 case IB_QPT_XRC_INI:
1079 if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
1080 mlx5_ib_dbg(dev, "XRC not supported\n");
1081 return ERR_PTR(-ENOSYS);
1082 }
1083 init_attr->recv_cq = NULL;
1084 if (init_attr->qp_type == IB_QPT_XRC_TGT) {
1085 xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
1086 init_attr->send_cq = NULL;
1087 }
1088
1089 /* fall through */
1090 case IB_QPT_RC:
1091 case IB_QPT_UC:
1092 case IB_QPT_UD:
1093 case IB_QPT_SMI:
1094 case IB_QPT_GSI:
1095 case MLX5_IB_QPT_REG_UMR:
1096 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1097 if (!qp)
1098 return ERR_PTR(-ENOMEM);
1099
1100 err = create_qp_common(dev, pd, init_attr, udata, qp);
1101 if (err) {
1102 mlx5_ib_dbg(dev, "create_qp_common failed\n");
1103 kfree(qp);
1104 return ERR_PTR(err);
1105 }
1106
1107 if (is_qp0(init_attr->qp_type))
1108 qp->ibqp.qp_num = 0;
1109 else if (is_qp1(init_attr->qp_type))
1110 qp->ibqp.qp_num = 1;
1111 else
1112 qp->ibqp.qp_num = qp->mqp.qpn;
1113
1114 mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
1115 qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
1116 to_mcq(init_attr->send_cq)->mcq.cqn);
1117
1118 qp->xrcdn = xrcdn;
1119
1120 break;
1121
1122 case IB_QPT_RAW_IPV6:
1123 case IB_QPT_RAW_ETHERTYPE:
1124 case IB_QPT_RAW_PACKET:
1125 case IB_QPT_MAX:
1126 default:
1127 mlx5_ib_dbg(dev, "unsupported qp type %d\n",
1128 init_attr->qp_type);
1129 /* Don't support raw QPs */
1130 return ERR_PTR(-EINVAL);
1131 }
1132
1133 return &qp->ibqp;
1134}
1135
1136int mlx5_ib_destroy_qp(struct ib_qp *qp)
1137{
1138 struct mlx5_ib_dev *dev = to_mdev(qp->device);
1139 struct mlx5_ib_qp *mqp = to_mqp(qp);
1140
1141 destroy_qp_common(dev, mqp);
1142
1143 kfree(mqp);
1144
1145 return 0;
1146}
1147
1148static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
1149 int attr_mask)
1150{
1151 u32 hw_access_flags = 0;
1152 u8 dest_rd_atomic;
1153 u32 access_flags;
1154
1155 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1156 dest_rd_atomic = attr->max_dest_rd_atomic;
1157 else
1158 dest_rd_atomic = qp->resp_depth;
1159
1160 if (attr_mask & IB_QP_ACCESS_FLAGS)
1161 access_flags = attr->qp_access_flags;
1162 else
1163 access_flags = qp->atomic_rd_en;
1164
1165 if (!dest_rd_atomic)
1166 access_flags &= IB_ACCESS_REMOTE_WRITE;
1167
1168 if (access_flags & IB_ACCESS_REMOTE_READ)
1169 hw_access_flags |= MLX5_QP_BIT_RRE;
1170 if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
1171 hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
1172 if (access_flags & IB_ACCESS_REMOTE_WRITE)
1173 hw_access_flags |= MLX5_QP_BIT_RWE;
1174
1175 return cpu_to_be32(hw_access_flags);
1176}
1177
1178enum {
1179 MLX5_PATH_FLAG_FL = 1 << 0,
1180 MLX5_PATH_FLAG_FREE_AR = 1 << 1,
1181 MLX5_PATH_FLAG_COUNTER = 1 << 2,
1182};
1183
1184static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
1185{
1186 if (rate == IB_RATE_PORT_CURRENT) {
1187 return 0;
1188 } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
1189 return -EINVAL;
1190 } else {
1191 while (rate != IB_RATE_2_5_GBPS &&
1192 !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
1193 dev->mdev.caps.stat_rate_support))
1194 --rate;
1195 }
1196
1197 return rate + MLX5_STAT_RATE_OFFSET;
1198}
1199
1200static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
1201 struct mlx5_qp_path *path, u8 port, int attr_mask,
1202 u32 path_flags, const struct ib_qp_attr *attr)
1203{
1204 int err;
1205
1206 path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
1207 path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
1208
1209 if (attr_mask & IB_QP_PKEY_INDEX)
1210 path->pkey_index = attr->pkey_index;
1211
1212 path->grh_mlid = ah->src_path_bits & 0x7f;
1213 path->rlid = cpu_to_be16(ah->dlid);
1214
1215 if (ah->ah_flags & IB_AH_GRH) {
1216 path->grh_mlid |= 1 << 7;
1217 path->mgid_index = ah->grh.sgid_index;
1218 path->hop_limit = ah->grh.hop_limit;
1219 path->tclass_flowlabel =
1220 cpu_to_be32((ah->grh.traffic_class << 20) |
1221 (ah->grh.flow_label));
1222 memcpy(path->rgid, ah->grh.dgid.raw, 16);
1223 }
1224
1225 err = ib_rate_to_mlx5(dev, ah->static_rate);
1226 if (err < 0)
1227 return err;
1228 path->static_rate = err;
1229 path->port = port;
1230
1231 if (ah->ah_flags & IB_AH_GRH) {
1232 if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) {
1233 pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
1234 ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len);
1235 return -EINVAL;
1236 }
1237
1238 path->grh_mlid |= 1 << 7;
1239 path->mgid_index = ah->grh.sgid_index;
1240 path->hop_limit = ah->grh.hop_limit;
1241 path->tclass_flowlabel =
1242 cpu_to_be32((ah->grh.traffic_class << 20) |
1243 (ah->grh.flow_label));
1244 memcpy(path->rgid, ah->grh.dgid.raw, 16);
1245 }
1246
1247 if (attr_mask & IB_QP_TIMEOUT)
1248 path->ackto_lt = attr->timeout << 3;
1249
1250 path->sl = ah->sl & 0xf;
1251
1252 return 0;
1253}
1254
1255static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
1256 [MLX5_QP_STATE_INIT] = {
1257 [MLX5_QP_STATE_INIT] = {
1258 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
1259 MLX5_QP_OPTPAR_RAE |
1260 MLX5_QP_OPTPAR_RWE |
1261 MLX5_QP_OPTPAR_PKEY_INDEX |
1262 MLX5_QP_OPTPAR_PRI_PORT,
1263 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
1264 MLX5_QP_OPTPAR_PKEY_INDEX |
1265 MLX5_QP_OPTPAR_PRI_PORT,
1266 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
1267 MLX5_QP_OPTPAR_Q_KEY |
1268 MLX5_QP_OPTPAR_PRI_PORT,
1269 },
1270 [MLX5_QP_STATE_RTR] = {
1271 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1272 MLX5_QP_OPTPAR_RRE |
1273 MLX5_QP_OPTPAR_RAE |
1274 MLX5_QP_OPTPAR_RWE |
1275 MLX5_QP_OPTPAR_PKEY_INDEX,
1276 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1277 MLX5_QP_OPTPAR_RWE |
1278 MLX5_QP_OPTPAR_PKEY_INDEX,
1279 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
1280 MLX5_QP_OPTPAR_Q_KEY,
1281 [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
1282 MLX5_QP_OPTPAR_Q_KEY,
1283 },
1284 },
1285 [MLX5_QP_STATE_RTR] = {
1286 [MLX5_QP_STATE_RTS] = {
1287 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1288 MLX5_QP_OPTPAR_RRE |
1289 MLX5_QP_OPTPAR_RAE |
1290 MLX5_QP_OPTPAR_RWE |
1291 MLX5_QP_OPTPAR_PM_STATE |
1292 MLX5_QP_OPTPAR_RNR_TIMEOUT,
1293 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1294 MLX5_QP_OPTPAR_RWE |
1295 MLX5_QP_OPTPAR_PM_STATE,
1296 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
1297 },
1298 },
1299 [MLX5_QP_STATE_RTS] = {
1300 [MLX5_QP_STATE_RTS] = {
1301 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
1302 MLX5_QP_OPTPAR_RAE |
1303 MLX5_QP_OPTPAR_RWE |
1304 MLX5_QP_OPTPAR_RNR_TIMEOUT |
1305 MLX5_QP_OPTPAR_PM_STATE,
1306 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
1307 MLX5_QP_OPTPAR_PM_STATE,
1308 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
1309 MLX5_QP_OPTPAR_SRQN |
1310 MLX5_QP_OPTPAR_CQN_RCV,
1311 },
1312 },
1313 [MLX5_QP_STATE_SQER] = {
1314 [MLX5_QP_STATE_RTS] = {
1315 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
1316 [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
1317 },
1318 },
1319};
1320
1321static int ib_nr_to_mlx5_nr(int ib_mask)
1322{
1323 switch (ib_mask) {
1324 case IB_QP_STATE:
1325 return 0;
1326 case IB_QP_CUR_STATE:
1327 return 0;
1328 case IB_QP_EN_SQD_ASYNC_NOTIFY:
1329 return 0;
1330 case IB_QP_ACCESS_FLAGS:
1331 return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
1332 MLX5_QP_OPTPAR_RAE;
1333 case IB_QP_PKEY_INDEX:
1334 return MLX5_QP_OPTPAR_PKEY_INDEX;
1335 case IB_QP_PORT:
1336 return MLX5_QP_OPTPAR_PRI_PORT;
1337 case IB_QP_QKEY:
1338 return MLX5_QP_OPTPAR_Q_KEY;
1339 case IB_QP_AV:
1340 return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
1341 MLX5_QP_OPTPAR_PRI_PORT;
1342 case IB_QP_PATH_MTU:
1343 return 0;
1344 case IB_QP_TIMEOUT:
1345 return MLX5_QP_OPTPAR_ACK_TIMEOUT;
1346 case IB_QP_RETRY_CNT:
1347 return MLX5_QP_OPTPAR_RETRY_COUNT;
1348 case IB_QP_RNR_RETRY:
1349 return MLX5_QP_OPTPAR_RNR_RETRY;
1350 case IB_QP_RQ_PSN:
1351 return 0;
1352 case IB_QP_MAX_QP_RD_ATOMIC:
1353 return MLX5_QP_OPTPAR_SRA_MAX;
1354 case IB_QP_ALT_PATH:
1355 return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
1356 case IB_QP_MIN_RNR_TIMER:
1357 return MLX5_QP_OPTPAR_RNR_TIMEOUT;
1358 case IB_QP_SQ_PSN:
1359 return 0;
1360 case IB_QP_MAX_DEST_RD_ATOMIC:
1361 return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
1362 MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
1363 case IB_QP_PATH_MIG_STATE:
1364 return MLX5_QP_OPTPAR_PM_STATE;
1365 case IB_QP_CAP:
1366 return 0;
1367 case IB_QP_DEST_QPN:
1368 return 0;
1369 }
1370 return 0;
1371}
1372
1373static int ib_mask_to_mlx5_opt(int ib_mask)
1374{
1375 int result = 0;
1376 int i;
1377
1378 for (i = 0; i < 8 * sizeof(int); i++) {
1379 if ((1 << i) & ib_mask)
1380 result |= ib_nr_to_mlx5_nr(1 << i);
1381 }
1382
1383 return result;
1384}
1385
1386static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1387 const struct ib_qp_attr *attr, int attr_mask,
1388 enum ib_qp_state cur_state, enum ib_qp_state new_state)
1389{
1390 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1391 struct mlx5_ib_qp *qp = to_mqp(ibqp);
1392 struct mlx5_ib_cq *send_cq, *recv_cq;
1393 struct mlx5_qp_context *context;
1394 struct mlx5_modify_qp_mbox_in *in;
1395 struct mlx5_ib_pd *pd;
1396 enum mlx5_qp_state mlx5_cur, mlx5_new;
1397 enum mlx5_qp_optpar optpar;
1398 int sqd_event;
1399 int mlx5_st;
1400 int err;
1401
1402 in = kzalloc(sizeof(*in), GFP_KERNEL);
1403 if (!in)
1404 return -ENOMEM;
1405
1406 context = &in->ctx;
1407 err = to_mlx5_st(ibqp->qp_type);
1408 if (err < 0)
1409 goto out;
1410
1411 context->flags = cpu_to_be32(err << 16);
1412
1413 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
1414 context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1415 } else {
1416 switch (attr->path_mig_state) {
1417 case IB_MIG_MIGRATED:
1418 context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1419 break;
1420 case IB_MIG_REARM:
1421 context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
1422 break;
1423 case IB_MIG_ARMED:
1424 context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
1425 break;
1426 }
1427 }
1428
1429 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
1430 context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
1431 } else if (ibqp->qp_type == IB_QPT_UD ||
1432 ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
1433 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
1434 } else if (attr_mask & IB_QP_PATH_MTU) {
1435 if (attr->path_mtu < IB_MTU_256 ||
1436 attr->path_mtu > IB_MTU_4096) {
1437 mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
1438 err = -EINVAL;
1439 goto out;
1440 }
1441 context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg;
1442 }
1443
1444 if (attr_mask & IB_QP_DEST_QPN)
1445 context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
1446
1447 if (attr_mask & IB_QP_PKEY_INDEX)
1448 context->pri_path.pkey_index = attr->pkey_index;
1449
1450 /* todo implement counter_index functionality */
1451
1452 if (is_sqp(ibqp->qp_type))
1453 context->pri_path.port = qp->port;
1454
1455 if (attr_mask & IB_QP_PORT)
1456 context->pri_path.port = attr->port_num;
1457
1458 if (attr_mask & IB_QP_AV) {
1459 err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
1460 attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
1461 attr_mask, 0, attr);
1462 if (err)
1463 goto out;
1464 }
1465
1466 if (attr_mask & IB_QP_TIMEOUT)
1467 context->pri_path.ackto_lt |= attr->timeout << 3;
1468
1469 if (attr_mask & IB_QP_ALT_PATH) {
1470 err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
1471 attr->alt_port_num, attr_mask, 0, attr);
1472 if (err)
1473 goto out;
1474 }
1475
1476 pd = get_pd(qp);
1477 get_cqs(qp, &send_cq, &recv_cq);
1478
1479 context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
1480 context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
1481 context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
1482 context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
1483
1484 if (attr_mask & IB_QP_RNR_RETRY)
1485 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
1486
1487 if (attr_mask & IB_QP_RETRY_CNT)
1488 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
1489
1490 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1491 if (attr->max_rd_atomic)
1492 context->params1 |=
1493 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
1494 }
1495
1496 if (attr_mask & IB_QP_SQ_PSN)
1497 context->next_send_psn = cpu_to_be32(attr->sq_psn);
1498
1499 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1500 if (attr->max_dest_rd_atomic)
1501 context->params2 |=
1502 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
1503 }
1504
1505 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
1506 context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
1507
1508 if (attr_mask & IB_QP_MIN_RNR_TIMER)
1509 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
1510
1511 if (attr_mask & IB_QP_RQ_PSN)
1512 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1513
1514 if (attr_mask & IB_QP_QKEY)
1515 context->qkey = cpu_to_be32(attr->qkey);
1516
1517 if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1518 context->db_rec_addr = cpu_to_be64(qp->db.dma);
1519
1520 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
1521 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
1522 sqd_event = 1;
1523 else
1524 sqd_event = 0;
1525
1526 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1527 context->sq_crq_size |= cpu_to_be16(1 << 4);
1528
1529
1530 mlx5_cur = to_mlx5_state(cur_state);
1531 mlx5_new = to_mlx5_state(new_state);
1532 mlx5_st = to_mlx5_st(ibqp->qp_type);
1533 if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
1534 goto out;
1535
1536 optpar = ib_mask_to_mlx5_opt(attr_mask);
1537 optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
1538 in->optparam = cpu_to_be32(optpar);
1539 err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state),
1540 to_mlx5_state(new_state), in, sqd_event,
1541 &qp->mqp);
1542 if (err)
1543 goto out;
1544
1545 qp->state = new_state;
1546
1547 if (attr_mask & IB_QP_ACCESS_FLAGS)
1548 qp->atomic_rd_en = attr->qp_access_flags;
1549 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1550 qp->resp_depth = attr->max_dest_rd_atomic;
1551 if (attr_mask & IB_QP_PORT)
1552 qp->port = attr->port_num;
1553 if (attr_mask & IB_QP_ALT_PATH)
1554 qp->alt_port = attr->alt_port_num;
1555
1556 /*
1557 * If we moved a kernel QP to RESET, clean up all old CQ
1558 * entries and reinitialize the QP.
1559 */
1560 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
1561 mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
1562 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
1563 if (send_cq != recv_cq)
1564 mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1565
1566 qp->rq.head = 0;
1567 qp->rq.tail = 0;
1568 qp->sq.head = 0;
1569 qp->sq.tail = 0;
1570 qp->sq.cur_post = 0;
1571 qp->sq.last_poll = 0;
1572 qp->db.db[MLX5_RCV_DBR] = 0;
1573 qp->db.db[MLX5_SND_DBR] = 0;
1574 }
1575
1576out:
1577 kfree(in);
1578 return err;
1579}
1580
1581int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1582 int attr_mask, struct ib_udata *udata)
1583{
1584 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1585 struct mlx5_ib_qp *qp = to_mqp(ibqp);
1586 enum ib_qp_state cur_state, new_state;
1587 int err = -EINVAL;
1588 int port;
1589
1590 mutex_lock(&qp->mutex);
1591
1592 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
1593 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1594
1595 if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
1596 !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
1597 goto out;
1598
1599 if ((attr_mask & IB_QP_PORT) &&
1600 (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports))
1601 goto out;
1602
1603 if (attr_mask & IB_QP_PKEY_INDEX) {
1604 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1605 if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len)
1606 goto out;
1607 }
1608
1609 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1610 attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp)
1611 goto out;
1612
1613 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1614 attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp)
1615 goto out;
1616
1617 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1618 err = 0;
1619 goto out;
1620 }
1621
1622 err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
1623
1624out:
1625 mutex_unlock(&qp->mutex);
1626 return err;
1627}
1628
1629static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
1630{
1631 struct mlx5_ib_cq *cq;
1632 unsigned cur;
1633
1634 cur = wq->head - wq->tail;
1635 if (likely(cur + nreq < wq->max_post))
1636 return 0;
1637
1638 cq = to_mcq(ib_cq);
1639 spin_lock(&cq->lock);
1640 cur = wq->head - wq->tail;
1641 spin_unlock(&cq->lock);
1642
1643 return cur + nreq >= wq->max_post;
1644}
1645
1646static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
1647 u64 remote_addr, u32 rkey)
1648{
1649 rseg->raddr = cpu_to_be64(remote_addr);
1650 rseg->rkey = cpu_to_be32(rkey);
1651 rseg->reserved = 0;
1652}
1653
1654static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
1655{
1656 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1657 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1658 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1659 } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
1660 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1661 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
1662 } else {
1663 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1664 aseg->compare = 0;
1665 }
1666}
1667
1668static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg,
1669 struct ib_send_wr *wr)
1670{
1671 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1672 aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
1673 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1674 aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
1675}
1676
1677static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
1678 struct ib_send_wr *wr)
1679{
1680 memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
1681 dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
1682 dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
1683}
1684
1685static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
1686{
1687 dseg->byte_count = cpu_to_be32(sg->length);
1688 dseg->lkey = cpu_to_be32(sg->lkey);
1689 dseg->addr = cpu_to_be64(sg->addr);
1690}
1691
1692static __be16 get_klm_octo(int npages)
1693{
1694 return cpu_to_be16(ALIGN(npages, 8) / 2);
1695}
1696
1697static __be64 frwr_mkey_mask(void)
1698{
1699 u64 result;
1700
1701 result = MLX5_MKEY_MASK_LEN |
1702 MLX5_MKEY_MASK_PAGE_SIZE |
1703 MLX5_MKEY_MASK_START_ADDR |
1704 MLX5_MKEY_MASK_EN_RINVAL |
1705 MLX5_MKEY_MASK_KEY |
1706 MLX5_MKEY_MASK_LR |
1707 MLX5_MKEY_MASK_LW |
1708 MLX5_MKEY_MASK_RR |
1709 MLX5_MKEY_MASK_RW |
1710 MLX5_MKEY_MASK_A |
1711 MLX5_MKEY_MASK_SMALL_FENCE |
1712 MLX5_MKEY_MASK_FREE;
1713
1714 return cpu_to_be64(result);
1715}
1716
1717static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
1718 struct ib_send_wr *wr, int li)
1719{
1720 memset(umr, 0, sizeof(*umr));
1721
1722 if (li) {
1723 umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
1724 umr->flags = 1 << 7;
1725 return;
1726 }
1727
1728 umr->flags = (1 << 5); /* fail if not free */
1729 umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
1730 umr->mkey_mask = frwr_mkey_mask();
1731}
1732
1733static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
1734 struct ib_send_wr *wr)
1735{
1736 struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
1737 u64 mask;
1738
1739 memset(umr, 0, sizeof(*umr));
1740
1741 if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
1742 umr->flags = 1 << 5; /* fail if not free */
1743 umr->klm_octowords = get_klm_octo(umrwr->npages);
1744 mask = MLX5_MKEY_MASK_LEN |
1745 MLX5_MKEY_MASK_PAGE_SIZE |
1746 MLX5_MKEY_MASK_START_ADDR |
1747 MLX5_MKEY_MASK_PD |
1748 MLX5_MKEY_MASK_LR |
1749 MLX5_MKEY_MASK_LW |
1750 MLX5_MKEY_MASK_RR |
1751 MLX5_MKEY_MASK_RW |
1752 MLX5_MKEY_MASK_A |
1753 MLX5_MKEY_MASK_FREE;
1754 umr->mkey_mask = cpu_to_be64(mask);
1755 } else {
1756 umr->flags = 2 << 5; /* fail if free */
1757 mask = MLX5_MKEY_MASK_FREE;
1758 umr->mkey_mask = cpu_to_be64(mask);
1759 }
1760
1761 if (!wr->num_sge)
1762 umr->flags |= (1 << 7); /* inline */
1763}
1764
1765static u8 get_umr_flags(int acc)
1766{
1767 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
1768 (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
1769 (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
1770 (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
1771 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
1772}
1773
1774static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
1775 int li, int *writ)
1776{
1777 memset(seg, 0, sizeof(*seg));
1778 if (li) {
1779 seg->status = 1 << 6;
1780 return;
1781 }
1782
1783 seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags);
1784 *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
1785 seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
1786 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
1787 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1788 seg->len = cpu_to_be64(wr->wr.fast_reg.length);
1789 seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
1790 seg->log2_page_size = wr->wr.fast_reg.page_shift;
1791}
1792
1793static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
1794{
1795 memset(seg, 0, sizeof(*seg));
1796 if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
1797 seg->status = 1 << 6;
1798 return;
1799 }
1800
1801 seg->flags = convert_access(wr->wr.fast_reg.access_flags);
1802 seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
1803 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1804 seg->len = cpu_to_be64(wr->wr.fast_reg.length);
1805 seg->log2_page_size = wr->wr.fast_reg.page_shift;
1806 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1807}
1808
1809static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
1810 struct ib_send_wr *wr,
1811 struct mlx5_core_dev *mdev,
1812 struct mlx5_ib_pd *pd,
1813 int writ)
1814{
1815 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
1816 u64 *page_list = wr->wr.fast_reg.page_list->page_list;
1817 u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
1818 int i;
1819
1820 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
1821 mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
1822 dseg->addr = cpu_to_be64(mfrpl->map);
1823 dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
1824 dseg->lkey = cpu_to_be32(pd->pa_lkey);
1825}
1826
1827static __be32 send_ieth(struct ib_send_wr *wr)
1828{
1829 switch (wr->opcode) {
1830 case IB_WR_SEND_WITH_IMM:
1831 case IB_WR_RDMA_WRITE_WITH_IMM:
1832 return wr->ex.imm_data;
1833
1834 case IB_WR_SEND_WITH_INV:
1835 return cpu_to_be32(wr->ex.invalidate_rkey);
1836
1837 default:
1838 return 0;
1839 }
1840}
1841
1842static u8 calc_sig(void *wqe, int size)
1843{
1844 u8 *p = wqe;
1845 u8 res = 0;
1846 int i;
1847
1848 for (i = 0; i < size; i++)
1849 res ^= p[i];
1850
1851 return ~res;
1852}
1853
1854static u8 wq_sig(void *wqe)
1855{
1856 return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
1857}
1858
1859static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
1860 void *wqe, int *sz)
1861{
1862 struct mlx5_wqe_inline_seg *seg;
1863 void *qend = qp->sq.qend;
1864 void *addr;
1865 int inl = 0;
1866 int copy;
1867 int len;
1868 int i;
1869
1870 seg = wqe;
1871 wqe += sizeof(*seg);
1872 for (i = 0; i < wr->num_sge; i++) {
1873 addr = (void *)(unsigned long)(wr->sg_list[i].addr);
1874 len = wr->sg_list[i].length;
1875 inl += len;
1876
1877 if (unlikely(inl > qp->max_inline_data))
1878 return -ENOMEM;
1879
1880 if (unlikely(wqe + len > qend)) {
1881 copy = qend - wqe;
1882 memcpy(wqe, addr, copy);
1883 addr += copy;
1884 len -= copy;
1885 wqe = mlx5_get_send_wqe(qp, 0);
1886 }
1887 memcpy(wqe, addr, len);
1888 wqe += len;
1889 }
1890
1891 seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
1892
1893 *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
1894
1895 return 0;
1896}
1897
1898static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
1899 struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
1900{
1901 int writ = 0;
1902 int li;
1903
1904 li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
1905 if (unlikely(wr->send_flags & IB_SEND_INLINE))
1906 return -EINVAL;
1907
1908 set_frwr_umr_segment(*seg, wr, li);
1909 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
1910 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
1911 if (unlikely((*seg == qp->sq.qend)))
1912 *seg = mlx5_get_send_wqe(qp, 0);
1913 set_mkey_segment(*seg, wr, li, &writ);
1914 *seg += sizeof(struct mlx5_mkey_seg);
1915 *size += sizeof(struct mlx5_mkey_seg) / 16;
1916 if (unlikely((*seg == qp->sq.qend)))
1917 *seg = mlx5_get_send_wqe(qp, 0);
1918 if (!li) {
1919 set_frwr_pages(*seg, wr, mdev, pd, writ);
1920 *seg += sizeof(struct mlx5_wqe_data_seg);
1921 *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
1922 }
1923 return 0;
1924}
1925
1926static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
1927{
1928 __be32 *p = NULL;
1929 int tidx = idx;
1930 int i, j;
1931
1932 pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
1933 for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
1934 if ((i & 0xf) == 0) {
1935 void *buf = mlx5_get_send_wqe(qp, tidx);
1936 tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
1937 p = buf;
1938 j = 0;
1939 }
1940 pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
1941 be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
1942 be32_to_cpu(p[j + 3]));
1943 }
1944}
1945
1946static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
1947 unsigned bytecnt, struct mlx5_ib_qp *qp)
1948{
1949 while (bytecnt > 0) {
1950 __iowrite64_copy(dst++, src++, 8);
1951 __iowrite64_copy(dst++, src++, 8);
1952 __iowrite64_copy(dst++, src++, 8);
1953 __iowrite64_copy(dst++, src++, 8);
1954 __iowrite64_copy(dst++, src++, 8);
1955 __iowrite64_copy(dst++, src++, 8);
1956 __iowrite64_copy(dst++, src++, 8);
1957 __iowrite64_copy(dst++, src++, 8);
1958 bytecnt -= 64;
1959 if (unlikely(src == qp->sq.qend))
1960 src = mlx5_get_send_wqe(qp, 0);
1961 }
1962}
1963
1964static u8 get_fence(u8 fence, struct ib_send_wr *wr)
1965{
1966 if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
1967 wr->send_flags & IB_SEND_FENCE))
1968 return MLX5_FENCE_MODE_STRONG_ORDERING;
1969
1970 if (unlikely(fence)) {
1971 if (wr->send_flags & IB_SEND_FENCE)
1972 return MLX5_FENCE_MODE_SMALL_AND_FENCE;
1973 else
1974 return fence;
1975
1976 } else {
1977 return 0;
1978 }
1979}
1980
1981int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1982 struct ib_send_wr **bad_wr)
1983{
1984 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
1985 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1986 struct mlx5_core_dev *mdev = &dev->mdev;
1987 struct mlx5_ib_qp *qp = to_mqp(ibqp);
1988 struct mlx5_wqe_data_seg *dpseg;
1989 struct mlx5_wqe_xrc_seg *xrc;
1990 struct mlx5_bf *bf = qp->bf;
1991 int uninitialized_var(size);
1992 void *qend = qp->sq.qend;
1993 unsigned long flags;
1994 u32 mlx5_opcode;
1995 unsigned idx;
1996 int err = 0;
1997 int inl = 0;
1998 int num_sge;
1999 void *seg;
2000 int nreq;
2001 int i;
2002 u8 next_fence = 0;
2003 u8 opmod = 0;
2004 u8 fence;
2005
2006 spin_lock_irqsave(&qp->sq.lock, flags);
2007
2008 for (nreq = 0; wr; nreq++, wr = wr->next) {
2009 if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
2010 mlx5_ib_warn(dev, "\n");
2011 err = -EINVAL;
2012 *bad_wr = wr;
2013 goto out;
2014 }
2015
2016 if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
2017 mlx5_ib_warn(dev, "\n");
2018 err = -ENOMEM;
2019 *bad_wr = wr;
2020 goto out;
2021 }
2022
2023 fence = qp->fm_cache;
2024 num_sge = wr->num_sge;
2025 if (unlikely(num_sge > qp->sq.max_gs)) {
2026 mlx5_ib_warn(dev, "\n");
2027 err = -ENOMEM;
2028 *bad_wr = wr;
2029 goto out;
2030 }
2031
2032 idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
2033 seg = mlx5_get_send_wqe(qp, idx);
2034 ctrl = seg;
2035 *(uint32_t *)(seg + 8) = 0;
2036 ctrl->imm = send_ieth(wr);
2037 ctrl->fm_ce_se = qp->sq_signal_bits |
2038 (wr->send_flags & IB_SEND_SIGNALED ?
2039 MLX5_WQE_CTRL_CQ_UPDATE : 0) |
2040 (wr->send_flags & IB_SEND_SOLICITED ?
2041 MLX5_WQE_CTRL_SOLICITED : 0);
2042
2043 seg += sizeof(*ctrl);
2044 size = sizeof(*ctrl) / 16;
2045
2046 switch (ibqp->qp_type) {
2047 case IB_QPT_XRC_INI:
2048 xrc = seg;
2049 xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
2050 seg += sizeof(*xrc);
2051 size += sizeof(*xrc) / 16;
2052 /* fall through */
2053 case IB_QPT_RC:
2054 switch (wr->opcode) {
2055 case IB_WR_RDMA_READ:
2056 case IB_WR_RDMA_WRITE:
2057 case IB_WR_RDMA_WRITE_WITH_IMM:
2058 set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2059 wr->wr.rdma.rkey);
2060 seg += sizeof(struct mlx5_wqe_raddr_seg);
2061 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2062 break;
2063
2064 case IB_WR_ATOMIC_CMP_AND_SWP:
2065 case IB_WR_ATOMIC_FETCH_AND_ADD:
2066 set_raddr_seg(seg, wr->wr.atomic.remote_addr,
2067 wr->wr.atomic.rkey);
2068 seg += sizeof(struct mlx5_wqe_raddr_seg);
2069
2070 set_atomic_seg(seg, wr);
2071 seg += sizeof(struct mlx5_wqe_atomic_seg);
2072
2073 size += (sizeof(struct mlx5_wqe_raddr_seg) +
2074 sizeof(struct mlx5_wqe_atomic_seg)) / 16;
2075 break;
2076
2077 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
2078 set_raddr_seg(seg, wr->wr.atomic.remote_addr,
2079 wr->wr.atomic.rkey);
2080 seg += sizeof(struct mlx5_wqe_raddr_seg);
2081
2082 set_masked_atomic_seg(seg, wr);
2083 seg += sizeof(struct mlx5_wqe_masked_atomic_seg);
2084
2085 size += (sizeof(struct mlx5_wqe_raddr_seg) +
2086 sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16;
2087 break;
2088
2089 case IB_WR_LOCAL_INV:
2090 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2091 qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
2092 ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
2093 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2094 if (err) {
2095 mlx5_ib_warn(dev, "\n");
2096 *bad_wr = wr;
2097 goto out;
2098 }
2099 num_sge = 0;
2100 break;
2101
2102 case IB_WR_FAST_REG_MR:
2103 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2104 qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR;
2105 ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
2106 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2107 if (err) {
2108 mlx5_ib_warn(dev, "\n");
2109 *bad_wr = wr;
2110 goto out;
2111 }
2112 num_sge = 0;
2113 break;
2114
2115 default:
2116 break;
2117 }
2118 break;
2119
2120 case IB_QPT_UC:
2121 switch (wr->opcode) {
2122 case IB_WR_RDMA_WRITE:
2123 case IB_WR_RDMA_WRITE_WITH_IMM:
2124 set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2125 wr->wr.rdma.rkey);
2126 seg += sizeof(struct mlx5_wqe_raddr_seg);
2127 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2128 break;
2129
2130 default:
2131 break;
2132 }
2133 break;
2134
2135 case IB_QPT_UD:
2136 case IB_QPT_SMI:
2137 case IB_QPT_GSI:
2138 set_datagram_seg(seg, wr);
2139 seg += sizeof(struct mlx5_wqe_datagram_seg);
2140 size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
2141 if (unlikely((seg == qend)))
2142 seg = mlx5_get_send_wqe(qp, 0);
2143 break;
2144
2145 case MLX5_IB_QPT_REG_UMR:
2146 if (wr->opcode != MLX5_IB_WR_UMR) {
2147 err = -EINVAL;
2148 mlx5_ib_warn(dev, "bad opcode\n");
2149 goto out;
2150 }
2151 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
2152 ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
2153 set_reg_umr_segment(seg, wr);
2154 seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
2155 size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
2156 if (unlikely((seg == qend)))
2157 seg = mlx5_get_send_wqe(qp, 0);
2158 set_reg_mkey_segment(seg, wr);
2159 seg += sizeof(struct mlx5_mkey_seg);
2160 size += sizeof(struct mlx5_mkey_seg) / 16;
2161 if (unlikely((seg == qend)))
2162 seg = mlx5_get_send_wqe(qp, 0);
2163 break;
2164
2165 default:
2166 break;
2167 }
2168
2169 if (wr->send_flags & IB_SEND_INLINE && num_sge) {
2170 int uninitialized_var(sz);
2171
2172 err = set_data_inl_seg(qp, wr, seg, &sz);
2173 if (unlikely(err)) {
2174 mlx5_ib_warn(dev, "\n");
2175 *bad_wr = wr;
2176 goto out;
2177 }
2178 inl = 1;
2179 size += sz;
2180 } else {
2181 dpseg = seg;
2182 for (i = 0; i < num_sge; i++) {
2183 if (unlikely(dpseg == qend)) {
2184 seg = mlx5_get_send_wqe(qp, 0);
2185 dpseg = seg;
2186 }
2187 if (likely(wr->sg_list[i].length)) {
2188 set_data_ptr_seg(dpseg, wr->sg_list + i);
2189 size += sizeof(struct mlx5_wqe_data_seg) / 16;
2190 dpseg++;
2191 }
2192 }
2193 }
2194
2195 mlx5_opcode = mlx5_ib_opcode[wr->opcode];
2196 ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
2197 mlx5_opcode |
2198 ((u32)opmod << 24));
2199 ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
2200 ctrl->fm_ce_se |= get_fence(fence, wr);
2201 qp->fm_cache = next_fence;
2202 if (unlikely(qp->wq_sig))
2203 ctrl->signature = wq_sig(ctrl);
2204
2205 qp->sq.wrid[idx] = wr->wr_id;
2206 qp->sq.w_list[idx].opcode = mlx5_opcode;
2207 qp->sq.wqe_head[idx] = qp->sq.head + nreq;
2208 qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
2209 qp->sq.w_list[idx].next = qp->sq.cur_post;
2210
2211 if (0)
2212 dump_wqe(qp, idx, size);
2213 }
2214
2215out:
2216 if (likely(nreq)) {
2217 qp->sq.head += nreq;
2218
2219 /* Make sure that descriptors are written before
2220 * updating doorbell record and ringing the doorbell
2221 */
2222 wmb();
2223
2224 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
2225
2226 if (bf->need_lock)
2227 spin_lock(&bf->lock);
2228
2229 /* TBD enable WC */
2230 if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
2231 mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
2232 /* wc_wmb(); */
2233 } else {
2234 mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
2235 MLX5_GET_DOORBELL_LOCK(&bf->lock32));
2236 /* Make sure doorbells don't leak out of SQ spinlock
2237 * and reach the HCA out of order.
2238 */
2239 mmiowb();
2240 }
2241 bf->offset ^= bf->buf_size;
2242 if (bf->need_lock)
2243 spin_unlock(&bf->lock);
2244 }
2245
2246 spin_unlock_irqrestore(&qp->sq.lock, flags);
2247
2248 return err;
2249}
2250
2251static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
2252{
2253 sig->signature = calc_sig(sig, size);
2254}
2255
2256int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2257 struct ib_recv_wr **bad_wr)
2258{
2259 struct mlx5_ib_qp *qp = to_mqp(ibqp);
2260 struct mlx5_wqe_data_seg *scat;
2261 struct mlx5_rwqe_sig *sig;
2262 unsigned long flags;
2263 int err = 0;
2264 int nreq;
2265 int ind;
2266 int i;
2267
2268 spin_lock_irqsave(&qp->rq.lock, flags);
2269
2270 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
2271
2272 for (nreq = 0; wr; nreq++, wr = wr->next) {
2273 if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2274 err = -ENOMEM;
2275 *bad_wr = wr;
2276 goto out;
2277 }
2278
2279 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
2280 err = -EINVAL;
2281 *bad_wr = wr;
2282 goto out;
2283 }
2284
2285 scat = get_recv_wqe(qp, ind);
2286 if (qp->wq_sig)
2287 scat++;
2288
2289 for (i = 0; i < wr->num_sge; i++)
2290 set_data_ptr_seg(scat + i, wr->sg_list + i);
2291
2292 if (i < qp->rq.max_gs) {
2293 scat[i].byte_count = 0;
2294 scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
2295 scat[i].addr = 0;
2296 }
2297
2298 if (qp->wq_sig) {
2299 sig = (struct mlx5_rwqe_sig *)scat;
2300 set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
2301 }
2302
2303 qp->rq.wrid[ind] = wr->wr_id;
2304
2305 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
2306 }
2307
2308out:
2309 if (likely(nreq)) {
2310 qp->rq.head += nreq;
2311
2312 /* Make sure that descriptors are written before
2313 * doorbell record.
2314 */
2315 wmb();
2316
2317 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
2318 }
2319
2320 spin_unlock_irqrestore(&qp->rq.lock, flags);
2321
2322 return err;
2323}
2324
2325static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
2326{
2327 switch (mlx5_state) {
2328 case MLX5_QP_STATE_RST: return IB_QPS_RESET;
2329 case MLX5_QP_STATE_INIT: return IB_QPS_INIT;
2330 case MLX5_QP_STATE_RTR: return IB_QPS_RTR;
2331 case MLX5_QP_STATE_RTS: return IB_QPS_RTS;
2332 case MLX5_QP_STATE_SQ_DRAINING:
2333 case MLX5_QP_STATE_SQD: return IB_QPS_SQD;
2334 case MLX5_QP_STATE_SQER: return IB_QPS_SQE;
2335 case MLX5_QP_STATE_ERR: return IB_QPS_ERR;
2336 default: return -1;
2337 }
2338}
2339
2340static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
2341{
2342 switch (mlx5_mig_state) {
2343 case MLX5_QP_PM_ARMED: return IB_MIG_ARMED;
2344 case MLX5_QP_PM_REARM: return IB_MIG_REARM;
2345 case MLX5_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
2346 default: return -1;
2347 }
2348}
2349
2350static int to_ib_qp_access_flags(int mlx5_flags)
2351{
2352 int ib_flags = 0;
2353
2354 if (mlx5_flags & MLX5_QP_BIT_RRE)
2355 ib_flags |= IB_ACCESS_REMOTE_READ;
2356 if (mlx5_flags & MLX5_QP_BIT_RWE)
2357 ib_flags |= IB_ACCESS_REMOTE_WRITE;
2358 if (mlx5_flags & MLX5_QP_BIT_RAE)
2359 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
2360
2361 return ib_flags;
2362}
2363
2364static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
2365 struct mlx5_qp_path *path)
2366{
2367 struct mlx5_core_dev *dev = &ibdev->mdev;
2368
2369 memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
2370 ib_ah_attr->port_num = path->port;
2371
2372 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
2373 return;
2374
2375 ib_ah_attr->sl = path->sl & 0xf;
2376
2377 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
2378 ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
2379 ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
2380 ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
2381 if (ib_ah_attr->ah_flags) {
2382 ib_ah_attr->grh.sgid_index = path->mgid_index;
2383 ib_ah_attr->grh.hop_limit = path->hop_limit;
2384 ib_ah_attr->grh.traffic_class =
2385 (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
2386 ib_ah_attr->grh.flow_label =
2387 be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
2388 memcpy(ib_ah_attr->grh.dgid.raw,
2389 path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
2390 }
2391}
2392
2393int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
2394 struct ib_qp_init_attr *qp_init_attr)
2395{
2396 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2397 struct mlx5_ib_qp *qp = to_mqp(ibqp);
2398 struct mlx5_query_qp_mbox_out *outb;
2399 struct mlx5_qp_context *context;
2400 int mlx5_state;
2401 int err = 0;
2402
2403 mutex_lock(&qp->mutex);
2404 outb = kzalloc(sizeof(*outb), GFP_KERNEL);
2405 if (!outb) {
2406 err = -ENOMEM;
2407 goto out;
2408 }
2409 context = &outb->ctx;
2410 err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb));
2411 if (err)
2412 goto out_free;
2413
2414 mlx5_state = be32_to_cpu(context->flags) >> 28;
2415
2416 qp->state = to_ib_qp_state(mlx5_state);
2417 qp_attr->qp_state = qp->state;
2418 qp_attr->path_mtu = context->mtu_msgmax >> 5;
2419 qp_attr->path_mig_state =
2420 to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
2421 qp_attr->qkey = be32_to_cpu(context->qkey);
2422 qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
2423 qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
2424 qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
2425 qp_attr->qp_access_flags =
2426 to_ib_qp_access_flags(be32_to_cpu(context->params2));
2427
2428 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
2429 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
2430 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
2431 qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f;
2432 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
2433 }
2434
2435 qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f;
2436 qp_attr->port_num = context->pri_path.port;
2437
2438 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
2439 qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
2440
2441 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
2442
2443 qp_attr->max_dest_rd_atomic =
2444 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
2445 qp_attr->min_rnr_timer =
2446 (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
2447 qp_attr->timeout = context->pri_path.ackto_lt >> 3;
2448 qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
2449 qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
2450 qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
2451 qp_attr->cur_qp_state = qp_attr->qp_state;
2452 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
2453 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
2454
2455 if (!ibqp->uobject) {
2456 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
2457 qp_attr->cap.max_send_sge = qp->sq.max_gs;
2458 } else {
2459 qp_attr->cap.max_send_wr = 0;
2460 qp_attr->cap.max_send_sge = 0;
2461 }
2462
2463 /* We don't support inline sends for kernel QPs (yet), and we
2464 * don't know what userspace's value should be.
2465 */
2466 qp_attr->cap.max_inline_data = 0;
2467
2468 qp_init_attr->cap = qp_attr->cap;
2469
2470 qp_init_attr->create_flags = 0;
2471 if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
2472 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
2473
2474 qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
2475 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2476
2477out_free:
2478 kfree(outb);
2479
2480out:
2481 mutex_unlock(&qp->mutex);
2482 return err;
2483}
2484
2485struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
2486 struct ib_ucontext *context,
2487 struct ib_udata *udata)
2488{
2489 struct mlx5_ib_dev *dev = to_mdev(ibdev);
2490 struct mlx5_ib_xrcd *xrcd;
2491 int err;
2492
2493 if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC))
2494 return ERR_PTR(-ENOSYS);
2495
2496 xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
2497 if (!xrcd)
2498 return ERR_PTR(-ENOMEM);
2499
2500 err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn);
2501 if (err) {
2502 kfree(xrcd);
2503 return ERR_PTR(-ENOMEM);
2504 }
2505
2506 return &xrcd->ibxrcd;
2507}
2508
2509int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
2510{
2511 struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
2512 u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
2513 int err;
2514
2515 err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn);
2516 if (err) {
2517 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
2518 return err;
2519 }
2520
2521 kfree(xrcd);
2522
2523 return 0;
2524}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
new file mode 100644
index 000000000000..84d297afd6a9
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -0,0 +1,473 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <linux/mlx5/qp.h>
35#include <linux/mlx5/srq.h>
36#include <linux/slab.h>
37#include <rdma/ib_umem.h>
38
39#include "mlx5_ib.h"
40#include "user.h"
41
42/* not supported currently */
43static int srq_signature;
44
45static void *get_wqe(struct mlx5_ib_srq *srq, int n)
46{
47 return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
48}
49
50static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
51{
52 struct ib_event event;
53 struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
54
55 if (ibsrq->event_handler) {
56 event.device = ibsrq->device;
57 event.element.srq = ibsrq;
58 switch (type) {
59 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
60 event.event = IB_EVENT_SRQ_LIMIT_REACHED;
61 break;
62 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
63 event.event = IB_EVENT_SRQ_ERR;
64 break;
65 default:
66 pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n",
67 type, srq->srqn);
68 return;
69 }
70
71 ibsrq->event_handler(&event, ibsrq->srq_context);
72 }
73}
74
75static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
76 struct mlx5_create_srq_mbox_in **in,
77 struct ib_udata *udata, int buf_size, int *inlen)
78{
79 struct mlx5_ib_dev *dev = to_mdev(pd->device);
80 struct mlx5_ib_create_srq ucmd;
81 int err;
82 int npages;
83 int page_shift;
84 int ncont;
85 u32 offset;
86
87 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
88 mlx5_ib_dbg(dev, "failed copy udata\n");
89 return -EFAULT;
90 }
91 srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
92
93 srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
94 0, 0);
95 if (IS_ERR(srq->umem)) {
96 mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
97 err = PTR_ERR(srq->umem);
98 return err;
99 }
100
101 mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages,
102 &page_shift, &ncont, NULL);
103 err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
104 &offset);
105 if (err) {
106 mlx5_ib_warn(dev, "bad offset\n");
107 goto err_umem;
108 }
109
110 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
111 *in = mlx5_vzalloc(*inlen);
112 if (!(*in)) {
113 err = -ENOMEM;
114 goto err_umem;
115 }
116
117 mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0);
118
119 err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
120 ucmd.db_addr, &srq->db);
121 if (err) {
122 mlx5_ib_dbg(dev, "map doorbell failed\n");
123 goto err_in;
124 }
125
126 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
127 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
128
129 return 0;
130
131err_in:
132 mlx5_vfree(*in);
133
134err_umem:
135 ib_umem_release(srq->umem);
136
137 return err;
138}
139
140static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
141 struct mlx5_create_srq_mbox_in **in, int buf_size,
142 int *inlen)
143{
144 int err;
145 int i;
146 struct mlx5_wqe_srq_next_seg *next;
147 int page_shift;
148 int npages;
149
150 err = mlx5_db_alloc(&dev->mdev, &srq->db);
151 if (err) {
152 mlx5_ib_warn(dev, "alloc dbell rec failed\n");
153 return err;
154 }
155
156 *srq->db.db = 0;
157
158 if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
159 mlx5_ib_dbg(dev, "buf alloc failed\n");
160 err = -ENOMEM;
161 goto err_db;
162 }
163 page_shift = srq->buf.page_shift;
164
165 srq->head = 0;
166 srq->tail = srq->msrq.max - 1;
167 srq->wqe_ctr = 0;
168
169 for (i = 0; i < srq->msrq.max; i++) {
170 next = get_wqe(srq, i);
171 next->next_wqe_index =
172 cpu_to_be16((i + 1) & (srq->msrq.max - 1));
173 }
174
175 npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
176 mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
177 buf_size, page_shift, srq->buf.npages, npages);
178 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages;
179 *in = mlx5_vzalloc(*inlen);
180 if (!*in) {
181 err = -ENOMEM;
182 goto err_buf;
183 }
184 mlx5_fill_page_array(&srq->buf, (*in)->pas);
185
186 srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
187 if (!srq->wrid) {
188 mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
189 (unsigned long)(srq->msrq.max * sizeof(u64)));
190 err = -ENOMEM;
191 goto err_in;
192 }
193 srq->wq_sig = !!srq_signature;
194
195 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
196
197 return 0;
198
199err_in:
200 mlx5_vfree(*in);
201
202err_buf:
203 mlx5_buf_free(&dev->mdev, &srq->buf);
204
205err_db:
206 mlx5_db_free(&dev->mdev, &srq->db);
207 return err;
208}
209
210static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
211{
212 mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
213 ib_umem_release(srq->umem);
214}
215
216
217static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
218{
219 kfree(srq->wrid);
220 mlx5_buf_free(&dev->mdev, &srq->buf);
221 mlx5_db_free(&dev->mdev, &srq->db);
222}
223
224struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
225 struct ib_srq_init_attr *init_attr,
226 struct ib_udata *udata)
227{
228 struct mlx5_ib_dev *dev = to_mdev(pd->device);
229 struct mlx5_ib_srq *srq;
230 int desc_size;
231 int buf_size;
232 int err;
233 struct mlx5_create_srq_mbox_in *uninitialized_var(in);
234 int uninitialized_var(inlen);
235 int is_xrc;
236 u32 flgs, xrcdn;
237
238 /* Sanity check SRQ size before proceeding */
239 if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) {
240 mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
241 init_attr->attr.max_wr,
242 dev->mdev.caps.max_srq_wqes);
243 return ERR_PTR(-EINVAL);
244 }
245
246 srq = kmalloc(sizeof(*srq), GFP_KERNEL);
247 if (!srq)
248 return ERR_PTR(-ENOMEM);
249
250 mutex_init(&srq->mutex);
251 spin_lock_init(&srq->lock);
252 srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
253 srq->msrq.max_gs = init_attr->attr.max_sge;
254
255 desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
256 srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
257 desc_size = roundup_pow_of_two(desc_size);
258 desc_size = max_t(int, 32, desc_size);
259 srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
260 sizeof(struct mlx5_wqe_data_seg);
261 srq->msrq.wqe_shift = ilog2(desc_size);
262 buf_size = srq->msrq.max * desc_size;
263 mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
264 desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
265 srq->msrq.max_avail_gather);
266
267 if (pd->uobject)
268 err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
269 else
270 err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
271
272 if (err) {
273 mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
274 pd->uobject ? "user" : "kernel", err);
275 goto err_srq;
276 }
277
278 is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
279 in->ctx.state_log_sz = ilog2(srq->msrq.max);
280 flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
281 xrcdn = 0;
282 if (is_xrc) {
283 xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
284 in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn);
285 } else if (init_attr->srq_type == IB_SRQT_BASIC) {
286 xrcdn = to_mxrcd(dev->devr.x0)->xrcdn;
287 in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn);
288 }
289
290 in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF));
291
292 in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
293 in->ctx.db_record = cpu_to_be64(srq->db.dma);
294 err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen);
295 mlx5_vfree(in);
296 if (err) {
297 mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
298 goto err_srq;
299 }
300
301 mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn);
302
303 srq->msrq.event = mlx5_ib_srq_event;
304 srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
305
306 if (pd->uobject)
307 if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
308 mlx5_ib_dbg(dev, "copy to user failed\n");
309 err = -EFAULT;
310 goto err_core;
311 }
312
313 init_attr->attr.max_wr = srq->msrq.max - 1;
314
315 return &srq->ibsrq;
316
317err_core:
318 mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
319 if (pd->uobject)
320 destroy_srq_user(pd, srq);
321 else
322 destroy_srq_kernel(dev, srq);
323
324err_srq:
325 kfree(srq);
326
327 return ERR_PTR(err);
328}
329
330int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
331 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
332{
333 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
334 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
335 int ret;
336
337 /* We don't support resizing SRQs yet */
338 if (attr_mask & IB_SRQ_MAX_WR)
339 return -EINVAL;
340
341 if (attr_mask & IB_SRQ_LIMIT) {
342 if (attr->srq_limit >= srq->msrq.max)
343 return -EINVAL;
344
345 mutex_lock(&srq->mutex);
346 ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1);
347 mutex_unlock(&srq->mutex);
348
349 if (ret)
350 return ret;
351 }
352
353 return 0;
354}
355
356int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
357{
358 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
359 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
360 int ret;
361 struct mlx5_query_srq_mbox_out *out;
362
363 out = kzalloc(sizeof(*out), GFP_KERNEL);
364 if (!out)
365 return -ENOMEM;
366
367 ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out);
368 if (ret)
369 goto out_box;
370
371 srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm);
372 srq_attr->max_wr = srq->msrq.max - 1;
373 srq_attr->max_sge = srq->msrq.max_gs;
374
375out_box:
376 kfree(out);
377 return ret;
378}
379
380int mlx5_ib_destroy_srq(struct ib_srq *srq)
381{
382 struct mlx5_ib_dev *dev = to_mdev(srq->device);
383 struct mlx5_ib_srq *msrq = to_msrq(srq);
384
385 mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq);
386
387 if (srq->uobject) {
388 mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
389 ib_umem_release(msrq->umem);
390 } else {
391 kfree(msrq->wrid);
392 mlx5_buf_free(&dev->mdev, &msrq->buf);
393 mlx5_db_free(&dev->mdev, &msrq->db);
394 }
395
396 kfree(srq);
397 return 0;
398}
399
400void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
401{
402 struct mlx5_wqe_srq_next_seg *next;
403
404 /* always called with interrupts disabled. */
405 spin_lock(&srq->lock);
406
407 next = get_wqe(srq, srq->tail);
408 next->next_wqe_index = cpu_to_be16(wqe_index);
409 srq->tail = wqe_index;
410
411 spin_unlock(&srq->lock);
412}
413
414int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
415 struct ib_recv_wr **bad_wr)
416{
417 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
418 struct mlx5_wqe_srq_next_seg *next;
419 struct mlx5_wqe_data_seg *scat;
420 unsigned long flags;
421 int err = 0;
422 int nreq;
423 int i;
424
425 spin_lock_irqsave(&srq->lock, flags);
426
427 for (nreq = 0; wr; nreq++, wr = wr->next) {
428 if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
429 err = -EINVAL;
430 *bad_wr = wr;
431 break;
432 }
433
434 if (unlikely(srq->head == srq->tail)) {
435 err = -ENOMEM;
436 *bad_wr = wr;
437 break;
438 }
439
440 srq->wrid[srq->head] = wr->wr_id;
441
442 next = get_wqe(srq, srq->head);
443 srq->head = be16_to_cpu(next->next_wqe_index);
444 scat = (struct mlx5_wqe_data_seg *)(next + 1);
445
446 for (i = 0; i < wr->num_sge; i++) {
447 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
448 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
449 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
450 }
451
452 if (i < srq->msrq.max_avail_gather) {
453 scat[i].byte_count = 0;
454 scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
455 scat[i].addr = 0;
456 }
457 }
458
459 if (likely(nreq)) {
460 srq->wqe_ctr += nreq;
461
462 /* Make sure that descriptors are written before
463 * doorbell record.
464 */
465 wmb();
466
467 *srq->db.db = cpu_to_be32(srq->wqe_ctr);
468 }
469
470 spin_unlock_irqrestore(&srq->lock, flags);
471
472 return err;
473}
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
new file mode 100644
index 000000000000..a886de3e593c
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -0,0 +1,121 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_IB_USER_H
34#define MLX5_IB_USER_H
35
36#include <linux/types.h>
37
38enum {
39 MLX5_QP_FLAG_SIGNATURE = 1 << 0,
40 MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
41};
42
43enum {
44 MLX5_SRQ_FLAG_SIGNATURE = 1 << 0,
45};
46
47
48/* Increment this value if any changes that break userspace ABI
49 * compatibility are made.
50 */
51#define MLX5_IB_UVERBS_ABI_VERSION 1
52
53/* Make sure that all structs defined in this file remain laid out so
54 * that they pack the same way on 32-bit and 64-bit architectures (to
55 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
56 * In particular do not use pointer types -- pass pointers in __u64
57 * instead.
58 */
59
60struct mlx5_ib_alloc_ucontext_req {
61 __u32 total_num_uuars;
62 __u32 num_low_latency_uuars;
63};
64
65struct mlx5_ib_alloc_ucontext_resp {
66 __u32 qp_tab_size;
67 __u32 bf_reg_size;
68 __u32 tot_uuars;
69 __u32 cache_line_size;
70 __u16 max_sq_desc_sz;
71 __u16 max_rq_desc_sz;
72 __u32 max_send_wqebb;
73 __u32 max_recv_wr;
74 __u32 max_srq_recv_wr;
75 __u16 num_ports;
76 __u16 reserved;
77};
78
79struct mlx5_ib_alloc_pd_resp {
80 __u32 pdn;
81};
82
83struct mlx5_ib_create_cq {
84 __u64 buf_addr;
85 __u64 db_addr;
86 __u32 cqe_size;
87};
88
89struct mlx5_ib_create_cq_resp {
90 __u32 cqn;
91 __u32 reserved;
92};
93
94struct mlx5_ib_resize_cq {
95 __u64 buf_addr;
96};
97
98struct mlx5_ib_create_srq {
99 __u64 buf_addr;
100 __u64 db_addr;
101 __u32 flags;
102};
103
104struct mlx5_ib_create_srq_resp {
105 __u32 srqn;
106 __u32 reserved;
107};
108
109struct mlx5_ib_create_qp {
110 __u64 buf_addr;
111 __u64 db_addr;
112 __u32 sq_wqe_count;
113 __u32 rq_wqe_count;
114 __u32 rq_wqe_shift;
115 __u32 flags;
116};
117
118struct mlx5_ib_create_qp_resp {
119 __u32 uuar_index;
120};
121#endif /* MLX5_IB_USER_H */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 48970af23679..d540180a8e42 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -42,8 +42,6 @@
42#define OCRDMA_ROCE_DEV_VERSION "1.0.0" 42#define OCRDMA_ROCE_DEV_VERSION "1.0.0"
43#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" 43#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
44 44
45#define ocrdma_err(format, arg...) printk(KERN_ERR format, ##arg)
46
47#define OCRDMA_MAX_AH 512 45#define OCRDMA_MAX_AH 512
48 46
49#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) 47#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
@@ -97,7 +95,6 @@ struct ocrdma_queue_info {
97 u16 id; /* qid, where to ring the doorbell. */ 95 u16 id; /* qid, where to ring the doorbell. */
98 u16 head, tail; 96 u16 head, tail;
99 bool created; 97 bool created;
100 atomic_t used; /* Number of valid elements in the queue */
101}; 98};
102 99
103struct ocrdma_eq { 100struct ocrdma_eq {
@@ -198,7 +195,6 @@ struct ocrdma_cq {
198 struct ocrdma_ucontext *ucontext; 195 struct ocrdma_ucontext *ucontext;
199 dma_addr_t pa; 196 dma_addr_t pa;
200 u32 len; 197 u32 len;
201 atomic_t use_cnt;
202 198
203 /* head of all qp's sq and rq for which cqes need to be flushed 199 /* head of all qp's sq and rq for which cqes need to be flushed
204 * by the software. 200 * by the software.
@@ -210,7 +206,6 @@ struct ocrdma_pd {
210 struct ib_pd ibpd; 206 struct ib_pd ibpd;
211 struct ocrdma_dev *dev; 207 struct ocrdma_dev *dev;
212 struct ocrdma_ucontext *uctx; 208 struct ocrdma_ucontext *uctx;
213 atomic_t use_cnt;
214 u32 id; 209 u32 id;
215 int num_dpp_qp; 210 int num_dpp_qp;
216 u32 dpp_page; 211 u32 dpp_page;
@@ -241,16 +236,16 @@ struct ocrdma_srq {
241 struct ib_srq ibsrq; 236 struct ib_srq ibsrq;
242 struct ocrdma_dev *dev; 237 struct ocrdma_dev *dev;
243 u8 __iomem *db; 238 u8 __iomem *db;
239 struct ocrdma_qp_hwq_info rq;
240 u64 *rqe_wr_id_tbl;
241 u32 *idx_bit_fields;
242 u32 bit_fields_len;
243
244 /* provide synchronization to multiple context(s) posting rqe */ 244 /* provide synchronization to multiple context(s) posting rqe */
245 spinlock_t q_lock ____cacheline_aligned; 245 spinlock_t q_lock ____cacheline_aligned;
246 246
247 struct ocrdma_qp_hwq_info rq;
248 struct ocrdma_pd *pd; 247 struct ocrdma_pd *pd;
249 atomic_t use_cnt;
250 u32 id; 248 u32 id;
251 u64 *rqe_wr_id_tbl;
252 u32 *idx_bit_fields;
253 u32 bit_fields_len;
254}; 249};
255 250
256struct ocrdma_qp { 251struct ocrdma_qp {
@@ -258,8 +253,6 @@ struct ocrdma_qp {
258 struct ocrdma_dev *dev; 253 struct ocrdma_dev *dev;
259 254
260 u8 __iomem *sq_db; 255 u8 __iomem *sq_db;
261 /* provide synchronization to multiple context(s) posting wqe, rqe */
262 spinlock_t q_lock ____cacheline_aligned;
263 struct ocrdma_qp_hwq_info sq; 256 struct ocrdma_qp_hwq_info sq;
264 struct { 257 struct {
265 uint64_t wrid; 258 uint64_t wrid;
@@ -269,6 +262,9 @@ struct ocrdma_qp {
269 uint8_t rsvd[3]; 262 uint8_t rsvd[3];
270 } *wqe_wr_id_tbl; 263 } *wqe_wr_id_tbl;
271 u32 max_inline_data; 264 u32 max_inline_data;
265
266 /* provide synchronization to multiple context(s) posting wqe, rqe */
267 spinlock_t q_lock ____cacheline_aligned;
272 struct ocrdma_cq *sq_cq; 268 struct ocrdma_cq *sq_cq;
273 /* list maintained per CQ to flush SQ errors */ 269 /* list maintained per CQ to flush SQ errors */
274 struct list_head sq_entry; 270 struct list_head sq_entry;
@@ -296,10 +292,6 @@ struct ocrdma_qp {
296 u8 *ird_q_va; 292 u8 *ird_q_va;
297}; 293};
298 294
299#define OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp) \
300 (((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) && \
301 (qp->id < 64)) ? 24 : 16)
302
303struct ocrdma_hw_mr { 295struct ocrdma_hw_mr {
304 struct ocrdma_dev *dev; 296 struct ocrdma_dev *dev;
305 u32 lkey; 297 u32 lkey;
@@ -390,4 +382,43 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
390 return container_of(ibsrq, struct ocrdma_srq, ibsrq); 382 return container_of(ibsrq, struct ocrdma_srq, ibsrq);
391} 383}
392 384
385
386static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp)
387{
388 return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY &&
389 qp->id < 64) ? 24 : 16);
390}
391
392static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
393{
394 int cqe_valid;
395 cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID;
396 return ((cqe_valid == cq->phase) ? 1 : 0);
397}
398
399static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe)
400{
401 return (le32_to_cpu(cqe->flags_status_srcqpn) &
402 OCRDMA_CQE_QTYPE) ? 0 : 1;
403}
404
405static inline int is_cqe_invalidated(struct ocrdma_cqe *cqe)
406{
407 return (le32_to_cpu(cqe->flags_status_srcqpn) &
408 OCRDMA_CQE_INVALIDATE) ? 1 : 0;
409}
410
411static inline int is_cqe_imm(struct ocrdma_cqe *cqe)
412{
413 return (le32_to_cpu(cqe->flags_status_srcqpn) &
414 OCRDMA_CQE_IMM) ? 1 : 0;
415}
416
417static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
418{
419 return (le32_to_cpu(cqe->flags_status_srcqpn) &
420 OCRDMA_CQE_WRITE_IMM) ? 1 : 0;
421}
422
423
393#endif 424#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 71942af4fce9..0965278dd2ed 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -128,7 +128,6 @@ static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev)
128static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev) 128static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
129{ 129{
130 dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1); 130 dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1);
131 atomic_inc(&dev->mq.sq.used);
132} 131}
133 132
134static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev) 133static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev)
@@ -564,32 +563,19 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
564 memset(cmd, 0, sizeof(*cmd)); 563 memset(cmd, 0, sizeof(*cmd));
565 num_pages = PAGES_4K_SPANNED(mq->va, mq->size); 564 num_pages = PAGES_4K_SPANNED(mq->va, mq->size);
566 565
567 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { 566 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT,
568 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ, 567 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
569 OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); 568 cmd->req.rsvd_version = 1;
570 cmd->v0.pages = num_pages; 569 cmd->cqid_pages = num_pages;
571 cmd->v0.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; 570 cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
572 cmd->v0.async_cqid_valid = (cq->id << 1); 571 cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
573 cmd->v0.cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << 572 cmd->async_event_bitmap = Bit(20);
574 OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); 573 cmd->async_cqid_ringsize = cq->id;
575 cmd->v0.cqid_ringsize |= 574 cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
576 (cq->id << OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT); 575 OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
577 cmd->v0.valid = OCRDMA_CREATE_MQ_VALID; 576 cmd->valid = OCRDMA_CREATE_MQ_VALID;
578 pa = &cmd->v0.pa[0]; 577 pa = &cmd->pa[0];
579 } else { 578
580 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT,
581 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
582 cmd->req.rsvd_version = 1;
583 cmd->v1.cqid_pages = num_pages;
584 cmd->v1.cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
585 cmd->v1.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
586 cmd->v1.async_event_bitmap = Bit(20);
587 cmd->v1.async_cqid_ringsize = cq->id;
588 cmd->v1.async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
589 OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
590 cmd->v1.valid = OCRDMA_CREATE_MQ_VALID;
591 pa = &cmd->v1.pa[0];
592 }
593 ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K); 579 ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K);
594 status = be_roce_mcc_cmd(dev->nic_info.netdev, 580 status = be_roce_mcc_cmd(dev->nic_info.netdev,
595 cmd, sizeof(*cmd), NULL, NULL); 581 cmd, sizeof(*cmd), NULL, NULL);
@@ -745,7 +731,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
745 qp_event = 0; 731 qp_event = 0;
746 srq_event = 0; 732 srq_event = 0;
747 dev_event = 0; 733 dev_event = 0;
748 ocrdma_err("%s() unknown type=0x%x\n", __func__, type); 734 pr_err("%s() unknown type=0x%x\n", __func__, type);
749 break; 735 break;
750 } 736 }
751 737
@@ -775,8 +761,8 @@ static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
775 if (evt_code == OCRDMA_ASYNC_EVE_CODE) 761 if (evt_code == OCRDMA_ASYNC_EVE_CODE)
776 ocrdma_dispatch_ibevent(dev, cqe); 762 ocrdma_dispatch_ibevent(dev, cqe);
777 else 763 else
778 ocrdma_err("%s(%d) invalid evt code=0x%x\n", 764 pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
779 __func__, dev->id, evt_code); 765 dev->id, evt_code);
780} 766}
781 767
782static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) 768static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
@@ -790,8 +776,8 @@ static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
790 dev->mqe_ctx.cmd_done = true; 776 dev->mqe_ctx.cmd_done = true;
791 wake_up(&dev->mqe_ctx.cmd_wait); 777 wake_up(&dev->mqe_ctx.cmd_wait);
792 } else 778 } else
793 ocrdma_err("%s() cqe for invalid tag0x%x.expected=0x%x\n", 779 pr_err("%s() cqe for invalid tag0x%x.expected=0x%x\n",
794 __func__, cqe->tag_lo, dev->mqe_ctx.tag); 780 __func__, cqe->tag_lo, dev->mqe_ctx.tag);
795} 781}
796 782
797static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) 783static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
@@ -810,7 +796,7 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
810 else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK) 796 else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
811 ocrdma_process_mcqe(dev, cqe); 797 ocrdma_process_mcqe(dev, cqe);
812 else 798 else
813 ocrdma_err("%s() cqe->compl is not set.\n", __func__); 799 pr_err("%s() cqe->compl is not set.\n", __func__);
814 memset(cqe, 0, sizeof(struct ocrdma_mcqe)); 800 memset(cqe, 0, sizeof(struct ocrdma_mcqe));
815 ocrdma_mcq_inc_tail(dev); 801 ocrdma_mcq_inc_tail(dev);
816 } 802 }
@@ -869,7 +855,7 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
869 855
870 cq = dev->cq_tbl[cq_idx]; 856 cq = dev->cq_tbl[cq_idx];
871 if (cq == NULL) { 857 if (cq == NULL) {
872 ocrdma_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx); 858 pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
873 return; 859 return;
874 } 860 }
875 spin_lock_irqsave(&cq->cq_lock, flags); 861 spin_lock_irqsave(&cq->cq_lock, flags);
@@ -971,7 +957,7 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
971 rsp = ocrdma_get_mqe_rsp(dev); 957 rsp = ocrdma_get_mqe_rsp(dev);
972 ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe))); 958 ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
973 if (cqe_status || ext_status) { 959 if (cqe_status || ext_status) {
974 ocrdma_err 960 pr_err
975 ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n", 961 ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
976 __func__, 962 __func__,
977 (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> 963 (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
@@ -1353,8 +1339,8 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
1353 if (dpp_cq) 1339 if (dpp_cq)
1354 return -EINVAL; 1340 return -EINVAL;
1355 if (entries > dev->attr.max_cqe) { 1341 if (entries > dev->attr.max_cqe) {
1356 ocrdma_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n", 1342 pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n",
1357 __func__, dev->id, dev->attr.max_cqe, entries); 1343 __func__, dev->id, dev->attr.max_cqe, entries);
1358 return -EINVAL; 1344 return -EINVAL;
1359 } 1345 }
1360 if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)) 1346 if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY))
@@ -1621,7 +1607,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev,
1621 status = ocrdma_mbx_reg_mr(dev, hwmr, pdid, 1607 status = ocrdma_mbx_reg_mr(dev, hwmr, pdid,
1622 cur_pbl_cnt, hwmr->pbe_size, last); 1608 cur_pbl_cnt, hwmr->pbe_size, last);
1623 if (status) { 1609 if (status) {
1624 ocrdma_err("%s() status=%d\n", __func__, status); 1610 pr_err("%s() status=%d\n", __func__, status);
1625 return status; 1611 return status;
1626 } 1612 }
1627 /* if there is no more pbls to register then exit. */ 1613 /* if there is no more pbls to register then exit. */
@@ -1644,7 +1630,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev,
1644 break; 1630 break;
1645 } 1631 }
1646 if (status) 1632 if (status)
1647 ocrdma_err("%s() err. status=%d\n", __func__, status); 1633 pr_err("%s() err. status=%d\n", __func__, status);
1648 1634
1649 return status; 1635 return status;
1650} 1636}
@@ -1841,8 +1827,8 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
1841 status = ocrdma_build_q_conf(&max_wqe_allocated, 1827 status = ocrdma_build_q_conf(&max_wqe_allocated,
1842 dev->attr.wqe_size, &hw_pages, &hw_page_size); 1828 dev->attr.wqe_size, &hw_pages, &hw_page_size);
1843 if (status) { 1829 if (status) {
1844 ocrdma_err("%s() req. max_send_wr=0x%x\n", __func__, 1830 pr_err("%s() req. max_send_wr=0x%x\n", __func__,
1845 max_wqe_allocated); 1831 max_wqe_allocated);
1846 return -EINVAL; 1832 return -EINVAL;
1847 } 1833 }
1848 qp->sq.max_cnt = max_wqe_allocated; 1834 qp->sq.max_cnt = max_wqe_allocated;
@@ -1891,8 +1877,8 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
1891 status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size, 1877 status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size,
1892 &hw_pages, &hw_page_size); 1878 &hw_pages, &hw_page_size);
1893 if (status) { 1879 if (status) {
1894 ocrdma_err("%s() req. max_recv_wr=0x%x\n", __func__, 1880 pr_err("%s() req. max_recv_wr=0x%x\n", __func__,
1895 attrs->cap.max_recv_wr + 1); 1881 attrs->cap.max_recv_wr + 1);
1896 return status; 1882 return status;
1897 } 1883 }
1898 qp->rq.max_cnt = max_rqe_allocated; 1884 qp->rq.max_cnt = max_rqe_allocated;
@@ -1900,7 +1886,7 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
1900 1886
1901 qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL); 1887 qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
1902 if (!qp->rq.va) 1888 if (!qp->rq.va)
1903 return status; 1889 return -ENOMEM;
1904 memset(qp->rq.va, 0, len); 1890 memset(qp->rq.va, 0, len);
1905 qp->rq.pa = pa; 1891 qp->rq.pa = pa;
1906 qp->rq.len = len; 1892 qp->rq.len = len;
@@ -2087,10 +2073,10 @@ mbx_err:
2087 if (qp->rq.va) 2073 if (qp->rq.va)
2088 dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa); 2074 dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa);
2089rq_err: 2075rq_err:
2090 ocrdma_err("%s(%d) rq_err\n", __func__, dev->id); 2076 pr_err("%s(%d) rq_err\n", __func__, dev->id);
2091 dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa); 2077 dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa);
2092sq_err: 2078sq_err:
2093 ocrdma_err("%s(%d) sq_err\n", __func__, dev->id); 2079 pr_err("%s(%d) sq_err\n", __func__, dev->id);
2094 kfree(cmd); 2080 kfree(cmd);
2095 return status; 2081 return status;
2096} 2082}
@@ -2127,7 +2113,7 @@ int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
2127 else if (rdma_link_local_addr(&in6)) 2113 else if (rdma_link_local_addr(&in6))
2128 rdma_get_ll_mac(&in6, mac_addr); 2114 rdma_get_ll_mac(&in6, mac_addr);
2129 else { 2115 else {
2130 ocrdma_err("%s() fail to resolve mac_addr.\n", __func__); 2116 pr_err("%s() fail to resolve mac_addr.\n", __func__);
2131 return -EINVAL; 2117 return -EINVAL;
2132 } 2118 }
2133 return 0; 2119 return 0;
@@ -2362,8 +2348,8 @@ int ocrdma_mbx_create_srq(struct ocrdma_srq *srq,
2362 dev->attr.rqe_size, 2348 dev->attr.rqe_size,
2363 &hw_pages, &hw_page_size); 2349 &hw_pages, &hw_page_size);
2364 if (status) { 2350 if (status) {
2365 ocrdma_err("%s() req. max_wr=0x%x\n", __func__, 2351 pr_err("%s() req. max_wr=0x%x\n", __func__,
2366 srq_attr->attr.max_wr); 2352 srq_attr->attr.max_wr);
2367 status = -EINVAL; 2353 status = -EINVAL;
2368 goto ret; 2354 goto ret;
2369 } 2355 }
@@ -2614,7 +2600,7 @@ mq_err:
2614 ocrdma_destroy_qp_eqs(dev); 2600 ocrdma_destroy_qp_eqs(dev);
2615qpeq_err: 2601qpeq_err:
2616 ocrdma_destroy_eq(dev, &dev->meq); 2602 ocrdma_destroy_eq(dev, &dev->meq);
2617 ocrdma_err("%s() status=%d\n", __func__, status); 2603 pr_err("%s() status=%d\n", __func__, status);
2618 return status; 2604 return status;
2619} 2605}
2620 2606
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 48928c8e7774..ded416f1adea 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -378,7 +378,7 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
378 spin_lock_init(&dev->flush_q_lock); 378 spin_lock_init(&dev->flush_q_lock);
379 return 0; 379 return 0;
380alloc_err: 380alloc_err:
381 ocrdma_err("%s(%d) error.\n", __func__, dev->id); 381 pr_err("%s(%d) error.\n", __func__, dev->id);
382 return -ENOMEM; 382 return -ENOMEM;
383} 383}
384 384
@@ -396,7 +396,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
396 396
397 dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); 397 dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
398 if (!dev) { 398 if (!dev) {
399 ocrdma_err("Unable to allocate ib device\n"); 399 pr_err("Unable to allocate ib device\n");
400 return NULL; 400 return NULL;
401 } 401 }
402 dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL); 402 dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL);
@@ -437,7 +437,7 @@ init_err:
437idr_err: 437idr_err:
438 kfree(dev->mbx_cmd); 438 kfree(dev->mbx_cmd);
439 ib_dealloc_device(&dev->ibdev); 439 ib_dealloc_device(&dev->ibdev);
440 ocrdma_err("%s() leaving. ret=%d\n", __func__, status); 440 pr_err("%s() leaving. ret=%d\n", __func__, status);
441 return NULL; 441 return NULL;
442} 442}
443 443
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index c75cbdfa87e7..36b062da2aea 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -608,16 +608,8 @@ enum {
608 OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0) 608 OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0)
609}; 609};
610 610
611struct ocrdma_create_mq_v0 { 611struct ocrdma_create_mq_req {
612 u32 pages; 612 struct ocrdma_mbx_hdr req;
613 u32 cqid_ringsize;
614 u32 valid;
615 u32 async_cqid_valid;
616 u32 rsvd;
617 struct ocrdma_pa pa[8];
618} __packed;
619
620struct ocrdma_create_mq_v1 {
621 u32 cqid_pages; 613 u32 cqid_pages;
622 u32 async_event_bitmap; 614 u32 async_event_bitmap;
623 u32 async_cqid_ringsize; 615 u32 async_cqid_ringsize;
@@ -627,14 +619,6 @@ struct ocrdma_create_mq_v1 {
627 struct ocrdma_pa pa[8]; 619 struct ocrdma_pa pa[8];
628} __packed; 620} __packed;
629 621
630struct ocrdma_create_mq_req {
631 struct ocrdma_mbx_hdr req;
632 union {
633 struct ocrdma_create_mq_v0 v0;
634 struct ocrdma_create_mq_v1 v1;
635 };
636} __packed;
637
638struct ocrdma_create_mq_rsp { 622struct ocrdma_create_mq_rsp {
639 struct ocrdma_mbx_rsp rsp; 623 struct ocrdma_mbx_rsp rsp;
640 u32 id; 624 u32 id;
@@ -1550,21 +1534,6 @@ struct ocrdma_cqe {
1550 u32 flags_status_srcqpn; /* w3 */ 1534 u32 flags_status_srcqpn; /* w3 */
1551} __packed; 1535} __packed;
1552 1536
1553#define is_cqe_valid(cq, cqe) \
1554 (((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID)\
1555 == cq->phase) ? 1 : 0)
1556#define is_cqe_for_sq(cqe) \
1557 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 0 : 1)
1558#define is_cqe_for_rq(cqe) \
1559 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 1 : 0)
1560#define is_cqe_invalidated(cqe) \
1561 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_INVALIDATE) ? \
1562 1 : 0)
1563#define is_cqe_imm(cqe) \
1564 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_IMM) ? 1 : 0)
1565#define is_cqe_wr_imm(cqe) \
1566 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_WRITE_IMM) ? 1 : 0)
1567
1568struct ocrdma_sge { 1537struct ocrdma_sge {
1569 u32 addr_hi; 1538 u32 addr_hi;
1570 u32 addr_lo; 1539 u32 addr_lo;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index b29a4246ef41..dcfbab177faa 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -114,8 +114,8 @@ int ocrdma_query_port(struct ib_device *ibdev,
114 114
115 dev = get_ocrdma_dev(ibdev); 115 dev = get_ocrdma_dev(ibdev);
116 if (port > 1) { 116 if (port > 1) {
117 ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, 117 pr_err("%s(%d) invalid_port=0x%x\n", __func__,
118 dev->id, port); 118 dev->id, port);
119 return -EINVAL; 119 return -EINVAL;
120 } 120 }
121 netdev = dev->nic_info.netdev; 121 netdev = dev->nic_info.netdev;
@@ -155,8 +155,7 @@ int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
155 155
156 dev = get_ocrdma_dev(ibdev); 156 dev = get_ocrdma_dev(ibdev);
157 if (port > 1) { 157 if (port > 1) {
158 ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, 158 pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
159 dev->id, port);
160 return -EINVAL; 159 return -EINVAL;
161 } 160 }
162 return 0; 161 return 0;
@@ -398,7 +397,6 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
398 kfree(pd); 397 kfree(pd);
399 return ERR_PTR(status); 398 return ERR_PTR(status);
400 } 399 }
401 atomic_set(&pd->use_cnt, 0);
402 400
403 if (udata && context) { 401 if (udata && context) {
404 status = ocrdma_copy_pd_uresp(pd, context, udata); 402 status = ocrdma_copy_pd_uresp(pd, context, udata);
@@ -419,12 +417,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
419 int status; 417 int status;
420 u64 usr_db; 418 u64 usr_db;
421 419
422 if (atomic_read(&pd->use_cnt)) {
423 ocrdma_err("%s(%d) pd=0x%x is in use.\n",
424 __func__, dev->id, pd->id);
425 status = -EFAULT;
426 goto dealloc_err;
427 }
428 status = ocrdma_mbx_dealloc_pd(dev, pd); 420 status = ocrdma_mbx_dealloc_pd(dev, pd);
429 if (pd->uctx) { 421 if (pd->uctx) {
430 u64 dpp_db = dev->nic_info.dpp_unmapped_addr + 422 u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
@@ -436,7 +428,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
436 ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); 428 ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
437 } 429 }
438 kfree(pd); 430 kfree(pd);
439dealloc_err:
440 return status; 431 return status;
441} 432}
442 433
@@ -450,8 +441,8 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
450 struct ocrdma_dev *dev = pd->dev; 441 struct ocrdma_dev *dev = pd->dev;
451 442
452 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { 443 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
453 ocrdma_err("%s(%d) leaving err, invalid access rights\n", 444 pr_err("%s(%d) leaving err, invalid access rights\n",
454 __func__, dev->id); 445 __func__, dev->id);
455 return ERR_PTR(-EINVAL); 446 return ERR_PTR(-EINVAL);
456 } 447 }
457 448
@@ -474,7 +465,6 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
474 return ERR_PTR(-ENOMEM); 465 return ERR_PTR(-ENOMEM);
475 } 466 }
476 mr->pd = pd; 467 mr->pd = pd;
477 atomic_inc(&pd->use_cnt);
478 mr->ibmr.lkey = mr->hwmr.lkey; 468 mr->ibmr.lkey = mr->hwmr.lkey;
479 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) 469 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
480 mr->ibmr.rkey = mr->hwmr.lkey; 470 mr->ibmr.rkey = mr->hwmr.lkey;
@@ -664,7 +654,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
664 if (status) 654 if (status)
665 goto mbx_err; 655 goto mbx_err;
666 mr->pd = pd; 656 mr->pd = pd;
667 atomic_inc(&pd->use_cnt);
668 mr->ibmr.lkey = mr->hwmr.lkey; 657 mr->ibmr.lkey = mr->hwmr.lkey;
669 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) 658 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
670 mr->ibmr.rkey = mr->hwmr.lkey; 659 mr->ibmr.rkey = mr->hwmr.lkey;
@@ -689,7 +678,6 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
689 if (mr->hwmr.fr_mr == 0) 678 if (mr->hwmr.fr_mr == 0)
690 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); 679 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
691 680
692 atomic_dec(&mr->pd->use_cnt);
693 /* it could be user registered memory. */ 681 /* it could be user registered memory. */
694 if (mr->umem) 682 if (mr->umem)
695 ib_umem_release(mr->umem); 683 ib_umem_release(mr->umem);
@@ -714,8 +702,8 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata,
714 uresp.phase_change = cq->phase_change ? 1 : 0; 702 uresp.phase_change = cq->phase_change ? 1 : 0;
715 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 703 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
716 if (status) { 704 if (status) {
717 ocrdma_err("%s(%d) copy error cqid=0x%x.\n", 705 pr_err("%s(%d) copy error cqid=0x%x.\n",
718 __func__, cq->dev->id, cq->id); 706 __func__, cq->dev->id, cq->id);
719 goto err; 707 goto err;
720 } 708 }
721 uctx = get_ocrdma_ucontext(ib_ctx); 709 uctx = get_ocrdma_ucontext(ib_ctx);
@@ -752,7 +740,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
752 740
753 spin_lock_init(&cq->cq_lock); 741 spin_lock_init(&cq->cq_lock);
754 spin_lock_init(&cq->comp_handler_lock); 742 spin_lock_init(&cq->comp_handler_lock);
755 atomic_set(&cq->use_cnt, 0);
756 INIT_LIST_HEAD(&cq->sq_head); 743 INIT_LIST_HEAD(&cq->sq_head);
757 INIT_LIST_HEAD(&cq->rq_head); 744 INIT_LIST_HEAD(&cq->rq_head);
758 cq->dev = dev; 745 cq->dev = dev;
@@ -799,9 +786,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
799 struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); 786 struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
800 struct ocrdma_dev *dev = cq->dev; 787 struct ocrdma_dev *dev = cq->dev;
801 788
802 if (atomic_read(&cq->use_cnt))
803 return -EINVAL;
804
805 status = ocrdma_mbx_destroy_cq(dev, cq); 789 status = ocrdma_mbx_destroy_cq(dev, cq);
806 790
807 if (cq->ucontext) { 791 if (cq->ucontext) {
@@ -837,57 +821,56 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
837 if (attrs->qp_type != IB_QPT_GSI && 821 if (attrs->qp_type != IB_QPT_GSI &&
838 attrs->qp_type != IB_QPT_RC && 822 attrs->qp_type != IB_QPT_RC &&
839 attrs->qp_type != IB_QPT_UD) { 823 attrs->qp_type != IB_QPT_UD) {
840 ocrdma_err("%s(%d) unsupported qp type=0x%x requested\n", 824 pr_err("%s(%d) unsupported qp type=0x%x requested\n",
841 __func__, dev->id, attrs->qp_type); 825 __func__, dev->id, attrs->qp_type);
842 return -EINVAL; 826 return -EINVAL;
843 } 827 }
844 if (attrs->cap.max_send_wr > dev->attr.max_wqe) { 828 if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
845 ocrdma_err("%s(%d) unsupported send_wr=0x%x requested\n", 829 pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
846 __func__, dev->id, attrs->cap.max_send_wr); 830 __func__, dev->id, attrs->cap.max_send_wr);
847 ocrdma_err("%s(%d) supported send_wr=0x%x\n", 831 pr_err("%s(%d) supported send_wr=0x%x\n",
848 __func__, dev->id, dev->attr.max_wqe); 832 __func__, dev->id, dev->attr.max_wqe);
849 return -EINVAL; 833 return -EINVAL;
850 } 834 }
851 if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) { 835 if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
852 ocrdma_err("%s(%d) unsupported recv_wr=0x%x requested\n", 836 pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
853 __func__, dev->id, attrs->cap.max_recv_wr); 837 __func__, dev->id, attrs->cap.max_recv_wr);
854 ocrdma_err("%s(%d) supported recv_wr=0x%x\n", 838 pr_err("%s(%d) supported recv_wr=0x%x\n",
855 __func__, dev->id, dev->attr.max_rqe); 839 __func__, dev->id, dev->attr.max_rqe);
856 return -EINVAL; 840 return -EINVAL;
857 } 841 }
858 if (attrs->cap.max_inline_data > dev->attr.max_inline_data) { 842 if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
859 ocrdma_err("%s(%d) unsupported inline data size=0x%x" 843 pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
860 " requested\n", __func__, dev->id, 844 __func__, dev->id, attrs->cap.max_inline_data);
861 attrs->cap.max_inline_data); 845 pr_err("%s(%d) supported inline data size=0x%x\n",
862 ocrdma_err("%s(%d) supported inline data size=0x%x\n", 846 __func__, dev->id, dev->attr.max_inline_data);
863 __func__, dev->id, dev->attr.max_inline_data);
864 return -EINVAL; 847 return -EINVAL;
865 } 848 }
866 if (attrs->cap.max_send_sge > dev->attr.max_send_sge) { 849 if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
867 ocrdma_err("%s(%d) unsupported send_sge=0x%x requested\n", 850 pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
868 __func__, dev->id, attrs->cap.max_send_sge); 851 __func__, dev->id, attrs->cap.max_send_sge);
869 ocrdma_err("%s(%d) supported send_sge=0x%x\n", 852 pr_err("%s(%d) supported send_sge=0x%x\n",
870 __func__, dev->id, dev->attr.max_send_sge); 853 __func__, dev->id, dev->attr.max_send_sge);
871 return -EINVAL; 854 return -EINVAL;
872 } 855 }
873 if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) { 856 if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
874 ocrdma_err("%s(%d) unsupported recv_sge=0x%x requested\n", 857 pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
875 __func__, dev->id, attrs->cap.max_recv_sge); 858 __func__, dev->id, attrs->cap.max_recv_sge);
876 ocrdma_err("%s(%d) supported recv_sge=0x%x\n", 859 pr_err("%s(%d) supported recv_sge=0x%x\n",
877 __func__, dev->id, dev->attr.max_recv_sge); 860 __func__, dev->id, dev->attr.max_recv_sge);
878 return -EINVAL; 861 return -EINVAL;
879 } 862 }
880 /* unprivileged user space cannot create special QP */ 863 /* unprivileged user space cannot create special QP */
881 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { 864 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
882 ocrdma_err 865 pr_err
883 ("%s(%d) Userspace can't create special QPs of type=0x%x\n", 866 ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
884 __func__, dev->id, attrs->qp_type); 867 __func__, dev->id, attrs->qp_type);
885 return -EINVAL; 868 return -EINVAL;
886 } 869 }
887 /* allow creating only one GSI type of QP */ 870 /* allow creating only one GSI type of QP */
888 if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) { 871 if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
889 ocrdma_err("%s(%d) GSI special QPs already created.\n", 872 pr_err("%s(%d) GSI special QPs already created.\n",
890 __func__, dev->id); 873 __func__, dev->id);
891 return -EINVAL; 874 return -EINVAL;
892 } 875 }
893 /* verify consumer QPs are not trying to use GSI QP's CQ */ 876 /* verify consumer QPs are not trying to use GSI QP's CQ */
@@ -896,8 +879,8 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
896 (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) || 879 (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
897 (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) || 880 (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
898 (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { 881 (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
899 ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n", 882 pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
900 __func__, dev->id); 883 __func__, dev->id);
901 return -EINVAL; 884 return -EINVAL;
902 } 885 }
903 } 886 }
@@ -949,7 +932,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
949 } 932 }
950 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 933 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
951 if (status) { 934 if (status) {
952 ocrdma_err("%s(%d) user copy error.\n", __func__, dev->id); 935 pr_err("%s(%d) user copy error.\n", __func__, dev->id);
953 goto err; 936 goto err;
954 } 937 }
955 status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0], 938 status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
@@ -1023,15 +1006,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
1023 qp->state = OCRDMA_QPS_RST; 1006 qp->state = OCRDMA_QPS_RST;
1024} 1007}
1025 1008
1026static void ocrdma_set_qp_use_cnt(struct ocrdma_qp *qp, struct ocrdma_pd *pd)
1027{
1028 atomic_inc(&pd->use_cnt);
1029 atomic_inc(&qp->sq_cq->use_cnt);
1030 atomic_inc(&qp->rq_cq->use_cnt);
1031 if (qp->srq)
1032 atomic_inc(&qp->srq->use_cnt);
1033 qp->ibqp.qp_num = qp->id;
1034}
1035 1009
1036static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, 1010static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1037 struct ib_qp_init_attr *attrs) 1011 struct ib_qp_init_attr *attrs)
@@ -1099,7 +1073,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
1099 goto cpy_err; 1073 goto cpy_err;
1100 } 1074 }
1101 ocrdma_store_gsi_qp_cq(dev, attrs); 1075 ocrdma_store_gsi_qp_cq(dev, attrs);
1102 ocrdma_set_qp_use_cnt(qp, pd); 1076 qp->ibqp.qp_num = qp->id;
1103 mutex_unlock(&dev->dev_lock); 1077 mutex_unlock(&dev->dev_lock);
1104 return &qp->ibqp; 1078 return &qp->ibqp;
1105 1079
@@ -1112,7 +1086,7 @@ mbx_err:
1112 kfree(qp->wqe_wr_id_tbl); 1086 kfree(qp->wqe_wr_id_tbl);
1113 kfree(qp->rqe_wr_id_tbl); 1087 kfree(qp->rqe_wr_id_tbl);
1114 kfree(qp); 1088 kfree(qp);
1115 ocrdma_err("%s(%d) error=%d\n", __func__, dev->id, status); 1089 pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
1116gen_err: 1090gen_err:
1117 return ERR_PTR(status); 1091 return ERR_PTR(status);
1118} 1092}
@@ -1162,10 +1136,10 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1162 spin_unlock_irqrestore(&qp->q_lock, flags); 1136 spin_unlock_irqrestore(&qp->q_lock, flags);
1163 1137
1164 if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) { 1138 if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
1165 ocrdma_err("%s(%d) invalid attribute mask=0x%x specified for " 1139 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1166 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", 1140 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1167 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, 1141 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1168 old_qps, new_qps); 1142 old_qps, new_qps);
1169 goto param_err; 1143 goto param_err;
1170 } 1144 }
1171 1145
@@ -1475,11 +1449,6 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
1475 1449
1476 ocrdma_del_flush_qp(qp); 1450 ocrdma_del_flush_qp(qp);
1477 1451
1478 atomic_dec(&qp->pd->use_cnt);
1479 atomic_dec(&qp->sq_cq->use_cnt);
1480 atomic_dec(&qp->rq_cq->use_cnt);
1481 if (qp->srq)
1482 atomic_dec(&qp->srq->use_cnt);
1483 kfree(qp->wqe_wr_id_tbl); 1452 kfree(qp->wqe_wr_id_tbl);
1484 kfree(qp->rqe_wr_id_tbl); 1453 kfree(qp->rqe_wr_id_tbl);
1485 kfree(qp); 1454 kfree(qp);
@@ -1565,14 +1534,12 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
1565 goto arm_err; 1534 goto arm_err;
1566 } 1535 }
1567 1536
1568 atomic_set(&srq->use_cnt, 0);
1569 if (udata) { 1537 if (udata) {
1570 status = ocrdma_copy_srq_uresp(srq, udata); 1538 status = ocrdma_copy_srq_uresp(srq, udata);
1571 if (status) 1539 if (status)
1572 goto arm_err; 1540 goto arm_err;
1573 } 1541 }
1574 1542
1575 atomic_inc(&pd->use_cnt);
1576 return &srq->ibsrq; 1543 return &srq->ibsrq;
1577 1544
1578arm_err: 1545arm_err:
@@ -1618,18 +1585,12 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq)
1618 1585
1619 srq = get_ocrdma_srq(ibsrq); 1586 srq = get_ocrdma_srq(ibsrq);
1620 dev = srq->dev; 1587 dev = srq->dev;
1621 if (atomic_read(&srq->use_cnt)) {
1622 ocrdma_err("%s(%d) err, srq=0x%x in use\n",
1623 __func__, dev->id, srq->id);
1624 return -EAGAIN;
1625 }
1626 1588
1627 status = ocrdma_mbx_destroy_srq(dev, srq); 1589 status = ocrdma_mbx_destroy_srq(dev, srq);
1628 1590
1629 if (srq->pd->uctx) 1591 if (srq->pd->uctx)
1630 ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len); 1592 ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
1631 1593
1632 atomic_dec(&srq->pd->use_cnt);
1633 kfree(srq->idx_bit_fields); 1594 kfree(srq->idx_bit_fields);
1634 kfree(srq->rqe_wr_id_tbl); 1595 kfree(srq->rqe_wr_id_tbl);
1635 kfree(srq); 1596 kfree(srq);
@@ -1677,9 +1638,9 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
1677{ 1638{
1678 if (wr->send_flags & IB_SEND_INLINE) { 1639 if (wr->send_flags & IB_SEND_INLINE) {
1679 if (wr->sg_list[0].length > qp->max_inline_data) { 1640 if (wr->sg_list[0].length > qp->max_inline_data) {
1680 ocrdma_err("%s() supported_len=0x%x," 1641 pr_err("%s() supported_len=0x%x,\n"
1681 " unspported len req=0x%x\n", __func__, 1642 " unspported len req=0x%x\n", __func__,
1682 qp->max_inline_data, wr->sg_list[0].length); 1643 qp->max_inline_data, wr->sg_list[0].length);
1683 return -EINVAL; 1644 return -EINVAL;
1684 } 1645 }
1685 memcpy(sge, 1646 memcpy(sge,
@@ -1773,12 +1734,14 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1773 spin_lock_irqsave(&qp->q_lock, flags); 1734 spin_lock_irqsave(&qp->q_lock, flags);
1774 if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) { 1735 if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
1775 spin_unlock_irqrestore(&qp->q_lock, flags); 1736 spin_unlock_irqrestore(&qp->q_lock, flags);
1737 *bad_wr = wr;
1776 return -EINVAL; 1738 return -EINVAL;
1777 } 1739 }
1778 1740
1779 while (wr) { 1741 while (wr) {
1780 if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || 1742 if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
1781 wr->num_sge > qp->sq.max_sges) { 1743 wr->num_sge > qp->sq.max_sges) {
1744 *bad_wr = wr;
1782 status = -ENOMEM; 1745 status = -ENOMEM;
1783 break; 1746 break;
1784 } 1747 }
@@ -1856,7 +1819,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1856 1819
1857static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) 1820static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
1858{ 1821{
1859 u32 val = qp->rq.dbid | (1 << OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp)); 1822 u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
1860 1823
1861 iowrite32(val, qp->rq_db); 1824 iowrite32(val, qp->rq_db);
1862} 1825}
@@ -2094,8 +2057,8 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2094 break; 2057 break;
2095 default: 2058 default:
2096 ibwc->status = IB_WC_GENERAL_ERR; 2059 ibwc->status = IB_WC_GENERAL_ERR;
2097 ocrdma_err("%s() invalid opcode received = 0x%x\n", 2060 pr_err("%s() invalid opcode received = 0x%x\n",
2098 __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); 2061 __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2099 break; 2062 break;
2100 }; 2063 };
2101} 2064}
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 1e603a375069..d03ca4c1ff25 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -5,3 +5,11 @@ config INFINIBAND_QIB
5 This is a low-level driver for Intel PCIe QLE InfiniBand host 5 This is a low-level driver for Intel PCIe QLE InfiniBand host
6 channel adapters. This driver does not support the Intel 6 channel adapters. This driver does not support the Intel
7 HyperTransport card (model QHT7140). 7 HyperTransport card (model QHT7140).
8
9config INFINIBAND_QIB_DCA
10 bool "QIB DCA support"
11 depends on INFINIBAND_QIB && DCA && SMP && GENERIC_HARDIRQS && !(INFINIBAND_QIB=y && DCA=m)
12 default y
13 ---help---
14 Setting this enables DCA support on some Intel chip sets
15 with the iba7322 HCA.
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
index f12d7bb8b39f..57f8103e51f8 100644
--- a/drivers/infiniband/hw/qib/Makefile
+++ b/drivers/infiniband/hw/qib/Makefile
@@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o
13 13
14ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o 14ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o
15ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o 15ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o
16ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 4d11575c2010..4a9af795b88f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1,7 +1,7 @@
1#ifndef _QIB_KERNEL_H 1#ifndef _QIB_KERNEL_H
2#define _QIB_KERNEL_H 2#define _QIB_KERNEL_H
3/* 3/*
4 * Copyright (c) 2012 Intel Corporation. All rights reserved. 4 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
5 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 5 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
6 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 6 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
7 * 7 *
@@ -51,6 +51,7 @@
51#include <linux/completion.h> 51#include <linux/completion.h>
52#include <linux/kref.h> 52#include <linux/kref.h>
53#include <linux/sched.h> 53#include <linux/sched.h>
54#include <linux/kthread.h>
54 55
55#include "qib_common.h" 56#include "qib_common.h"
56#include "qib_verbs.h" 57#include "qib_verbs.h"
@@ -114,6 +115,11 @@ struct qib_eep_log_mask {
114/* 115/*
115 * Below contains all data related to a single context (formerly called port). 116 * Below contains all data related to a single context (formerly called port).
116 */ 117 */
118
119#ifdef CONFIG_DEBUG_FS
120struct qib_opcode_stats_perctx;
121#endif
122
117struct qib_ctxtdata { 123struct qib_ctxtdata {
118 void **rcvegrbuf; 124 void **rcvegrbuf;
119 dma_addr_t *rcvegrbuf_phys; 125 dma_addr_t *rcvegrbuf_phys;
@@ -154,6 +160,8 @@ struct qib_ctxtdata {
154 */ 160 */
155 /* instead of calculating it */ 161 /* instead of calculating it */
156 unsigned ctxt; 162 unsigned ctxt;
163 /* local node of context */
164 int node_id;
157 /* non-zero if ctxt is being shared. */ 165 /* non-zero if ctxt is being shared. */
158 u16 subctxt_cnt; 166 u16 subctxt_cnt;
159 /* non-zero if ctxt is being shared. */ 167 /* non-zero if ctxt is being shared. */
@@ -222,12 +230,15 @@ struct qib_ctxtdata {
222 u8 redirect_seq_cnt; 230 u8 redirect_seq_cnt;
223 /* ctxt rcvhdrq head offset */ 231 /* ctxt rcvhdrq head offset */
224 u32 head; 232 u32 head;
225 u32 pkt_count;
226 /* lookaside fields */ 233 /* lookaside fields */
227 struct qib_qp *lookaside_qp; 234 struct qib_qp *lookaside_qp;
228 u32 lookaside_qpn; 235 u32 lookaside_qpn;
229 /* QPs waiting for context processing */ 236 /* QPs waiting for context processing */
230 struct list_head qp_wait_list; 237 struct list_head qp_wait_list;
238#ifdef CONFIG_DEBUG_FS
239 /* verbs stats per CTX */
240 struct qib_opcode_stats_perctx *opstats;
241#endif
231}; 242};
232 243
233struct qib_sge_state; 244struct qib_sge_state;
@@ -428,9 +439,19 @@ struct qib_verbs_txreq {
428#define ACTIVITY_TIMER 5 439#define ACTIVITY_TIMER 5
429 440
430#define MAX_NAME_SIZE 64 441#define MAX_NAME_SIZE 64
442
443#ifdef CONFIG_INFINIBAND_QIB_DCA
444struct qib_irq_notify;
445#endif
446
431struct qib_msix_entry { 447struct qib_msix_entry {
432 struct msix_entry msix; 448 struct msix_entry msix;
433 void *arg; 449 void *arg;
450#ifdef CONFIG_INFINIBAND_QIB_DCA
451 int dca;
452 int rcv;
453 struct qib_irq_notify *notifier;
454#endif
434 char name[MAX_NAME_SIZE]; 455 char name[MAX_NAME_SIZE];
435 cpumask_var_t mask; 456 cpumask_var_t mask;
436}; 457};
@@ -828,6 +849,9 @@ struct qib_devdata {
828 struct qib_ctxtdata *); 849 struct qib_ctxtdata *);
829 void (*f_writescratch)(struct qib_devdata *, u32); 850 void (*f_writescratch)(struct qib_devdata *, u32);
830 int (*f_tempsense_rd)(struct qib_devdata *, int regnum); 851 int (*f_tempsense_rd)(struct qib_devdata *, int regnum);
852#ifdef CONFIG_INFINIBAND_QIB_DCA
853 int (*f_notify_dca)(struct qib_devdata *, unsigned long event);
854#endif
831 855
832 char *boardname; /* human readable board info */ 856 char *boardname; /* human readable board info */
833 857
@@ -1075,6 +1099,10 @@ struct qib_devdata {
1075 u16 psxmitwait_check_rate; 1099 u16 psxmitwait_check_rate;
1076 /* high volume overflow errors defered to tasklet */ 1100 /* high volume overflow errors defered to tasklet */
1077 struct tasklet_struct error_tasklet; 1101 struct tasklet_struct error_tasklet;
1102 /* per device cq worker */
1103 struct kthread_worker *worker;
1104
1105 int assigned_node_id; /* NUMA node closest to HCA */
1078}; 1106};
1079 1107
1080/* hol_state values */ 1108/* hol_state values */
@@ -1154,7 +1182,7 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *);
1154int qib_setup_eagerbufs(struct qib_ctxtdata *); 1182int qib_setup_eagerbufs(struct qib_ctxtdata *);
1155void qib_set_ctxtcnt(struct qib_devdata *); 1183void qib_set_ctxtcnt(struct qib_devdata *);
1156int qib_create_ctxts(struct qib_devdata *dd); 1184int qib_create_ctxts(struct qib_devdata *dd);
1157struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32); 1185struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
1158void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); 1186void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
1159void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); 1187void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
1160 1188
@@ -1320,7 +1348,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd)
1320 return ppd->sdma_state.current_state == qib_sdma_state_s99_running; 1348 return ppd->sdma_state.current_state == qib_sdma_state_s99_running;
1321} 1349}
1322int qib_sdma_running(struct qib_pportdata *); 1350int qib_sdma_running(struct qib_pportdata *);
1323 1351void dump_sdma_state(struct qib_pportdata *ppd);
1324void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); 1352void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
1325void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); 1353void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
1326 1354
@@ -1445,6 +1473,7 @@ extern unsigned qib_n_krcv_queues;
1445extern unsigned qib_sdma_fetch_arb; 1473extern unsigned qib_sdma_fetch_arb;
1446extern unsigned qib_compat_ddr_negotiate; 1474extern unsigned qib_compat_ddr_negotiate;
1447extern int qib_special_trigger; 1475extern int qib_special_trigger;
1476extern unsigned qib_numa_aware;
1448 1477
1449extern struct mutex qib_mutex; 1478extern struct mutex qib_mutex;
1450 1479
@@ -1474,27 +1503,23 @@ extern struct mutex qib_mutex;
1474 * first to avoid possible serial port delays from printk. 1503 * first to avoid possible serial port delays from printk.
1475 */ 1504 */
1476#define qib_early_err(dev, fmt, ...) \ 1505#define qib_early_err(dev, fmt, ...) \
1477 do { \ 1506 dev_err(dev, fmt, ##__VA_ARGS__)
1478 dev_err(dev, fmt, ##__VA_ARGS__); \
1479 } while (0)
1480 1507
1481#define qib_dev_err(dd, fmt, ...) \ 1508#define qib_dev_err(dd, fmt, ...) \
1482 do { \ 1509 dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
1483 dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ 1510 qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
1484 qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \ 1511
1485 } while (0) 1512#define qib_dev_warn(dd, fmt, ...) \
1513 dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
1514 qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
1486 1515
1487#define qib_dev_porterr(dd, port, fmt, ...) \ 1516#define qib_dev_porterr(dd, port, fmt, ...) \
1488 do { \ 1517 dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
1489 dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ 1518 qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
1490 qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ 1519 ##__VA_ARGS__)
1491 ##__VA_ARGS__); \
1492 } while (0)
1493 1520
1494#define qib_devinfo(pcidev, fmt, ...) \ 1521#define qib_devinfo(pcidev, fmt, ...) \
1495 do { \ 1522 dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__)
1496 dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \
1497 } while (0)
1498 1523
1499/* 1524/*
1500 * this is used for formatting hw error messages... 1525 * this is used for formatting hw error messages...
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index d39e0183ff82..4f255b723ffd 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -279,7 +279,7 @@ struct qib_base_info {
279 * may not be implemented; the user code must deal with this if it 279 * may not be implemented; the user code must deal with this if it
280 * cares, or it must abort after initialization reports the difference. 280 * cares, or it must abort after initialization reports the difference.
281 */ 281 */
282#define QIB_USER_SWMINOR 11 282#define QIB_USER_SWMINOR 12
283 283
284#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) 284#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR)
285 285
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
index 5246aa486bbe..ab4e11cfab15 100644
--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ b/drivers/infiniband/hw/qib/qib_cq.c
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2013 Intel Corporation. All rights reserved.
2 * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 5 *
@@ -34,8 +35,10 @@
34#include <linux/err.h> 35#include <linux/err.h>
35#include <linux/slab.h> 36#include <linux/slab.h>
36#include <linux/vmalloc.h> 37#include <linux/vmalloc.h>
38#include <linux/kthread.h>
37 39
38#include "qib_verbs.h" 40#include "qib_verbs.h"
41#include "qib.h"
39 42
40/** 43/**
41 * qib_cq_enter - add a new entry to the completion queue 44 * qib_cq_enter - add a new entry to the completion queue
@@ -102,13 +105,18 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
102 if (cq->notify == IB_CQ_NEXT_COMP || 105 if (cq->notify == IB_CQ_NEXT_COMP ||
103 (cq->notify == IB_CQ_SOLICITED && 106 (cq->notify == IB_CQ_SOLICITED &&
104 (solicited || entry->status != IB_WC_SUCCESS))) { 107 (solicited || entry->status != IB_WC_SUCCESS))) {
105 cq->notify = IB_CQ_NONE; 108 struct kthread_worker *worker;
106 cq->triggered++;
107 /* 109 /*
108 * This will cause send_complete() to be called in 110 * This will cause send_complete() to be called in
109 * another thread. 111 * another thread.
110 */ 112 */
111 queue_work(qib_cq_wq, &cq->comptask); 113 smp_rmb();
114 worker = cq->dd->worker;
115 if (likely(worker)) {
116 cq->notify = IB_CQ_NONE;
117 cq->triggered++;
118 queue_kthread_work(worker, &cq->comptask);
119 }
112 } 120 }
113 121
114 spin_unlock_irqrestore(&cq->lock, flags); 122 spin_unlock_irqrestore(&cq->lock, flags);
@@ -163,7 +171,7 @@ bail:
163 return npolled; 171 return npolled;
164} 172}
165 173
166static void send_complete(struct work_struct *work) 174static void send_complete(struct kthread_work *work)
167{ 175{
168 struct qib_cq *cq = container_of(work, struct qib_cq, comptask); 176 struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
169 177
@@ -287,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
287 * The number of entries should be >= the number requested or return 295 * The number of entries should be >= the number requested or return
288 * an error. 296 * an error.
289 */ 297 */
298 cq->dd = dd_from_dev(dev);
290 cq->ibcq.cqe = entries; 299 cq->ibcq.cqe = entries;
291 cq->notify = IB_CQ_NONE; 300 cq->notify = IB_CQ_NONE;
292 cq->triggered = 0; 301 cq->triggered = 0;
293 spin_lock_init(&cq->lock); 302 spin_lock_init(&cq->lock);
294 INIT_WORK(&cq->comptask, send_complete); 303 init_kthread_work(&cq->comptask, send_complete);
295 wc->head = 0; 304 wc->head = 0;
296 wc->tail = 0; 305 wc->tail = 0;
297 cq->queue = wc; 306 cq->queue = wc;
@@ -323,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq)
323 struct qib_ibdev *dev = to_idev(ibcq->device); 332 struct qib_ibdev *dev = to_idev(ibcq->device);
324 struct qib_cq *cq = to_icq(ibcq); 333 struct qib_cq *cq = to_icq(ibcq);
325 334
326 flush_work(&cq->comptask); 335 flush_kthread_work(&cq->comptask);
327 spin_lock(&dev->n_cqs_lock); 336 spin_lock(&dev->n_cqs_lock);
328 dev->n_cqs_allocated--; 337 dev->n_cqs_allocated--;
329 spin_unlock(&dev->n_cqs_lock); 338 spin_unlock(&dev->n_cqs_lock);
@@ -483,3 +492,49 @@ bail_free:
483bail: 492bail:
484 return ret; 493 return ret;
485} 494}
495
496int qib_cq_init(struct qib_devdata *dd)
497{
498 int ret = 0;
499 int cpu;
500 struct task_struct *task;
501
502 if (dd->worker)
503 return 0;
504 dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
505 if (!dd->worker)
506 return -ENOMEM;
507 init_kthread_worker(dd->worker);
508 task = kthread_create_on_node(
509 kthread_worker_fn,
510 dd->worker,
511 dd->assigned_node_id,
512 "qib_cq%d", dd->unit);
513 if (IS_ERR(task))
514 goto task_fail;
515 cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
516 kthread_bind(task, cpu);
517 wake_up_process(task);
518out:
519 return ret;
520task_fail:
521 ret = PTR_ERR(task);
522 kfree(dd->worker);
523 dd->worker = NULL;
524 goto out;
525}
526
527void qib_cq_exit(struct qib_devdata *dd)
528{
529 struct kthread_worker *worker;
530
531 worker = dd->worker;
532 if (!worker)
533 return;
534 /* blocks future queuing from send_complete() */
535 dd->worker = NULL;
536 smp_wmb();
537 flush_kthread_worker(worker);
538 kthread_stop(worker->task);
539 kfree(worker);
540}
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
new file mode 100644
index 000000000000..799a0c3bffc4
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.c
@@ -0,0 +1,283 @@
1#ifdef CONFIG_DEBUG_FS
2/*
3 * Copyright (c) 2013 Intel Corporation. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <linux/debugfs.h>
34#include <linux/seq_file.h>
35#include <linux/kernel.h>
36#include <linux/export.h>
37
38#include "qib.h"
39#include "qib_verbs.h"
40#include "qib_debugfs.h"
41
42static struct dentry *qib_dbg_root;
43
44#define DEBUGFS_FILE(name) \
45static const struct seq_operations _##name##_seq_ops = { \
46 .start = _##name##_seq_start, \
47 .next = _##name##_seq_next, \
48 .stop = _##name##_seq_stop, \
49 .show = _##name##_seq_show \
50}; \
51static int _##name##_open(struct inode *inode, struct file *s) \
52{ \
53 struct seq_file *seq; \
54 int ret; \
55 ret = seq_open(s, &_##name##_seq_ops); \
56 if (ret) \
57 return ret; \
58 seq = s->private_data; \
59 seq->private = inode->i_private; \
60 return 0; \
61} \
62static const struct file_operations _##name##_file_ops = { \
63 .owner = THIS_MODULE, \
64 .open = _##name##_open, \
65 .read = seq_read, \
66 .llseek = seq_lseek, \
67 .release = seq_release \
68};
69
70#define DEBUGFS_FILE_CREATE(name) \
71do { \
72 struct dentry *ent; \
73 ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \
74 ibd, &_##name##_file_ops); \
75 if (!ent) \
76 pr_warn("create of " #name " failed\n"); \
77} while (0)
78
79static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
80{
81 struct qib_opcode_stats_perctx *opstats;
82
83 if (*pos >= ARRAY_SIZE(opstats->stats))
84 return NULL;
85 return pos;
86}
87
88static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
89{
90 struct qib_opcode_stats_perctx *opstats;
91
92 ++*pos;
93 if (*pos >= ARRAY_SIZE(opstats->stats))
94 return NULL;
95 return pos;
96}
97
98
99static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
100{
101 /* nothing allocated */
102}
103
104static int _opcode_stats_seq_show(struct seq_file *s, void *v)
105{
106 loff_t *spos = v;
107 loff_t i = *spos, j;
108 u64 n_packets = 0, n_bytes = 0;
109 struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
110 struct qib_devdata *dd = dd_from_dev(ibd);
111
112 for (j = 0; j < dd->first_user_ctxt; j++) {
113 if (!dd->rcd[j])
114 continue;
115 n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
116 n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
117 }
118 if (!n_packets && !n_bytes)
119 return SEQ_SKIP;
120 seq_printf(s, "%02llx %llu/%llu\n", i,
121 (unsigned long long) n_packets,
122 (unsigned long long) n_bytes);
123
124 return 0;
125}
126
127DEBUGFS_FILE(opcode_stats)
128
129static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
130{
131 struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
132 struct qib_devdata *dd = dd_from_dev(ibd);
133
134 if (!*pos)
135 return SEQ_START_TOKEN;
136 if (*pos >= dd->first_user_ctxt)
137 return NULL;
138 return pos;
139}
140
141static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
142{
143 struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
144 struct qib_devdata *dd = dd_from_dev(ibd);
145
146 if (v == SEQ_START_TOKEN)
147 return pos;
148
149 ++*pos;
150 if (*pos >= dd->first_user_ctxt)
151 return NULL;
152 return pos;
153}
154
155static void _ctx_stats_seq_stop(struct seq_file *s, void *v)
156{
157 /* nothing allocated */
158}
159
160static int _ctx_stats_seq_show(struct seq_file *s, void *v)
161{
162 loff_t *spos;
163 loff_t i, j;
164 u64 n_packets = 0;
165 struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
166 struct qib_devdata *dd = dd_from_dev(ibd);
167
168 if (v == SEQ_START_TOKEN) {
169 seq_puts(s, "Ctx:npkts\n");
170 return 0;
171 }
172
173 spos = v;
174 i = *spos;
175
176 if (!dd->rcd[i])
177 return SEQ_SKIP;
178
179 for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++)
180 n_packets += dd->rcd[i]->opstats->stats[j].n_packets;
181
182 if (!n_packets)
183 return SEQ_SKIP;
184
185 seq_printf(s, " %llu:%llu\n", i, n_packets);
186 return 0;
187}
188
189DEBUGFS_FILE(ctx_stats)
190
191static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
192{
193 struct qib_qp_iter *iter;
194 loff_t n = *pos;
195
196 iter = qib_qp_iter_init(s->private);
197 if (!iter)
198 return NULL;
199
200 while (n--) {
201 if (qib_qp_iter_next(iter)) {
202 kfree(iter);
203 return NULL;
204 }
205 }
206
207 return iter;
208}
209
210static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
211 loff_t *pos)
212{
213 struct qib_qp_iter *iter = iter_ptr;
214
215 (*pos)++;
216
217 if (qib_qp_iter_next(iter)) {
218 kfree(iter);
219 return NULL;
220 }
221
222 return iter;
223}
224
225static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
226{
227 /* nothing for now */
228}
229
230static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
231{
232 struct qib_qp_iter *iter = iter_ptr;
233
234 if (!iter)
235 return 0;
236
237 qib_qp_iter_print(s, iter);
238
239 return 0;
240}
241
242DEBUGFS_FILE(qp_stats)
243
244void qib_dbg_ibdev_init(struct qib_ibdev *ibd)
245{
246 char name[10];
247
248 snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit);
249 ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root);
250 if (!ibd->qib_ibdev_dbg) {
251 pr_warn("create of %s failed\n", name);
252 return;
253 }
254 DEBUGFS_FILE_CREATE(opcode_stats);
255 DEBUGFS_FILE_CREATE(ctx_stats);
256 DEBUGFS_FILE_CREATE(qp_stats);
257 return;
258}
259
260void qib_dbg_ibdev_exit(struct qib_ibdev *ibd)
261{
262 if (!qib_dbg_root)
263 goto out;
264 debugfs_remove_recursive(ibd->qib_ibdev_dbg);
265out:
266 ibd->qib_ibdev_dbg = NULL;
267}
268
269void qib_dbg_init(void)
270{
271 qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL);
272 if (!qib_dbg_root)
273 pr_warn("init of debugfs failed\n");
274}
275
276void qib_dbg_exit(void)
277{
278 debugfs_remove_recursive(qib_dbg_root);
279 qib_dbg_root = NULL;
280}
281
282#endif
283
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h
new file mode 100644
index 000000000000..7ae983a91b8b
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.h
@@ -0,0 +1,45 @@
1#ifndef _QIB_DEBUGFS_H
2#define _QIB_DEBUGFS_H
3
4#ifdef CONFIG_DEBUG_FS
5/*
6 * Copyright (c) 2013 Intel Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37struct qib_ibdev;
38void qib_dbg_ibdev_init(struct qib_ibdev *ibd);
39void qib_dbg_ibdev_exit(struct qib_ibdev *ibd);
40void qib_dbg_init(void);
41void qib_dbg_exit(void);
42
43#endif
44
45#endif /* _QIB_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 216092477dfc..5bee08f16d74 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -558,7 +558,6 @@ move_along:
558 } 558 }
559 559
560 rcd->head = l; 560 rcd->head = l;
561 rcd->pkt_count += i;
562 561
563 /* 562 /*
564 * Iterate over all QPs waiting to respond. 563 * Iterate over all QPs waiting to respond.
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 9dd0bc89c3aa..b51a51486cb8 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
@@ -1155,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt)
1155 return pollflag; 1155 return pollflag;
1156} 1156}
1157 1157
1158static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
1159{
1160 struct qib_filedata *fd = fp->private_data;
1161 const unsigned int weight = cpumask_weight(&current->cpus_allowed);
1162 const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
1163 int local_cpu;
1164
1165 /*
1166 * If process has NOT already set it's affinity, select and
1167 * reserve a processor for it on the local NUMA node.
1168 */
1169 if ((weight >= qib_cpulist_count) &&
1170 (cpumask_weight(local_mask) <= qib_cpulist_count)) {
1171 for_each_cpu(local_cpu, local_mask)
1172 if (!test_and_set_bit(local_cpu, qib_cpulist)) {
1173 fd->rec_cpu_num = local_cpu;
1174 return;
1175 }
1176 }
1177
1178 /*
1179 * If process has NOT already set it's affinity, select and
1180 * reserve a processor for it, as a rendevous for all
1181 * users of the driver. If they don't actually later
1182 * set affinity to this cpu, or set it to some other cpu,
1183 * it just means that sooner or later we don't recommend
1184 * a cpu, and let the scheduler do it's best.
1185 */
1186 if (weight >= qib_cpulist_count) {
1187 int cpu;
1188 cpu = find_first_zero_bit(qib_cpulist,
1189 qib_cpulist_count);
1190 if (cpu == qib_cpulist_count)
1191 qib_dev_err(dd,
1192 "no cpus avail for affinity PID %u\n",
1193 current->pid);
1194 else {
1195 __set_bit(cpu, qib_cpulist);
1196 fd->rec_cpu_num = cpu;
1197 }
1198 }
1199}
1200
1158/* 1201/*
1159 * Check that userland and driver are compatible for subcontexts. 1202 * Check that userland and driver are compatible for subcontexts.
1160 */ 1203 */
@@ -1259,12 +1302,20 @@ bail:
1259static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, 1302static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
1260 struct file *fp, const struct qib_user_info *uinfo) 1303 struct file *fp, const struct qib_user_info *uinfo)
1261{ 1304{
1305 struct qib_filedata *fd = fp->private_data;
1262 struct qib_devdata *dd = ppd->dd; 1306 struct qib_devdata *dd = ppd->dd;
1263 struct qib_ctxtdata *rcd; 1307 struct qib_ctxtdata *rcd;
1264 void *ptmp = NULL; 1308 void *ptmp = NULL;
1265 int ret; 1309 int ret;
1310 int numa_id;
1311
1312 assign_ctxt_affinity(fp, dd);
1266 1313
1267 rcd = qib_create_ctxtdata(ppd, ctxt); 1314 numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ?
1315 cpu_to_node(fd->rec_cpu_num) :
1316 numa_node_id()) : dd->assigned_node_id;
1317
1318 rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
1268 1319
1269 /* 1320 /*
1270 * Allocate memory for use in qib_tid_update() at open to 1321 * Allocate memory for use in qib_tid_update() at open to
@@ -1296,6 +1347,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
1296 goto bail; 1347 goto bail;
1297 1348
1298bailerr: 1349bailerr:
1350 if (fd->rec_cpu_num != -1)
1351 __clear_bit(fd->rec_cpu_num, qib_cpulist);
1352
1299 dd->rcd[ctxt] = NULL; 1353 dd->rcd[ctxt] = NULL;
1300 kfree(rcd); 1354 kfree(rcd);
1301 kfree(ptmp); 1355 kfree(ptmp);
@@ -1485,6 +1539,57 @@ static int qib_open(struct inode *in, struct file *fp)
1485 return fp->private_data ? 0 : -ENOMEM; 1539 return fp->private_data ? 0 : -ENOMEM;
1486} 1540}
1487 1541
1542static int find_hca(unsigned int cpu, int *unit)
1543{
1544 int ret = 0, devmax, npresent, nup, ndev;
1545
1546 *unit = -1;
1547
1548 devmax = qib_count_units(&npresent, &nup);
1549 if (!npresent) {
1550 ret = -ENXIO;
1551 goto done;
1552 }
1553 if (!nup) {
1554 ret = -ENETDOWN;
1555 goto done;
1556 }
1557 for (ndev = 0; ndev < devmax; ndev++) {
1558 struct qib_devdata *dd = qib_lookup(ndev);
1559 if (dd) {
1560 if (pcibus_to_node(dd->pcidev->bus) < 0) {
1561 ret = -EINVAL;
1562 goto done;
1563 }
1564 if (cpu_to_node(cpu) ==
1565 pcibus_to_node(dd->pcidev->bus)) {
1566 *unit = ndev;
1567 goto done;
1568 }
1569 }
1570 }
1571done:
1572 return ret;
1573}
1574
1575static int do_qib_user_sdma_queue_create(struct file *fp)
1576{
1577 struct qib_filedata *fd = fp->private_data;
1578 struct qib_ctxtdata *rcd = fd->rcd;
1579 struct qib_devdata *dd = rcd->dd;
1580
1581 if (dd->flags & QIB_HAS_SEND_DMA)
1582
1583 fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
1584 dd->unit,
1585 rcd->ctxt,
1586 fd->subctxt);
1587 if (!fd->pq)
1588 return -ENOMEM;
1589
1590 return 0;
1591}
1592
1488/* 1593/*
1489 * Get ctxt early, so can set affinity prior to memory allocation. 1594 * Get ctxt early, so can set affinity prior to memory allocation.
1490 */ 1595 */
@@ -1517,61 +1622,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
1517 if (qib_compatible_subctxts(swmajor, swminor) && 1622 if (qib_compatible_subctxts(swmajor, swminor) &&
1518 uinfo->spu_subctxt_cnt) { 1623 uinfo->spu_subctxt_cnt) {
1519 ret = find_shared_ctxt(fp, uinfo); 1624 ret = find_shared_ctxt(fp, uinfo);
1520 if (ret) { 1625 if (ret > 0) {
1521 if (ret > 0) 1626 ret = do_qib_user_sdma_queue_create(fp);
1522 ret = 0; 1627 if (!ret)
1523 goto done_chk_sdma; 1628 assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
1629 goto done_ok;
1524 } 1630 }
1525 } 1631 }
1526 1632
1527 i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; 1633 i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
1528 if (i_minor) 1634 if (i_minor)
1529 ret = find_free_ctxt(i_minor - 1, fp, uinfo); 1635 ret = find_free_ctxt(i_minor - 1, fp, uinfo);
1530 else 1636 else {
1637 int unit;
1638 const unsigned int cpu = cpumask_first(&current->cpus_allowed);
1639 const unsigned int weight =
1640 cpumask_weight(&current->cpus_allowed);
1641
1642 if (weight == 1 && !test_bit(cpu, qib_cpulist))
1643 if (!find_hca(cpu, &unit) && unit >= 0)
1644 if (!find_free_ctxt(unit, fp, uinfo)) {
1645 ret = 0;
1646 goto done_chk_sdma;
1647 }
1531 ret = get_a_ctxt(fp, uinfo, alg); 1648 ret = get_a_ctxt(fp, uinfo, alg);
1532
1533done_chk_sdma:
1534 if (!ret) {
1535 struct qib_filedata *fd = fp->private_data;
1536 const struct qib_ctxtdata *rcd = fd->rcd;
1537 const struct qib_devdata *dd = rcd->dd;
1538 unsigned int weight;
1539
1540 if (dd->flags & QIB_HAS_SEND_DMA) {
1541 fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
1542 dd->unit,
1543 rcd->ctxt,
1544 fd->subctxt);
1545 if (!fd->pq)
1546 ret = -ENOMEM;
1547 }
1548
1549 /*
1550 * If process has NOT already set it's affinity, select and
1551 * reserve a processor for it, as a rendezvous for all
1552 * users of the driver. If they don't actually later
1553 * set affinity to this cpu, or set it to some other cpu,
1554 * it just means that sooner or later we don't recommend
1555 * a cpu, and let the scheduler do it's best.
1556 */
1557 weight = cpumask_weight(tsk_cpus_allowed(current));
1558 if (!ret && weight >= qib_cpulist_count) {
1559 int cpu;
1560 cpu = find_first_zero_bit(qib_cpulist,
1561 qib_cpulist_count);
1562 if (cpu != qib_cpulist_count) {
1563 __set_bit(cpu, qib_cpulist);
1564 fd->rec_cpu_num = cpu;
1565 }
1566 } else if (weight == 1 &&
1567 test_bit(cpumask_first(tsk_cpus_allowed(current)),
1568 qib_cpulist))
1569 qib_devinfo(dd->pcidev,
1570 "%s PID %u affinity set to cpu %d; already allocated\n",
1571 current->comm, current->pid,
1572 cpumask_first(tsk_cpus_allowed(current)));
1573 } 1649 }
1574 1650
1651done_chk_sdma:
1652 if (!ret)
1653 ret = do_qib_user_sdma_queue_create(fp);
1654done_ok:
1575 mutex_unlock(&qib_mutex); 1655 mutex_unlock(&qib_mutex);
1576 1656
1577done: 1657done:
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 0232ae56b1fa..84e593d6007b 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3464,6 +3464,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum)
3464 return -ENXIO; 3464 return -ENXIO;
3465} 3465}
3466 3466
3467#ifdef CONFIG_INFINIBAND_QIB_DCA
3468static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event)
3469{
3470 return 0;
3471}
3472#endif
3473
3467/* Dummy function, as 6120 boards never disable EEPROM Write */ 3474/* Dummy function, as 6120 boards never disable EEPROM Write */
3468static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen) 3475static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen)
3469{ 3476{
@@ -3539,6 +3546,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
3539 dd->f_xgxs_reset = qib_6120_xgxs_reset; 3546 dd->f_xgxs_reset = qib_6120_xgxs_reset;
3540 dd->f_writescratch = writescratch; 3547 dd->f_writescratch = writescratch;
3541 dd->f_tempsense_rd = qib_6120_tempsense_rd; 3548 dd->f_tempsense_rd = qib_6120_tempsense_rd;
3549#ifdef CONFIG_INFINIBAND_QIB_DCA
3550 dd->f_notify_dca = qib_6120_notify_dca;
3551#endif
3542 /* 3552 /*
3543 * Do remaining pcie setup and save pcie values in dd. 3553 * Do remaining pcie setup and save pcie values in dd.
3544 * Any error printing is already done by the init code. 3554 * Any error printing is already done by the init code.
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 64d0ecb90cdc..454c2e7668fe 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4513,6 +4513,13 @@ bail:
4513 return ret; 4513 return ret;
4514} 4514}
4515 4515
4516#ifdef CONFIG_INFINIBAND_QIB_DCA
4517static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event)
4518{
4519 return 0;
4520}
4521#endif
4522
4516/* Dummy function, as 7220 boards never disable EEPROM Write */ 4523/* Dummy function, as 7220 boards never disable EEPROM Write */
4517static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) 4524static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen)
4518{ 4525{
@@ -4587,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
4587 dd->f_xgxs_reset = qib_7220_xgxs_reset; 4594 dd->f_xgxs_reset = qib_7220_xgxs_reset;
4588 dd->f_writescratch = writescratch; 4595 dd->f_writescratch = writescratch;
4589 dd->f_tempsense_rd = qib_7220_tempsense_rd; 4596 dd->f_tempsense_rd = qib_7220_tempsense_rd;
4597#ifdef CONFIG_INFINIBAND_QIB_DCA
4598 dd->f_notify_dca = qib_7220_notify_dca;
4599#endif
4590 /* 4600 /*
4591 * Do remaining pcie setup and save pcie values in dd. 4601 * Do remaining pcie setup and save pcie values in dd.
4592 * Any error printing is already done by the init code. 4602 * Any error printing is already done by the init code.
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 3f6b21e9dc11..21e8b09d4bf8 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -44,6 +44,9 @@
44#include <linux/module.h> 44#include <linux/module.h>
45#include <rdma/ib_verbs.h> 45#include <rdma/ib_verbs.h>
46#include <rdma/ib_smi.h> 46#include <rdma/ib_smi.h>
47#ifdef CONFIG_INFINIBAND_QIB_DCA
48#include <linux/dca.h>
49#endif
47 50
48#include "qib.h" 51#include "qib.h"
49#include "qib_7322_regs.h" 52#include "qib_7322_regs.h"
@@ -80,6 +83,7 @@ static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned);
80static void serdes_7322_los_enable(struct qib_pportdata *, int); 83static void serdes_7322_los_enable(struct qib_pportdata *, int);
81static int serdes_7322_init_old(struct qib_pportdata *); 84static int serdes_7322_init_old(struct qib_pportdata *);
82static int serdes_7322_init_new(struct qib_pportdata *); 85static int serdes_7322_init_new(struct qib_pportdata *);
86static void dump_sdma_7322_state(struct qib_pportdata *);
83 87
84#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) 88#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb))
85 89
@@ -519,6 +523,14 @@ static const u8 qib_7322_physportstate[0x20] = {
519 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN 523 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
520}; 524};
521 525
526#ifdef CONFIG_INFINIBAND_QIB_DCA
527struct qib_irq_notify {
528 int rcv;
529 void *arg;
530 struct irq_affinity_notify notify;
531};
532#endif
533
522struct qib_chip_specific { 534struct qib_chip_specific {
523 u64 __iomem *cregbase; 535 u64 __iomem *cregbase;
524 u64 *cntrs; 536 u64 *cntrs;
@@ -546,6 +558,12 @@ struct qib_chip_specific {
546 u32 lastbuf_for_pio; 558 u32 lastbuf_for_pio;
547 u32 stay_in_freeze; 559 u32 stay_in_freeze;
548 u32 recovery_ports_initted; 560 u32 recovery_ports_initted;
561#ifdef CONFIG_INFINIBAND_QIB_DCA
562 u32 dca_ctrl;
563 int rhdr_cpu[18];
564 int sdma_cpu[2];
565 u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */
566#endif
549 struct qib_msix_entry *msix_entries; 567 struct qib_msix_entry *msix_entries;
550 unsigned long *sendchkenable; 568 unsigned long *sendchkenable;
551 unsigned long *sendgrhchk; 569 unsigned long *sendgrhchk;
@@ -573,7 +591,7 @@ struct vendor_txdds_ent {
573static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *); 591static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *);
574 592
575#define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */ 593#define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */
576#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */ 594#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */
577#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */ 595#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */
578#define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */ 596#define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */
579 597
@@ -635,6 +653,7 @@ struct qib_chippport_specific {
635 u8 ibmalfusesnap; 653 u8 ibmalfusesnap;
636 struct qib_qsfp_data qsfp_data; 654 struct qib_qsfp_data qsfp_data;
637 char epmsgbuf[192]; /* for port error interrupt msg buffer */ 655 char epmsgbuf[192]; /* for port error interrupt msg buffer */
656 char sdmamsgbuf[192]; /* for per-port sdma error messages */
638}; 657};
639 658
640static struct { 659static struct {
@@ -642,28 +661,76 @@ static struct {
642 irq_handler_t handler; 661 irq_handler_t handler;
643 int lsb; 662 int lsb;
644 int port; /* 0 if not port-specific, else port # */ 663 int port; /* 0 if not port-specific, else port # */
664 int dca;
645} irq_table[] = { 665} irq_table[] = {
646 { "", qib_7322intr, -1, 0 }, 666 { "", qib_7322intr, -1, 0, 0 },
647 { " (buf avail)", qib_7322bufavail, 667 { " (buf avail)", qib_7322bufavail,
648 SYM_LSB(IntStatus, SendBufAvail), 0 }, 668 SYM_LSB(IntStatus, SendBufAvail), 0, 0},
649 { " (sdma 0)", sdma_intr, 669 { " (sdma 0)", sdma_intr,
650 SYM_LSB(IntStatus, SDmaInt_0), 1 }, 670 SYM_LSB(IntStatus, SDmaInt_0), 1, 1 },
651 { " (sdma 1)", sdma_intr, 671 { " (sdma 1)", sdma_intr,
652 SYM_LSB(IntStatus, SDmaInt_1), 2 }, 672 SYM_LSB(IntStatus, SDmaInt_1), 2, 1 },
653 { " (sdmaI 0)", sdma_idle_intr, 673 { " (sdmaI 0)", sdma_idle_intr,
654 SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, 674 SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1},
655 { " (sdmaI 1)", sdma_idle_intr, 675 { " (sdmaI 1)", sdma_idle_intr,
656 SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, 676 SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1},
657 { " (sdmaP 0)", sdma_progress_intr, 677 { " (sdmaP 0)", sdma_progress_intr,
658 SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, 678 SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 },
659 { " (sdmaP 1)", sdma_progress_intr, 679 { " (sdmaP 1)", sdma_progress_intr,
660 SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, 680 SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 },
661 { " (sdmaC 0)", sdma_cleanup_intr, 681 { " (sdmaC 0)", sdma_cleanup_intr,
662 SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, 682 SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 },
663 { " (sdmaC 1)", sdma_cleanup_intr, 683 { " (sdmaC 1)", sdma_cleanup_intr,
664 SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, 684 SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0},
665}; 685};
666 686
687#ifdef CONFIG_INFINIBAND_QIB_DCA
688
689static const struct dca_reg_map {
690 int shadow_inx;
691 int lsb;
692 u64 mask;
693 u16 regno;
694} dca_rcvhdr_reg_map[] = {
695 { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH),
696 ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) },
697 { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH),
698 ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) },
699 { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH),
700 ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) },
701 { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH),
702 ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) },
703 { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH),
704 ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) },
705 { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH),
706 ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) },
707 { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH),
708 ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) },
709 { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH),
710 ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) },
711 { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH),
712 ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) },
713 { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH),
714 ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) },
715 { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH),
716 ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) },
717 { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH),
718 ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) },
719 { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH),
720 ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) },
721 { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH),
722 ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) },
723 { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH),
724 ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) },
725 { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH),
726 ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) },
727 { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH),
728 ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) },
729 { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH),
730 ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) },
731};
732#endif
733
667/* ibcctrl bits */ 734/* ibcctrl bits */
668#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 735#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1
669/* cycle through TS1/TS2 till OK */ 736/* cycle through TS1/TS2 till OK */
@@ -686,6 +753,13 @@ static void write_7322_init_portregs(struct qib_pportdata *);
686static void setup_7322_link_recovery(struct qib_pportdata *, u32); 753static void setup_7322_link_recovery(struct qib_pportdata *, u32);
687static void check_7322_rxe_status(struct qib_pportdata *); 754static void check_7322_rxe_status(struct qib_pportdata *);
688static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *); 755static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *);
756#ifdef CONFIG_INFINIBAND_QIB_DCA
757static void qib_setup_dca(struct qib_devdata *dd);
758static void setup_dca_notifier(struct qib_devdata *dd,
759 struct qib_msix_entry *m);
760static void reset_dca_notifier(struct qib_devdata *dd,
761 struct qib_msix_entry *m);
762#endif
689 763
690/** 764/**
691 * qib_read_ureg32 - read 32-bit virtualized per-context register 765 * qib_read_ureg32 - read 32-bit virtualized per-context register
@@ -1529,6 +1603,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs)
1529 1603
1530 spin_lock_irqsave(&ppd->sdma_lock, flags); 1604 spin_lock_irqsave(&ppd->sdma_lock, flags);
1531 1605
1606 if (errs != QIB_E_P_SDMAHALT) {
1607 /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */
1608 qib_dev_porterr(dd, ppd->port,
1609 "SDMA %s 0x%016llx %s\n",
1610 qib_sdma_state_names[ppd->sdma_state.current_state],
1611 errs, ppd->cpspec->sdmamsgbuf);
1612 dump_sdma_7322_state(ppd);
1613 }
1614
1532 switch (ppd->sdma_state.current_state) { 1615 switch (ppd->sdma_state.current_state) {
1533 case qib_sdma_state_s00_hw_down: 1616 case qib_sdma_state_s00_hw_down:
1534 break; 1617 break;
@@ -2084,6 +2167,29 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
2084 2167
2085 qib_dev_err(dd, "%s hardware error\n", msg); 2168 qib_dev_err(dd, "%s hardware error\n", msg);
2086 2169
2170 if (hwerrs &
2171 (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) |
2172 SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) {
2173 int pidx = 0;
2174 int err;
2175 unsigned long flags;
2176 struct qib_pportdata *ppd = dd->pport;
2177 for (; pidx < dd->num_pports; ++pidx, ppd++) {
2178 err = 0;
2179 if (pidx == 0 && (hwerrs &
2180 SYM_MASK(HwErrMask, SDmaMemReadErrMask_0)))
2181 err++;
2182 if (pidx == 1 && (hwerrs &
2183 SYM_MASK(HwErrMask, SDmaMemReadErrMask_1)))
2184 err++;
2185 if (err) {
2186 spin_lock_irqsave(&ppd->sdma_lock, flags);
2187 dump_sdma_7322_state(ppd);
2188 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
2189 }
2190 }
2191 }
2192
2087 if (isfatal && !dd->diag_client) { 2193 if (isfatal && !dd->diag_client) {
2088 qib_dev_err(dd, 2194 qib_dev_err(dd,
2089 "Fatal Hardware Error, no longer usable, SN %.16s\n", 2195 "Fatal Hardware Error, no longer usable, SN %.16s\n",
@@ -2558,6 +2664,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on)
2558 qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink); 2664 qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink);
2559} 2665}
2560 2666
2667#ifdef CONFIG_INFINIBAND_QIB_DCA
2668
2669static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event)
2670{
2671 switch (event) {
2672 case DCA_PROVIDER_ADD:
2673 if (dd->flags & QIB_DCA_ENABLED)
2674 break;
2675 if (!dca_add_requester(&dd->pcidev->dev)) {
2676 qib_devinfo(dd->pcidev, "DCA enabled\n");
2677 dd->flags |= QIB_DCA_ENABLED;
2678 qib_setup_dca(dd);
2679 }
2680 break;
2681 case DCA_PROVIDER_REMOVE:
2682 if (dd->flags & QIB_DCA_ENABLED) {
2683 dca_remove_requester(&dd->pcidev->dev);
2684 dd->flags &= ~QIB_DCA_ENABLED;
2685 dd->cspec->dca_ctrl = 0;
2686 qib_write_kreg(dd, KREG_IDX(DCACtrlA),
2687 dd->cspec->dca_ctrl);
2688 }
2689 break;
2690 }
2691 return 0;
2692}
2693
2694static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu)
2695{
2696 struct qib_devdata *dd = rcd->dd;
2697 struct qib_chip_specific *cspec = dd->cspec;
2698
2699 if (!(dd->flags & QIB_DCA_ENABLED))
2700 return;
2701 if (cspec->rhdr_cpu[rcd->ctxt] != cpu) {
2702 const struct dca_reg_map *rmp;
2703
2704 cspec->rhdr_cpu[rcd->ctxt] = cpu;
2705 rmp = &dca_rcvhdr_reg_map[rcd->ctxt];
2706 cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask;
2707 cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |=
2708 (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb;
2709 qib_devinfo(dd->pcidev,
2710 "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu,
2711 (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
2712 qib_write_kreg(dd, rmp->regno,
2713 cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
2714 cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable);
2715 qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
2716 }
2717}
2718
2719static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu)
2720{
2721 struct qib_devdata *dd = ppd->dd;
2722 struct qib_chip_specific *cspec = dd->cspec;
2723 unsigned pidx = ppd->port - 1;
2724
2725 if (!(dd->flags & QIB_DCA_ENABLED))
2726 return;
2727 if (cspec->sdma_cpu[pidx] != cpu) {
2728 cspec->sdma_cpu[pidx] = cpu;
2729 cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ?
2730 SYM_MASK(DCACtrlF, SendDma1DCAOPH) :
2731 SYM_MASK(DCACtrlF, SendDma0DCAOPH));
2732 cspec->dca_rcvhdr_ctrl[4] |=
2733 (u64) dca3_get_tag(&dd->pcidev->dev, cpu) <<
2734 (ppd->hw_pidx ?
2735 SYM_LSB(DCACtrlF, SendDma1DCAOPH) :
2736 SYM_LSB(DCACtrlF, SendDma0DCAOPH));
2737 qib_devinfo(dd->pcidev,
2738 "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu,
2739 (long long) cspec->dca_rcvhdr_ctrl[4]);
2740 qib_write_kreg(dd, KREG_IDX(DCACtrlF),
2741 cspec->dca_rcvhdr_ctrl[4]);
2742 cspec->dca_ctrl |= ppd->hw_pidx ?
2743 SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) :
2744 SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable);
2745 qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
2746 }
2747}
2748
2749static void qib_setup_dca(struct qib_devdata *dd)
2750{
2751 struct qib_chip_specific *cspec = dd->cspec;
2752 int i;
2753
2754 for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++)
2755 cspec->rhdr_cpu[i] = -1;
2756 for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
2757 cspec->sdma_cpu[i] = -1;
2758 cspec->dca_rcvhdr_ctrl[0] =
2759 (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) |
2760 (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) |
2761 (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) |
2762 (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt));
2763 cspec->dca_rcvhdr_ctrl[1] =
2764 (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) |
2765 (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) |
2766 (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) |
2767 (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt));
2768 cspec->dca_rcvhdr_ctrl[2] =
2769 (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) |
2770 (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) |
2771 (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) |
2772 (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt));
2773 cspec->dca_rcvhdr_ctrl[3] =
2774 (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) |
2775 (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) |
2776 (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) |
2777 (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt));
2778 cspec->dca_rcvhdr_ctrl[4] =
2779 (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) |
2780 (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt));
2781 for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
2782 qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i,
2783 cspec->dca_rcvhdr_ctrl[i]);
2784 for (i = 0; i < cspec->num_msix_entries; i++)
2785 setup_dca_notifier(dd, &cspec->msix_entries[i]);
2786}
2787
2788static void qib_irq_notifier_notify(struct irq_affinity_notify *notify,
2789 const cpumask_t *mask)
2790{
2791 struct qib_irq_notify *n =
2792 container_of(notify, struct qib_irq_notify, notify);
2793 int cpu = cpumask_first(mask);
2794
2795 if (n->rcv) {
2796 struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
2797 qib_update_rhdrq_dca(rcd, cpu);
2798 } else {
2799 struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
2800 qib_update_sdma_dca(ppd, cpu);
2801 }
2802}
2803
2804static void qib_irq_notifier_release(struct kref *ref)
2805{
2806 struct qib_irq_notify *n =
2807 container_of(ref, struct qib_irq_notify, notify.kref);
2808 struct qib_devdata *dd;
2809
2810 if (n->rcv) {
2811 struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
2812 dd = rcd->dd;
2813 } else {
2814 struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
2815 dd = ppd->dd;
2816 }
2817 qib_devinfo(dd->pcidev,
2818 "release on HCA notify 0x%p n 0x%p\n", ref, n);
2819 kfree(n);
2820}
2821#endif
2822
2561/* 2823/*
2562 * Disable MSIx interrupt if enabled, call generic MSIx code 2824 * Disable MSIx interrupt if enabled, call generic MSIx code
2563 * to cleanup, and clear pending MSIx interrupts. 2825 * to cleanup, and clear pending MSIx interrupts.
@@ -2575,6 +2837,9 @@ static void qib_7322_nomsix(struct qib_devdata *dd)
2575 2837
2576 dd->cspec->num_msix_entries = 0; 2838 dd->cspec->num_msix_entries = 0;
2577 for (i = 0; i < n; i++) { 2839 for (i = 0; i < n; i++) {
2840#ifdef CONFIG_INFINIBAND_QIB_DCA
2841 reset_dca_notifier(dd, &dd->cspec->msix_entries[i]);
2842#endif
2578 irq_set_affinity_hint( 2843 irq_set_affinity_hint(
2579 dd->cspec->msix_entries[i].msix.vector, NULL); 2844 dd->cspec->msix_entries[i].msix.vector, NULL);
2580 free_cpumask_var(dd->cspec->msix_entries[i].mask); 2845 free_cpumask_var(dd->cspec->msix_entries[i].mask);
@@ -2602,6 +2867,15 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
2602{ 2867{
2603 int i; 2868 int i;
2604 2869
2870#ifdef CONFIG_INFINIBAND_QIB_DCA
2871 if (dd->flags & QIB_DCA_ENABLED) {
2872 dca_remove_requester(&dd->pcidev->dev);
2873 dd->flags &= ~QIB_DCA_ENABLED;
2874 dd->cspec->dca_ctrl = 0;
2875 qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl);
2876 }
2877#endif
2878
2605 qib_7322_free_irq(dd); 2879 qib_7322_free_irq(dd);
2606 kfree(dd->cspec->cntrs); 2880 kfree(dd->cspec->cntrs);
2607 kfree(dd->cspec->sendchkenable); 2881 kfree(dd->cspec->sendchkenable);
@@ -3068,6 +3342,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
3068 return IRQ_HANDLED; 3342 return IRQ_HANDLED;
3069} 3343}
3070 3344
3345#ifdef CONFIG_INFINIBAND_QIB_DCA
3346
3347static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
3348{
3349 if (!m->dca)
3350 return;
3351 qib_devinfo(dd->pcidev,
3352 "Disabling notifier on HCA %d irq %d\n",
3353 dd->unit,
3354 m->msix.vector);
3355 irq_set_affinity_notifier(
3356 m->msix.vector,
3357 NULL);
3358 m->notifier = NULL;
3359}
3360
3361static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
3362{
3363 struct qib_irq_notify *n;
3364
3365 if (!m->dca)
3366 return;
3367 n = kzalloc(sizeof(*n), GFP_KERNEL);
3368 if (n) {
3369 int ret;
3370
3371 m->notifier = n;
3372 n->notify.irq = m->msix.vector;
3373 n->notify.notify = qib_irq_notifier_notify;
3374 n->notify.release = qib_irq_notifier_release;
3375 n->arg = m->arg;
3376 n->rcv = m->rcv;
3377 qib_devinfo(dd->pcidev,
3378 "set notifier irq %d rcv %d notify %p\n",
3379 n->notify.irq, n->rcv, &n->notify);
3380 ret = irq_set_affinity_notifier(
3381 n->notify.irq,
3382 &n->notify);
3383 if (ret) {
3384 m->notifier = NULL;
3385 kfree(n);
3386 }
3387 }
3388}
3389
3390#endif
3391
3071/* 3392/*
3072 * Set up our chip-specific interrupt handler. 3393 * Set up our chip-specific interrupt handler.
3073 * The interrupt type has already been setup, so 3394 * The interrupt type has already been setup, so
@@ -3149,6 +3470,9 @@ try_intx:
3149 void *arg; 3470 void *arg;
3150 u64 val; 3471 u64 val;
3151 int lsb, reg, sh; 3472 int lsb, reg, sh;
3473#ifdef CONFIG_INFINIBAND_QIB_DCA
3474 int dca = 0;
3475#endif
3152 3476
3153 dd->cspec->msix_entries[msixnum]. 3477 dd->cspec->msix_entries[msixnum].
3154 name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] 3478 name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1]
@@ -3161,6 +3485,9 @@ try_intx:
3161 arg = dd->pport + irq_table[i].port - 1; 3485 arg = dd->pport + irq_table[i].port - 1;
3162 } else 3486 } else
3163 arg = dd; 3487 arg = dd;
3488#ifdef CONFIG_INFINIBAND_QIB_DCA
3489 dca = irq_table[i].dca;
3490#endif
3164 lsb = irq_table[i].lsb; 3491 lsb = irq_table[i].lsb;
3165 handler = irq_table[i].handler; 3492 handler = irq_table[i].handler;
3166 snprintf(dd->cspec->msix_entries[msixnum].name, 3493 snprintf(dd->cspec->msix_entries[msixnum].name,
@@ -3178,6 +3505,9 @@ try_intx:
3178 continue; 3505 continue;
3179 if (qib_krcvq01_no_msi && ctxt < 2) 3506 if (qib_krcvq01_no_msi && ctxt < 2)
3180 continue; 3507 continue;
3508#ifdef CONFIG_INFINIBAND_QIB_DCA
3509 dca = 1;
3510#endif
3181 lsb = QIB_I_RCVAVAIL_LSB + ctxt; 3511 lsb = QIB_I_RCVAVAIL_LSB + ctxt;
3182 handler = qib_7322pintr; 3512 handler = qib_7322pintr;
3183 snprintf(dd->cspec->msix_entries[msixnum].name, 3513 snprintf(dd->cspec->msix_entries[msixnum].name,
@@ -3203,6 +3533,11 @@ try_intx:
3203 goto try_intx; 3533 goto try_intx;
3204 } 3534 }
3205 dd->cspec->msix_entries[msixnum].arg = arg; 3535 dd->cspec->msix_entries[msixnum].arg = arg;
3536#ifdef CONFIG_INFINIBAND_QIB_DCA
3537 dd->cspec->msix_entries[msixnum].dca = dca;
3538 dd->cspec->msix_entries[msixnum].rcv =
3539 handler == qib_7322pintr;
3540#endif
3206 if (lsb >= 0) { 3541 if (lsb >= 0) {
3207 reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; 3542 reg = lsb / IBA7322_REDIRECT_VEC_PER_REG;
3208 sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * 3543 sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) *
@@ -6452,6 +6787,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt)
6452 qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt); 6787 qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt);
6453} 6788}
6454 6789
6790/*
6791 * sdma_lock should be acquired before calling this routine
6792 */
6793static void dump_sdma_7322_state(struct qib_pportdata *ppd)
6794{
6795 u64 reg, reg1, reg2;
6796
6797 reg = qib_read_kreg_port(ppd, krp_senddmastatus);
6798 qib_dev_porterr(ppd->dd, ppd->port,
6799 "SDMA senddmastatus: 0x%016llx\n", reg);
6800
6801 reg = qib_read_kreg_port(ppd, krp_sendctrl);
6802 qib_dev_porterr(ppd->dd, ppd->port,
6803 "SDMA sendctrl: 0x%016llx\n", reg);
6804
6805 reg = qib_read_kreg_port(ppd, krp_senddmabase);
6806 qib_dev_porterr(ppd->dd, ppd->port,
6807 "SDMA senddmabase: 0x%016llx\n", reg);
6808
6809 reg = qib_read_kreg_port(ppd, krp_senddmabufmask0);
6810 reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1);
6811 reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2);
6812 qib_dev_porterr(ppd->dd, ppd->port,
6813 "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n",
6814 reg, reg1, reg2);
6815
6816 /* get bufuse bits, clear them, and print them again if non-zero */
6817 reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
6818 qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg);
6819 reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
6820 qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1);
6821 reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
6822 qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2);
6823 /* 0 and 1 should always be zero, so print as short form */
6824 qib_dev_porterr(ppd->dd, ppd->port,
6825 "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
6826 reg, reg1, reg2);
6827 reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
6828 reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
6829 reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
6830 /* 0 and 1 should always be zero, so print as short form */
6831 qib_dev_porterr(ppd->dd, ppd->port,
6832 "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
6833 reg, reg1, reg2);
6834
6835 reg = qib_read_kreg_port(ppd, krp_senddmatail);
6836 qib_dev_porterr(ppd->dd, ppd->port,
6837 "SDMA senddmatail: 0x%016llx\n", reg);
6838
6839 reg = qib_read_kreg_port(ppd, krp_senddmahead);
6840 qib_dev_porterr(ppd->dd, ppd->port,
6841 "SDMA senddmahead: 0x%016llx\n", reg);
6842
6843 reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr);
6844 qib_dev_porterr(ppd->dd, ppd->port,
6845 "SDMA senddmaheadaddr: 0x%016llx\n", reg);
6846
6847 reg = qib_read_kreg_port(ppd, krp_senddmalengen);
6848 qib_dev_porterr(ppd->dd, ppd->port,
6849 "SDMA senddmalengen: 0x%016llx\n", reg);
6850
6851 reg = qib_read_kreg_port(ppd, krp_senddmadesccnt);
6852 qib_dev_porterr(ppd->dd, ppd->port,
6853 "SDMA senddmadesccnt: 0x%016llx\n", reg);
6854
6855 reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt);
6856 qib_dev_porterr(ppd->dd, ppd->port,
6857 "SDMA senddmaidlecnt: 0x%016llx\n", reg);
6858
6859 reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld);
6860 qib_dev_porterr(ppd->dd, ppd->port,
6861 "SDMA senddmapriorityhld: 0x%016llx\n", reg);
6862
6863 reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt);
6864 qib_dev_porterr(ppd->dd, ppd->port,
6865 "SDMA senddmareloadcnt: 0x%016llx\n", reg);
6866
6867 dump_sdma_state(ppd);
6868}
6869
6455static struct sdma_set_state_action sdma_7322_action_table[] = { 6870static struct sdma_set_state_action sdma_7322_action_table[] = {
6456 [qib_sdma_state_s00_hw_down] = { 6871 [qib_sdma_state_s00_hw_down] = {
6457 .go_s99_running_tofalse = 1, 6872 .go_s99_running_tofalse = 1,
@@ -6885,6 +7300,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
6885 dd->f_sdma_init_early = qib_7322_sdma_init_early; 7300 dd->f_sdma_init_early = qib_7322_sdma_init_early;
6886 dd->f_writescratch = writescratch; 7301 dd->f_writescratch = writescratch;
6887 dd->f_tempsense_rd = qib_7322_tempsense_rd; 7302 dd->f_tempsense_rd = qib_7322_tempsense_rd;
7303#ifdef CONFIG_INFINIBAND_QIB_DCA
7304 dd->f_notify_dca = qib_7322_notify_dca;
7305#endif
6888 /* 7306 /*
6889 * Do remaining PCIe setup and save PCIe values in dd. 7307 * Do remaining PCIe setup and save PCIe values in dd.
6890 * Any error printing is already done by the init code. 7308 * Any error printing is already done by the init code.
@@ -6921,7 +7339,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
6921 actual_cnt -= dd->num_pports; 7339 actual_cnt -= dd->num_pports;
6922 7340
6923 tabsize = actual_cnt; 7341 tabsize = actual_cnt;
6924 dd->cspec->msix_entries = kmalloc(tabsize * 7342 dd->cspec->msix_entries = kzalloc(tabsize *
6925 sizeof(struct qib_msix_entry), GFP_KERNEL); 7343 sizeof(struct qib_msix_entry), GFP_KERNEL);
6926 if (!dd->cspec->msix_entries) { 7344 if (!dd->cspec->msix_entries) {
6927 qib_dev_err(dd, "No memory for MSIx table\n"); 7345 qib_dev_err(dd, "No memory for MSIx table\n");
@@ -6941,7 +7359,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
6941 7359
6942 /* clear diagctrl register, in case diags were running and crashed */ 7360 /* clear diagctrl register, in case diags were running and crashed */
6943 qib_write_kreg(dd, kr_hwdiagctrl, 0); 7361 qib_write_kreg(dd, kr_hwdiagctrl, 0);
6944 7362#ifdef CONFIG_INFINIBAND_QIB_DCA
7363 if (!dca_add_requester(&pdev->dev)) {
7364 qib_devinfo(dd->pcidev, "DCA enabled\n");
7365 dd->flags |= QIB_DCA_ENABLED;
7366 qib_setup_dca(dd);
7367 }
7368#endif
6945 goto bail; 7369 goto bail;
6946 7370
6947bail_cleanup: 7371bail_cleanup:
@@ -7156,15 +7580,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = {
7156 { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ 7580 { 0, 0, 0, 1 }, /* QMH7342 backplane settings */
7157 { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ 7581 { 0, 0, 0, 2 }, /* QMH7342 backplane settings */
7158 { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ 7582 { 0, 0, 0, 2 }, /* QMH7342 backplane settings */
7159 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7160 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7161 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7162 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7163 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7164 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7165 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7166 { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ 7583 { 0, 0, 0, 3 }, /* QMH7342 backplane settings */
7167 { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ 7584 { 0, 0, 0, 4 }, /* QMH7342 backplane settings */
7585 { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
7586 { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
7587 { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
7588 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
7589 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
7590 { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
7591 { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
7592 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
7593 { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
7594 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
7595 { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
7596 { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
7168}; 7597};
7169 7598
7170static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { 7599static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
@@ -7173,15 +7602,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
7173 { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ 7602 { 0, 0, 0, 7 }, /* QMH7342 backplane settings */
7174 { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ 7603 { 0, 0, 0, 8 }, /* QMH7342 backplane settings */
7175 { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ 7604 { 0, 0, 0, 8 }, /* QMH7342 backplane settings */
7176 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7177 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7178 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7179 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7180 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7181 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7182 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7183 { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ 7605 { 0, 0, 0, 9 }, /* QMH7342 backplane settings */
7184 { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ 7606 { 0, 0, 0, 10 }, /* QMH7342 backplane settings */
7607 { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
7608 { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
7609 { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
7610 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
7611 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
7612 { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
7613 { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
7614 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
7615 { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
7616 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
7617 { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
7618 { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
7185}; 7619};
7186 7620
7187static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { 7621static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
@@ -7190,15 +7624,20 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
7190 { 0, 1, 0, 5 }, /* QMH7342 backplane settings */ 7624 { 0, 1, 0, 5 }, /* QMH7342 backplane settings */
7191 { 0, 1, 0, 6 }, /* QMH7342 backplane settings */ 7625 { 0, 1, 0, 6 }, /* QMH7342 backplane settings */
7192 { 0, 1, 0, 8 }, /* QMH7342 backplane settings */ 7626 { 0, 1, 0, 8 }, /* QMH7342 backplane settings */
7193 { 0, 1, 12, 10 }, /* QME7342 backplane setting */
7194 { 0, 1, 12, 11 }, /* QME7342 backplane setting */
7195 { 0, 1, 12, 12 }, /* QME7342 backplane setting */
7196 { 0, 1, 12, 14 }, /* QME7342 backplane setting */
7197 { 0, 1, 12, 6 }, /* QME7342 backplane setting */
7198 { 0, 1, 12, 7 }, /* QME7342 backplane setting */
7199 { 0, 1, 12, 8 }, /* QME7342 backplane setting */
7200 { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ 7627 { 0, 1, 0, 10 }, /* QMH7342 backplane settings */
7201 { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ 7628 { 0, 1, 0, 12 }, /* QMH7342 backplane settings */
7629 { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
7630 { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
7631 { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
7632 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
7633 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
7634 { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
7635 { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
7636 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
7637 { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
7638 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
7639 { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
7640 { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
7202}; 7641};
7203 7642
7204static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = { 7643static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = {
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 173f805790da..36e048e0e1d9 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -39,10 +39,17 @@
39#include <linux/idr.h> 39#include <linux/idr.h>
40#include <linux/module.h> 40#include <linux/module.h>
41#include <linux/printk.h> 41#include <linux/printk.h>
42#ifdef CONFIG_INFINIBAND_QIB_DCA
43#include <linux/dca.h>
44#endif
42 45
43#include "qib.h" 46#include "qib.h"
44#include "qib_common.h" 47#include "qib_common.h"
45#include "qib_mad.h" 48#include "qib_mad.h"
49#ifdef CONFIG_DEBUG_FS
50#include "qib_debugfs.h"
51#include "qib_verbs.h"
52#endif
46 53
47#undef pr_fmt 54#undef pr_fmt
48#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt 55#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
@@ -64,6 +71,11 @@ ushort qib_cfgctxts;
64module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); 71module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
65MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); 72MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
66 73
74unsigned qib_numa_aware;
75module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
76MODULE_PARM_DESC(numa_aware,
77 "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
78
67/* 79/*
68 * If set, do not write to any regs if avoidable, hack to allow 80 * If set, do not write to any regs if avoidable, hack to allow
69 * check for deranged default register values. 81 * check for deranged default register values.
@@ -89,8 +101,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */
89module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); 101module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO);
90MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); 102MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism");
91 103
92struct workqueue_struct *qib_cq_wq;
93
94static void verify_interrupt(unsigned long); 104static void verify_interrupt(unsigned long);
95 105
96static struct idr qib_unit_table; 106static struct idr qib_unit_table;
@@ -121,6 +131,11 @@ int qib_create_ctxts(struct qib_devdata *dd)
121{ 131{
122 unsigned i; 132 unsigned i;
123 int ret; 133 int ret;
134 int local_node_id = pcibus_to_node(dd->pcidev->bus);
135
136 if (local_node_id < 0)
137 local_node_id = numa_node_id();
138 dd->assigned_node_id = local_node_id;
124 139
125 /* 140 /*
126 * Allocate full ctxtcnt array, rather than just cfgctxts, because 141 * Allocate full ctxtcnt array, rather than just cfgctxts, because
@@ -143,7 +158,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
143 continue; 158 continue;
144 159
145 ppd = dd->pport + (i % dd->num_pports); 160 ppd = dd->pport + (i % dd->num_pports);
146 rcd = qib_create_ctxtdata(ppd, i); 161
162 rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
147 if (!rcd) { 163 if (!rcd) {
148 qib_dev_err(dd, 164 qib_dev_err(dd,
149 "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); 165 "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
@@ -161,20 +177,33 @@ done:
161/* 177/*
162 * Common code for user and kernel context setup. 178 * Common code for user and kernel context setup.
163 */ 179 */
164struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) 180struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
181 int node_id)
165{ 182{
166 struct qib_devdata *dd = ppd->dd; 183 struct qib_devdata *dd = ppd->dd;
167 struct qib_ctxtdata *rcd; 184 struct qib_ctxtdata *rcd;
168 185
169 rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); 186 rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id);
170 if (rcd) { 187 if (rcd) {
171 INIT_LIST_HEAD(&rcd->qp_wait_list); 188 INIT_LIST_HEAD(&rcd->qp_wait_list);
189 rcd->node_id = node_id;
172 rcd->ppd = ppd; 190 rcd->ppd = ppd;
173 rcd->dd = dd; 191 rcd->dd = dd;
174 rcd->cnt = 1; 192 rcd->cnt = 1;
175 rcd->ctxt = ctxt; 193 rcd->ctxt = ctxt;
176 dd->rcd[ctxt] = rcd; 194 dd->rcd[ctxt] = rcd;
177 195#ifdef CONFIG_DEBUG_FS
196 if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
197 rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
198 GFP_KERNEL, node_id);
199 if (!rcd->opstats) {
200 kfree(rcd);
201 qib_dev_err(dd,
202 "Unable to allocate per ctxt stats buffer\n");
203 return NULL;
204 }
205 }
206#endif
178 dd->f_init_ctxt(rcd); 207 dd->f_init_ctxt(rcd);
179 208
180 /* 209 /*
@@ -429,6 +458,7 @@ static int loadtime_init(struct qib_devdata *dd)
429 dd->intrchk_timer.function = verify_interrupt; 458 dd->intrchk_timer.function = verify_interrupt;
430 dd->intrchk_timer.data = (unsigned long) dd; 459 dd->intrchk_timer.data = (unsigned long) dd;
431 460
461 ret = qib_cq_init(dd);
432done: 462done:
433 return ret; 463 return ret;
434} 464}
@@ -944,6 +974,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
944 vfree(rcd->subctxt_uregbase); 974 vfree(rcd->subctxt_uregbase);
945 vfree(rcd->subctxt_rcvegrbuf); 975 vfree(rcd->subctxt_rcvegrbuf);
946 vfree(rcd->subctxt_rcvhdr_base); 976 vfree(rcd->subctxt_rcvhdr_base);
977#ifdef CONFIG_DEBUG_FS
978 kfree(rcd->opstats);
979 rcd->opstats = NULL;
980#endif
947 kfree(rcd); 981 kfree(rcd);
948} 982}
949 983
@@ -1033,7 +1067,6 @@ done:
1033 dd->f_set_armlaunch(dd, 1); 1067 dd->f_set_armlaunch(dd, 1);
1034} 1068}
1035 1069
1036
1037void qib_free_devdata(struct qib_devdata *dd) 1070void qib_free_devdata(struct qib_devdata *dd)
1038{ 1071{
1039 unsigned long flags; 1072 unsigned long flags;
@@ -1043,6 +1076,9 @@ void qib_free_devdata(struct qib_devdata *dd)
1043 list_del(&dd->list); 1076 list_del(&dd->list);
1044 spin_unlock_irqrestore(&qib_devs_lock, flags); 1077 spin_unlock_irqrestore(&qib_devs_lock, flags);
1045 1078
1079#ifdef CONFIG_DEBUG_FS
1080 qib_dbg_ibdev_exit(&dd->verbs_dev);
1081#endif
1046 ib_dealloc_device(&dd->verbs_dev.ibdev); 1082 ib_dealloc_device(&dd->verbs_dev.ibdev);
1047} 1083}
1048 1084
@@ -1066,6 +1102,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
1066 goto bail; 1102 goto bail;
1067 } 1103 }
1068 1104
1105#ifdef CONFIG_DEBUG_FS
1106 qib_dbg_ibdev_init(&dd->verbs_dev);
1107#endif
1108
1069 idr_preload(GFP_KERNEL); 1109 idr_preload(GFP_KERNEL);
1070 spin_lock_irqsave(&qib_devs_lock, flags); 1110 spin_lock_irqsave(&qib_devs_lock, flags);
1071 1111
@@ -1081,6 +1121,9 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
1081 if (ret < 0) { 1121 if (ret < 0) {
1082 qib_early_err(&pdev->dev, 1122 qib_early_err(&pdev->dev,
1083 "Could not allocate unit ID: error %d\n", -ret); 1123 "Could not allocate unit ID: error %d\n", -ret);
1124#ifdef CONFIG_DEBUG_FS
1125 qib_dbg_ibdev_exit(&dd->verbs_dev);
1126#endif
1084 ib_dealloc_device(&dd->verbs_dev.ibdev); 1127 ib_dealloc_device(&dd->verbs_dev.ibdev);
1085 dd = ERR_PTR(ret); 1128 dd = ERR_PTR(ret);
1086 goto bail; 1129 goto bail;
@@ -1158,6 +1201,35 @@ struct pci_driver qib_driver = {
1158 .err_handler = &qib_pci_err_handler, 1201 .err_handler = &qib_pci_err_handler,
1159}; 1202};
1160 1203
1204#ifdef CONFIG_INFINIBAND_QIB_DCA
1205
1206static int qib_notify_dca(struct notifier_block *, unsigned long, void *);
1207static struct notifier_block dca_notifier = {
1208 .notifier_call = qib_notify_dca,
1209 .next = NULL,
1210 .priority = 0
1211};
1212
1213static int qib_notify_dca_device(struct device *device, void *data)
1214{
1215 struct qib_devdata *dd = dev_get_drvdata(device);
1216 unsigned long event = *(unsigned long *)data;
1217
1218 return dd->f_notify_dca(dd, event);
1219}
1220
1221static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
1222 void *p)
1223{
1224 int rval;
1225
1226 rval = driver_for_each_device(&qib_driver.driver, NULL,
1227 &event, qib_notify_dca_device);
1228 return rval ? NOTIFY_BAD : NOTIFY_DONE;
1229}
1230
1231#endif
1232
1161/* 1233/*
1162 * Do all the generic driver unit- and chip-independent memory 1234 * Do all the generic driver unit- and chip-independent memory
1163 * allocation and initialization. 1235 * allocation and initialization.
@@ -1170,22 +1242,22 @@ static int __init qlogic_ib_init(void)
1170 if (ret) 1242 if (ret)
1171 goto bail; 1243 goto bail;
1172 1244
1173 qib_cq_wq = create_singlethread_workqueue("qib_cq");
1174 if (!qib_cq_wq) {
1175 ret = -ENOMEM;
1176 goto bail_dev;
1177 }
1178
1179 /* 1245 /*
1180 * These must be called before the driver is registered with 1246 * These must be called before the driver is registered with
1181 * the PCI subsystem. 1247 * the PCI subsystem.
1182 */ 1248 */
1183 idr_init(&qib_unit_table); 1249 idr_init(&qib_unit_table);
1184 1250
1251#ifdef CONFIG_INFINIBAND_QIB_DCA
1252 dca_register_notify(&dca_notifier);
1253#endif
1254#ifdef CONFIG_DEBUG_FS
1255 qib_dbg_init();
1256#endif
1185 ret = pci_register_driver(&qib_driver); 1257 ret = pci_register_driver(&qib_driver);
1186 if (ret < 0) { 1258 if (ret < 0) {
1187 pr_err("Unable to register driver: error %d\n", -ret); 1259 pr_err("Unable to register driver: error %d\n", -ret);
1188 goto bail_unit; 1260 goto bail_dev;
1189 } 1261 }
1190 1262
1191 /* not fatal if it doesn't work */ 1263 /* not fatal if it doesn't work */
@@ -1193,10 +1265,14 @@ static int __init qlogic_ib_init(void)
1193 pr_err("Unable to register ipathfs\n"); 1265 pr_err("Unable to register ipathfs\n");
1194 goto bail; /* all OK */ 1266 goto bail; /* all OK */
1195 1267
1196bail_unit:
1197 idr_destroy(&qib_unit_table);
1198 destroy_workqueue(qib_cq_wq);
1199bail_dev: 1268bail_dev:
1269#ifdef CONFIG_INFINIBAND_QIB_DCA
1270 dca_unregister_notify(&dca_notifier);
1271#endif
1272#ifdef CONFIG_DEBUG_FS
1273 qib_dbg_exit();
1274#endif
1275 idr_destroy(&qib_unit_table);
1200 qib_dev_cleanup(); 1276 qib_dev_cleanup();
1201bail: 1277bail:
1202 return ret; 1278 return ret;
@@ -1217,9 +1293,13 @@ static void __exit qlogic_ib_cleanup(void)
1217 "Unable to cleanup counter filesystem: error %d\n", 1293 "Unable to cleanup counter filesystem: error %d\n",
1218 -ret); 1294 -ret);
1219 1295
1296#ifdef CONFIG_INFINIBAND_QIB_DCA
1297 dca_unregister_notify(&dca_notifier);
1298#endif
1220 pci_unregister_driver(&qib_driver); 1299 pci_unregister_driver(&qib_driver);
1221 1300#ifdef CONFIG_DEBUG_FS
1222 destroy_workqueue(qib_cq_wq); 1301 qib_dbg_exit();
1302#endif
1223 1303
1224 qib_cpulist_count = 0; 1304 qib_cpulist_count = 0;
1225 kfree(qib_cpulist); 1305 kfree(qib_cpulist);
@@ -1270,7 +1350,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
1270 if (dd->pageshadow) { 1350 if (dd->pageshadow) {
1271 struct page **tmpp = dd->pageshadow; 1351 struct page **tmpp = dd->pageshadow;
1272 dma_addr_t *tmpd = dd->physshadow; 1352 dma_addr_t *tmpd = dd->physshadow;
1273 int i, cnt = 0; 1353 int i;
1274 1354
1275 for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { 1355 for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) {
1276 int ctxt_tidbase = ctxt * dd->rcvtidcnt; 1356 int ctxt_tidbase = ctxt * dd->rcvtidcnt;
@@ -1283,13 +1363,13 @@ static void cleanup_device_data(struct qib_devdata *dd)
1283 PAGE_SIZE, PCI_DMA_FROMDEVICE); 1363 PAGE_SIZE, PCI_DMA_FROMDEVICE);
1284 qib_release_user_pages(&tmpp[i], 1); 1364 qib_release_user_pages(&tmpp[i], 1);
1285 tmpp[i] = NULL; 1365 tmpp[i] = NULL;
1286 cnt++;
1287 } 1366 }
1288 } 1367 }
1289 1368
1290 tmpp = dd->pageshadow;
1291 dd->pageshadow = NULL; 1369 dd->pageshadow = NULL;
1292 vfree(tmpp); 1370 vfree(tmpp);
1371 dd->physshadow = NULL;
1372 vfree(tmpd);
1293 } 1373 }
1294 1374
1295 /* 1375 /*
@@ -1311,6 +1391,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
1311 } 1391 }
1312 kfree(tmp); 1392 kfree(tmp);
1313 kfree(dd->boardname); 1393 kfree(dd->boardname);
1394 qib_cq_exit(dd);
1314} 1395}
1315 1396
1316/* 1397/*
@@ -1483,6 +1564,7 @@ static void qib_remove_one(struct pci_dev *pdev)
1483int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 1564int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1484{ 1565{
1485 unsigned amt; 1566 unsigned amt;
1567 int old_node_id;
1486 1568
1487 if (!rcd->rcvhdrq) { 1569 if (!rcd->rcvhdrq) {
1488 dma_addr_t phys_hdrqtail; 1570 dma_addr_t phys_hdrqtail;
@@ -1492,9 +1574,13 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1492 sizeof(u32), PAGE_SIZE); 1574 sizeof(u32), PAGE_SIZE);
1493 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? 1575 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
1494 GFP_USER : GFP_KERNEL; 1576 GFP_USER : GFP_KERNEL;
1577
1578 old_node_id = dev_to_node(&dd->pcidev->dev);
1579 set_dev_node(&dd->pcidev->dev, rcd->node_id);
1495 rcd->rcvhdrq = dma_alloc_coherent( 1580 rcd->rcvhdrq = dma_alloc_coherent(
1496 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, 1581 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
1497 gfp_flags | __GFP_COMP); 1582 gfp_flags | __GFP_COMP);
1583 set_dev_node(&dd->pcidev->dev, old_node_id);
1498 1584
1499 if (!rcd->rcvhdrq) { 1585 if (!rcd->rcvhdrq) {
1500 qib_dev_err(dd, 1586 qib_dev_err(dd,
@@ -1510,9 +1596,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1510 } 1596 }
1511 1597
1512 if (!(dd->flags & QIB_NODMA_RTAIL)) { 1598 if (!(dd->flags & QIB_NODMA_RTAIL)) {
1599 set_dev_node(&dd->pcidev->dev, rcd->node_id);
1513 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( 1600 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
1514 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1601 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
1515 gfp_flags); 1602 gfp_flags);
1603 set_dev_node(&dd->pcidev->dev, old_node_id);
1516 if (!rcd->rcvhdrtail_kvaddr) 1604 if (!rcd->rcvhdrtail_kvaddr)
1517 goto bail_free; 1605 goto bail_free;
1518 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; 1606 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1556,6 +1644,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
1556 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 1644 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
1557 size_t size; 1645 size_t size;
1558 gfp_t gfp_flags; 1646 gfp_t gfp_flags;
1647 int old_node_id;
1559 1648
1560 /* 1649 /*
1561 * GFP_USER, but without GFP_FS, so buffer cache can be 1650 * GFP_USER, but without GFP_FS, so buffer cache can be
@@ -1574,25 +1663,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
1574 size = rcd->rcvegrbuf_size; 1663 size = rcd->rcvegrbuf_size;
1575 if (!rcd->rcvegrbuf) { 1664 if (!rcd->rcvegrbuf) {
1576 rcd->rcvegrbuf = 1665 rcd->rcvegrbuf =
1577 kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), 1666 kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
1578 GFP_KERNEL); 1667 GFP_KERNEL, rcd->node_id);
1579 if (!rcd->rcvegrbuf) 1668 if (!rcd->rcvegrbuf)
1580 goto bail; 1669 goto bail;
1581 } 1670 }
1582 if (!rcd->rcvegrbuf_phys) { 1671 if (!rcd->rcvegrbuf_phys) {
1583 rcd->rcvegrbuf_phys = 1672 rcd->rcvegrbuf_phys =
1584 kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), 1673 kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
1585 GFP_KERNEL); 1674 GFP_KERNEL, rcd->node_id);
1586 if (!rcd->rcvegrbuf_phys) 1675 if (!rcd->rcvegrbuf_phys)
1587 goto bail_rcvegrbuf; 1676 goto bail_rcvegrbuf;
1588 } 1677 }
1589 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 1678 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) {
1590 if (rcd->rcvegrbuf[e]) 1679 if (rcd->rcvegrbuf[e])
1591 continue; 1680 continue;
1681
1682 old_node_id = dev_to_node(&dd->pcidev->dev);
1683 set_dev_node(&dd->pcidev->dev, rcd->node_id);
1592 rcd->rcvegrbuf[e] = 1684 rcd->rcvegrbuf[e] =
1593 dma_alloc_coherent(&dd->pcidev->dev, size, 1685 dma_alloc_coherent(&dd->pcidev->dev, size,
1594 &rcd->rcvegrbuf_phys[e], 1686 &rcd->rcvegrbuf_phys[e],
1595 gfp_flags); 1687 gfp_flags);
1688 set_dev_node(&dd->pcidev->dev, old_node_id);
1596 if (!rcd->rcvegrbuf[e]) 1689 if (!rcd->rcvegrbuf[e])
1597 goto bail_rcvegrbuf_phys; 1690 goto bail_rcvegrbuf_phys;
1598 } 1691 }
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index a6a2cc2ba260..3cca55b51e54 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
@@ -35,6 +35,9 @@
35#include <linux/err.h> 35#include <linux/err.h>
36#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
37#include <linux/jhash.h> 37#include <linux/jhash.h>
38#ifdef CONFIG_DEBUG_FS
39#include <linux/seq_file.h>
40#endif
38 41
39#include "qib.h" 42#include "qib.h"
40 43
@@ -222,8 +225,8 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
222 unsigned long flags; 225 unsigned long flags;
223 unsigned n = qpn_hash(dev, qp->ibqp.qp_num); 226 unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
224 227
225 spin_lock_irqsave(&dev->qpt_lock, flags);
226 atomic_inc(&qp->refcount); 228 atomic_inc(&qp->refcount);
229 spin_lock_irqsave(&dev->qpt_lock, flags);
227 230
228 if (qp->ibqp.qp_num == 0) 231 if (qp->ibqp.qp_num == 0)
229 rcu_assign_pointer(ibp->qp0, qp); 232 rcu_assign_pointer(ibp->qp0, qp);
@@ -235,7 +238,6 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
235 } 238 }
236 239
237 spin_unlock_irqrestore(&dev->qpt_lock, flags); 240 spin_unlock_irqrestore(&dev->qpt_lock, flags);
238 synchronize_rcu();
239} 241}
240 242
241/* 243/*
@@ -247,36 +249,39 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
247 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 249 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
248 unsigned n = qpn_hash(dev, qp->ibqp.qp_num); 250 unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
249 unsigned long flags; 251 unsigned long flags;
252 int removed = 1;
250 253
251 spin_lock_irqsave(&dev->qpt_lock, flags); 254 spin_lock_irqsave(&dev->qpt_lock, flags);
252 255
253 if (rcu_dereference_protected(ibp->qp0, 256 if (rcu_dereference_protected(ibp->qp0,
254 lockdep_is_held(&dev->qpt_lock)) == qp) { 257 lockdep_is_held(&dev->qpt_lock)) == qp) {
255 atomic_dec(&qp->refcount);
256 rcu_assign_pointer(ibp->qp0, NULL); 258 rcu_assign_pointer(ibp->qp0, NULL);
257 } else if (rcu_dereference_protected(ibp->qp1, 259 } else if (rcu_dereference_protected(ibp->qp1,
258 lockdep_is_held(&dev->qpt_lock)) == qp) { 260 lockdep_is_held(&dev->qpt_lock)) == qp) {
259 atomic_dec(&qp->refcount);
260 rcu_assign_pointer(ibp->qp1, NULL); 261 rcu_assign_pointer(ibp->qp1, NULL);
261 } else { 262 } else {
262 struct qib_qp *q; 263 struct qib_qp *q;
263 struct qib_qp __rcu **qpp; 264 struct qib_qp __rcu **qpp;
264 265
266 removed = 0;
265 qpp = &dev->qp_table[n]; 267 qpp = &dev->qp_table[n];
266 for (; (q = rcu_dereference_protected(*qpp, 268 for (; (q = rcu_dereference_protected(*qpp,
267 lockdep_is_held(&dev->qpt_lock))) != NULL; 269 lockdep_is_held(&dev->qpt_lock))) != NULL;
268 qpp = &q->next) 270 qpp = &q->next)
269 if (q == qp) { 271 if (q == qp) {
270 atomic_dec(&qp->refcount);
271 rcu_assign_pointer(*qpp, 272 rcu_assign_pointer(*qpp,
272 rcu_dereference_protected(qp->next, 273 rcu_dereference_protected(qp->next,
273 lockdep_is_held(&dev->qpt_lock))); 274 lockdep_is_held(&dev->qpt_lock)));
275 removed = 1;
274 break; 276 break;
275 } 277 }
276 } 278 }
277 279
278 spin_unlock_irqrestore(&dev->qpt_lock, flags); 280 spin_unlock_irqrestore(&dev->qpt_lock, flags);
279 synchronize_rcu(); 281 if (removed) {
282 synchronize_rcu();
283 atomic_dec(&qp->refcount);
284 }
280} 285}
281 286
282/** 287/**
@@ -334,26 +339,25 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
334{ 339{
335 struct qib_qp *qp = NULL; 340 struct qib_qp *qp = NULL;
336 341
342 rcu_read_lock();
337 if (unlikely(qpn <= 1)) { 343 if (unlikely(qpn <= 1)) {
338 rcu_read_lock();
339 if (qpn == 0) 344 if (qpn == 0)
340 qp = rcu_dereference(ibp->qp0); 345 qp = rcu_dereference(ibp->qp0);
341 else 346 else
342 qp = rcu_dereference(ibp->qp1); 347 qp = rcu_dereference(ibp->qp1);
348 if (qp)
349 atomic_inc(&qp->refcount);
343 } else { 350 } else {
344 struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; 351 struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
345 unsigned n = qpn_hash(dev, qpn); 352 unsigned n = qpn_hash(dev, qpn);
346 353
347 rcu_read_lock();
348 for (qp = rcu_dereference(dev->qp_table[n]); qp; 354 for (qp = rcu_dereference(dev->qp_table[n]); qp;
349 qp = rcu_dereference(qp->next)) 355 qp = rcu_dereference(qp->next))
350 if (qp->ibqp.qp_num == qpn) 356 if (qp->ibqp.qp_num == qpn) {
357 atomic_inc(&qp->refcount);
351 break; 358 break;
359 }
352 } 360 }
353 if (qp)
354 if (unlikely(!atomic_inc_not_zero(&qp->refcount)))
355 qp = NULL;
356
357 rcu_read_unlock(); 361 rcu_read_unlock();
358 return qp; 362 return qp;
359} 363}
@@ -1286,3 +1290,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth)
1286 } 1290 }
1287 } 1291 }
1288} 1292}
1293
1294#ifdef CONFIG_DEBUG_FS
1295
1296struct qib_qp_iter {
1297 struct qib_ibdev *dev;
1298 struct qib_qp *qp;
1299 int n;
1300};
1301
1302struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
1303{
1304 struct qib_qp_iter *iter;
1305
1306 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1307 if (!iter)
1308 return NULL;
1309
1310 iter->dev = dev;
1311 if (qib_qp_iter_next(iter)) {
1312 kfree(iter);
1313 return NULL;
1314 }
1315
1316 return iter;
1317}
1318
1319int qib_qp_iter_next(struct qib_qp_iter *iter)
1320{
1321 struct qib_ibdev *dev = iter->dev;
1322 int n = iter->n;
1323 int ret = 1;
1324 struct qib_qp *pqp = iter->qp;
1325 struct qib_qp *qp;
1326
1327 rcu_read_lock();
1328 for (; n < dev->qp_table_size; n++) {
1329 if (pqp)
1330 qp = rcu_dereference(pqp->next);
1331 else
1332 qp = rcu_dereference(dev->qp_table[n]);
1333 pqp = qp;
1334 if (qp) {
1335 if (iter->qp)
1336 atomic_dec(&iter->qp->refcount);
1337 atomic_inc(&qp->refcount);
1338 rcu_read_unlock();
1339 iter->qp = qp;
1340 iter->n = n;
1341 return 0;
1342 }
1343 }
1344 rcu_read_unlock();
1345 if (iter->qp)
1346 atomic_dec(&iter->qp->refcount);
1347 return ret;
1348}
1349
1350static const char * const qp_type_str[] = {
1351 "SMI", "GSI", "RC", "UC", "UD",
1352};
1353
1354void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
1355{
1356 struct qib_swqe *wqe;
1357 struct qib_qp *qp = iter->qp;
1358
1359 wqe = get_swqe_ptr(qp, qp->s_last);
1360 seq_printf(s,
1361 "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
1362 iter->n,
1363 qp->ibqp.qp_num,
1364 qp_type_str[qp->ibqp.qp_type],
1365 qp->state,
1366 wqe->wr.opcode,
1367 qp->s_hdrwords,
1368 qp->s_flags,
1369 atomic_read(&qp->s_dma_busy),
1370 !list_empty(&qp->iowait),
1371 qp->timeout,
1372 wqe->ssn,
1373 qp->s_lsn,
1374 qp->s_last_psn,
1375 qp->s_psn, qp->s_next_psn,
1376 qp->s_sending_psn, qp->s_sending_hpsn,
1377 qp->s_last, qp->s_acked, qp->s_cur,
1378 qp->s_tail, qp->s_head, qp->s_size,
1379 qp->remote_qpn,
1380 qp->remote_ah_attr.dlid);
1381}
1382
1383#endif
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 3fc514431212..32162d355370 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -708,6 +708,62 @@ unlock:
708 return ret; 708 return ret;
709} 709}
710 710
711/*
712 * sdma_lock should be acquired before calling this routine
713 */
714void dump_sdma_state(struct qib_pportdata *ppd)
715{
716 struct qib_sdma_desc *descq;
717 struct qib_sdma_txreq *txp, *txpnext;
718 __le64 *descqp;
719 u64 desc[2];
720 dma_addr_t addr;
721 u16 gen, dwlen, dwoffset;
722 u16 head, tail, cnt;
723
724 head = ppd->sdma_descq_head;
725 tail = ppd->sdma_descq_tail;
726 cnt = qib_sdma_descq_freecnt(ppd);
727 descq = ppd->sdma_descq;
728
729 qib_dev_porterr(ppd->dd, ppd->port,
730 "SDMA ppd->sdma_descq_head: %u\n", head);
731 qib_dev_porterr(ppd->dd, ppd->port,
732 "SDMA ppd->sdma_descq_tail: %u\n", tail);
733 qib_dev_porterr(ppd->dd, ppd->port,
734 "SDMA sdma_descq_freecnt: %u\n", cnt);
735
736 /* print info for each entry in the descriptor queue */
737 while (head != tail) {
738 char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 };
739
740 descqp = &descq[head].qw[0];
741 desc[0] = le64_to_cpu(descqp[0]);
742 desc[1] = le64_to_cpu(descqp[1]);
743 flags[0] = (desc[0] & 1<<15) ? 'I' : '-';
744 flags[1] = (desc[0] & 1<<14) ? 'L' : 'S';
745 flags[2] = (desc[0] & 1<<13) ? 'H' : '-';
746 flags[3] = (desc[0] & 1<<12) ? 'F' : '-';
747 flags[4] = (desc[0] & 1<<11) ? 'L' : '-';
748 addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL);
749 gen = (desc[0] >> 30) & 3ULL;
750 dwlen = (desc[0] >> 14) & (0x7ffULL << 2);
751 dwoffset = (desc[0] & 0x7ffULL) << 2;
752 qib_dev_porterr(ppd->dd, ppd->port,
753 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n",
754 head, flags, addr, gen, dwlen, dwoffset);
755 if (++head == ppd->sdma_descq_cnt)
756 head = 0;
757 }
758
759 /* print dma descriptor indices from the TX requests */
760 list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist,
761 list)
762 qib_dev_porterr(ppd->dd, ppd->port,
763 "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n",
764 txp->start_idx, txp->next_descq_idx);
765}
766
711void qib_sdma_process_event(struct qib_pportdata *ppd, 767void qib_sdma_process_event(struct qib_pportdata *ppd,
712 enum qib_sdma_events event) 768 enum qib_sdma_events event)
713{ 769{
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 904c384aa361..092b0bb1bb78 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -645,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
645 } else 645 } else
646 goto drop; 646 goto drop;
647 647
648 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 648 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
649 ibp->opstats[opcode & 0x7f].n_bytes += tlen; 649#ifdef CONFIG_DEBUG_FS
650 ibp->opstats[opcode & 0x7f].n_packets++; 650 rcd->opstats->stats[opcode].n_bytes += tlen;
651 rcd->opstats->stats[opcode].n_packets++;
652#endif
651 653
652 /* Get the destination QP number. */ 654 /* Get the destination QP number. */
653 qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; 655 qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index aff8b2c17886..012e2c7575ad 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
@@ -41,6 +41,7 @@
41#include <linux/interrupt.h> 41#include <linux/interrupt.h>
42#include <linux/kref.h> 42#include <linux/kref.h>
43#include <linux/workqueue.h> 43#include <linux/workqueue.h>
44#include <linux/kthread.h>
44#include <linux/completion.h> 45#include <linux/completion.h>
45#include <rdma/ib_pack.h> 46#include <rdma/ib_pack.h>
46#include <rdma/ib_user_verbs.h> 47#include <rdma/ib_user_verbs.h>
@@ -267,7 +268,8 @@ struct qib_cq_wc {
267 */ 268 */
268struct qib_cq { 269struct qib_cq {
269 struct ib_cq ibcq; 270 struct ib_cq ibcq;
270 struct work_struct comptask; 271 struct kthread_work comptask;
272 struct qib_devdata *dd;
271 spinlock_t lock; /* protect changes in this struct */ 273 spinlock_t lock; /* protect changes in this struct */
272 u8 notify; 274 u8 notify;
273 u8 triggered; 275 u8 triggered;
@@ -658,6 +660,10 @@ struct qib_opcode_stats {
658 u64 n_bytes; /* total number of bytes */ 660 u64 n_bytes; /* total number of bytes */
659}; 661};
660 662
663struct qib_opcode_stats_perctx {
664 struct qib_opcode_stats stats[128];
665};
666
661struct qib_ibport { 667struct qib_ibport {
662 struct qib_qp __rcu *qp0; 668 struct qib_qp __rcu *qp0;
663 struct qib_qp __rcu *qp1; 669 struct qib_qp __rcu *qp1;
@@ -724,7 +730,6 @@ struct qib_ibport {
724 u8 vl_high_limit; 730 u8 vl_high_limit;
725 u8 sl_to_vl[16]; 731 u8 sl_to_vl[16];
726 732
727 struct qib_opcode_stats opstats[128];
728}; 733};
729 734
730 735
@@ -768,6 +773,10 @@ struct qib_ibdev {
768 spinlock_t n_srqs_lock; 773 spinlock_t n_srqs_lock;
769 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ 774 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
770 spinlock_t n_mcast_grps_lock; 775 spinlock_t n_mcast_grps_lock;
776#ifdef CONFIG_DEBUG_FS
777 /* per HCA debugfs */
778 struct dentry *qib_ibdev_dbg;
779#endif
771}; 780};
772 781
773struct qib_verbs_counters { 782struct qib_verbs_counters {
@@ -832,8 +841,6 @@ static inline int qib_send_ok(struct qib_qp *qp)
832 !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); 841 !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
833} 842}
834 843
835extern struct workqueue_struct *qib_cq_wq;
836
837/* 844/*
838 * This must be called with s_lock held. 845 * This must be called with s_lock held.
839 */ 846 */
@@ -910,6 +917,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
910 917
911void qib_free_qpn_table(struct qib_qpn_table *qpt); 918void qib_free_qpn_table(struct qib_qpn_table *qpt);
912 919
920#ifdef CONFIG_DEBUG_FS
921
922struct qib_qp_iter;
923
924struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev);
925
926int qib_qp_iter_next(struct qib_qp_iter *iter);
927
928void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
929
930#endif
931
913void qib_get_credit(struct qib_qp *qp, u32 aeth); 932void qib_get_credit(struct qib_qp *qp, u32 aeth);
914 933
915unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); 934unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
@@ -972,6 +991,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
972 991
973int qib_destroy_srq(struct ib_srq *ibsrq); 992int qib_destroy_srq(struct ib_srq *ibsrq);
974 993
994int qib_cq_init(struct qib_devdata *dd);
995
996void qib_cq_exit(struct qib_devdata *dd);
997
975void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); 998void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
976 999
977int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); 1000int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 7ccf3284dda3..f93baf8254c4 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -53,8 +53,8 @@
53 53
54#define DRV_NAME "ib_srp" 54#define DRV_NAME "ib_srp"
55#define PFX DRV_NAME ": " 55#define PFX DRV_NAME ": "
56#define DRV_VERSION "0.2" 56#define DRV_VERSION "1.0"
57#define DRV_RELDATE "November 1, 2005" 57#define DRV_RELDATE "July 1, 2013"
58 58
59MODULE_AUTHOR("Roland Dreier"); 59MODULE_AUTHOR("Roland Dreier");
60MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " 60MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
@@ -231,14 +231,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
231 return -ENOMEM; 231 return -ENOMEM;
232 232
233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, 233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
234 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); 234 srp_recv_completion, NULL, target, SRP_RQ_SIZE,
235 target->comp_vector);
235 if (IS_ERR(recv_cq)) { 236 if (IS_ERR(recv_cq)) {
236 ret = PTR_ERR(recv_cq); 237 ret = PTR_ERR(recv_cq);
237 goto err; 238 goto err;
238 } 239 }
239 240
240 send_cq = ib_create_cq(target->srp_host->srp_dev->dev, 241 send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
241 srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); 242 srp_send_completion, NULL, target, SRP_SQ_SIZE,
243 target->comp_vector);
242 if (IS_ERR(send_cq)) { 244 if (IS_ERR(send_cq)) {
243 ret = PTR_ERR(send_cq); 245 ret = PTR_ERR(send_cq);
244 goto err_recv_cq; 246 goto err_recv_cq;
@@ -542,11 +544,11 @@ static void srp_remove_work(struct work_struct *work)
542 544
543 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 545 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
544 546
547 srp_remove_target(target);
548
545 spin_lock(&target->srp_host->target_lock); 549 spin_lock(&target->srp_host->target_lock);
546 list_del(&target->list); 550 list_del(&target->list);
547 spin_unlock(&target->srp_host->target_lock); 551 spin_unlock(&target->srp_host->target_lock);
548
549 srp_remove_target(target);
550} 552}
551 553
552static void srp_rport_delete(struct srp_rport *rport) 554static void srp_rport_delete(struct srp_rport *rport)
@@ -1744,18 +1746,24 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1744{ 1746{
1745 struct srp_target_port *target = host_to_target(scmnd->device->host); 1747 struct srp_target_port *target = host_to_target(scmnd->device->host);
1746 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 1748 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
1749 int ret;
1747 1750
1748 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 1751 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
1749 1752
1750 if (!req || !srp_claim_req(target, req, scmnd)) 1753 if (!req || !srp_claim_req(target, req, scmnd))
1751 return FAILED; 1754 return FAILED;
1752 srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, 1755 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1753 SRP_TSK_ABORT_TASK); 1756 SRP_TSK_ABORT_TASK) == 0)
1757 ret = SUCCESS;
1758 else if (target->transport_offline)
1759 ret = FAST_IO_FAIL;
1760 else
1761 ret = FAILED;
1754 srp_free_req(target, req, scmnd, 0); 1762 srp_free_req(target, req, scmnd, 0);
1755 scmnd->result = DID_ABORT << 16; 1763 scmnd->result = DID_ABORT << 16;
1756 scmnd->scsi_done(scmnd); 1764 scmnd->scsi_done(scmnd);
1757 1765
1758 return SUCCESS; 1766 return ret;
1759} 1767}
1760 1768
1761static int srp_reset_device(struct scsi_cmnd *scmnd) 1769static int srp_reset_device(struct scsi_cmnd *scmnd)
@@ -1891,6 +1899,14 @@ static ssize_t show_local_ib_device(struct device *dev,
1891 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); 1899 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
1892} 1900}
1893 1901
1902static ssize_t show_comp_vector(struct device *dev,
1903 struct device_attribute *attr, char *buf)
1904{
1905 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1906
1907 return sprintf(buf, "%d\n", target->comp_vector);
1908}
1909
1894static ssize_t show_cmd_sg_entries(struct device *dev, 1910static ssize_t show_cmd_sg_entries(struct device *dev,
1895 struct device_attribute *attr, char *buf) 1911 struct device_attribute *attr, char *buf)
1896{ 1912{
@@ -1917,6 +1933,7 @@ static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
1917static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 1933static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
1918static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 1934static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
1919static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 1935static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
1936static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
1920static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 1937static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
1921static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 1938static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
1922 1939
@@ -1931,6 +1948,7 @@ static struct device_attribute *srp_host_attrs[] = {
1931 &dev_attr_zero_req_lim, 1948 &dev_attr_zero_req_lim,
1932 &dev_attr_local_ib_port, 1949 &dev_attr_local_ib_port,
1933 &dev_attr_local_ib_device, 1950 &dev_attr_local_ib_device,
1951 &dev_attr_comp_vector,
1934 &dev_attr_cmd_sg_entries, 1952 &dev_attr_cmd_sg_entries,
1935 &dev_attr_allow_ext_sg, 1953 &dev_attr_allow_ext_sg,
1936 NULL 1954 NULL
@@ -1946,6 +1964,7 @@ static struct scsi_host_template srp_template = {
1946 .eh_abort_handler = srp_abort, 1964 .eh_abort_handler = srp_abort,
1947 .eh_device_reset_handler = srp_reset_device, 1965 .eh_device_reset_handler = srp_reset_device,
1948 .eh_host_reset_handler = srp_reset_host, 1966 .eh_host_reset_handler = srp_reset_host,
1967 .skip_settle_delay = true,
1949 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 1968 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
1950 .can_queue = SRP_CMD_SQ_SIZE, 1969 .can_queue = SRP_CMD_SQ_SIZE,
1951 .this_id = -1, 1970 .this_id = -1,
@@ -2001,6 +2020,36 @@ static struct class srp_class = {
2001 .dev_release = srp_release_dev 2020 .dev_release = srp_release_dev
2002}; 2021};
2003 2022
2023/**
2024 * srp_conn_unique() - check whether the connection to a target is unique
2025 */
2026static bool srp_conn_unique(struct srp_host *host,
2027 struct srp_target_port *target)
2028{
2029 struct srp_target_port *t;
2030 bool ret = false;
2031
2032 if (target->state == SRP_TARGET_REMOVED)
2033 goto out;
2034
2035 ret = true;
2036
2037 spin_lock(&host->target_lock);
2038 list_for_each_entry(t, &host->target_list, list) {
2039 if (t != target &&
2040 target->id_ext == t->id_ext &&
2041 target->ioc_guid == t->ioc_guid &&
2042 target->initiator_ext == t->initiator_ext) {
2043 ret = false;
2044 break;
2045 }
2046 }
2047 spin_unlock(&host->target_lock);
2048
2049out:
2050 return ret;
2051}
2052
2004/* 2053/*
2005 * Target ports are added by writing 2054 * Target ports are added by writing
2006 * 2055 *
@@ -2023,6 +2072,7 @@ enum {
2023 SRP_OPT_CMD_SG_ENTRIES = 1 << 9, 2072 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2024 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 2073 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2025 SRP_OPT_SG_TABLESIZE = 1 << 11, 2074 SRP_OPT_SG_TABLESIZE = 1 << 11,
2075 SRP_OPT_COMP_VECTOR = 1 << 12,
2026 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 2076 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2027 SRP_OPT_IOC_GUID | 2077 SRP_OPT_IOC_GUID |
2028 SRP_OPT_DGID | 2078 SRP_OPT_DGID |
@@ -2043,6 +2093,7 @@ static const match_table_t srp_opt_tokens = {
2043 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 2093 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2044 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 2094 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2045 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 2095 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2096 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2046 { SRP_OPT_ERR, NULL } 2097 { SRP_OPT_ERR, NULL }
2047}; 2098};
2048 2099
@@ -2198,6 +2249,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2198 target->sg_tablesize = token; 2249 target->sg_tablesize = token;
2199 break; 2250 break;
2200 2251
2252 case SRP_OPT_COMP_VECTOR:
2253 if (match_int(args, &token) || token < 0) {
2254 pr_warn("bad comp_vector parameter '%s'\n", p);
2255 goto out;
2256 }
2257 target->comp_vector = token;
2258 break;
2259
2201 default: 2260 default:
2202 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 2261 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
2203 p); 2262 p);
@@ -2257,6 +2316,16 @@ static ssize_t srp_create_target(struct device *dev,
2257 if (ret) 2316 if (ret)
2258 goto err; 2317 goto err;
2259 2318
2319 if (!srp_conn_unique(target->srp_host, target)) {
2320 shost_printk(KERN_INFO, target->scsi_host,
2321 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
2322 be64_to_cpu(target->id_ext),
2323 be64_to_cpu(target->ioc_guid),
2324 be64_to_cpu(target->initiator_ext));
2325 ret = -EEXIST;
2326 goto err;
2327 }
2328
2260 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && 2329 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
2261 target->cmd_sg_cnt < target->sg_tablesize) { 2330 target->cmd_sg_cnt < target->sg_tablesize) {
2262 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 2331 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
@@ -2507,6 +2576,8 @@ static void srp_remove_one(struct ib_device *device)
2507 struct srp_target_port *target; 2576 struct srp_target_port *target;
2508 2577
2509 srp_dev = ib_get_client_data(device, &srp_client); 2578 srp_dev = ib_get_client_data(device, &srp_client);
2579 if (!srp_dev)
2580 return;
2510 2581
2511 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 2582 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
2512 device_unregister(&host->dev); 2583 device_unregister(&host->dev);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 66fbedda4571..e641088c14dc 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -156,6 +156,7 @@ struct srp_target_port {
156 char target_name[32]; 156 char target_name[32];
157 unsigned int scsi_id; 157 unsigned int scsi_id;
158 unsigned int sg_tablesize; 158 unsigned int sg_tablesize;
159 int comp_vector;
159 160
160 struct ib_sa_path_rec path; 161 struct ib_sa_path_rec path;
161 __be16 orig_dgid[8]; 162 __be16 orig_dgid[8];
diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig
index bcdbc14aeff0..8cf7563a8d92 100644
--- a/drivers/net/ethernet/mellanox/Kconfig
+++ b/drivers/net/ethernet/mellanox/Kconfig
@@ -19,5 +19,6 @@ config NET_VENDOR_MELLANOX
19if NET_VENDOR_MELLANOX 19if NET_VENDOR_MELLANOX
20 20
21source "drivers/net/ethernet/mellanox/mlx4/Kconfig" 21source "drivers/net/ethernet/mellanox/mlx4/Kconfig"
22source "drivers/net/ethernet/mellanox/mlx5/core/Kconfig"
22 23
23endif # NET_VENDOR_MELLANOX 24endif # NET_VENDOR_MELLANOX
diff --git a/drivers/net/ethernet/mellanox/Makefile b/drivers/net/ethernet/mellanox/Makefile
index 37afb9683372..38fe32ef5e5f 100644
--- a/drivers/net/ethernet/mellanox/Makefile
+++ b/drivers/net/ethernet/mellanox/Makefile
@@ -3,3 +3,4 @@
3# 3#
4 4
5obj-$(CONFIG_MLX4_CORE) += mlx4/ 5obj-$(CONFIG_MLX4_CORE) += mlx4/
6obj-$(CONFIG_MLX5_CORE) += mlx5/core/
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
new file mode 100644
index 000000000000..21962828925a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -0,0 +1,18 @@
1#
2# Mellanox driver configuration
3#
4
5config MLX5_CORE
6 tristate
7 depends on PCI && X86
8 default n
9
10config MLX5_DEBUG
11 bool "Verbose debugging output" if (MLX5_CORE && EXPERT)
12 depends on MLX5_CORE
13 default y
14 ---help---
15 This option causes debugging code to be compiled into the
16 mlx5_core driver. The output can be turned on via the
17 debug_mask module parameter (which can also be set after
18 the driver is loaded through sysfs).
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
new file mode 100644
index 000000000000..105780bb980b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -0,0 +1,5 @@
1obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
2
3mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
4 health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
5 mad.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
new file mode 100644
index 000000000000..b215742b842f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -0,0 +1,238 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/errno.h>
34#include <linux/slab.h>
35#include <linux/mm.h>
36#include <linux/export.h>
37#include <linux/bitmap.h>
38#include <linux/dma-mapping.h>
39#include <linux/vmalloc.h>
40#include <linux/mlx5/driver.h>
41
42#include "mlx5_core.h"
43
44/* Handling for queue buffers -- we allocate a bunch of memory and
45 * register it in a memory region at HCA virtual address 0. If the
46 * requested size is > max_direct, we split the allocation into
47 * multiple pages, so we don't require too much contiguous memory.
48 */
49
50int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct,
51 struct mlx5_buf *buf)
52{
53 dma_addr_t t;
54
55 buf->size = size;
56 if (size <= max_direct) {
57 buf->nbufs = 1;
58 buf->npages = 1;
59 buf->page_shift = get_order(size) + PAGE_SHIFT;
60 buf->direct.buf = dma_zalloc_coherent(&dev->pdev->dev,
61 size, &t, GFP_KERNEL);
62 if (!buf->direct.buf)
63 return -ENOMEM;
64
65 buf->direct.map = t;
66
67 while (t & ((1 << buf->page_shift) - 1)) {
68 --buf->page_shift;
69 buf->npages *= 2;
70 }
71 } else {
72 int i;
73
74 buf->direct.buf = NULL;
75 buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
76 buf->npages = buf->nbufs;
77 buf->page_shift = PAGE_SHIFT;
78 buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
79 GFP_KERNEL);
80 if (!buf->page_list)
81 return -ENOMEM;
82
83 for (i = 0; i < buf->nbufs; i++) {
84 buf->page_list[i].buf =
85 dma_zalloc_coherent(&dev->pdev->dev, PAGE_SIZE,
86 &t, GFP_KERNEL);
87 if (!buf->page_list[i].buf)
88 goto err_free;
89
90 buf->page_list[i].map = t;
91 }
92
93 if (BITS_PER_LONG == 64) {
94 struct page **pages;
95 pages = kmalloc(sizeof(*pages) * buf->nbufs, GFP_KERNEL);
96 if (!pages)
97 goto err_free;
98 for (i = 0; i < buf->nbufs; i++)
99 pages[i] = virt_to_page(buf->page_list[i].buf);
100 buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL);
101 kfree(pages);
102 if (!buf->direct.buf)
103 goto err_free;
104 }
105 }
106
107 return 0;
108
109err_free:
110 mlx5_buf_free(dev, buf);
111
112 return -ENOMEM;
113}
114EXPORT_SYMBOL_GPL(mlx5_buf_alloc);
115
116void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
117{
118 int i;
119
120 if (buf->nbufs == 1)
121 dma_free_coherent(&dev->pdev->dev, buf->size, buf->direct.buf,
122 buf->direct.map);
123 else {
124 if (BITS_PER_LONG == 64 && buf->direct.buf)
125 vunmap(buf->direct.buf);
126
127 for (i = 0; i < buf->nbufs; i++)
128 if (buf->page_list[i].buf)
129 dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
130 buf->page_list[i].buf,
131 buf->page_list[i].map);
132 kfree(buf->page_list);
133 }
134}
135EXPORT_SYMBOL_GPL(mlx5_buf_free);
136
137static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device)
138{
139 struct mlx5_db_pgdir *pgdir;
140
141 pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
142 if (!pgdir)
143 return NULL;
144
145 bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE);
146 pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
147 &pgdir->db_dma, GFP_KERNEL);
148 if (!pgdir->db_page) {
149 kfree(pgdir);
150 return NULL;
151 }
152
153 return pgdir;
154}
155
156static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir,
157 struct mlx5_db *db)
158{
159 int offset;
160 int i;
161
162 i = find_first_bit(pgdir->bitmap, MLX5_DB_PER_PAGE);
163 if (i >= MLX5_DB_PER_PAGE)
164 return -ENOMEM;
165
166 __clear_bit(i, pgdir->bitmap);
167
168 db->u.pgdir = pgdir;
169 db->index = i;
170 offset = db->index * L1_CACHE_BYTES;
171 db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page);
172 db->dma = pgdir->db_dma + offset;
173
174 return 0;
175}
176
177int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db)
178{
179 struct mlx5_db_pgdir *pgdir;
180 int ret = 0;
181
182 mutex_lock(&dev->priv.pgdir_mutex);
183
184 list_for_each_entry(pgdir, &dev->priv.pgdir_list, list)
185 if (!mlx5_alloc_db_from_pgdir(pgdir, db))
186 goto out;
187
188 pgdir = mlx5_alloc_db_pgdir(&(dev->pdev->dev));
189 if (!pgdir) {
190 ret = -ENOMEM;
191 goto out;
192 }
193
194 list_add(&pgdir->list, &dev->priv.pgdir_list);
195
196 /* This should never fail -- we just allocated an empty page: */
197 WARN_ON(mlx5_alloc_db_from_pgdir(pgdir, db));
198
199out:
200 mutex_unlock(&dev->priv.pgdir_mutex);
201
202 return ret;
203}
204EXPORT_SYMBOL_GPL(mlx5_db_alloc);
205
206void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
207{
208 mutex_lock(&dev->priv.pgdir_mutex);
209
210 __set_bit(db->index, db->u.pgdir->bitmap);
211
212 if (bitmap_full(db->u.pgdir->bitmap, MLX5_DB_PER_PAGE)) {
213 dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
214 db->u.pgdir->db_page, db->u.pgdir->db_dma);
215 list_del(&db->u.pgdir->list);
216 kfree(db->u.pgdir);
217 }
218
219 mutex_unlock(&dev->priv.pgdir_mutex);
220}
221EXPORT_SYMBOL_GPL(mlx5_db_free);
222
223
224void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
225{
226 u64 addr;
227 int i;
228
229 for (i = 0; i < buf->npages; i++) {
230 if (buf->nbufs == 1)
231 addr = buf->direct.map + (i << buf->page_shift);
232 else
233 addr = buf->page_list[i].map;
234
235 pas[i] = cpu_to_be64(addr);
236 }
237}
238EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
new file mode 100644
index 000000000000..205753a04cfc
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -0,0 +1,1515 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <asm-generic/kmap_types.h>
34#include <linux/module.h>
35#include <linux/init.h>
36#include <linux/errno.h>
37#include <linux/pci.h>
38#include <linux/dma-mapping.h>
39#include <linux/slab.h>
40#include <linux/delay.h>
41#include <linux/random.h>
42#include <linux/io-mapping.h>
43#include <linux/mlx5/driver.h>
44#include <linux/debugfs.h>
45
46#include "mlx5_core.h"
47
48enum {
49 CMD_IF_REV = 3,
50};
51
52enum {
53 CMD_MODE_POLLING,
54 CMD_MODE_EVENTS
55};
56
57enum {
58 NUM_LONG_LISTS = 2,
59 NUM_MED_LISTS = 64,
60 LONG_LIST_SIZE = (2ULL * 1024 * 1024 * 1024 / PAGE_SIZE) * 8 + 16 +
61 MLX5_CMD_DATA_BLOCK_SIZE,
62 MED_LIST_SIZE = 16 + MLX5_CMD_DATA_BLOCK_SIZE,
63};
64
65enum {
66 MLX5_CMD_DELIVERY_STAT_OK = 0x0,
67 MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR = 0x1,
68 MLX5_CMD_DELIVERY_STAT_TOK_ERR = 0x2,
69 MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR = 0x3,
70 MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR = 0x4,
71 MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR = 0x5,
72 MLX5_CMD_DELIVERY_STAT_FW_ERR = 0x6,
73 MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR = 0x7,
74 MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR = 0x8,
75 MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR = 0x9,
76 MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10,
77};
78
79enum {
80 MLX5_CMD_STAT_OK = 0x0,
81 MLX5_CMD_STAT_INT_ERR = 0x1,
82 MLX5_CMD_STAT_BAD_OP_ERR = 0x2,
83 MLX5_CMD_STAT_BAD_PARAM_ERR = 0x3,
84 MLX5_CMD_STAT_BAD_SYS_STATE_ERR = 0x4,
85 MLX5_CMD_STAT_BAD_RES_ERR = 0x5,
86 MLX5_CMD_STAT_RES_BUSY = 0x6,
87 MLX5_CMD_STAT_LIM_ERR = 0x8,
88 MLX5_CMD_STAT_BAD_RES_STATE_ERR = 0x9,
89 MLX5_CMD_STAT_IX_ERR = 0xa,
90 MLX5_CMD_STAT_NO_RES_ERR = 0xf,
91 MLX5_CMD_STAT_BAD_INP_LEN_ERR = 0x50,
92 MLX5_CMD_STAT_BAD_OUTP_LEN_ERR = 0x51,
93 MLX5_CMD_STAT_BAD_QP_STATE_ERR = 0x10,
94 MLX5_CMD_STAT_BAD_PKT_ERR = 0x30,
95 MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40,
96};
97
98static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
99 struct mlx5_cmd_msg *in,
100 struct mlx5_cmd_msg *out,
101 mlx5_cmd_cbk_t cbk,
102 void *context, int page_queue)
103{
104 gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
105 struct mlx5_cmd_work_ent *ent;
106
107 ent = kzalloc(sizeof(*ent), alloc_flags);
108 if (!ent)
109 return ERR_PTR(-ENOMEM);
110
111 ent->in = in;
112 ent->out = out;
113 ent->callback = cbk;
114 ent->context = context;
115 ent->cmd = cmd;
116 ent->page_queue = page_queue;
117
118 return ent;
119}
120
121static u8 alloc_token(struct mlx5_cmd *cmd)
122{
123 u8 token;
124
125 spin_lock(&cmd->token_lock);
126 token = cmd->token++ % 255 + 1;
127 spin_unlock(&cmd->token_lock);
128
129 return token;
130}
131
132static int alloc_ent(struct mlx5_cmd *cmd)
133{
134 unsigned long flags;
135 int ret;
136
137 spin_lock_irqsave(&cmd->alloc_lock, flags);
138 ret = find_first_bit(&cmd->bitmask, cmd->max_reg_cmds);
139 if (ret < cmd->max_reg_cmds)
140 clear_bit(ret, &cmd->bitmask);
141 spin_unlock_irqrestore(&cmd->alloc_lock, flags);
142
143 return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
144}
145
146static void free_ent(struct mlx5_cmd *cmd, int idx)
147{
148 unsigned long flags;
149
150 spin_lock_irqsave(&cmd->alloc_lock, flags);
151 set_bit(idx, &cmd->bitmask);
152 spin_unlock_irqrestore(&cmd->alloc_lock, flags);
153}
154
155static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
156{
157 return cmd->cmd_buf + (idx << cmd->log_stride);
158}
159
160static u8 xor8_buf(void *buf, int len)
161{
162 u8 *ptr = buf;
163 u8 sum = 0;
164 int i;
165
166 for (i = 0; i < len; i++)
167 sum ^= ptr[i];
168
169 return sum;
170}
171
172static int verify_block_sig(struct mlx5_cmd_prot_block *block)
173{
174 if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff)
175 return -EINVAL;
176
177 if (xor8_buf(block, sizeof(*block)) != 0xff)
178 return -EINVAL;
179
180 return 0;
181}
182
183static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token)
184{
185 block->token = token;
186 block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 2);
187 block->sig = ~xor8_buf(block, sizeof(*block) - 1);
188}
189
190static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token)
191{
192 struct mlx5_cmd_mailbox *next = msg->next;
193
194 while (next) {
195 calc_block_sig(next->buf, token);
196 next = next->next;
197 }
198}
199
200static void set_signature(struct mlx5_cmd_work_ent *ent)
201{
202 ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
203 calc_chain_sig(ent->in, ent->token);
204 calc_chain_sig(ent->out, ent->token);
205}
206
207static void poll_timeout(struct mlx5_cmd_work_ent *ent)
208{
209 unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000);
210 u8 own;
211
212 do {
213 own = ent->lay->status_own;
214 if (!(own & CMD_OWNER_HW)) {
215 ent->ret = 0;
216 return;
217 }
218 usleep_range(5000, 10000);
219 } while (time_before(jiffies, poll_end));
220
221 ent->ret = -ETIMEDOUT;
222}
223
224static void free_cmd(struct mlx5_cmd_work_ent *ent)
225{
226 kfree(ent);
227}
228
229
230static int verify_signature(struct mlx5_cmd_work_ent *ent)
231{
232 struct mlx5_cmd_mailbox *next = ent->out->next;
233 int err;
234 u8 sig;
235
236 sig = xor8_buf(ent->lay, sizeof(*ent->lay));
237 if (sig != 0xff)
238 return -EINVAL;
239
240 while (next) {
241 err = verify_block_sig(next->buf);
242 if (err)
243 return err;
244
245 next = next->next;
246 }
247
248 return 0;
249}
250
251static void dump_buf(void *buf, int size, int data_only, int offset)
252{
253 __be32 *p = buf;
254 int i;
255
256 for (i = 0; i < size; i += 16) {
257 pr_debug("%03x: %08x %08x %08x %08x\n", offset, be32_to_cpu(p[0]),
258 be32_to_cpu(p[1]), be32_to_cpu(p[2]),
259 be32_to_cpu(p[3]));
260 p += 4;
261 offset += 16;
262 }
263 if (!data_only)
264 pr_debug("\n");
265}
266
267const char *mlx5_command_str(int command)
268{
269 switch (command) {
270 case MLX5_CMD_OP_QUERY_HCA_CAP:
271 return "QUERY_HCA_CAP";
272
273 case MLX5_CMD_OP_SET_HCA_CAP:
274 return "SET_HCA_CAP";
275
276 case MLX5_CMD_OP_QUERY_ADAPTER:
277 return "QUERY_ADAPTER";
278
279 case MLX5_CMD_OP_INIT_HCA:
280 return "INIT_HCA";
281
282 case MLX5_CMD_OP_TEARDOWN_HCA:
283 return "TEARDOWN_HCA";
284
285 case MLX5_CMD_OP_QUERY_PAGES:
286 return "QUERY_PAGES";
287
288 case MLX5_CMD_OP_MANAGE_PAGES:
289 return "MANAGE_PAGES";
290
291 case MLX5_CMD_OP_CREATE_MKEY:
292 return "CREATE_MKEY";
293
294 case MLX5_CMD_OP_QUERY_MKEY:
295 return "QUERY_MKEY";
296
297 case MLX5_CMD_OP_DESTROY_MKEY:
298 return "DESTROY_MKEY";
299
300 case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS:
301 return "QUERY_SPECIAL_CONTEXTS";
302
303 case MLX5_CMD_OP_CREATE_EQ:
304 return "CREATE_EQ";
305
306 case MLX5_CMD_OP_DESTROY_EQ:
307 return "DESTROY_EQ";
308
309 case MLX5_CMD_OP_QUERY_EQ:
310 return "QUERY_EQ";
311
312 case MLX5_CMD_OP_CREATE_CQ:
313 return "CREATE_CQ";
314
315 case MLX5_CMD_OP_DESTROY_CQ:
316 return "DESTROY_CQ";
317
318 case MLX5_CMD_OP_QUERY_CQ:
319 return "QUERY_CQ";
320
321 case MLX5_CMD_OP_MODIFY_CQ:
322 return "MODIFY_CQ";
323
324 case MLX5_CMD_OP_CREATE_QP:
325 return "CREATE_QP";
326
327 case MLX5_CMD_OP_DESTROY_QP:
328 return "DESTROY_QP";
329
330 case MLX5_CMD_OP_RST2INIT_QP:
331 return "RST2INIT_QP";
332
333 case MLX5_CMD_OP_INIT2RTR_QP:
334 return "INIT2RTR_QP";
335
336 case MLX5_CMD_OP_RTR2RTS_QP:
337 return "RTR2RTS_QP";
338
339 case MLX5_CMD_OP_RTS2RTS_QP:
340 return "RTS2RTS_QP";
341
342 case MLX5_CMD_OP_SQERR2RTS_QP:
343 return "SQERR2RTS_QP";
344
345 case MLX5_CMD_OP_2ERR_QP:
346 return "2ERR_QP";
347
348 case MLX5_CMD_OP_RTS2SQD_QP:
349 return "RTS2SQD_QP";
350
351 case MLX5_CMD_OP_SQD2RTS_QP:
352 return "SQD2RTS_QP";
353
354 case MLX5_CMD_OP_2RST_QP:
355 return "2RST_QP";
356
357 case MLX5_CMD_OP_QUERY_QP:
358 return "QUERY_QP";
359
360 case MLX5_CMD_OP_CONF_SQP:
361 return "CONF_SQP";
362
363 case MLX5_CMD_OP_MAD_IFC:
364 return "MAD_IFC";
365
366 case MLX5_CMD_OP_INIT2INIT_QP:
367 return "INIT2INIT_QP";
368
369 case MLX5_CMD_OP_SUSPEND_QP:
370 return "SUSPEND_QP";
371
372 case MLX5_CMD_OP_UNSUSPEND_QP:
373 return "UNSUSPEND_QP";
374
375 case MLX5_CMD_OP_SQD2SQD_QP:
376 return "SQD2SQD_QP";
377
378 case MLX5_CMD_OP_ALLOC_QP_COUNTER_SET:
379 return "ALLOC_QP_COUNTER_SET";
380
381 case MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET:
382 return "DEALLOC_QP_COUNTER_SET";
383
384 case MLX5_CMD_OP_QUERY_QP_COUNTER_SET:
385 return "QUERY_QP_COUNTER_SET";
386
387 case MLX5_CMD_OP_CREATE_PSV:
388 return "CREATE_PSV";
389
390 case MLX5_CMD_OP_DESTROY_PSV:
391 return "DESTROY_PSV";
392
393 case MLX5_CMD_OP_QUERY_PSV:
394 return "QUERY_PSV";
395
396 case MLX5_CMD_OP_QUERY_SIG_RULE_TABLE:
397 return "QUERY_SIG_RULE_TABLE";
398
399 case MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE:
400 return "QUERY_BLOCK_SIZE_TABLE";
401
402 case MLX5_CMD_OP_CREATE_SRQ:
403 return "CREATE_SRQ";
404
405 case MLX5_CMD_OP_DESTROY_SRQ:
406 return "DESTROY_SRQ";
407
408 case MLX5_CMD_OP_QUERY_SRQ:
409 return "QUERY_SRQ";
410
411 case MLX5_CMD_OP_ARM_RQ:
412 return "ARM_RQ";
413
414 case MLX5_CMD_OP_RESIZE_SRQ:
415 return "RESIZE_SRQ";
416
417 case MLX5_CMD_OP_ALLOC_PD:
418 return "ALLOC_PD";
419
420 case MLX5_CMD_OP_DEALLOC_PD:
421 return "DEALLOC_PD";
422
423 case MLX5_CMD_OP_ALLOC_UAR:
424 return "ALLOC_UAR";
425
426 case MLX5_CMD_OP_DEALLOC_UAR:
427 return "DEALLOC_UAR";
428
429 case MLX5_CMD_OP_ATTACH_TO_MCG:
430 return "ATTACH_TO_MCG";
431
432 case MLX5_CMD_OP_DETACH_FROM_MCG:
433 return "DETACH_FROM_MCG";
434
435 case MLX5_CMD_OP_ALLOC_XRCD:
436 return "ALLOC_XRCD";
437
438 case MLX5_CMD_OP_DEALLOC_XRCD:
439 return "DEALLOC_XRCD";
440
441 case MLX5_CMD_OP_ACCESS_REG:
442 return "MLX5_CMD_OP_ACCESS_REG";
443
444 default: return "unknown command opcode";
445 }
446}
447
448static void dump_command(struct mlx5_core_dev *dev,
449 struct mlx5_cmd_work_ent *ent, int input)
450{
451 u16 op = be16_to_cpu(((struct mlx5_inbox_hdr *)(ent->lay->in))->opcode);
452 struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
453 struct mlx5_cmd_mailbox *next = msg->next;
454 int data_only;
455 int offset = 0;
456 int dump_len;
457
458 data_only = !!(mlx5_core_debug_mask & (1 << MLX5_CMD_DATA));
459
460 if (data_only)
461 mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_DATA,
462 "dump command data %s(0x%x) %s\n",
463 mlx5_command_str(op), op,
464 input ? "INPUT" : "OUTPUT");
465 else
466 mlx5_core_dbg(dev, "dump command %s(0x%x) %s\n",
467 mlx5_command_str(op), op,
468 input ? "INPUT" : "OUTPUT");
469
470 if (data_only) {
471 if (input) {
472 dump_buf(ent->lay->in, sizeof(ent->lay->in), 1, offset);
473 offset += sizeof(ent->lay->in);
474 } else {
475 dump_buf(ent->lay->out, sizeof(ent->lay->out), 1, offset);
476 offset += sizeof(ent->lay->out);
477 }
478 } else {
479 dump_buf(ent->lay, sizeof(*ent->lay), 0, offset);
480 offset += sizeof(*ent->lay);
481 }
482
483 while (next && offset < msg->len) {
484 if (data_only) {
485 dump_len = min_t(int, MLX5_CMD_DATA_BLOCK_SIZE, msg->len - offset);
486 dump_buf(next->buf, dump_len, 1, offset);
487 offset += MLX5_CMD_DATA_BLOCK_SIZE;
488 } else {
489 mlx5_core_dbg(dev, "command block:\n");
490 dump_buf(next->buf, sizeof(struct mlx5_cmd_prot_block), 0, offset);
491 offset += sizeof(struct mlx5_cmd_prot_block);
492 }
493 next = next->next;
494 }
495
496 if (data_only)
497 pr_debug("\n");
498}
499
500static void cmd_work_handler(struct work_struct *work)
501{
502 struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
503 struct mlx5_cmd *cmd = ent->cmd;
504 struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
505 struct mlx5_cmd_layout *lay;
506 struct semaphore *sem;
507
508 sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
509 down(sem);
510 if (!ent->page_queue) {
511 ent->idx = alloc_ent(cmd);
512 if (ent->idx < 0) {
513 mlx5_core_err(dev, "failed to allocate command entry\n");
514 up(sem);
515 return;
516 }
517 } else {
518 ent->idx = cmd->max_reg_cmds;
519 }
520
521 ent->token = alloc_token(cmd);
522 cmd->ent_arr[ent->idx] = ent;
523 lay = get_inst(cmd, ent->idx);
524 ent->lay = lay;
525 memset(lay, 0, sizeof(*lay));
526 memcpy(lay->in, ent->in->first.data, sizeof(lay->in));
527 if (ent->in->next)
528 lay->in_ptr = cpu_to_be64(ent->in->next->dma);
529 lay->inlen = cpu_to_be32(ent->in->len);
530 if (ent->out->next)
531 lay->out_ptr = cpu_to_be64(ent->out->next->dma);
532 lay->outlen = cpu_to_be32(ent->out->len);
533 lay->type = MLX5_PCI_CMD_XPORT;
534 lay->token = ent->token;
535 lay->status_own = CMD_OWNER_HW;
536 if (!cmd->checksum_disabled)
537 set_signature(ent);
538 dump_command(dev, ent, 1);
539 ktime_get_ts(&ent->ts1);
540
541 /* ring doorbell after the descriptor is valid */
542 wmb();
543 iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
544 mlx5_core_dbg(dev, "write 0x%x to command doorbell\n", 1 << ent->idx);
545 mmiowb();
546 if (cmd->mode == CMD_MODE_POLLING) {
547 poll_timeout(ent);
548 /* make sure we read the descriptor after ownership is SW */
549 rmb();
550 mlx5_cmd_comp_handler(dev, 1UL << ent->idx);
551 }
552}
553
554static const char *deliv_status_to_str(u8 status)
555{
556 switch (status) {
557 case MLX5_CMD_DELIVERY_STAT_OK:
558 return "no errors";
559 case MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR:
560 return "signature error";
561 case MLX5_CMD_DELIVERY_STAT_TOK_ERR:
562 return "token error";
563 case MLX5_CMD_DELIVERY_STAT_BAD_BLK_NUM_ERR:
564 return "bad block number";
565 case MLX5_CMD_DELIVERY_STAT_OUT_PTR_ALIGN_ERR:
566 return "output pointer not aligned to block size";
567 case MLX5_CMD_DELIVERY_STAT_IN_PTR_ALIGN_ERR:
568 return "input pointer not aligned to block size";
569 case MLX5_CMD_DELIVERY_STAT_FW_ERR:
570 return "firmware internal error";
571 case MLX5_CMD_DELIVERY_STAT_IN_LENGTH_ERR:
572 return "command input length error";
573 case MLX5_CMD_DELIVERY_STAT_OUT_LENGTH_ERR:
574 return "command ouput length error";
575 case MLX5_CMD_DELIVERY_STAT_RES_FLD_NOT_CLR_ERR:
576 return "reserved fields not cleared";
577 case MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR:
578 return "bad command descriptor type";
579 default:
580 return "unknown status code";
581 }
582}
583
584static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
585{
586 struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
587
588 return be16_to_cpu(hdr->opcode);
589}
590
591static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
592{
593 unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
594 struct mlx5_cmd *cmd = &dev->cmd;
595 int err;
596
597 if (cmd->mode == CMD_MODE_POLLING) {
598 wait_for_completion(&ent->done);
599 err = ent->ret;
600 } else {
601 if (!wait_for_completion_timeout(&ent->done, timeout))
602 err = -ETIMEDOUT;
603 else
604 err = 0;
605 }
606 if (err == -ETIMEDOUT) {
607 mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
608 mlx5_command_str(msg_to_opcode(ent->in)),
609 msg_to_opcode(ent->in));
610 }
611 mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err,
612 deliv_status_to_str(ent->status), ent->status);
613
614 return err;
615}
616
617/* Notes:
618 * 1. Callback functions may not sleep
619 * 2. page queue commands do not support asynchrous completion
620 */
621static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
622 struct mlx5_cmd_msg *out, mlx5_cmd_cbk_t callback,
623 void *context, int page_queue, u8 *status)
624{
625 struct mlx5_cmd *cmd = &dev->cmd;
626 struct mlx5_cmd_work_ent *ent;
627 ktime_t t1, t2, delta;
628 struct mlx5_cmd_stats *stats;
629 int err = 0;
630 s64 ds;
631 u16 op;
632
633 if (callback && page_queue)
634 return -EINVAL;
635
636 ent = alloc_cmd(cmd, in, out, callback, context, page_queue);
637 if (IS_ERR(ent))
638 return PTR_ERR(ent);
639
640 if (!callback)
641 init_completion(&ent->done);
642
643 INIT_WORK(&ent->work, cmd_work_handler);
644 if (page_queue) {
645 cmd_work_handler(&ent->work);
646 } else if (!queue_work(cmd->wq, &ent->work)) {
647 mlx5_core_warn(dev, "failed to queue work\n");
648 err = -ENOMEM;
649 goto out_free;
650 }
651
652 if (!callback) {
653 err = wait_func(dev, ent);
654 if (err == -ETIMEDOUT)
655 goto out;
656
657 t1 = timespec_to_ktime(ent->ts1);
658 t2 = timespec_to_ktime(ent->ts2);
659 delta = ktime_sub(t2, t1);
660 ds = ktime_to_ns(delta);
661 op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
662 if (op < ARRAY_SIZE(cmd->stats)) {
663 stats = &cmd->stats[op];
664 spin_lock(&stats->lock);
665 stats->sum += ds;
666 ++stats->n;
667 spin_unlock(&stats->lock);
668 }
669 mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME,
670 "fw exec time for %s is %lld nsec\n",
671 mlx5_command_str(op), ds);
672 *status = ent->status;
673 free_cmd(ent);
674 }
675
676 return err;
677
678out_free:
679 free_cmd(ent);
680out:
681 return err;
682}
683
684static ssize_t dbg_write(struct file *filp, const char __user *buf,
685 size_t count, loff_t *pos)
686{
687 struct mlx5_core_dev *dev = filp->private_data;
688 struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
689 char lbuf[3];
690 int err;
691
692 if (!dbg->in_msg || !dbg->out_msg)
693 return -ENOMEM;
694
695 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
696 return -EFAULT;
697
698 lbuf[sizeof(lbuf) - 1] = 0;
699
700 if (strcmp(lbuf, "go"))
701 return -EINVAL;
702
703 err = mlx5_cmd_exec(dev, dbg->in_msg, dbg->inlen, dbg->out_msg, dbg->outlen);
704
705 return err ? err : count;
706}
707
708
709static const struct file_operations fops = {
710 .owner = THIS_MODULE,
711 .open = simple_open,
712 .write = dbg_write,
713};
714
715static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
716{
717 struct mlx5_cmd_prot_block *block;
718 struct mlx5_cmd_mailbox *next;
719 int copy;
720
721 if (!to || !from)
722 return -ENOMEM;
723
724 copy = min_t(int, size, sizeof(to->first.data));
725 memcpy(to->first.data, from, copy);
726 size -= copy;
727 from += copy;
728
729 next = to->next;
730 while (size) {
731 if (!next) {
732 /* this is a BUG */
733 return -ENOMEM;
734 }
735
736 copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
737 block = next->buf;
738 memcpy(block->data, from, copy);
739 from += copy;
740 size -= copy;
741 next = next->next;
742 }
743
744 return 0;
745}
746
747static int mlx5_copy_from_msg(void *to, struct mlx5_cmd_msg *from, int size)
748{
749 struct mlx5_cmd_prot_block *block;
750 struct mlx5_cmd_mailbox *next;
751 int copy;
752
753 if (!to || !from)
754 return -ENOMEM;
755
756 copy = min_t(int, size, sizeof(from->first.data));
757 memcpy(to, from->first.data, copy);
758 size -= copy;
759 to += copy;
760
761 next = from->next;
762 while (size) {
763 if (!next) {
764 /* this is a BUG */
765 return -ENOMEM;
766 }
767
768 copy = min_t(int, size, MLX5_CMD_DATA_BLOCK_SIZE);
769 block = next->buf;
770 if (xor8_buf(block, sizeof(*block)) != 0xff)
771 return -EINVAL;
772
773 memcpy(to, block->data, copy);
774 to += copy;
775 size -= copy;
776 next = next->next;
777 }
778
779 return 0;
780}
781
782static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev,
783 gfp_t flags)
784{
785 struct mlx5_cmd_mailbox *mailbox;
786
787 mailbox = kmalloc(sizeof(*mailbox), flags);
788 if (!mailbox)
789 return ERR_PTR(-ENOMEM);
790
791 mailbox->buf = pci_pool_alloc(dev->cmd.pool, flags,
792 &mailbox->dma);
793 if (!mailbox->buf) {
794 mlx5_core_dbg(dev, "failed allocation\n");
795 kfree(mailbox);
796 return ERR_PTR(-ENOMEM);
797 }
798 memset(mailbox->buf, 0, sizeof(struct mlx5_cmd_prot_block));
799 mailbox->next = NULL;
800
801 return mailbox;
802}
803
804static void free_cmd_box(struct mlx5_core_dev *dev,
805 struct mlx5_cmd_mailbox *mailbox)
806{
807 pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma);
808 kfree(mailbox);
809}
810
811static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
812 gfp_t flags, int size)
813{
814 struct mlx5_cmd_mailbox *tmp, *head = NULL;
815 struct mlx5_cmd_prot_block *block;
816 struct mlx5_cmd_msg *msg;
817 int blen;
818 int err;
819 int n;
820 int i;
821
822 msg = kzalloc(sizeof(*msg), GFP_KERNEL);
823 if (!msg)
824 return ERR_PTR(-ENOMEM);
825
826 blen = size - min_t(int, sizeof(msg->first.data), size);
827 n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1) / MLX5_CMD_DATA_BLOCK_SIZE;
828
829 for (i = 0; i < n; i++) {
830 tmp = alloc_cmd_box(dev, flags);
831 if (IS_ERR(tmp)) {
832 mlx5_core_warn(dev, "failed allocating block\n");
833 err = PTR_ERR(tmp);
834 goto err_alloc;
835 }
836
837 block = tmp->buf;
838 tmp->next = head;
839 block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
840 block->block_num = cpu_to_be32(n - i - 1);
841 head = tmp;
842 }
843 msg->next = head;
844 msg->len = size;
845 return msg;
846
847err_alloc:
848 while (head) {
849 tmp = head->next;
850 free_cmd_box(dev, head);
851 head = tmp;
852 }
853 kfree(msg);
854
855 return ERR_PTR(err);
856}
857
858static void mlx5_free_cmd_msg(struct mlx5_core_dev *dev,
859 struct mlx5_cmd_msg *msg)
860{
861 struct mlx5_cmd_mailbox *head = msg->next;
862 struct mlx5_cmd_mailbox *next;
863
864 while (head) {
865 next = head->next;
866 free_cmd_box(dev, head);
867 head = next;
868 }
869 kfree(msg);
870}
871
872static ssize_t data_write(struct file *filp, const char __user *buf,
873 size_t count, loff_t *pos)
874{
875 struct mlx5_core_dev *dev = filp->private_data;
876 struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
877 void *ptr;
878 int err;
879
880 if (*pos != 0)
881 return -EINVAL;
882
883 kfree(dbg->in_msg);
884 dbg->in_msg = NULL;
885 dbg->inlen = 0;
886
887 ptr = kzalloc(count, GFP_KERNEL);
888 if (!ptr)
889 return -ENOMEM;
890
891 if (copy_from_user(ptr, buf, count)) {
892 err = -EFAULT;
893 goto out;
894 }
895 dbg->in_msg = ptr;
896 dbg->inlen = count;
897
898 *pos = count;
899
900 return count;
901
902out:
903 kfree(ptr);
904 return err;
905}
906
907static ssize_t data_read(struct file *filp, char __user *buf, size_t count,
908 loff_t *pos)
909{
910 struct mlx5_core_dev *dev = filp->private_data;
911 struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
912 int copy;
913
914 if (*pos)
915 return 0;
916
917 if (!dbg->out_msg)
918 return -ENOMEM;
919
920 copy = min_t(int, count, dbg->outlen);
921 if (copy_to_user(buf, dbg->out_msg, copy))
922 return -EFAULT;
923
924 *pos += copy;
925
926 return copy;
927}
928
929static const struct file_operations dfops = {
930 .owner = THIS_MODULE,
931 .open = simple_open,
932 .write = data_write,
933 .read = data_read,
934};
935
936static ssize_t outlen_read(struct file *filp, char __user *buf, size_t count,
937 loff_t *pos)
938{
939 struct mlx5_core_dev *dev = filp->private_data;
940 struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
941 char outlen[8];
942 int err;
943
944 if (*pos)
945 return 0;
946
947 err = snprintf(outlen, sizeof(outlen), "%d", dbg->outlen);
948 if (err < 0)
949 return err;
950
951 if (copy_to_user(buf, &outlen, err))
952 return -EFAULT;
953
954 *pos += err;
955
956 return err;
957}
958
959static ssize_t outlen_write(struct file *filp, const char __user *buf,
960 size_t count, loff_t *pos)
961{
962 struct mlx5_core_dev *dev = filp->private_data;
963 struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
964 char outlen_str[8];
965 int outlen;
966 void *ptr;
967 int err;
968
969 if (*pos != 0 || count > 6)
970 return -EINVAL;
971
972 kfree(dbg->out_msg);
973 dbg->out_msg = NULL;
974 dbg->outlen = 0;
975
976 if (copy_from_user(outlen_str, buf, count))
977 return -EFAULT;
978
979 outlen_str[7] = 0;
980
981 err = sscanf(outlen_str, "%d", &outlen);
982 if (err < 0)
983 return err;
984
985 ptr = kzalloc(outlen, GFP_KERNEL);
986 if (!ptr)
987 return -ENOMEM;
988
989 dbg->out_msg = ptr;
990 dbg->outlen = outlen;
991
992 *pos = count;
993
994 return count;
995}
996
997static const struct file_operations olfops = {
998 .owner = THIS_MODULE,
999 .open = simple_open,
1000 .write = outlen_write,
1001 .read = outlen_read,
1002};
1003
1004static void set_wqname(struct mlx5_core_dev *dev)
1005{
1006 struct mlx5_cmd *cmd = &dev->cmd;
1007
1008 snprintf(cmd->wq_name, sizeof(cmd->wq_name), "mlx5_cmd_%s",
1009 dev_name(&dev->pdev->dev));
1010}
1011
1012static void clean_debug_files(struct mlx5_core_dev *dev)
1013{
1014 struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
1015
1016 if (!mlx5_debugfs_root)
1017 return;
1018
1019 mlx5_cmdif_debugfs_cleanup(dev);
1020 debugfs_remove_recursive(dbg->dbg_root);
1021}
1022
1023static int create_debugfs_files(struct mlx5_core_dev *dev)
1024{
1025 struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
1026 int err = -ENOMEM;
1027
1028 if (!mlx5_debugfs_root)
1029 return 0;
1030
1031 dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root);
1032 if (!dbg->dbg_root)
1033 return err;
1034
1035 dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root,
1036 dev, &dfops);
1037 if (!dbg->dbg_in)
1038 goto err_dbg;
1039
1040 dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root,
1041 dev, &dfops);
1042 if (!dbg->dbg_out)
1043 goto err_dbg;
1044
1045 dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root,
1046 dev, &olfops);
1047 if (!dbg->dbg_outlen)
1048 goto err_dbg;
1049
1050 dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root,
1051 &dbg->status);
1052 if (!dbg->dbg_status)
1053 goto err_dbg;
1054
1055 dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
1056 if (!dbg->dbg_run)
1057 goto err_dbg;
1058
1059 mlx5_cmdif_debugfs_init(dev);
1060
1061 return 0;
1062
1063err_dbg:
1064 clean_debug_files(dev);
1065 return err;
1066}
1067
1068void mlx5_cmd_use_events(struct mlx5_core_dev *dev)
1069{
1070 struct mlx5_cmd *cmd = &dev->cmd;
1071 int i;
1072
1073 for (i = 0; i < cmd->max_reg_cmds; i++)
1074 down(&cmd->sem);
1075
1076 down(&cmd->pages_sem);
1077
1078 flush_workqueue(cmd->wq);
1079
1080 cmd->mode = CMD_MODE_EVENTS;
1081
1082 up(&cmd->pages_sem);
1083 for (i = 0; i < cmd->max_reg_cmds; i++)
1084 up(&cmd->sem);
1085}
1086
1087void mlx5_cmd_use_polling(struct mlx5_core_dev *dev)
1088{
1089 struct mlx5_cmd *cmd = &dev->cmd;
1090 int i;
1091
1092 for (i = 0; i < cmd->max_reg_cmds; i++)
1093 down(&cmd->sem);
1094
1095 down(&cmd->pages_sem);
1096
1097 flush_workqueue(cmd->wq);
1098 cmd->mode = CMD_MODE_POLLING;
1099
1100 up(&cmd->pages_sem);
1101 for (i = 0; i < cmd->max_reg_cmds; i++)
1102 up(&cmd->sem);
1103}
1104
1105void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector)
1106{
1107 struct mlx5_cmd *cmd = &dev->cmd;
1108 struct mlx5_cmd_work_ent *ent;
1109 mlx5_cmd_cbk_t callback;
1110 void *context;
1111 int err;
1112 int i;
1113
1114 for (i = 0; i < (1 << cmd->log_sz); i++) {
1115 if (test_bit(i, &vector)) {
1116 ent = cmd->ent_arr[i];
1117 ktime_get_ts(&ent->ts2);
1118 memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
1119 dump_command(dev, ent, 0);
1120 if (!ent->ret) {
1121 if (!cmd->checksum_disabled)
1122 ent->ret = verify_signature(ent);
1123 else
1124 ent->ret = 0;
1125 ent->status = ent->lay->status_own >> 1;
1126 mlx5_core_dbg(dev, "command completed. ret 0x%x, delivery status %s(0x%x)\n",
1127 ent->ret, deliv_status_to_str(ent->status), ent->status);
1128 }
1129 free_ent(cmd, ent->idx);
1130 if (ent->callback) {
1131 callback = ent->callback;
1132 context = ent->context;
1133 err = ent->ret;
1134 free_cmd(ent);
1135 callback(err, context);
1136 } else {
1137 complete(&ent->done);
1138 }
1139 if (ent->page_queue)
1140 up(&cmd->pages_sem);
1141 else
1142 up(&cmd->sem);
1143 }
1144 }
1145}
1146EXPORT_SYMBOL(mlx5_cmd_comp_handler);
1147
1148static int status_to_err(u8 status)
1149{
1150 return status ? -1 : 0; /* TBD more meaningful codes */
1151}
1152
1153static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size)
1154{
1155 struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
1156 struct mlx5_cmd *cmd = &dev->cmd;
1157 struct cache_ent *ent = NULL;
1158
1159 if (in_size > MED_LIST_SIZE && in_size <= LONG_LIST_SIZE)
1160 ent = &cmd->cache.large;
1161 else if (in_size > 16 && in_size <= MED_LIST_SIZE)
1162 ent = &cmd->cache.med;
1163
1164 if (ent) {
1165 spin_lock(&ent->lock);
1166 if (!list_empty(&ent->head)) {
1167 msg = list_entry(ent->head.next, typeof(*msg), list);
1168 /* For cached lists, we must explicitly state what is
1169 * the real size
1170 */
1171 msg->len = in_size;
1172 list_del(&msg->list);
1173 }
1174 spin_unlock(&ent->lock);
1175 }
1176
1177 if (IS_ERR(msg))
1178 msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, in_size);
1179
1180 return msg;
1181}
1182
1183static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
1184{
1185 if (msg->cache) {
1186 spin_lock(&msg->cache->lock);
1187 list_add_tail(&msg->list, &msg->cache->head);
1188 spin_unlock(&msg->cache->lock);
1189 } else {
1190 mlx5_free_cmd_msg(dev, msg);
1191 }
1192}
1193
1194static int is_manage_pages(struct mlx5_inbox_hdr *in)
1195{
1196 return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
1197}
1198
1199int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
1200 int out_size)
1201{
1202 struct mlx5_cmd_msg *inb;
1203 struct mlx5_cmd_msg *outb;
1204 int pages_queue;
1205 int err;
1206 u8 status = 0;
1207
1208 pages_queue = is_manage_pages(in);
1209
1210 inb = alloc_msg(dev, in_size);
1211 if (IS_ERR(inb)) {
1212 err = PTR_ERR(inb);
1213 return err;
1214 }
1215
1216 err = mlx5_copy_to_msg(inb, in, in_size);
1217 if (err) {
1218 mlx5_core_warn(dev, "err %d\n", err);
1219 goto out_in;
1220 }
1221
1222 outb = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, out_size);
1223 if (IS_ERR(outb)) {
1224 err = PTR_ERR(outb);
1225 goto out_in;
1226 }
1227
1228 err = mlx5_cmd_invoke(dev, inb, outb, NULL, NULL, pages_queue, &status);
1229 if (err)
1230 goto out_out;
1231
1232 mlx5_core_dbg(dev, "err %d, status %d\n", err, status);
1233 if (status) {
1234 err = status_to_err(status);
1235 goto out_out;
1236 }
1237
1238 err = mlx5_copy_from_msg(out, outb, out_size);
1239
1240out_out:
1241 mlx5_free_cmd_msg(dev, outb);
1242
1243out_in:
1244 free_msg(dev, inb);
1245 return err;
1246}
1247EXPORT_SYMBOL(mlx5_cmd_exec);
1248
1249static void destroy_msg_cache(struct mlx5_core_dev *dev)
1250{
1251 struct mlx5_cmd *cmd = &dev->cmd;
1252 struct mlx5_cmd_msg *msg;
1253 struct mlx5_cmd_msg *n;
1254
1255 list_for_each_entry_safe(msg, n, &cmd->cache.large.head, list) {
1256 list_del(&msg->list);
1257 mlx5_free_cmd_msg(dev, msg);
1258 }
1259
1260 list_for_each_entry_safe(msg, n, &cmd->cache.med.head, list) {
1261 list_del(&msg->list);
1262 mlx5_free_cmd_msg(dev, msg);
1263 }
1264}
1265
1266static int create_msg_cache(struct mlx5_core_dev *dev)
1267{
1268 struct mlx5_cmd *cmd = &dev->cmd;
1269 struct mlx5_cmd_msg *msg;
1270 int err;
1271 int i;
1272
1273 spin_lock_init(&cmd->cache.large.lock);
1274 INIT_LIST_HEAD(&cmd->cache.large.head);
1275 spin_lock_init(&cmd->cache.med.lock);
1276 INIT_LIST_HEAD(&cmd->cache.med.head);
1277
1278 for (i = 0; i < NUM_LONG_LISTS; i++) {
1279 msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE);
1280 if (IS_ERR(msg)) {
1281 err = PTR_ERR(msg);
1282 goto ex_err;
1283 }
1284 msg->cache = &cmd->cache.large;
1285 list_add_tail(&msg->list, &cmd->cache.large.head);
1286 }
1287
1288 for (i = 0; i < NUM_MED_LISTS; i++) {
1289 msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE);
1290 if (IS_ERR(msg)) {
1291 err = PTR_ERR(msg);
1292 goto ex_err;
1293 }
1294 msg->cache = &cmd->cache.med;
1295 list_add_tail(&msg->list, &cmd->cache.med.head);
1296 }
1297
1298 return 0;
1299
1300ex_err:
1301 destroy_msg_cache(dev);
1302 return err;
1303}
1304
1305int mlx5_cmd_init(struct mlx5_core_dev *dev)
1306{
1307 int size = sizeof(struct mlx5_cmd_prot_block);
1308 int align = roundup_pow_of_two(size);
1309 struct mlx5_cmd *cmd = &dev->cmd;
1310 u32 cmd_h, cmd_l;
1311 u16 cmd_if_rev;
1312 int err;
1313 int i;
1314
1315 cmd_if_rev = cmdif_rev(dev);
1316 if (cmd_if_rev != CMD_IF_REV) {
1317 dev_err(&dev->pdev->dev,
1318 "Driver cmdif rev(%d) differs from firmware's(%d)\n",
1319 CMD_IF_REV, cmd_if_rev);
1320 return -EINVAL;
1321 }
1322
1323 cmd->pool = pci_pool_create("mlx5_cmd", dev->pdev, size, align, 0);
1324 if (!cmd->pool)
1325 return -ENOMEM;
1326
1327 cmd->cmd_buf = (void *)__get_free_pages(GFP_ATOMIC, 0);
1328 if (!cmd->cmd_buf) {
1329 err = -ENOMEM;
1330 goto err_free_pool;
1331 }
1332 cmd->dma = dma_map_single(&dev->pdev->dev, cmd->cmd_buf, PAGE_SIZE,
1333 DMA_BIDIRECTIONAL);
1334 if (dma_mapping_error(&dev->pdev->dev, cmd->dma)) {
1335 err = -ENOMEM;
1336 goto err_free;
1337 }
1338
1339 cmd_l = ioread32be(&dev->iseg->cmdq_addr_l_sz) & 0xff;
1340 cmd->log_sz = cmd_l >> 4 & 0xf;
1341 cmd->log_stride = cmd_l & 0xf;
1342 if (1 << cmd->log_sz > MLX5_MAX_COMMANDS) {
1343 dev_err(&dev->pdev->dev, "firmware reports too many outstanding commands %d\n",
1344 1 << cmd->log_sz);
1345 err = -EINVAL;
1346 goto err_map;
1347 }
1348
1349 if (cmd->log_sz + cmd->log_stride > PAGE_SHIFT) {
1350 dev_err(&dev->pdev->dev, "command queue size overflow\n");
1351 err = -EINVAL;
1352 goto err_map;
1353 }
1354
1355 cmd->max_reg_cmds = (1 << cmd->log_sz) - 1;
1356 cmd->bitmask = (1 << cmd->max_reg_cmds) - 1;
1357
1358 cmd->cmdif_rev = ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
1359 if (cmd->cmdif_rev > CMD_IF_REV) {
1360 dev_err(&dev->pdev->dev, "driver does not support command interface version. driver %d, firmware %d\n",
1361 CMD_IF_REV, cmd->cmdif_rev);
1362 err = -ENOTSUPP;
1363 goto err_map;
1364 }
1365
1366 spin_lock_init(&cmd->alloc_lock);
1367 spin_lock_init(&cmd->token_lock);
1368 for (i = 0; i < ARRAY_SIZE(cmd->stats); i++)
1369 spin_lock_init(&cmd->stats[i].lock);
1370
1371 sema_init(&cmd->sem, cmd->max_reg_cmds);
1372 sema_init(&cmd->pages_sem, 1);
1373
1374 cmd_h = (u32)((u64)(cmd->dma) >> 32);
1375 cmd_l = (u32)(cmd->dma);
1376 if (cmd_l & 0xfff) {
1377 dev_err(&dev->pdev->dev, "invalid command queue address\n");
1378 err = -ENOMEM;
1379 goto err_map;
1380 }
1381
1382 iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h);
1383 iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz);
1384
1385 /* Make sure firmware sees the complete address before we proceed */
1386 wmb();
1387
1388 mlx5_core_dbg(dev, "descriptor at dma 0x%llx\n", (unsigned long long)(cmd->dma));
1389
1390 cmd->mode = CMD_MODE_POLLING;
1391
1392 err = create_msg_cache(dev);
1393 if (err) {
1394 dev_err(&dev->pdev->dev, "failed to create command cache\n");
1395 goto err_map;
1396 }
1397
1398 set_wqname(dev);
1399 cmd->wq = create_singlethread_workqueue(cmd->wq_name);
1400 if (!cmd->wq) {
1401 dev_err(&dev->pdev->dev, "failed to create command workqueue\n");
1402 err = -ENOMEM;
1403 goto err_cache;
1404 }
1405
1406 err = create_debugfs_files(dev);
1407 if (err) {
1408 err = -ENOMEM;
1409 goto err_wq;
1410 }
1411
1412 return 0;
1413
1414err_wq:
1415 destroy_workqueue(cmd->wq);
1416
1417err_cache:
1418 destroy_msg_cache(dev);
1419
1420err_map:
1421 dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE,
1422 DMA_BIDIRECTIONAL);
1423err_free:
1424 free_pages((unsigned long)cmd->cmd_buf, 0);
1425
1426err_free_pool:
1427 pci_pool_destroy(cmd->pool);
1428
1429 return err;
1430}
1431EXPORT_SYMBOL(mlx5_cmd_init);
1432
1433void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
1434{
1435 struct mlx5_cmd *cmd = &dev->cmd;
1436
1437 clean_debug_files(dev);
1438 destroy_workqueue(cmd->wq);
1439 destroy_msg_cache(dev);
1440 dma_unmap_single(&dev->pdev->dev, cmd->dma, PAGE_SIZE,
1441 DMA_BIDIRECTIONAL);
1442 free_pages((unsigned long)cmd->cmd_buf, 0);
1443 pci_pool_destroy(cmd->pool);
1444}
1445EXPORT_SYMBOL(mlx5_cmd_cleanup);
1446
1447static const char *cmd_status_str(u8 status)
1448{
1449 switch (status) {
1450 case MLX5_CMD_STAT_OK:
1451 return "OK";
1452 case MLX5_CMD_STAT_INT_ERR:
1453 return "internal error";
1454 case MLX5_CMD_STAT_BAD_OP_ERR:
1455 return "bad operation";
1456 case MLX5_CMD_STAT_BAD_PARAM_ERR:
1457 return "bad parameter";
1458 case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
1459 return "bad system state";
1460 case MLX5_CMD_STAT_BAD_RES_ERR:
1461 return "bad resource";
1462 case MLX5_CMD_STAT_RES_BUSY:
1463 return "resource busy";
1464 case MLX5_CMD_STAT_LIM_ERR:
1465 return "limits exceeded";
1466 case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
1467 return "bad resource state";
1468 case MLX5_CMD_STAT_IX_ERR:
1469 return "bad index";
1470 case MLX5_CMD_STAT_NO_RES_ERR:
1471 return "no resources";
1472 case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
1473 return "bad input length";
1474 case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
1475 return "bad output length";
1476 case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
1477 return "bad QP state";
1478 case MLX5_CMD_STAT_BAD_PKT_ERR:
1479 return "bad packet (discarded)";
1480 case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
1481 return "bad size too many outstanding CQEs";
1482 default:
1483 return "unknown status";
1484 }
1485}
1486
1487int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
1488{
1489 if (!hdr->status)
1490 return 0;
1491
1492 pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
1493 cmd_status_str(hdr->status), hdr->status,
1494 be32_to_cpu(hdr->syndrome));
1495
1496 switch (hdr->status) {
1497 case MLX5_CMD_STAT_OK: return 0;
1498 case MLX5_CMD_STAT_INT_ERR: return -EIO;
1499 case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL;
1500 case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL;
1501 case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO;
1502 case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL;
1503 case MLX5_CMD_STAT_RES_BUSY: return -EBUSY;
1504 case MLX5_CMD_STAT_LIM_ERR: return -EINVAL;
1505 case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL;
1506 case MLX5_CMD_STAT_IX_ERR: return -EINVAL;
1507 case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN;
1508 case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO;
1509 case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO;
1510 case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL;
1511 case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL;
1512 case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL;
1513 default: return -EIO;
1514 }
1515}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
new file mode 100644
index 000000000000..c2d660be6f76
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -0,0 +1,224 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/hardirq.h>
36#include <linux/mlx5/driver.h>
37#include <linux/mlx5/cmd.h>
38#include <rdma/ib_verbs.h>
39#include <linux/mlx5/cq.h>
40#include "mlx5_core.h"
41
42void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
43{
44 struct mlx5_core_cq *cq;
45 struct mlx5_cq_table *table = &dev->priv.cq_table;
46
47 spin_lock(&table->lock);
48 cq = radix_tree_lookup(&table->tree, cqn);
49 if (likely(cq))
50 atomic_inc(&cq->refcount);
51 spin_unlock(&table->lock);
52
53 if (!cq) {
54 mlx5_core_warn(dev, "Completion event for bogus CQ 0x%x\n", cqn);
55 return;
56 }
57
58 ++cq->arm_sn;
59
60 cq->comp(cq);
61
62 if (atomic_dec_and_test(&cq->refcount))
63 complete(&cq->free);
64}
65
66void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
67{
68 struct mlx5_cq_table *table = &dev->priv.cq_table;
69 struct mlx5_core_cq *cq;
70
71 spin_lock(&table->lock);
72
73 cq = radix_tree_lookup(&table->tree, cqn);
74 if (cq)
75 atomic_inc(&cq->refcount);
76
77 spin_unlock(&table->lock);
78
79 if (!cq) {
80 mlx5_core_warn(dev, "Async event for bogus CQ 0x%x\n", cqn);
81 return;
82 }
83
84 cq->event(cq, event_type);
85
86 if (atomic_dec_and_test(&cq->refcount))
87 complete(&cq->free);
88}
89
90
91int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
92 struct mlx5_create_cq_mbox_in *in, int inlen)
93{
94 int err;
95 struct mlx5_cq_table *table = &dev->priv.cq_table;
96 struct mlx5_create_cq_mbox_out out;
97 struct mlx5_destroy_cq_mbox_in din;
98 struct mlx5_destroy_cq_mbox_out dout;
99
100 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ);
101 memset(&out, 0, sizeof(out));
102 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
103 if (err)
104 return err;
105
106 if (out.hdr.status)
107 return mlx5_cmd_status_to_err(&out.hdr);
108
109 cq->cqn = be32_to_cpu(out.cqn) & 0xffffff;
110 cq->cons_index = 0;
111 cq->arm_sn = 0;
112 atomic_set(&cq->refcount, 1);
113 init_completion(&cq->free);
114
115 spin_lock_irq(&table->lock);
116 err = radix_tree_insert(&table->tree, cq->cqn, cq);
117 spin_unlock_irq(&table->lock);
118 if (err)
119 goto err_cmd;
120
121 cq->pid = current->pid;
122 err = mlx5_debug_cq_add(dev, cq);
123 if (err)
124 mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n",
125 cq->cqn);
126
127 return 0;
128
129err_cmd:
130 memset(&din, 0, sizeof(din));
131 memset(&dout, 0, sizeof(dout));
132 din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
133 mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout));
134 return err;
135}
136EXPORT_SYMBOL(mlx5_core_create_cq);
137
138int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
139{
140 struct mlx5_cq_table *table = &dev->priv.cq_table;
141 struct mlx5_destroy_cq_mbox_in in;
142 struct mlx5_destroy_cq_mbox_out out;
143 struct mlx5_core_cq *tmp;
144 int err;
145
146 spin_lock_irq(&table->lock);
147 tmp = radix_tree_delete(&table->tree, cq->cqn);
148 spin_unlock_irq(&table->lock);
149 if (!tmp) {
150 mlx5_core_warn(dev, "cq 0x%x not found in tree\n", cq->cqn);
151 return -EINVAL;
152 }
153 if (tmp != cq) {
154 mlx5_core_warn(dev, "corruption on srqn 0x%x\n", cq->cqn);
155 return -EINVAL;
156 }
157
158 memset(&in, 0, sizeof(in));
159 memset(&out, 0, sizeof(out));
160 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
161 in.cqn = cpu_to_be32(cq->cqn);
162 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
163 if (err)
164 return err;
165
166 if (out.hdr.status)
167 return mlx5_cmd_status_to_err(&out.hdr);
168
169 synchronize_irq(cq->irqn);
170
171 mlx5_debug_cq_remove(dev, cq);
172 if (atomic_dec_and_test(&cq->refcount))
173 complete(&cq->free);
174 wait_for_completion(&cq->free);
175
176 return 0;
177}
178EXPORT_SYMBOL(mlx5_core_destroy_cq);
179
180int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
181 struct mlx5_query_cq_mbox_out *out)
182{
183 struct mlx5_query_cq_mbox_in in;
184 int err;
185
186 memset(&in, 0, sizeof(in));
187 memset(out, 0, sizeof(*out));
188
189 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_CQ);
190 in.cqn = cpu_to_be32(cq->cqn);
191 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
192 if (err)
193 return err;
194
195 if (out->hdr.status)
196 return mlx5_cmd_status_to_err(&out->hdr);
197
198 return err;
199}
200EXPORT_SYMBOL(mlx5_core_query_cq);
201
202
203int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
204 int type, struct mlx5_cq_modify_params *params)
205{
206 return -ENOSYS;
207}
208
209int mlx5_init_cq_table(struct mlx5_core_dev *dev)
210{
211 struct mlx5_cq_table *table = &dev->priv.cq_table;
212 int err;
213
214 spin_lock_init(&table->lock);
215 INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
216 err = mlx5_cq_debugfs_init(dev);
217
218 return err;
219}
220
221void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev)
222{
223 mlx5_cq_debugfs_cleanup(dev);
224}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
new file mode 100644
index 000000000000..4273c06e2e96
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -0,0 +1,583 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <linux/debugfs.h>
35#include <linux/mlx5/qp.h>
36#include <linux/mlx5/cq.h>
37#include <linux/mlx5/driver.h>
38#include "mlx5_core.h"
39
40enum {
41 QP_PID,
42 QP_STATE,
43 QP_XPORT,
44 QP_MTU,
45 QP_N_RECV,
46 QP_RECV_SZ,
47 QP_N_SEND,
48 QP_LOG_PG_SZ,
49 QP_RQPN,
50};
51
52static char *qp_fields[] = {
53 [QP_PID] = "pid",
54 [QP_STATE] = "state",
55 [QP_XPORT] = "transport",
56 [QP_MTU] = "mtu",
57 [QP_N_RECV] = "num_recv",
58 [QP_RECV_SZ] = "rcv_wqe_sz",
59 [QP_N_SEND] = "num_send",
60 [QP_LOG_PG_SZ] = "log2_page_sz",
61 [QP_RQPN] = "remote_qpn",
62};
63
64enum {
65 EQ_NUM_EQES,
66 EQ_INTR,
67 EQ_LOG_PG_SZ,
68};
69
70static char *eq_fields[] = {
71 [EQ_NUM_EQES] = "num_eqes",
72 [EQ_INTR] = "intr",
73 [EQ_LOG_PG_SZ] = "log_page_size",
74};
75
76enum {
77 CQ_PID,
78 CQ_NUM_CQES,
79 CQ_LOG_PG_SZ,
80};
81
82static char *cq_fields[] = {
83 [CQ_PID] = "pid",
84 [CQ_NUM_CQES] = "num_cqes",
85 [CQ_LOG_PG_SZ] = "log_page_size",
86};
87
88struct dentry *mlx5_debugfs_root;
89EXPORT_SYMBOL(mlx5_debugfs_root);
90
91void mlx5_register_debugfs(void)
92{
93 mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL);
94 if (IS_ERR_OR_NULL(mlx5_debugfs_root))
95 mlx5_debugfs_root = NULL;
96}
97
98void mlx5_unregister_debugfs(void)
99{
100 debugfs_remove(mlx5_debugfs_root);
101}
102
103int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
104{
105 if (!mlx5_debugfs_root)
106 return 0;
107
108 atomic_set(&dev->num_qps, 0);
109
110 dev->priv.qp_debugfs = debugfs_create_dir("QPs", dev->priv.dbg_root);
111 if (!dev->priv.qp_debugfs)
112 return -ENOMEM;
113
114 return 0;
115}
116
117void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
118{
119 if (!mlx5_debugfs_root)
120 return;
121
122 debugfs_remove_recursive(dev->priv.qp_debugfs);
123}
124
125int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
126{
127 if (!mlx5_debugfs_root)
128 return 0;
129
130 dev->priv.eq_debugfs = debugfs_create_dir("EQs", dev->priv.dbg_root);
131 if (!dev->priv.eq_debugfs)
132 return -ENOMEM;
133
134 return 0;
135}
136
137void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
138{
139 if (!mlx5_debugfs_root)
140 return;
141
142 debugfs_remove_recursive(dev->priv.eq_debugfs);
143}
144
145static ssize_t average_read(struct file *filp, char __user *buf, size_t count,
146 loff_t *pos)
147{
148 struct mlx5_cmd_stats *stats;
149 u64 field = 0;
150 int ret;
151 char tbuf[22];
152
153 if (*pos)
154 return 0;
155
156 stats = filp->private_data;
157 spin_lock(&stats->lock);
158 if (stats->n)
159 field = stats->sum / stats->n;
160 spin_unlock(&stats->lock);
161 ret = snprintf(tbuf, sizeof(tbuf), "%llu\n", field);
162 if (ret > 0) {
163 if (copy_to_user(buf, tbuf, ret))
164 return -EFAULT;
165 }
166
167 *pos += ret;
168 return ret;
169}
170
171
172static ssize_t average_write(struct file *filp, const char __user *buf,
173 size_t count, loff_t *pos)
174{
175 struct mlx5_cmd_stats *stats;
176
177 stats = filp->private_data;
178 spin_lock(&stats->lock);
179 stats->sum = 0;
180 stats->n = 0;
181 spin_unlock(&stats->lock);
182
183 *pos += count;
184
185 return count;
186}
187
188static const struct file_operations stats_fops = {
189 .owner = THIS_MODULE,
190 .open = simple_open,
191 .read = average_read,
192 .write = average_write,
193};
194
195int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
196{
197 struct mlx5_cmd_stats *stats;
198 struct dentry **cmd;
199 const char *namep;
200 int err;
201 int i;
202
203 if (!mlx5_debugfs_root)
204 return 0;
205
206 cmd = &dev->priv.cmdif_debugfs;
207 *cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
208 if (!*cmd)
209 return -ENOMEM;
210
211 for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) {
212 stats = &dev->cmd.stats[i];
213 namep = mlx5_command_str(i);
214 if (strcmp(namep, "unknown command opcode")) {
215 stats->root = debugfs_create_dir(namep, *cmd);
216 if (!stats->root) {
217 mlx5_core_warn(dev, "failed adding command %d\n",
218 i);
219 err = -ENOMEM;
220 goto out;
221 }
222
223 stats->avg = debugfs_create_file("average", 0400,
224 stats->root, stats,
225 &stats_fops);
226 if (!stats->avg) {
227 mlx5_core_warn(dev, "failed creating debugfs file\n");
228 err = -ENOMEM;
229 goto out;
230 }
231
232 stats->count = debugfs_create_u64("n", 0400,
233 stats->root,
234 &stats->n);
235 if (!stats->count) {
236 mlx5_core_warn(dev, "failed creating debugfs file\n");
237 err = -ENOMEM;
238 goto out;
239 }
240 }
241 }
242
243 return 0;
244out:
245 debugfs_remove_recursive(dev->priv.cmdif_debugfs);
246 return err;
247}
248
249void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
250{
251 if (!mlx5_debugfs_root)
252 return;
253
254 debugfs_remove_recursive(dev->priv.cmdif_debugfs);
255}
256
257int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
258{
259 if (!mlx5_debugfs_root)
260 return 0;
261
262 dev->priv.cq_debugfs = debugfs_create_dir("CQs", dev->priv.dbg_root);
263 if (!dev->priv.cq_debugfs)
264 return -ENOMEM;
265
266 return 0;
267}
268
269void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
270{
271 if (!mlx5_debugfs_root)
272 return;
273
274 debugfs_remove_recursive(dev->priv.cq_debugfs);
275}
276
277static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
278 int index)
279{
280 struct mlx5_query_qp_mbox_out *out;
281 struct mlx5_qp_context *ctx;
282 u64 param = 0;
283 int err;
284 int no_sq;
285
286 out = kzalloc(sizeof(*out), GFP_KERNEL);
287 if (!out)
288 return param;
289
290 err = mlx5_core_qp_query(dev, qp, out, sizeof(*out));
291 if (err) {
292 mlx5_core_warn(dev, "failed to query qp\n");
293 goto out;
294 }
295
296 ctx = &out->ctx;
297 switch (index) {
298 case QP_PID:
299 param = qp->pid;
300 break;
301 case QP_STATE:
302 param = be32_to_cpu(ctx->flags) >> 28;
303 break;
304 case QP_XPORT:
305 param = (be32_to_cpu(ctx->flags) >> 16) & 0xff;
306 break;
307 case QP_MTU:
308 param = ctx->mtu_msgmax >> 5;
309 break;
310 case QP_N_RECV:
311 param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
312 break;
313 case QP_RECV_SZ:
314 param = 1 << ((ctx->rq_size_stride & 7) + 4);
315 break;
316 case QP_N_SEND:
317 no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15;
318 if (!no_sq)
319 param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11);
320 else
321 param = 0;
322 break;
323 case QP_LOG_PG_SZ:
324 param = (be32_to_cpu(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f;
325 param += 12;
326 break;
327 case QP_RQPN:
328 param = be32_to_cpu(ctx->log_pg_sz_remote_qpn) & 0xffffff;
329 break;
330 }
331
332out:
333 kfree(out);
334 return param;
335}
336
337static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
338 int index)
339{
340 struct mlx5_query_eq_mbox_out *out;
341 struct mlx5_eq_context *ctx;
342 u64 param = 0;
343 int err;
344
345 out = kzalloc(sizeof(*out), GFP_KERNEL);
346 if (!out)
347 return param;
348
349 ctx = &out->ctx;
350
351 err = mlx5_core_eq_query(dev, eq, out, sizeof(*out));
352 if (err) {
353 mlx5_core_warn(dev, "failed to query eq\n");
354 goto out;
355 }
356
357 switch (index) {
358 case EQ_NUM_EQES:
359 param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
360 break;
361 case EQ_INTR:
362 param = ctx->intr;
363 break;
364 case EQ_LOG_PG_SZ:
365 param = (ctx->log_page_size & 0x1f) + 12;
366 break;
367 }
368
369out:
370 kfree(out);
371 return param;
372}
373
374static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
375 int index)
376{
377 struct mlx5_query_cq_mbox_out *out;
378 struct mlx5_cq_context *ctx;
379 u64 param = 0;
380 int err;
381
382 out = kzalloc(sizeof(*out), GFP_KERNEL);
383 if (!out)
384 return param;
385
386 ctx = &out->ctx;
387
388 err = mlx5_core_query_cq(dev, cq, out);
389 if (err) {
390 mlx5_core_warn(dev, "failed to query cq\n");
391 goto out;
392 }
393
394 switch (index) {
395 case CQ_PID:
396 param = cq->pid;
397 break;
398 case CQ_NUM_CQES:
399 param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
400 break;
401 case CQ_LOG_PG_SZ:
402 param = (ctx->log_pg_sz & 0x1f) + 12;
403 break;
404 }
405
406out:
407 kfree(out);
408 return param;
409}
410
411static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
412 loff_t *pos)
413{
414 struct mlx5_field_desc *desc;
415 struct mlx5_rsc_debug *d;
416 char tbuf[18];
417 u64 field;
418 int ret;
419
420 if (*pos)
421 return 0;
422
423 desc = filp->private_data;
424 d = (void *)(desc - desc->i) - sizeof(*d);
425 switch (d->type) {
426 case MLX5_DBG_RSC_QP:
427 field = qp_read_field(d->dev, d->object, desc->i);
428 break;
429
430 case MLX5_DBG_RSC_EQ:
431 field = eq_read_field(d->dev, d->object, desc->i);
432 break;
433
434 case MLX5_DBG_RSC_CQ:
435 field = cq_read_field(d->dev, d->object, desc->i);
436 break;
437
438 default:
439 mlx5_core_warn(d->dev, "invalid resource type %d\n", d->type);
440 return -EINVAL;
441 }
442
443 ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
444 if (ret > 0) {
445 if (copy_to_user(buf, tbuf, ret))
446 return -EFAULT;
447 }
448
449 *pos += ret;
450 return ret;
451}
452
453static const struct file_operations fops = {
454 .owner = THIS_MODULE,
455 .open = simple_open,
456 .read = dbg_read,
457};
458
459static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type,
460 struct dentry *root, struct mlx5_rsc_debug **dbg,
461 int rsn, char **field, int nfile, void *data)
462{
463 struct mlx5_rsc_debug *d;
464 char resn[32];
465 int err;
466 int i;
467
468 d = kzalloc(sizeof(*d) + nfile * sizeof(d->fields[0]), GFP_KERNEL);
469 if (!d)
470 return -ENOMEM;
471
472 d->dev = dev;
473 d->object = data;
474 d->type = type;
475 sprintf(resn, "0x%x", rsn);
476 d->root = debugfs_create_dir(resn, root);
477 if (!d->root) {
478 err = -ENOMEM;
479 goto out_free;
480 }
481
482 for (i = 0; i < nfile; i++) {
483 d->fields[i].i = i;
484 d->fields[i].dent = debugfs_create_file(field[i], 0400,
485 d->root, &d->fields[i],
486 &fops);
487 if (!d->fields[i].dent) {
488 err = -ENOMEM;
489 goto out_rem;
490 }
491 }
492 *dbg = d;
493
494 return 0;
495out_rem:
496 debugfs_remove_recursive(d->root);
497
498out_free:
499 kfree(d);
500 return err;
501}
502
503static void rem_res_tree(struct mlx5_rsc_debug *d)
504{
505 debugfs_remove_recursive(d->root);
506 kfree(d);
507}
508
509int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
510{
511 int err;
512
513 if (!mlx5_debugfs_root)
514 return 0;
515
516 err = add_res_tree(dev, MLX5_DBG_RSC_QP, dev->priv.qp_debugfs,
517 &qp->dbg, qp->qpn, qp_fields,
518 ARRAY_SIZE(qp_fields), qp);
519 if (err)
520 qp->dbg = NULL;
521
522 return err;
523}
524
525void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
526{
527 if (!mlx5_debugfs_root)
528 return;
529
530 if (qp->dbg)
531 rem_res_tree(qp->dbg);
532}
533
534
535int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
536{
537 int err;
538
539 if (!mlx5_debugfs_root)
540 return 0;
541
542 err = add_res_tree(dev, MLX5_DBG_RSC_EQ, dev->priv.eq_debugfs,
543 &eq->dbg, eq->eqn, eq_fields,
544 ARRAY_SIZE(eq_fields), eq);
545 if (err)
546 eq->dbg = NULL;
547
548 return err;
549}
550
551void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
552{
553 if (!mlx5_debugfs_root)
554 return;
555
556 if (eq->dbg)
557 rem_res_tree(eq->dbg);
558}
559
560int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
561{
562 int err;
563
564 if (!mlx5_debugfs_root)
565 return 0;
566
567 err = add_res_tree(dev, MLX5_DBG_RSC_CQ, dev->priv.cq_debugfs,
568 &cq->dbg, cq->cqn, cq_fields,
569 ARRAY_SIZE(cq_fields), cq);
570 if (err)
571 cq->dbg = NULL;
572
573 return err;
574}
575
576void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
577{
578 if (!mlx5_debugfs_root)
579 return;
580
581 if (cq->dbg)
582 rem_res_tree(cq->dbg);
583}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
new file mode 100644
index 000000000000..c02cbcfd0fb8
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -0,0 +1,521 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/interrupt.h>
34#include <linux/module.h>
35#include <linux/mlx5/driver.h>
36#include <linux/mlx5/cmd.h>
37#include "mlx5_core.h"
38
39enum {
40 MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
41 MLX5_EQE_OWNER_INIT_VAL = 0x1,
42};
43
44enum {
45 MLX5_EQ_STATE_ARMED = 0x9,
46 MLX5_EQ_STATE_FIRED = 0xa,
47 MLX5_EQ_STATE_ALWAYS_ARMED = 0xb,
48};
49
50enum {
51 MLX5_NUM_SPARE_EQE = 0x80,
52 MLX5_NUM_ASYNC_EQE = 0x100,
53 MLX5_NUM_CMD_EQE = 32,
54};
55
56enum {
57 MLX5_EQ_DOORBEL_OFFSET = 0x40,
58};
59
60#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
61 (1ull << MLX5_EVENT_TYPE_COMM_EST) | \
62 (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \
63 (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \
64 (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \
65 (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \
66 (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
67 (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \
68 (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \
69 (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \
70 (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
71 (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
72
73struct map_eq_in {
74 u64 mask;
75 u32 reserved;
76 u32 unmap_eqn;
77};
78
79struct cre_des_eq {
80 u8 reserved[15];
81 u8 eqn;
82};
83
84static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
85{
86 struct mlx5_destroy_eq_mbox_in in;
87 struct mlx5_destroy_eq_mbox_out out;
88 int err;
89
90 memset(&in, 0, sizeof(in));
91 memset(&out, 0, sizeof(out));
92 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ);
93 in.eqn = eqn;
94 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
95 if (!err)
96 goto ex;
97
98 if (out.hdr.status)
99 err = mlx5_cmd_status_to_err(&out.hdr);
100
101ex:
102 return err;
103}
104
105static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
106{
107 return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
108}
109
110static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
111{
112 struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
113
114 return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
115}
116
117static const char *eqe_type_str(u8 type)
118{
119 switch (type) {
120 case MLX5_EVENT_TYPE_COMP:
121 return "MLX5_EVENT_TYPE_COMP";
122 case MLX5_EVENT_TYPE_PATH_MIG:
123 return "MLX5_EVENT_TYPE_PATH_MIG";
124 case MLX5_EVENT_TYPE_COMM_EST:
125 return "MLX5_EVENT_TYPE_COMM_EST";
126 case MLX5_EVENT_TYPE_SQ_DRAINED:
127 return "MLX5_EVENT_TYPE_SQ_DRAINED";
128 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
129 return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
130 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
131 return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
132 case MLX5_EVENT_TYPE_CQ_ERROR:
133 return "MLX5_EVENT_TYPE_CQ_ERROR";
134 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
135 return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
136 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
137 return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
138 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
139 return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
140 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
141 return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
142 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
143 return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
144 case MLX5_EVENT_TYPE_INTERNAL_ERROR:
145 return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
146 case MLX5_EVENT_TYPE_PORT_CHANGE:
147 return "MLX5_EVENT_TYPE_PORT_CHANGE";
148 case MLX5_EVENT_TYPE_GPIO_EVENT:
149 return "MLX5_EVENT_TYPE_GPIO_EVENT";
150 case MLX5_EVENT_TYPE_REMOTE_CONFIG:
151 return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
152 case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
153 return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
154 case MLX5_EVENT_TYPE_STALL_EVENT:
155 return "MLX5_EVENT_TYPE_STALL_EVENT";
156 case MLX5_EVENT_TYPE_CMD:
157 return "MLX5_EVENT_TYPE_CMD";
158 case MLX5_EVENT_TYPE_PAGE_REQUEST:
159 return "MLX5_EVENT_TYPE_PAGE_REQUEST";
160 default:
161 return "Unrecognized event";
162 }
163}
164
165static enum mlx5_dev_event port_subtype_event(u8 subtype)
166{
167 switch (subtype) {
168 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
169 return MLX5_DEV_EVENT_PORT_DOWN;
170 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
171 return MLX5_DEV_EVENT_PORT_UP;
172 case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
173 return MLX5_DEV_EVENT_PORT_INITIALIZED;
174 case MLX5_PORT_CHANGE_SUBTYPE_LID:
175 return MLX5_DEV_EVENT_LID_CHANGE;
176 case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
177 return MLX5_DEV_EVENT_PKEY_CHANGE;
178 case MLX5_PORT_CHANGE_SUBTYPE_GUID:
179 return MLX5_DEV_EVENT_GUID_CHANGE;
180 case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
181 return MLX5_DEV_EVENT_CLIENT_REREG;
182 }
183 return -1;
184}
185
186static void eq_update_ci(struct mlx5_eq *eq, int arm)
187{
188 __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
189 u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
190 __raw_writel((__force u32) cpu_to_be32(val), addr);
191 /* We still want ordering, just not swabbing, so add a barrier */
192 mb();
193}
194
195static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
196{
197 struct mlx5_eqe *eqe;
198 int eqes_found = 0;
199 int set_ci = 0;
200 u32 cqn;
201 u32 srqn;
202 u8 port;
203
204 while ((eqe = next_eqe_sw(eq))) {
205 /*
206 * Make sure we read EQ entry contents after we've
207 * checked the ownership bit.
208 */
209 rmb();
210
211 mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n", eq->eqn, eqe_type_str(eqe->type));
212 switch (eqe->type) {
213 case MLX5_EVENT_TYPE_COMP:
214 cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
215 mlx5_cq_completion(dev, cqn);
216 break;
217
218 case MLX5_EVENT_TYPE_PATH_MIG:
219 case MLX5_EVENT_TYPE_COMM_EST:
220 case MLX5_EVENT_TYPE_SQ_DRAINED:
221 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
222 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
223 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
224 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
225 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
226 mlx5_core_dbg(dev, "event %s(%d) arrived\n",
227 eqe_type_str(eqe->type), eqe->type);
228 mlx5_qp_event(dev, be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff,
229 eqe->type);
230 break;
231
232 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
233 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
234 srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
235 mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
236 eqe_type_str(eqe->type), eqe->type, srqn);
237 mlx5_srq_event(dev, srqn, eqe->type);
238 break;
239
240 case MLX5_EVENT_TYPE_CMD:
241 mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector));
242 break;
243
244 case MLX5_EVENT_TYPE_PORT_CHANGE:
245 port = (eqe->data.port.port >> 4) & 0xf;
246 switch (eqe->sub_type) {
247 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
248 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
249 case MLX5_PORT_CHANGE_SUBTYPE_LID:
250 case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
251 case MLX5_PORT_CHANGE_SUBTYPE_GUID:
252 case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
253 case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
254 dev->event(dev, port_subtype_event(eqe->sub_type), &port);
255 break;
256 default:
257 mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
258 port, eqe->sub_type);
259 }
260 break;
261 case MLX5_EVENT_TYPE_CQ_ERROR:
262 cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
263 mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
264 cqn, eqe->data.cq_err.syndrome);
265 mlx5_cq_event(dev, cqn, eqe->type);
266 break;
267
268 case MLX5_EVENT_TYPE_PAGE_REQUEST:
269 {
270 u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
271 s16 npages = be16_to_cpu(eqe->data.req_pages.num_pages);
272
273 mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages);
274 mlx5_core_req_pages_handler(dev, func_id, npages);
275 }
276 break;
277
278
279 default:
280 mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn);
281 break;
282 }
283
284 ++eq->cons_index;
285 eqes_found = 1;
286 ++set_ci;
287
288 /* The HCA will think the queue has overflowed if we
289 * don't tell it we've been processing events. We
290 * create our EQs with MLX5_NUM_SPARE_EQE extra
291 * entries, so we must update our consumer index at
292 * least that often.
293 */
294 if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
295 eq_update_ci(eq, 0);
296 set_ci = 0;
297 }
298 }
299
300 eq_update_ci(eq, 1);
301
302 return eqes_found;
303}
304
305static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr)
306{
307 struct mlx5_eq *eq = eq_ptr;
308 struct mlx5_core_dev *dev = eq->dev;
309
310 mlx5_eq_int(dev, eq);
311
312 /* MSI-X vectors always belong to us */
313 return IRQ_HANDLED;
314}
315
316static void init_eq_buf(struct mlx5_eq *eq)
317{
318 struct mlx5_eqe *eqe;
319 int i;
320
321 for (i = 0; i < eq->nent; i++) {
322 eqe = get_eqe(eq, i);
323 eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
324 }
325}
326
327int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
328 int nent, u64 mask, const char *name, struct mlx5_uar *uar)
329{
330 struct mlx5_eq_table *table = &dev->priv.eq_table;
331 struct mlx5_create_eq_mbox_in *in;
332 struct mlx5_create_eq_mbox_out out;
333 int err;
334 int inlen;
335
336 eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
337 err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, 2 * PAGE_SIZE,
338 &eq->buf);
339 if (err)
340 return err;
341
342 init_eq_buf(eq);
343
344 inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages;
345 in = mlx5_vzalloc(inlen);
346 if (!in) {
347 err = -ENOMEM;
348 goto err_buf;
349 }
350 memset(&out, 0, sizeof(out));
351
352 mlx5_fill_page_array(&eq->buf, in->pas);
353
354 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
355 in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
356 in->ctx.intr = vecidx;
357 in->ctx.log_page_size = PAGE_SHIFT - 12;
358 in->events_mask = cpu_to_be64(mask);
359
360 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
361 if (err)
362 goto err_in;
363
364 if (out.hdr.status) {
365 err = mlx5_cmd_status_to_err(&out.hdr);
366 goto err_in;
367 }
368
369 eq->eqn = out.eq_number;
370 err = request_irq(table->msix_arr[vecidx].vector, mlx5_msix_handler, 0,
371 name, eq);
372 if (err)
373 goto err_eq;
374
375 eq->irqn = vecidx;
376 eq->dev = dev;
377 eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET;
378
379 err = mlx5_debug_eq_add(dev, eq);
380 if (err)
381 goto err_irq;
382
383 /* EQs are created in ARMED state
384 */
385 eq_update_ci(eq, 1);
386
387 mlx5_vfree(in);
388 return 0;
389
390err_irq:
391 free_irq(table->msix_arr[vecidx].vector, eq);
392
393err_eq:
394 mlx5_cmd_destroy_eq(dev, eq->eqn);
395
396err_in:
397 mlx5_vfree(in);
398
399err_buf:
400 mlx5_buf_free(dev, &eq->buf);
401 return err;
402}
403EXPORT_SYMBOL_GPL(mlx5_create_map_eq);
404
405int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
406{
407 struct mlx5_eq_table *table = &dev->priv.eq_table;
408 int err;
409
410 mlx5_debug_eq_remove(dev, eq);
411 free_irq(table->msix_arr[eq->irqn].vector, eq);
412 err = mlx5_cmd_destroy_eq(dev, eq->eqn);
413 if (err)
414 mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
415 eq->eqn);
416 mlx5_buf_free(dev, &eq->buf);
417
418 return err;
419}
420EXPORT_SYMBOL_GPL(mlx5_destroy_unmap_eq);
421
422int mlx5_eq_init(struct mlx5_core_dev *dev)
423{
424 int err;
425
426 spin_lock_init(&dev->priv.eq_table.lock);
427
428 err = mlx5_eq_debugfs_init(dev);
429
430 return err;
431}
432
433
434void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
435{
436 mlx5_eq_debugfs_cleanup(dev);
437}
438
439int mlx5_start_eqs(struct mlx5_core_dev *dev)
440{
441 struct mlx5_eq_table *table = &dev->priv.eq_table;
442 int err;
443
444 err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
445 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
446 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
447 if (err) {
448 mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
449 return err;
450 }
451
452 mlx5_cmd_use_events(dev);
453
454 err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
455 MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK,
456 "mlx5_async_eq", &dev->priv.uuari.uars[0]);
457 if (err) {
458 mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
459 goto err1;
460 }
461
462 err = mlx5_create_map_eq(dev, &table->pages_eq,
463 MLX5_EQ_VEC_PAGES,
464 dev->caps.max_vf + 1,
465 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
466 &dev->priv.uuari.uars[0]);
467 if (err) {
468 mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
469 goto err2;
470 }
471
472 return err;
473
474err2:
475 mlx5_destroy_unmap_eq(dev, &table->async_eq);
476
477err1:
478 mlx5_cmd_use_polling(dev);
479 mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
480 return err;
481}
482
483int mlx5_stop_eqs(struct mlx5_core_dev *dev)
484{
485 struct mlx5_eq_table *table = &dev->priv.eq_table;
486 int err;
487
488 err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
489 if (err)
490 return err;
491
492 mlx5_destroy_unmap_eq(dev, &table->async_eq);
493 mlx5_cmd_use_polling(dev);
494
495 err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
496 if (err)
497 mlx5_cmd_use_events(dev);
498
499 return err;
500}
501
502int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
503 struct mlx5_query_eq_mbox_out *out, int outlen)
504{
505 struct mlx5_query_eq_mbox_in in;
506 int err;
507
508 memset(&in, 0, sizeof(in));
509 memset(out, 0, outlen);
510 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ);
511 in.eqn = eq->eqn;
512 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
513 if (err)
514 return err;
515
516 if (out->hdr.status)
517 err = mlx5_cmd_status_to_err(&out->hdr);
518
519 return err;
520}
521EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
new file mode 100644
index 000000000000..72a5222447f5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -0,0 +1,185 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx5/driver.h>
34#include <linux/mlx5/cmd.h>
35#include <linux/module.h>
36#include "mlx5_core.h"
37
38int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev)
39{
40 struct mlx5_cmd_query_adapter_mbox_out *out;
41 struct mlx5_cmd_query_adapter_mbox_in in;
42 int err;
43
44 out = kzalloc(sizeof(*out), GFP_KERNEL);
45 if (!out)
46 return -ENOMEM;
47
48 memset(&in, 0, sizeof(in));
49 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_ADAPTER);
50 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
51 if (err)
52 goto out_out;
53
54 if (out->hdr.status) {
55 err = mlx5_cmd_status_to_err(&out->hdr);
56 goto out_out;
57 }
58
59 memcpy(dev->board_id, out->vsd_psid, sizeof(out->vsd_psid));
60
61out_out:
62 kfree(out);
63
64 return err;
65}
66
67int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
68 struct mlx5_caps *caps)
69{
70 struct mlx5_cmd_query_hca_cap_mbox_out *out;
71 struct mlx5_cmd_query_hca_cap_mbox_in in;
72 struct mlx5_query_special_ctxs_mbox_out ctx_out;
73 struct mlx5_query_special_ctxs_mbox_in ctx_in;
74 int err;
75 u16 t16;
76
77 out = kzalloc(sizeof(*out), GFP_KERNEL);
78 if (!out)
79 return -ENOMEM;
80
81 memset(&in, 0, sizeof(in));
82 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
83 in.hdr.opmod = cpu_to_be16(0x1);
84 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
85 if (err)
86 goto out_out;
87
88 if (out->hdr.status) {
89 err = mlx5_cmd_status_to_err(&out->hdr);
90 goto out_out;
91 }
92
93
94 caps->log_max_eq = out->hca_cap.log_max_eq & 0xf;
95 caps->max_cqes = 1 << out->hca_cap.log_max_cq_sz;
96 caps->max_wqes = 1 << out->hca_cap.log_max_qp_sz;
97 caps->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq);
98 caps->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq);
99 caps->flags = be64_to_cpu(out->hca_cap.flags);
100 caps->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support);
101 caps->log_max_msg = out->hca_cap.log_max_msg & 0x1f;
102 caps->num_ports = out->hca_cap.num_ports & 0xf;
103 caps->log_max_cq = out->hca_cap.log_max_cq & 0x1f;
104 if (caps->num_ports > MLX5_MAX_PORTS) {
105 mlx5_core_err(dev, "device has %d ports while the driver supports max %d ports\n",
106 caps->num_ports, MLX5_MAX_PORTS);
107 err = -EINVAL;
108 goto out_out;
109 }
110 caps->log_max_qp = out->hca_cap.log_max_qp & 0x1f;
111 caps->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f;
112 caps->log_max_pd = out->hca_cap.log_max_pd & 0x1f;
113 caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
114 caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
115 caps->log_max_mcg = out->hca_cap.log_max_mcg;
116 caps->max_qp_mcg = be16_to_cpu(out->hca_cap.max_qp_mcg);
117 caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f);
118 caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f);
119 caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
120 t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size);
121 if (t16 & 0x8000) {
122 caps->bf_reg_size = 1 << (t16 & 0x1f);
123 caps->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE;
124 } else {
125 caps->bf_reg_size = 0;
126 caps->bf_regs_per_page = 0;
127 }
128 caps->min_page_sz = ~(u32)((1 << out->hca_cap.log_pg_sz) - 1);
129
130 memset(&ctx_in, 0, sizeof(ctx_in));
131 memset(&ctx_out, 0, sizeof(ctx_out));
132 ctx_in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
133 err = mlx5_cmd_exec(dev, &ctx_in, sizeof(ctx_in),
134 &ctx_out, sizeof(ctx_out));
135 if (err)
136 goto out_out;
137
138 if (ctx_out.hdr.status)
139 err = mlx5_cmd_status_to_err(&ctx_out.hdr);
140
141 caps->reserved_lkey = be32_to_cpu(ctx_out.reserved_lkey);
142
143out_out:
144 kfree(out);
145
146 return err;
147}
148
149int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
150{
151 struct mlx5_cmd_init_hca_mbox_in in;
152 struct mlx5_cmd_init_hca_mbox_out out;
153 int err;
154
155 memset(&in, 0, sizeof(in));
156 memset(&out, 0, sizeof(out));
157 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA);
158 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
159 if (err)
160 return err;
161
162 if (out.hdr.status)
163 err = mlx5_cmd_status_to_err(&out.hdr);
164
165 return err;
166}
167
168int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
169{
170 struct mlx5_cmd_teardown_hca_mbox_in in;
171 struct mlx5_cmd_teardown_hca_mbox_out out;
172 int err;
173
174 memset(&in, 0, sizeof(in));
175 memset(&out, 0, sizeof(out));
176 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA);
177 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
178 if (err)
179 return err;
180
181 if (out.hdr.status)
182 err = mlx5_cmd_status_to_err(&out.hdr);
183
184 return err;
185}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
new file mode 100644
index 000000000000..748f10a155c4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -0,0 +1,227 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/random.h>
36#include <linux/vmalloc.h>
37#include <linux/mlx5/driver.h>
38#include <linux/mlx5/cmd.h>
39#include "mlx5_core.h"
40
41enum {
42 MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
43 MAX_MISSES = 3,
44};
45
46enum {
47 MLX5_HEALTH_SYNDR_FW_ERR = 0x1,
48 MLX5_HEALTH_SYNDR_IRISC_ERR = 0x7,
49 MLX5_HEALTH_SYNDR_CRC_ERR = 0x9,
50 MLX5_HEALTH_SYNDR_FETCH_PCI_ERR = 0xa,
51 MLX5_HEALTH_SYNDR_HW_FTL_ERR = 0xb,
52 MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR = 0xc,
53 MLX5_HEALTH_SYNDR_EQ_ERR = 0xd,
54 MLX5_HEALTH_SYNDR_FFSER_ERR = 0xf,
55};
56
57static DEFINE_SPINLOCK(health_lock);
58
59static LIST_HEAD(health_list);
60static struct work_struct health_work;
61
62static health_handler_t reg_handler;
63int mlx5_register_health_report_handler(health_handler_t handler)
64{
65 spin_lock_irq(&health_lock);
66 if (reg_handler) {
67 spin_unlock_irq(&health_lock);
68 return -EEXIST;
69 }
70 reg_handler = handler;
71 spin_unlock_irq(&health_lock);
72
73 return 0;
74}
75EXPORT_SYMBOL(mlx5_register_health_report_handler);
76
77void mlx5_unregister_health_report_handler(void)
78{
79 spin_lock_irq(&health_lock);
80 reg_handler = NULL;
81 spin_unlock_irq(&health_lock);
82}
83EXPORT_SYMBOL(mlx5_unregister_health_report_handler);
84
85static void health_care(struct work_struct *work)
86{
87 struct mlx5_core_health *health, *n;
88 struct mlx5_core_dev *dev;
89 struct mlx5_priv *priv;
90 LIST_HEAD(tlist);
91
92 spin_lock_irq(&health_lock);
93 list_splice_init(&health_list, &tlist);
94
95 spin_unlock_irq(&health_lock);
96
97 list_for_each_entry_safe(health, n, &tlist, list) {
98 priv = container_of(health, struct mlx5_priv, health);
99 dev = container_of(priv, struct mlx5_core_dev, priv);
100 mlx5_core_warn(dev, "handling bad device here\n");
101 spin_lock_irq(&health_lock);
102 if (reg_handler)
103 reg_handler(dev->pdev, health->health,
104 sizeof(health->health));
105
106 list_del_init(&health->list);
107 spin_unlock_irq(&health_lock);
108 }
109}
110
111static const char *hsynd_str(u8 synd)
112{
113 switch (synd) {
114 case MLX5_HEALTH_SYNDR_FW_ERR:
115 return "firmware internal error";
116 case MLX5_HEALTH_SYNDR_IRISC_ERR:
117 return "irisc not responding";
118 case MLX5_HEALTH_SYNDR_CRC_ERR:
119 return "firmware CRC error";
120 case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
121 return "ICM fetch PCI error";
122 case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
123 return "HW fatal error\n";
124 case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
125 return "async EQ buffer overrun";
126 case MLX5_HEALTH_SYNDR_EQ_ERR:
127 return "EQ error";
128 case MLX5_HEALTH_SYNDR_FFSER_ERR:
129 return "FFSER error";
130 default:
131 return "unrecognized error";
132 }
133}
134
135static u16 read_be16(__be16 __iomem *p)
136{
137 return swab16(readl((__force u16 __iomem *) p));
138}
139
140static u32 read_be32(__be32 __iomem *p)
141{
142 return swab32(readl((__force u32 __iomem *) p));
143}
144
145static void print_health_info(struct mlx5_core_dev *dev)
146{
147 struct mlx5_core_health *health = &dev->priv.health;
148 struct health_buffer __iomem *h = health->health;
149 int i;
150
151 for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
152 pr_info("assert_var[%d] 0x%08x\n", i, read_be32(h->assert_var + i));
153
154 pr_info("assert_exit_ptr 0x%08x\n", read_be32(&h->assert_exit_ptr));
155 pr_info("assert_callra 0x%08x\n", read_be32(&h->assert_callra));
156 pr_info("fw_ver 0x%08x\n", read_be32(&h->fw_ver));
157 pr_info("hw_id 0x%08x\n", read_be32(&h->hw_id));
158 pr_info("irisc_index %d\n", readb(&h->irisc_index));
159 pr_info("synd 0x%x: %s\n", readb(&h->synd), hsynd_str(readb(&h->synd)));
160 pr_info("ext_sync 0x%04x\n", read_be16(&h->ext_sync));
161}
162
163static void poll_health(unsigned long data)
164{
165 struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
166 struct mlx5_core_health *health = &dev->priv.health;
167 unsigned long next;
168 u32 count;
169
170 count = ioread32be(health->health_counter);
171 if (count == health->prev)
172 ++health->miss_counter;
173 else
174 health->miss_counter = 0;
175
176 health->prev = count;
177 if (health->miss_counter == MAX_MISSES) {
178 mlx5_core_err(dev, "device's health compromised\n");
179 print_health_info(dev);
180 spin_lock_irq(&health_lock);
181 list_add_tail(&health->list, &health_list);
182 spin_unlock_irq(&health_lock);
183
184 queue_work(mlx5_core_wq, &health_work);
185 } else {
186 get_random_bytes(&next, sizeof(next));
187 next %= HZ;
188 next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
189 mod_timer(&health->timer, next);
190 }
191}
192
193void mlx5_start_health_poll(struct mlx5_core_dev *dev)
194{
195 struct mlx5_core_health *health = &dev->priv.health;
196
197 INIT_LIST_HEAD(&health->list);
198 init_timer(&health->timer);
199 health->health = &dev->iseg->health;
200 health->health_counter = &dev->iseg->health_counter;
201
202 health->timer.data = (unsigned long)dev;
203 health->timer.function = poll_health;
204 health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
205 add_timer(&health->timer);
206}
207
208void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
209{
210 struct mlx5_core_health *health = &dev->priv.health;
211
212 del_timer_sync(&health->timer);
213
214 spin_lock_irq(&health_lock);
215 if (!list_empty(&health->list))
216 list_del_init(&health->list);
217 spin_unlock_irq(&health_lock);
218}
219
220void mlx5_health_cleanup(void)
221{
222}
223
224void __init mlx5_health_init(void)
225{
226 INIT_WORK(&health_work, health_care);
227}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
new file mode 100644
index 000000000000..18d6fd5dd90b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
@@ -0,0 +1,78 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/mlx5/driver.h>
36#include <linux/mlx5/cmd.h>
37#include "mlx5_core.h"
38
39int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb,
40 u16 opmod, int port)
41{
42 struct mlx5_mad_ifc_mbox_in *in = NULL;
43 struct mlx5_mad_ifc_mbox_out *out = NULL;
44 int err;
45
46 in = kzalloc(sizeof(*in), GFP_KERNEL);
47 if (!in)
48 return -ENOMEM;
49
50 out = kzalloc(sizeof(*out), GFP_KERNEL);
51 if (!out) {
52 err = -ENOMEM;
53 goto out;
54 }
55
56 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC);
57 in->hdr.opmod = cpu_to_be16(opmod);
58 in->port = port;
59
60 memcpy(in->data, inb, sizeof(in->data));
61
62 err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out));
63 if (err)
64 goto out;
65
66 if (out->hdr.status) {
67 err = mlx5_cmd_status_to_err(&out->hdr);
68 goto out;
69 }
70
71 memcpy(outb, out->data, sizeof(out->data));
72
73out:
74 kfree(out);
75 kfree(in);
76 return err;
77}
78EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
new file mode 100644
index 000000000000..12242de2b0e3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -0,0 +1,475 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <asm-generic/kmap_types.h>
34#include <linux/module.h>
35#include <linux/init.h>
36#include <linux/errno.h>
37#include <linux/pci.h>
38#include <linux/dma-mapping.h>
39#include <linux/slab.h>
40#include <linux/io-mapping.h>
41#include <linux/mlx5/driver.h>
42#include <linux/mlx5/cq.h>
43#include <linux/mlx5/qp.h>
44#include <linux/mlx5/srq.h>
45#include <linux/debugfs.h>
46#include "mlx5_core.h"
47
48#define DRIVER_NAME "mlx5_core"
49#define DRIVER_VERSION "1.0"
50#define DRIVER_RELDATE "June 2013"
51
52MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
53MODULE_DESCRIPTION("Mellanox ConnectX-IB HCA core library");
54MODULE_LICENSE("Dual BSD/GPL");
55MODULE_VERSION(DRIVER_VERSION);
56
57int mlx5_core_debug_mask;
58module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644);
59MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
60
61struct workqueue_struct *mlx5_core_wq;
62
63static int set_dma_caps(struct pci_dev *pdev)
64{
65 int err;
66
67 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
68 if (err) {
69 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
70 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
71 if (err) {
72 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
73 return err;
74 }
75 }
76
77 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
78 if (err) {
79 dev_warn(&pdev->dev,
80 "Warning: couldn't set 64-bit consistent PCI DMA mask.\n");
81 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
82 if (err) {
83 dev_err(&pdev->dev,
84 "Can't set consistent PCI DMA mask, aborting.\n");
85 return err;
86 }
87 }
88
89 dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
90 return err;
91}
92
93static int request_bar(struct pci_dev *pdev)
94{
95 int err = 0;
96
97 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
98 dev_err(&pdev->dev, "Missing registers BAR, aborting.\n");
99 return -ENODEV;
100 }
101
102 err = pci_request_regions(pdev, DRIVER_NAME);
103 if (err)
104 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
105
106 return err;
107}
108
109static void release_bar(struct pci_dev *pdev)
110{
111 pci_release_regions(pdev);
112}
113
114static int mlx5_enable_msix(struct mlx5_core_dev *dev)
115{
116 struct mlx5_eq_table *table = &dev->priv.eq_table;
117 int num_eqs = 1 << dev->caps.log_max_eq;
118 int nvec;
119 int err;
120 int i;
121
122 nvec = dev->caps.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE;
123 nvec = min_t(int, nvec, num_eqs);
124 if (nvec <= MLX5_EQ_VEC_COMP_BASE)
125 return -ENOMEM;
126
127 table->msix_arr = kzalloc(nvec * sizeof(*table->msix_arr), GFP_KERNEL);
128 if (!table->msix_arr)
129 return -ENOMEM;
130
131 for (i = 0; i < nvec; i++)
132 table->msix_arr[i].entry = i;
133
134retry:
135 table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
136 err = pci_enable_msix(dev->pdev, table->msix_arr, nvec);
137 if (err <= 0) {
138 return err;
139 } else if (err > 2) {
140 nvec = err;
141 goto retry;
142 }
143
144 mlx5_core_dbg(dev, "received %d MSI vectors out of %d requested\n", err, nvec);
145
146 return 0;
147}
148
149static void mlx5_disable_msix(struct mlx5_core_dev *dev)
150{
151 struct mlx5_eq_table *table = &dev->priv.eq_table;
152
153 pci_disable_msix(dev->pdev);
154 kfree(table->msix_arr);
155}
156
157struct mlx5_reg_host_endianess {
158 u8 he;
159 u8 rsvd[15];
160};
161
162static int handle_hca_cap(struct mlx5_core_dev *dev)
163{
164 struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
165 struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
166 struct mlx5_cmd_query_hca_cap_mbox_in query_ctx;
167 struct mlx5_cmd_set_hca_cap_mbox_out set_out;
168 struct mlx5_profile *prof = dev->profile;
169 u64 flags;
170 int csum = 1;
171 int err;
172
173 memset(&query_ctx, 0, sizeof(query_ctx));
174 query_out = kzalloc(sizeof(*query_out), GFP_KERNEL);
175 if (!query_out)
176 return -ENOMEM;
177
178 set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL);
179 if (!set_ctx) {
180 err = -ENOMEM;
181 goto query_ex;
182 }
183
184 query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
185 query_ctx.hdr.opmod = cpu_to_be16(0x1);
186 err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
187 query_out, sizeof(*query_out));
188 if (err)
189 goto query_ex;
190
191 err = mlx5_cmd_status_to_err(&query_out->hdr);
192 if (err) {
193 mlx5_core_warn(dev, "query hca cap failed, %d\n", err);
194 goto query_ex;
195 }
196
197 memcpy(&set_ctx->hca_cap, &query_out->hca_cap,
198 sizeof(set_ctx->hca_cap));
199
200 if (prof->mask & MLX5_PROF_MASK_CMDIF_CSUM) {
201 csum = !!prof->cmdif_csum;
202 flags = be64_to_cpu(set_ctx->hca_cap.flags);
203 if (csum)
204 flags |= MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
205 else
206 flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
207
208 set_ctx->hca_cap.flags = cpu_to_be64(flags);
209 }
210
211 if (dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
212 set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
213
214 memset(&set_out, 0, sizeof(set_out));
215 set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12);
216 set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
217 err = mlx5_cmd_exec(dev, set_ctx, sizeof(*set_ctx),
218 &set_out, sizeof(set_out));
219 if (err) {
220 mlx5_core_warn(dev, "set hca cap failed, %d\n", err);
221 goto query_ex;
222 }
223
224 err = mlx5_cmd_status_to_err(&set_out.hdr);
225 if (err)
226 goto query_ex;
227
228 if (!csum)
229 dev->cmd.checksum_disabled = 1;
230
231query_ex:
232 kfree(query_out);
233 kfree(set_ctx);
234
235 return err;
236}
237
238static int set_hca_ctrl(struct mlx5_core_dev *dev)
239{
240 struct mlx5_reg_host_endianess he_in;
241 struct mlx5_reg_host_endianess he_out;
242 int err;
243
244 memset(&he_in, 0, sizeof(he_in));
245 he_in.he = MLX5_SET_HOST_ENDIANNESS;
246 err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in),
247 &he_out, sizeof(he_out),
248 MLX5_REG_HOST_ENDIANNESS, 0, 1);
249 return err;
250}
251
252int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
253{
254 struct mlx5_priv *priv = &dev->priv;
255 int err;
256
257 dev->pdev = pdev;
258 pci_set_drvdata(dev->pdev, dev);
259 strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
260 priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
261
262 mutex_init(&priv->pgdir_mutex);
263 INIT_LIST_HEAD(&priv->pgdir_list);
264 spin_lock_init(&priv->mkey_lock);
265
266 priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root);
267 if (!priv->dbg_root)
268 return -ENOMEM;
269
270 err = pci_enable_device(pdev);
271 if (err) {
272 dev_err(&pdev->dev, "Cannot enable PCI device, aborting.\n");
273 goto err_dbg;
274 }
275
276 err = request_bar(pdev);
277 if (err) {
278 dev_err(&pdev->dev, "error requesting BARs, aborting.\n");
279 goto err_disable;
280 }
281
282 pci_set_master(pdev);
283
284 err = set_dma_caps(pdev);
285 if (err) {
286 dev_err(&pdev->dev, "Failed setting DMA capabilities mask, aborting\n");
287 goto err_clr_master;
288 }
289
290 dev->iseg_base = pci_resource_start(dev->pdev, 0);
291 dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
292 if (!dev->iseg) {
293 err = -ENOMEM;
294 dev_err(&pdev->dev, "Failed mapping initialization segment, aborting\n");
295 goto err_clr_master;
296 }
297 dev_info(&pdev->dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev),
298 fw_rev_min(dev), fw_rev_sub(dev));
299
300 err = mlx5_cmd_init(dev);
301 if (err) {
302 dev_err(&pdev->dev, "Failed initializing command interface, aborting\n");
303 goto err_unmap;
304 }
305
306 mlx5_pagealloc_init(dev);
307 err = set_hca_ctrl(dev);
308 if (err) {
309 dev_err(&pdev->dev, "set_hca_ctrl failed\n");
310 goto err_pagealloc_cleanup;
311 }
312
313 err = handle_hca_cap(dev);
314 if (err) {
315 dev_err(&pdev->dev, "handle_hca_cap failed\n");
316 goto err_pagealloc_cleanup;
317 }
318
319 err = mlx5_satisfy_startup_pages(dev);
320 if (err) {
321 dev_err(&pdev->dev, "failed to allocate startup pages\n");
322 goto err_pagealloc_cleanup;
323 }
324
325 err = mlx5_pagealloc_start(dev);
326 if (err) {
327 dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
328 goto err_reclaim_pages;
329 }
330
331 err = mlx5_cmd_init_hca(dev);
332 if (err) {
333 dev_err(&pdev->dev, "init hca failed\n");
334 goto err_pagealloc_stop;
335 }
336
337 mlx5_start_health_poll(dev);
338
339 err = mlx5_cmd_query_hca_cap(dev, &dev->caps);
340 if (err) {
341 dev_err(&pdev->dev, "query hca failed\n");
342 goto err_stop_poll;
343 }
344
345 err = mlx5_cmd_query_adapter(dev);
346 if (err) {
347 dev_err(&pdev->dev, "query adapter failed\n");
348 goto err_stop_poll;
349 }
350
351 err = mlx5_enable_msix(dev);
352 if (err) {
353 dev_err(&pdev->dev, "enable msix failed\n");
354 goto err_stop_poll;
355 }
356
357 err = mlx5_eq_init(dev);
358 if (err) {
359 dev_err(&pdev->dev, "failed to initialize eq\n");
360 goto disable_msix;
361 }
362
363 err = mlx5_alloc_uuars(dev, &priv->uuari);
364 if (err) {
365 dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
366 goto err_eq_cleanup;
367 }
368
369 err = mlx5_start_eqs(dev);
370 if (err) {
371 dev_err(&pdev->dev, "Failed to start pages and async EQs\n");
372 goto err_free_uar;
373 }
374
375 MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
376
377 mlx5_init_cq_table(dev);
378 mlx5_init_qp_table(dev);
379 mlx5_init_srq_table(dev);
380
381 return 0;
382
383err_free_uar:
384 mlx5_free_uuars(dev, &priv->uuari);
385
386err_eq_cleanup:
387 mlx5_eq_cleanup(dev);
388
389disable_msix:
390 mlx5_disable_msix(dev);
391
392err_stop_poll:
393 mlx5_stop_health_poll(dev);
394 mlx5_cmd_teardown_hca(dev);
395
396err_pagealloc_stop:
397 mlx5_pagealloc_stop(dev);
398
399err_reclaim_pages:
400 mlx5_reclaim_startup_pages(dev);
401
402err_pagealloc_cleanup:
403 mlx5_pagealloc_cleanup(dev);
404 mlx5_cmd_cleanup(dev);
405
406err_unmap:
407 iounmap(dev->iseg);
408
409err_clr_master:
410 pci_clear_master(dev->pdev);
411 release_bar(dev->pdev);
412
413err_disable:
414 pci_disable_device(dev->pdev);
415
416err_dbg:
417 debugfs_remove(priv->dbg_root);
418 return err;
419}
420EXPORT_SYMBOL(mlx5_dev_init);
421
422void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
423{
424 struct mlx5_priv *priv = &dev->priv;
425
426 mlx5_cleanup_srq_table(dev);
427 mlx5_cleanup_qp_table(dev);
428 mlx5_cleanup_cq_table(dev);
429 mlx5_stop_eqs(dev);
430 mlx5_free_uuars(dev, &priv->uuari);
431 mlx5_eq_cleanup(dev);
432 mlx5_disable_msix(dev);
433 mlx5_stop_health_poll(dev);
434 mlx5_cmd_teardown_hca(dev);
435 mlx5_pagealloc_stop(dev);
436 mlx5_reclaim_startup_pages(dev);
437 mlx5_pagealloc_cleanup(dev);
438 mlx5_cmd_cleanup(dev);
439 iounmap(dev->iseg);
440 pci_clear_master(dev->pdev);
441 release_bar(dev->pdev);
442 pci_disable_device(dev->pdev);
443 debugfs_remove(priv->dbg_root);
444}
445EXPORT_SYMBOL(mlx5_dev_cleanup);
446
447static int __init init(void)
448{
449 int err;
450
451 mlx5_register_debugfs();
452 mlx5_core_wq = create_singlethread_workqueue("mlx5_core_wq");
453 if (!mlx5_core_wq) {
454 err = -ENOMEM;
455 goto err_debug;
456 }
457 mlx5_health_init();
458
459 return 0;
460
461 mlx5_health_cleanup();
462err_debug:
463 mlx5_unregister_debugfs();
464 return err;
465}
466
467static void __exit cleanup(void)
468{
469 mlx5_health_cleanup();
470 destroy_workqueue(mlx5_core_wq);
471 mlx5_unregister_debugfs();
472}
473
474module_init(init);
475module_exit(cleanup);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
new file mode 100644
index 000000000000..44837640bd7c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -0,0 +1,106 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/mlx5/driver.h>
36#include <linux/mlx5/cmd.h>
37#include <rdma/ib_verbs.h>
38#include "mlx5_core.h"
39
40struct mlx5_attach_mcg_mbox_in {
41 struct mlx5_inbox_hdr hdr;
42 __be32 qpn;
43 __be32 rsvd;
44 u8 gid[16];
45};
46
47struct mlx5_attach_mcg_mbox_out {
48 struct mlx5_outbox_hdr hdr;
49 u8 rsvf[8];
50};
51
52struct mlx5_detach_mcg_mbox_in {
53 struct mlx5_inbox_hdr hdr;
54 __be32 qpn;
55 __be32 rsvd;
56 u8 gid[16];
57};
58
59struct mlx5_detach_mcg_mbox_out {
60 struct mlx5_outbox_hdr hdr;
61 u8 rsvf[8];
62};
63
64int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
65{
66 struct mlx5_attach_mcg_mbox_in in;
67 struct mlx5_attach_mcg_mbox_out out;
68 int err;
69
70 memset(&in, 0, sizeof(in));
71 memset(&out, 0, sizeof(out));
72 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG);
73 memcpy(in.gid, mgid, sizeof(*mgid));
74 in.qpn = cpu_to_be32(qpn);
75 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
76 if (err)
77 return err;
78
79 if (out.hdr.status)
80 err = mlx5_cmd_status_to_err(&out.hdr);
81
82 return err;
83}
84EXPORT_SYMBOL(mlx5_core_attach_mcg);
85
86int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
87{
88 struct mlx5_detach_mcg_mbox_in in;
89 struct mlx5_detach_mcg_mbox_out out;
90 int err;
91
92 memset(&in, 0, sizeof(in));
93 memset(&out, 0, sizeof(out));
94 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETACH_FROM_MCG);
95 memcpy(in.gid, mgid, sizeof(*mgid));
96 in.qpn = cpu_to_be32(qpn);
97 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
98 if (err)
99 return err;
100
101 if (out.hdr.status)
102 err = mlx5_cmd_status_to_err(&out.hdr);
103
104 return err;
105}
106EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
new file mode 100644
index 000000000000..68b74e1ae1b0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -0,0 +1,73 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef __MLX5_CORE_H__
34#define __MLX5_CORE_H__
35
36#include <linux/types.h>
37#include <linux/kernel.h>
38#include <linux/sched.h>
39
40extern int mlx5_core_debug_mask;
41
42#define mlx5_core_dbg(dev, format, arg...) \
43pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__, \
44 current->pid, ##arg)
45
46#define mlx5_core_dbg_mask(dev, mask, format, arg...) \
47do { \
48 if ((mask) & mlx5_core_debug_mask) \
49 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->priv.name, \
50 __func__, __LINE__, current->pid, ##arg); \
51} while (0)
52
53#define mlx5_core_err(dev, format, arg...) \
54pr_err("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__, \
55 current->pid, ##arg)
56
57#define mlx5_core_warn(dev, format, arg...) \
58pr_warn("%s:%s:%d:(pid %d): " format, (dev)->priv.name, __func__, __LINE__, \
59 current->pid, ##arg)
60
61enum {
62 MLX5_CMD_DATA, /* print command payload only */
63 MLX5_CMD_TIME, /* print command execution time */
64};
65
66
67int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
68 struct mlx5_caps *caps);
69int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev);
70int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
71int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
72
73#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
new file mode 100644
index 000000000000..5b44e2e46daf
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -0,0 +1,136 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/mlx5/driver.h>
36#include <linux/mlx5/cmd.h>
37#include "mlx5_core.h"
38
39int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
40 struct mlx5_create_mkey_mbox_in *in, int inlen)
41{
42 struct mlx5_create_mkey_mbox_out out;
43 int err;
44 u8 key;
45
46 memset(&out, 0, sizeof(out));
47 spin_lock(&dev->priv.mkey_lock);
48 key = dev->priv.mkey_key++;
49 spin_unlock(&dev->priv.mkey_lock);
50 in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
51 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
52 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
53 if (err) {
54 mlx5_core_dbg(dev, "cmd exec faile %d\n", err);
55 return err;
56 }
57
58 if (out.hdr.status) {
59 mlx5_core_dbg(dev, "status %d\n", out.hdr.status);
60 return mlx5_cmd_status_to_err(&out.hdr);
61 }
62
63 mr->key = mlx5_idx_to_mkey(be32_to_cpu(out.mkey) & 0xffffff) | key;
64 mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", be32_to_cpu(out.mkey), key, mr->key);
65
66 return err;
67}
68EXPORT_SYMBOL(mlx5_core_create_mkey);
69
70int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr)
71{
72 struct mlx5_destroy_mkey_mbox_in in;
73 struct mlx5_destroy_mkey_mbox_out out;
74 int err;
75
76 memset(&in, 0, sizeof(in));
77 memset(&out, 0, sizeof(out));
78
79 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY);
80 in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key));
81 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
82 if (err)
83 return err;
84
85 if (out.hdr.status)
86 return mlx5_cmd_status_to_err(&out.hdr);
87
88 return err;
89}
90EXPORT_SYMBOL(mlx5_core_destroy_mkey);
91
92int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
93 struct mlx5_query_mkey_mbox_out *out, int outlen)
94{
95 struct mlx5_destroy_mkey_mbox_in in;
96 int err;
97
98 memset(&in, 0, sizeof(in));
99 memset(out, 0, outlen);
100
101 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY);
102 in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mr->key));
103 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
104 if (err)
105 return err;
106
107 if (out->hdr.status)
108 return mlx5_cmd_status_to_err(&out->hdr);
109
110 return err;
111}
112EXPORT_SYMBOL(mlx5_core_query_mkey);
113
114int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
115 u32 *mkey)
116{
117 struct mlx5_query_special_ctxs_mbox_in in;
118 struct mlx5_query_special_ctxs_mbox_out out;
119 int err;
120
121 memset(&in, 0, sizeof(in));
122 memset(&out, 0, sizeof(out));
123
124 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
125 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
126 if (err)
127 return err;
128
129 if (out.hdr.status)
130 return mlx5_cmd_status_to_err(&out.hdr);
131
132 *mkey = be32_to_cpu(out.dump_fill_mkey);
133
134 return err;
135}
136EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
new file mode 100644
index 000000000000..f0bf46339b28
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -0,0 +1,435 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <asm-generic/kmap_types.h>
34#include <linux/kernel.h>
35#include <linux/module.h>
36#include <linux/mlx5/driver.h>
37#include <linux/mlx5/cmd.h>
38#include "mlx5_core.h"
39
40enum {
41 MLX5_PAGES_CANT_GIVE = 0,
42 MLX5_PAGES_GIVE = 1,
43 MLX5_PAGES_TAKE = 2
44};
45
46struct mlx5_pages_req {
47 struct mlx5_core_dev *dev;
48 u32 func_id;
49 s16 npages;
50 struct work_struct work;
51};
52
53struct fw_page {
54 struct rb_node rb_node;
55 u64 addr;
56 struct page *page;
57 u16 func_id;
58};
59
60struct mlx5_query_pages_inbox {
61 struct mlx5_inbox_hdr hdr;
62 u8 rsvd[8];
63};
64
65struct mlx5_query_pages_outbox {
66 struct mlx5_outbox_hdr hdr;
67 u8 reserved[2];
68 __be16 func_id;
69 __be16 init_pages;
70 __be16 num_pages;
71};
72
73struct mlx5_manage_pages_inbox {
74 struct mlx5_inbox_hdr hdr;
75 __be16 rsvd0;
76 __be16 func_id;
77 __be16 rsvd1;
78 __be16 num_entries;
79 u8 rsvd2[16];
80 __be64 pas[0];
81};
82
83struct mlx5_manage_pages_outbox {
84 struct mlx5_outbox_hdr hdr;
85 u8 rsvd0[2];
86 __be16 num_entries;
87 u8 rsvd1[20];
88 __be64 pas[0];
89};
90
91static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
92{
93 struct rb_root *root = &dev->priv.page_root;
94 struct rb_node **new = &root->rb_node;
95 struct rb_node *parent = NULL;
96 struct fw_page *nfp;
97 struct fw_page *tfp;
98
99 while (*new) {
100 parent = *new;
101 tfp = rb_entry(parent, struct fw_page, rb_node);
102 if (tfp->addr < addr)
103 new = &parent->rb_left;
104 else if (tfp->addr > addr)
105 new = &parent->rb_right;
106 else
107 return -EEXIST;
108 }
109
110 nfp = kmalloc(sizeof(*nfp), GFP_KERNEL);
111 if (!nfp)
112 return -ENOMEM;
113
114 nfp->addr = addr;
115 nfp->page = page;
116 nfp->func_id = func_id;
117
118 rb_link_node(&nfp->rb_node, parent, new);
119 rb_insert_color(&nfp->rb_node, root);
120
121 return 0;
122}
123
124static struct page *remove_page(struct mlx5_core_dev *dev, u64 addr)
125{
126 struct rb_root *root = &dev->priv.page_root;
127 struct rb_node *tmp = root->rb_node;
128 struct page *result = NULL;
129 struct fw_page *tfp;
130
131 while (tmp) {
132 tfp = rb_entry(tmp, struct fw_page, rb_node);
133 if (tfp->addr < addr) {
134 tmp = tmp->rb_left;
135 } else if (tfp->addr > addr) {
136 tmp = tmp->rb_right;
137 } else {
138 rb_erase(&tfp->rb_node, root);
139 result = tfp->page;
140 kfree(tfp);
141 break;
142 }
143 }
144
145 return result;
146}
147
148static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
149 s16 *pages, s16 *init_pages)
150{
151 struct mlx5_query_pages_inbox in;
152 struct mlx5_query_pages_outbox out;
153 int err;
154
155 memset(&in, 0, sizeof(in));
156 memset(&out, 0, sizeof(out));
157 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
158 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
159 if (err)
160 return err;
161
162 if (out.hdr.status)
163 return mlx5_cmd_status_to_err(&out.hdr);
164
165 if (pages)
166 *pages = be16_to_cpu(out.num_pages);
167 if (init_pages)
168 *init_pages = be16_to_cpu(out.init_pages);
169 *func_id = be16_to_cpu(out.func_id);
170
171 return err;
172}
173
174static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
175 int notify_fail)
176{
177 struct mlx5_manage_pages_inbox *in;
178 struct mlx5_manage_pages_outbox out;
179 struct page *page;
180 int inlen;
181 u64 addr;
182 int err;
183 int i;
184
185 inlen = sizeof(*in) + npages * sizeof(in->pas[0]);
186 in = mlx5_vzalloc(inlen);
187 if (!in) {
188 mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
189 return -ENOMEM;
190 }
191 memset(&out, 0, sizeof(out));
192
193 for (i = 0; i < npages; i++) {
194 page = alloc_page(GFP_HIGHUSER);
195 if (!page) {
196 err = -ENOMEM;
197 mlx5_core_warn(dev, "failed to allocate page\n");
198 goto out_alloc;
199 }
200 addr = dma_map_page(&dev->pdev->dev, page, 0,
201 PAGE_SIZE, DMA_BIDIRECTIONAL);
202 if (dma_mapping_error(&dev->pdev->dev, addr)) {
203 mlx5_core_warn(dev, "failed dma mapping page\n");
204 __free_page(page);
205 err = -ENOMEM;
206 goto out_alloc;
207 }
208 err = insert_page(dev, addr, page, func_id);
209 if (err) {
210 mlx5_core_err(dev, "failed to track allocated page\n");
211 dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
212 __free_page(page);
213 err = -ENOMEM;
214 goto out_alloc;
215 }
216 in->pas[i] = cpu_to_be64(addr);
217 }
218
219 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
220 in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
221 in->func_id = cpu_to_be16(func_id);
222 in->num_entries = cpu_to_be16(npages);
223 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
224 mlx5_core_dbg(dev, "err %d\n", err);
225 if (err) {
226 mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err);
227 goto out_alloc;
228 }
229 dev->priv.fw_pages += npages;
230
231 if (out.hdr.status) {
232 err = mlx5_cmd_status_to_err(&out.hdr);
233 if (err) {
234 mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", func_id, npages, out.hdr.status);
235 goto out_alloc;
236 }
237 }
238
239 mlx5_core_dbg(dev, "err %d\n", err);
240
241 goto out_free;
242
243out_alloc:
244 if (notify_fail) {
245 memset(in, 0, inlen);
246 memset(&out, 0, sizeof(out));
247 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
248 in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
249 if (mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)))
250 mlx5_core_warn(dev, "\n");
251 }
252 for (i--; i >= 0; i--) {
253 addr = be64_to_cpu(in->pas[i]);
254 page = remove_page(dev, addr);
255 if (!page) {
256 mlx5_core_err(dev, "BUG: can't remove page at addr 0x%llx\n",
257 addr);
258 continue;
259 }
260 dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
261 __free_page(page);
262 }
263
264out_free:
265 mlx5_vfree(in);
266 return err;
267}
268
269static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
270 int *nclaimed)
271{
272 struct mlx5_manage_pages_inbox in;
273 struct mlx5_manage_pages_outbox *out;
274 struct page *page;
275 int num_claimed;
276 int outlen;
277 u64 addr;
278 int err;
279 int i;
280
281 memset(&in, 0, sizeof(in));
282 outlen = sizeof(*out) + npages * sizeof(out->pas[0]);
283 out = mlx5_vzalloc(outlen);
284 if (!out)
285 return -ENOMEM;
286
287 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
288 in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
289 in.func_id = cpu_to_be16(func_id);
290 in.num_entries = cpu_to_be16(npages);
291 mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
292 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
293 if (err) {
294 mlx5_core_err(dev, "failed recliaming pages\n");
295 goto out_free;
296 }
297 dev->priv.fw_pages -= npages;
298
299 if (out->hdr.status) {
300 err = mlx5_cmd_status_to_err(&out->hdr);
301 goto out_free;
302 }
303
304 num_claimed = be16_to_cpu(out->num_entries);
305 if (nclaimed)
306 *nclaimed = num_claimed;
307
308 for (i = 0; i < num_claimed; i++) {
309 addr = be64_to_cpu(out->pas[i]);
310 page = remove_page(dev, addr);
311 if (!page) {
312 mlx5_core_warn(dev, "FW reported unknown DMA address 0x%llx\n", addr);
313 } else {
314 dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
315 __free_page(page);
316 }
317 }
318
319out_free:
320 mlx5_vfree(out);
321 return err;
322}
323
324static void pages_work_handler(struct work_struct *work)
325{
326 struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
327 struct mlx5_core_dev *dev = req->dev;
328 int err = 0;
329
330 if (req->npages < 0)
331 err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
332 else if (req->npages > 0)
333 err = give_pages(dev, req->func_id, req->npages, 1);
334
335 if (err)
336 mlx5_core_warn(dev, "%s fail %d\n", req->npages < 0 ?
337 "reclaim" : "give", err);
338
339 kfree(req);
340}
341
342void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
343 s16 npages)
344{
345 struct mlx5_pages_req *req;
346
347 req = kzalloc(sizeof(*req), GFP_ATOMIC);
348 if (!req) {
349 mlx5_core_warn(dev, "failed to allocate pages request\n");
350 return;
351 }
352
353 req->dev = dev;
354 req->func_id = func_id;
355 req->npages = npages;
356 INIT_WORK(&req->work, pages_work_handler);
357 queue_work(dev->priv.pg_wq, &req->work);
358}
359
360int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev)
361{
362 s16 uninitialized_var(init_pages);
363 u16 uninitialized_var(func_id);
364 int err;
365
366 err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages);
367 if (err)
368 return err;
369
370 mlx5_core_dbg(dev, "requested %d init pages for func_id 0x%x\n", init_pages, func_id);
371
372 return give_pages(dev, func_id, init_pages, 0);
373}
374
375static int optimal_reclaimed_pages(void)
376{
377 struct mlx5_cmd_prot_block *block;
378 struct mlx5_cmd_layout *lay;
379 int ret;
380
381 ret = (sizeof(lay->in) + sizeof(block->data) -
382 sizeof(struct mlx5_manage_pages_outbox)) / 8;
383
384 return ret;
385}
386
387int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
388{
389 unsigned long end = jiffies + msecs_to_jiffies(5000);
390 struct fw_page *fwp;
391 struct rb_node *p;
392 int err;
393
394 do {
395 p = rb_first(&dev->priv.page_root);
396 if (p) {
397 fwp = rb_entry(p, struct fw_page, rb_node);
398 err = reclaim_pages(dev, fwp->func_id, optimal_reclaimed_pages(), NULL);
399 if (err) {
400 mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err);
401 return err;
402 }
403 }
404 if (time_after(jiffies, end)) {
405 mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
406 break;
407 }
408 } while (p);
409
410 return 0;
411}
412
413void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
414{
415 dev->priv.page_root = RB_ROOT;
416}
417
418void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
419{
420 /* nothing */
421}
422
423int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
424{
425 dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
426 if (!dev->priv.pg_wq)
427 return -ENOMEM;
428
429 return 0;
430}
431
432void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
433{
434 destroy_workqueue(dev->priv.pg_wq);
435}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
new file mode 100644
index 000000000000..790da5c4ca4f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -0,0 +1,101 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/mlx5/driver.h>
36#include <linux/mlx5/cmd.h>
37#include "mlx5_core.h"
38
39struct mlx5_alloc_pd_mbox_in {
40 struct mlx5_inbox_hdr hdr;
41 u8 rsvd[8];
42};
43
44struct mlx5_alloc_pd_mbox_out {
45 struct mlx5_outbox_hdr hdr;
46 __be32 pdn;
47 u8 rsvd[4];
48};
49
50struct mlx5_dealloc_pd_mbox_in {
51 struct mlx5_inbox_hdr hdr;
52 __be32 pdn;
53 u8 rsvd[4];
54};
55
56struct mlx5_dealloc_pd_mbox_out {
57 struct mlx5_outbox_hdr hdr;
58 u8 rsvd[8];
59};
60
61int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
62{
63 struct mlx5_alloc_pd_mbox_in in;
64 struct mlx5_alloc_pd_mbox_out out;
65 int err;
66
67 memset(&in, 0, sizeof(in));
68 memset(&out, 0, sizeof(out));
69 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD);
70 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
71 if (err)
72 return err;
73
74 if (out.hdr.status)
75 return mlx5_cmd_status_to_err(&out.hdr);
76
77 *pdn = be32_to_cpu(out.pdn) & 0xffffff;
78 return err;
79}
80EXPORT_SYMBOL(mlx5_core_alloc_pd);
81
82int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
83{
84 struct mlx5_dealloc_pd_mbox_in in;
85 struct mlx5_dealloc_pd_mbox_out out;
86 int err;
87
88 memset(&in, 0, sizeof(in));
89 memset(&out, 0, sizeof(out));
90 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD);
91 in.pdn = cpu_to_be32(pdn);
92 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
93 if (err)
94 return err;
95
96 if (out.hdr.status)
97 return mlx5_cmd_status_to_err(&out.hdr);
98
99 return err;
100}
101EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
new file mode 100644
index 000000000000..f6afe7b5a675
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -0,0 +1,104 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <linux/mlx5/driver.h>
35#include <linux/mlx5/cmd.h>
36#include "mlx5_core.h"
37
38int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
39 int size_in, void *data_out, int size_out,
40 u16 reg_num, int arg, int write)
41{
42 struct mlx5_access_reg_mbox_in *in = NULL;
43 struct mlx5_access_reg_mbox_out *out = NULL;
44 int err = -ENOMEM;
45
46 in = mlx5_vzalloc(sizeof(*in) + size_in);
47 if (!in)
48 return -ENOMEM;
49
50 out = mlx5_vzalloc(sizeof(*out) + size_out);
51 if (!out)
52 goto ex1;
53
54 memcpy(in->data, data_in, size_in);
55 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG);
56 in->hdr.opmod = cpu_to_be16(!write);
57 in->arg = cpu_to_be32(arg);
58 in->register_id = cpu_to_be16(reg_num);
59 err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
60 sizeof(out) + size_out);
61 if (err)
62 goto ex2;
63
64 if (out->hdr.status)
65 err = mlx5_cmd_status_to_err(&out->hdr);
66
67 if (!err)
68 memcpy(data_out, out->data, size_out);
69
70ex2:
71 mlx5_vfree(out);
72ex1:
73 mlx5_vfree(in);
74 return err;
75}
76EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
77
78
79struct mlx5_reg_pcap {
80 u8 rsvd0;
81 u8 port_num;
82 u8 rsvd1[2];
83 __be32 caps_127_96;
84 __be32 caps_95_64;
85 __be32 caps_63_32;
86 __be32 caps_31_0;
87};
88
89int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps)
90{
91 struct mlx5_reg_pcap in;
92 struct mlx5_reg_pcap out;
93 int err;
94
95 memset(&in, 0, sizeof(in));
96 in.caps_127_96 = cpu_to_be32(caps);
97 in.port_num = port_num;
98
99 err = mlx5_core_access_reg(dev, &in, sizeof(in), &out,
100 sizeof(out), MLX5_REG_PCAP, 0, 1);
101
102 return err;
103}
104EXPORT_SYMBOL_GPL(mlx5_set_port_caps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
new file mode 100644
index 000000000000..54faf8bfcaf4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -0,0 +1,301 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34#include <linux/gfp.h>
35#include <linux/export.h>
36#include <linux/mlx5/cmd.h>
37#include <linux/mlx5/qp.h>
38#include <linux/mlx5/driver.h>
39
40#include "mlx5_core.h"
41
42void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type)
43{
44 struct mlx5_qp_table *table = &dev->priv.qp_table;
45 struct mlx5_core_qp *qp;
46
47 spin_lock(&table->lock);
48
49 qp = radix_tree_lookup(&table->tree, qpn);
50 if (qp)
51 atomic_inc(&qp->refcount);
52
53 spin_unlock(&table->lock);
54
55 if (!qp) {
56 mlx5_core_warn(dev, "Async event for bogus QP 0x%x\n", qpn);
57 return;
58 }
59
60 qp->event(qp, event_type);
61
62 if (atomic_dec_and_test(&qp->refcount))
63 complete(&qp->free);
64}
65
66int mlx5_core_create_qp(struct mlx5_core_dev *dev,
67 struct mlx5_core_qp *qp,
68 struct mlx5_create_qp_mbox_in *in,
69 int inlen)
70{
71 struct mlx5_qp_table *table = &dev->priv.qp_table;
72 struct mlx5_create_qp_mbox_out out;
73 struct mlx5_destroy_qp_mbox_in din;
74 struct mlx5_destroy_qp_mbox_out dout;
75 int err;
76
77 memset(&dout, 0, sizeof(dout));
78 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
79
80 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
81 if (err) {
82 mlx5_core_warn(dev, "ret %d", err);
83 return err;
84 }
85
86 if (out.hdr.status) {
87 pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps));
88 return mlx5_cmd_status_to_err(&out.hdr);
89 }
90
91 qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
92 mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
93
94 spin_lock_irq(&table->lock);
95 err = radix_tree_insert(&table->tree, qp->qpn, qp);
96 spin_unlock_irq(&table->lock);
97 if (err) {
98 mlx5_core_warn(dev, "err %d", err);
99 goto err_cmd;
100 }
101
102 err = mlx5_debug_qp_add(dev, qp);
103 if (err)
104 mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
105 qp->qpn);
106
107 qp->pid = current->pid;
108 atomic_set(&qp->refcount, 1);
109 atomic_inc(&dev->num_qps);
110 init_completion(&qp->free);
111
112 return 0;
113
114err_cmd:
115 memset(&din, 0, sizeof(din));
116 memset(&dout, 0, sizeof(dout));
117 din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
118 din.qpn = cpu_to_be32(qp->qpn);
119 mlx5_cmd_exec(dev, &din, sizeof(din), &out, sizeof(dout));
120
121 return err;
122}
123EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
124
125int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
126 struct mlx5_core_qp *qp)
127{
128 struct mlx5_destroy_qp_mbox_in in;
129 struct mlx5_destroy_qp_mbox_out out;
130 struct mlx5_qp_table *table = &dev->priv.qp_table;
131 unsigned long flags;
132 int err;
133
134 mlx5_debug_qp_remove(dev, qp);
135
136 spin_lock_irqsave(&table->lock, flags);
137 radix_tree_delete(&table->tree, qp->qpn);
138 spin_unlock_irqrestore(&table->lock, flags);
139
140 if (atomic_dec_and_test(&qp->refcount))
141 complete(&qp->free);
142 wait_for_completion(&qp->free);
143
144 memset(&in, 0, sizeof(in));
145 memset(&out, 0, sizeof(out));
146 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
147 in.qpn = cpu_to_be32(qp->qpn);
148 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
149 if (err)
150 return err;
151
152 if (out.hdr.status)
153 return mlx5_cmd_status_to_err(&out.hdr);
154
155 atomic_dec(&dev->num_qps);
156 return 0;
157}
158EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
159
160int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
161 enum mlx5_qp_state new_state,
162 struct mlx5_modify_qp_mbox_in *in, int sqd_event,
163 struct mlx5_core_qp *qp)
164{
165 static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
166 [MLX5_QP_STATE_RST] = {
167 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
168 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
169 [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
170 },
171 [MLX5_QP_STATE_INIT] = {
172 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
173 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
174 [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
175 [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
176 },
177 [MLX5_QP_STATE_RTR] = {
178 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
179 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
180 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
181 },
182 [MLX5_QP_STATE_RTS] = {
183 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
184 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
185 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
186 [MLX5_QP_STATE_SQD] = MLX5_CMD_OP_RTS2SQD_QP,
187 },
188 [MLX5_QP_STATE_SQD] = {
189 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
190 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
191 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQD2RTS_QP,
192 [MLX5_QP_STATE_SQD] = MLX5_CMD_OP_SQD2SQD_QP,
193 },
194 [MLX5_QP_STATE_SQER] = {
195 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
196 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
197 [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
198 },
199 [MLX5_QP_STATE_ERR] = {
200 [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
201 [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
202 }
203 };
204
205 struct mlx5_modify_qp_mbox_out out;
206 int err = 0;
207 u16 op;
208
209 if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
210 !optab[cur_state][new_state])
211 return -EINVAL;
212
213 memset(&out, 0, sizeof(out));
214 op = optab[cur_state][new_state];
215 in->hdr.opcode = cpu_to_be16(op);
216 in->qpn = cpu_to_be32(qp->qpn);
217 err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
218 if (err)
219 return err;
220
221 return mlx5_cmd_status_to_err(&out.hdr);
222}
223EXPORT_SYMBOL_GPL(mlx5_core_qp_modify);
224
225void mlx5_init_qp_table(struct mlx5_core_dev *dev)
226{
227 struct mlx5_qp_table *table = &dev->priv.qp_table;
228
229 spin_lock_init(&table->lock);
230 INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
231 mlx5_qp_debugfs_init(dev);
232}
233
234void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
235{
236 mlx5_qp_debugfs_cleanup(dev);
237}
238
239int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
240 struct mlx5_query_qp_mbox_out *out, int outlen)
241{
242 struct mlx5_query_qp_mbox_in in;
243 int err;
244
245 memset(&in, 0, sizeof(in));
246 memset(out, 0, outlen);
247 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_QP);
248 in.qpn = cpu_to_be32(qp->qpn);
249 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
250 if (err)
251 return err;
252
253 if (out->hdr.status)
254 return mlx5_cmd_status_to_err(&out->hdr);
255
256 return err;
257}
258EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
259
260int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
261{
262 struct mlx5_alloc_xrcd_mbox_in in;
263 struct mlx5_alloc_xrcd_mbox_out out;
264 int err;
265
266 memset(&in, 0, sizeof(in));
267 memset(&out, 0, sizeof(out));
268 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_XRCD);
269 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
270 if (err)
271 return err;
272
273 if (out.hdr.status)
274 err = mlx5_cmd_status_to_err(&out.hdr);
275 else
276 *xrcdn = be32_to_cpu(out.xrcdn);
277
278 return err;
279}
280EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc);
281
282int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
283{
284 struct mlx5_dealloc_xrcd_mbox_in in;
285 struct mlx5_dealloc_xrcd_mbox_out out;
286 int err;
287
288 memset(&in, 0, sizeof(in));
289 memset(&out, 0, sizeof(out));
290 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_XRCD);
291 in.xrcdn = cpu_to_be32(xrcdn);
292 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
293 if (err)
294 return err;
295
296 if (out.hdr.status)
297 err = mlx5_cmd_status_to_err(&out.hdr);
298
299 return err;
300}
301EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
new file mode 100644
index 000000000000..38bce93f8314
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -0,0 +1,223 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/mlx5/driver.h>
36#include <linux/mlx5/cmd.h>
37#include <linux/mlx5/srq.h>
38#include <rdma/ib_verbs.h>
39#include "mlx5_core.h"
40
41void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
42{
43 struct mlx5_srq_table *table = &dev->priv.srq_table;
44 struct mlx5_core_srq *srq;
45
46 spin_lock(&table->lock);
47
48 srq = radix_tree_lookup(&table->tree, srqn);
49 if (srq)
50 atomic_inc(&srq->refcount);
51
52 spin_unlock(&table->lock);
53
54 if (!srq) {
55 mlx5_core_warn(dev, "Async event for bogus SRQ 0x%08x\n", srqn);
56 return;
57 }
58
59 srq->event(srq, event_type);
60
61 if (atomic_dec_and_test(&srq->refcount))
62 complete(&srq->free);
63}
64
65struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn)
66{
67 struct mlx5_srq_table *table = &dev->priv.srq_table;
68 struct mlx5_core_srq *srq;
69
70 spin_lock(&table->lock);
71
72 srq = radix_tree_lookup(&table->tree, srqn);
73 if (srq)
74 atomic_inc(&srq->refcount);
75
76 spin_unlock(&table->lock);
77
78 return srq;
79}
80EXPORT_SYMBOL(mlx5_core_get_srq);
81
82int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
83 struct mlx5_create_srq_mbox_in *in, int inlen)
84{
85 struct mlx5_create_srq_mbox_out out;
86 struct mlx5_srq_table *table = &dev->priv.srq_table;
87 struct mlx5_destroy_srq_mbox_in din;
88 struct mlx5_destroy_srq_mbox_out dout;
89 int err;
90
91 memset(&out, 0, sizeof(out));
92 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_SRQ);
93 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
94 if (err)
95 return err;
96
97 if (out.hdr.status)
98 return mlx5_cmd_status_to_err(&out.hdr);
99
100 srq->srqn = be32_to_cpu(out.srqn) & 0xffffff;
101
102 atomic_set(&srq->refcount, 1);
103 init_completion(&srq->free);
104
105 spin_lock_irq(&table->lock);
106 err = radix_tree_insert(&table->tree, srq->srqn, srq);
107 spin_unlock_irq(&table->lock);
108 if (err) {
109 mlx5_core_warn(dev, "err %d, srqn 0x%x\n", err, srq->srqn);
110 goto err_cmd;
111 }
112
113 return 0;
114
115err_cmd:
116 memset(&din, 0, sizeof(din));
117 memset(&dout, 0, sizeof(dout));
118 din.srqn = cpu_to_be32(srq->srqn);
119 din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ);
120 mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout));
121 return err;
122}
123EXPORT_SYMBOL(mlx5_core_create_srq);
124
125int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
126{
127 struct mlx5_destroy_srq_mbox_in in;
128 struct mlx5_destroy_srq_mbox_out out;
129 struct mlx5_srq_table *table = &dev->priv.srq_table;
130 struct mlx5_core_srq *tmp;
131 int err;
132
133 spin_lock_irq(&table->lock);
134 tmp = radix_tree_delete(&table->tree, srq->srqn);
135 spin_unlock_irq(&table->lock);
136 if (!tmp) {
137 mlx5_core_warn(dev, "srq 0x%x not found in tree\n", srq->srqn);
138 return -EINVAL;
139 }
140 if (tmp != srq) {
141 mlx5_core_warn(dev, "corruption on srqn 0x%x\n", srq->srqn);
142 return -EINVAL;
143 }
144
145 memset(&in, 0, sizeof(in));
146 memset(&out, 0, sizeof(out));
147 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_SRQ);
148 in.srqn = cpu_to_be32(srq->srqn);
149 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
150 if (err)
151 return err;
152
153 if (out.hdr.status)
154 return mlx5_cmd_status_to_err(&out.hdr);
155
156 if (atomic_dec_and_test(&srq->refcount))
157 complete(&srq->free);
158 wait_for_completion(&srq->free);
159
160 return 0;
161}
162EXPORT_SYMBOL(mlx5_core_destroy_srq);
163
164int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
165 struct mlx5_query_srq_mbox_out *out)
166{
167 struct mlx5_query_srq_mbox_in in;
168 int err;
169
170 memset(&in, 0, sizeof(in));
171 memset(out, 0, sizeof(*out));
172
173 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SRQ);
174 in.srqn = cpu_to_be32(srq->srqn);
175 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
176 if (err)
177 return err;
178
179 if (out->hdr.status)
180 return mlx5_cmd_status_to_err(&out->hdr);
181
182 return err;
183}
184EXPORT_SYMBOL(mlx5_core_query_srq);
185
186int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
187 u16 lwm, int is_srq)
188{
189 struct mlx5_arm_srq_mbox_in in;
190 struct mlx5_arm_srq_mbox_out out;
191 int err;
192
193 memset(&in, 0, sizeof(in));
194 memset(&out, 0, sizeof(out));
195
196 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ARM_RQ);
197 in.hdr.opmod = cpu_to_be16(!!is_srq);
198 in.srqn = cpu_to_be32(srq->srqn);
199 in.lwm = cpu_to_be16(lwm);
200
201 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
202 if (err)
203 return err;
204
205 if (out.hdr.status)
206 return mlx5_cmd_status_to_err(&out.hdr);
207
208 return err;
209}
210EXPORT_SYMBOL(mlx5_core_arm_srq);
211
212void mlx5_init_srq_table(struct mlx5_core_dev *dev)
213{
214 struct mlx5_srq_table *table = &dev->priv.srq_table;
215
216 spin_lock_init(&table->lock);
217 INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
218}
219
220void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev)
221{
222 /* nothing */
223}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
new file mode 100644
index 000000000000..71d4a3937200
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -0,0 +1,223 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/mlx5/driver.h>
36#include <linux/mlx5/cmd.h>
37#include "mlx5_core.h"
38
39enum {
40 NUM_DRIVER_UARS = 4,
41 NUM_LOW_LAT_UUARS = 4,
42};
43
44
45struct mlx5_alloc_uar_mbox_in {
46 struct mlx5_inbox_hdr hdr;
47 u8 rsvd[8];
48};
49
50struct mlx5_alloc_uar_mbox_out {
51 struct mlx5_outbox_hdr hdr;
52 __be32 uarn;
53 u8 rsvd[4];
54};
55
56struct mlx5_free_uar_mbox_in {
57 struct mlx5_inbox_hdr hdr;
58 __be32 uarn;
59 u8 rsvd[4];
60};
61
62struct mlx5_free_uar_mbox_out {
63 struct mlx5_outbox_hdr hdr;
64 u8 rsvd[8];
65};
66
67int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
68{
69 struct mlx5_alloc_uar_mbox_in in;
70 struct mlx5_alloc_uar_mbox_out out;
71 int err;
72
73 memset(&in, 0, sizeof(in));
74 memset(&out, 0, sizeof(out));
75 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR);
76 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
77 if (err)
78 goto ex;
79
80 if (out.hdr.status) {
81 err = mlx5_cmd_status_to_err(&out.hdr);
82 goto ex;
83 }
84
85 *uarn = be32_to_cpu(out.uarn) & 0xffffff;
86
87ex:
88 return err;
89}
90EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
91
92int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
93{
94 struct mlx5_free_uar_mbox_in in;
95 struct mlx5_free_uar_mbox_out out;
96 int err;
97
98 memset(&in, 0, sizeof(in));
99 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR);
100 in.uarn = cpu_to_be32(uarn);
101 err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
102 if (err)
103 goto ex;
104
105 if (out.hdr.status)
106 err = mlx5_cmd_status_to_err(&out.hdr);
107
108ex:
109 return err;
110}
111EXPORT_SYMBOL(mlx5_cmd_free_uar);
112
113static int need_uuar_lock(int uuarn)
114{
115 int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE;
116
117 if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS)
118 return 0;
119
120 return 1;
121}
122
123int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari)
124{
125 int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE;
126 struct mlx5_bf *bf;
127 phys_addr_t addr;
128 int err;
129 int i;
130
131 uuari->num_uars = NUM_DRIVER_UARS;
132 uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS;
133
134 mutex_init(&uuari->lock);
135 uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL);
136 if (!uuari->uars)
137 return -ENOMEM;
138
139 uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL);
140 if (!uuari->bfs) {
141 err = -ENOMEM;
142 goto out_uars;
143 }
144
145 uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap),
146 GFP_KERNEL);
147 if (!uuari->bitmap) {
148 err = -ENOMEM;
149 goto out_bfs;
150 }
151
152 uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL);
153 if (!uuari->count) {
154 err = -ENOMEM;
155 goto out_bitmap;
156 }
157
158 for (i = 0; i < uuari->num_uars; i++) {
159 err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index);
160 if (err)
161 goto out_count;
162
163 addr = dev->iseg_base + ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT);
164 uuari->uars[i].map = ioremap(addr, PAGE_SIZE);
165 if (!uuari->uars[i].map) {
166 mlx5_cmd_free_uar(dev, uuari->uars[i].index);
167 goto out_count;
168 }
169 mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n",
170 uuari->uars[i].index, uuari->uars[i].map);
171 }
172
173 for (i = 0; i < tot_uuars; i++) {
174 bf = &uuari->bfs[i];
175
176 bf->buf_size = dev->caps.bf_reg_size / 2;
177 bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE];
178 bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map;
179 bf->reg = NULL; /* Add WC support */
180 bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.bf_reg_size +
181 MLX5_BF_OFFSET;
182 bf->need_lock = need_uuar_lock(i);
183 spin_lock_init(&bf->lock);
184 spin_lock_init(&bf->lock32);
185 bf->uuarn = i;
186 }
187
188 return 0;
189
190out_count:
191 for (i--; i >= 0; i--) {
192 iounmap(uuari->uars[i].map);
193 mlx5_cmd_free_uar(dev, uuari->uars[i].index);
194 }
195 kfree(uuari->count);
196
197out_bitmap:
198 kfree(uuari->bitmap);
199
200out_bfs:
201 kfree(uuari->bfs);
202
203out_uars:
204 kfree(uuari->uars);
205 return err;
206}
207
208int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari)
209{
210 int i = uuari->num_uars;
211
212 for (i--; i >= 0; i--) {
213 iounmap(uuari->uars[i].map);
214 mlx5_cmd_free_uar(dev, uuari->uars[i].index);
215 }
216
217 kfree(uuari->count);
218 kfree(uuari->bitmap);
219 kfree(uuari->bfs);
220 kfree(uuari->uars);
221
222 return 0;
223}
diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h
new file mode 100644
index 000000000000..2826a4b6071e
--- /dev/null
+++ b/include/linux/mlx5/cmd.h
@@ -0,0 +1,51 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_CMD_H
34#define MLX5_CMD_H
35
36#include <linux/types.h>
37
38struct manage_pages_layout {
39 u64 ptr;
40 u32 reserved;
41 u16 num_entries;
42 u16 func_id;
43};
44
45
46struct mlx5_cmd_alloc_uar_imm_out {
47 u32 rsvd[3];
48 u32 uarn;
49};
50
51#endif /* MLX5_CMD_H */
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
new file mode 100644
index 000000000000..3db67f73d96d
--- /dev/null
+++ b/include/linux/mlx5/cq.h
@@ -0,0 +1,165 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_CORE_CQ_H
34#define MLX5_CORE_CQ_H
35
36#include <rdma/ib_verbs.h>
37#include <linux/mlx5/driver.h>
38
39
40struct mlx5_core_cq {
41 u32 cqn;
42 int cqe_sz;
43 __be32 *set_ci_db;
44 __be32 *arm_db;
45 atomic_t refcount;
46 struct completion free;
47 unsigned vector;
48 int irqn;
49 void (*comp) (struct mlx5_core_cq *);
50 void (*event) (struct mlx5_core_cq *, enum mlx5_event);
51 struct mlx5_uar *uar;
52 u32 cons_index;
53 unsigned arm_sn;
54 struct mlx5_rsc_debug *dbg;
55 int pid;
56};
57
58
59enum {
60 MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR = 0x01,
61 MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR = 0x02,
62 MLX5_CQE_SYNDROME_LOCAL_PROT_ERR = 0x04,
63 MLX5_CQE_SYNDROME_WR_FLUSH_ERR = 0x05,
64 MLX5_CQE_SYNDROME_MW_BIND_ERR = 0x06,
65 MLX5_CQE_SYNDROME_BAD_RESP_ERR = 0x10,
66 MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR = 0x11,
67 MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR = 0x12,
68 MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR = 0x13,
69 MLX5_CQE_SYNDROME_REMOTE_OP_ERR = 0x14,
70 MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR = 0x15,
71 MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR = 0x16,
72 MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR = 0x22,
73};
74
75enum {
76 MLX5_CQE_OWNER_MASK = 1,
77 MLX5_CQE_REQ = 0,
78 MLX5_CQE_RESP_WR_IMM = 1,
79 MLX5_CQE_RESP_SEND = 2,
80 MLX5_CQE_RESP_SEND_IMM = 3,
81 MLX5_CQE_RESP_SEND_INV = 4,
82 MLX5_CQE_RESIZE_CQ = 0xff, /* TBD */
83 MLX5_CQE_REQ_ERR = 13,
84 MLX5_CQE_RESP_ERR = 14,
85};
86
87enum {
88 MLX5_CQ_MODIFY_RESEIZE = 0,
89 MLX5_CQ_MODIFY_MODER = 1,
90 MLX5_CQ_MODIFY_MAPPING = 2,
91};
92
93struct mlx5_cq_modify_params {
94 int type;
95 union {
96 struct {
97 u32 page_offset;
98 u8 log_cq_size;
99 } resize;
100
101 struct {
102 } moder;
103
104 struct {
105 } mapping;
106 } params;
107};
108
109enum {
110 CQE_SIZE_64 = 0,
111 CQE_SIZE_128 = 1,
112};
113
114static inline int cqe_sz_to_mlx_sz(u8 size)
115{
116 return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128;
117}
118
119static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq)
120{
121 *cq->set_ci_db = cpu_to_be32(cq->cons_index & 0xffffff);
122}
123
124enum {
125 MLX5_CQ_DB_REQ_NOT_SOL = 1 << 24,
126 MLX5_CQ_DB_REQ_NOT = 0 << 24
127};
128
129static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd,
130 void __iomem *uar_page,
131 spinlock_t *doorbell_lock)
132{
133 __be32 doorbell[2];
134 u32 sn;
135 u32 ci;
136
137 sn = cq->arm_sn & 3;
138 ci = cq->cons_index & 0xffffff;
139
140 *cq->arm_db = cpu_to_be32(sn << 28 | cmd | ci);
141
142 /* Make sure that the doorbell record in host memory is
143 * written before ringing the doorbell via PCI MMIO.
144 */
145 wmb();
146
147 doorbell[0] = cpu_to_be32(sn << 28 | cmd | ci);
148 doorbell[1] = cpu_to_be32(cq->cqn);
149
150 mlx5_write64(doorbell, uar_page + MLX5_CQ_DOORBELL, doorbell_lock);
151}
152
153int mlx5_init_cq_table(struct mlx5_core_dev *dev);
154void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev);
155int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
156 struct mlx5_create_cq_mbox_in *in, int inlen);
157int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
158int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
159 struct mlx5_query_cq_mbox_out *out);
160int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
161 int type, struct mlx5_cq_modify_params *params);
162int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
163void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
164
165#endif /* MLX5_CORE_CQ_H */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
new file mode 100644
index 000000000000..8de8d8f22384
--- /dev/null
+++ b/include/linux/mlx5/device.h
@@ -0,0 +1,893 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_DEVICE_H
34#define MLX5_DEVICE_H
35
36#include <linux/types.h>
37#include <rdma/ib_verbs.h>
38
39#if defined(__LITTLE_ENDIAN)
40#define MLX5_SET_HOST_ENDIANNESS 0
41#elif defined(__BIG_ENDIAN)
42#define MLX5_SET_HOST_ENDIANNESS 0x80
43#else
44#error Host endianness not defined
45#endif
46
47enum {
48 MLX5_MAX_COMMANDS = 32,
49 MLX5_CMD_DATA_BLOCK_SIZE = 512,
50 MLX5_PCI_CMD_XPORT = 7,
51};
52
53enum {
54 MLX5_EXTENDED_UD_AV = 0x80000000,
55};
56
57enum {
58 MLX5_CQ_STATE_ARMED = 9,
59 MLX5_CQ_STATE_ALWAYS_ARMED = 0xb,
60 MLX5_CQ_STATE_FIRED = 0xa,
61};
62
63enum {
64 MLX5_STAT_RATE_OFFSET = 5,
65};
66
67enum {
68 MLX5_INLINE_SEG = 0x80000000,
69};
70
71enum {
72 MLX5_PERM_LOCAL_READ = 1 << 2,
73 MLX5_PERM_LOCAL_WRITE = 1 << 3,
74 MLX5_PERM_REMOTE_READ = 1 << 4,
75 MLX5_PERM_REMOTE_WRITE = 1 << 5,
76 MLX5_PERM_ATOMIC = 1 << 6,
77 MLX5_PERM_UMR_EN = 1 << 7,
78};
79
80enum {
81 MLX5_PCIE_CTRL_SMALL_FENCE = 1 << 0,
82 MLX5_PCIE_CTRL_RELAXED_ORDERING = 1 << 2,
83 MLX5_PCIE_CTRL_NO_SNOOP = 1 << 3,
84 MLX5_PCIE_CTRL_TLP_PROCE_EN = 1 << 6,
85 MLX5_PCIE_CTRL_TPH_MASK = 3 << 4,
86};
87
88enum {
89 MLX5_ACCESS_MODE_PA = 0,
90 MLX5_ACCESS_MODE_MTT = 1,
91 MLX5_ACCESS_MODE_KLM = 2
92};
93
94enum {
95 MLX5_MKEY_REMOTE_INVAL = 1 << 24,
96 MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29,
97 MLX5_MKEY_BSF_EN = 1 << 30,
98 MLX5_MKEY_LEN64 = 1 << 31,
99};
100
101enum {
102 MLX5_EN_RD = (u64)1,
103 MLX5_EN_WR = (u64)2
104};
105
106enum {
107 MLX5_BF_REGS_PER_PAGE = 4,
108 MLX5_MAX_UAR_PAGES = 1 << 8,
109 MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE,
110};
111
112enum {
113 MLX5_MKEY_MASK_LEN = 1ull << 0,
114 MLX5_MKEY_MASK_PAGE_SIZE = 1ull << 1,
115 MLX5_MKEY_MASK_START_ADDR = 1ull << 6,
116 MLX5_MKEY_MASK_PD = 1ull << 7,
117 MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8,
118 MLX5_MKEY_MASK_BSF_EN = 1ull << 12,
119 MLX5_MKEY_MASK_KEY = 1ull << 13,
120 MLX5_MKEY_MASK_QPN = 1ull << 14,
121 MLX5_MKEY_MASK_LR = 1ull << 17,
122 MLX5_MKEY_MASK_LW = 1ull << 18,
123 MLX5_MKEY_MASK_RR = 1ull << 19,
124 MLX5_MKEY_MASK_RW = 1ull << 20,
125 MLX5_MKEY_MASK_A = 1ull << 21,
126 MLX5_MKEY_MASK_SMALL_FENCE = 1ull << 23,
127 MLX5_MKEY_MASK_FREE = 1ull << 29,
128};
129
130enum mlx5_event {
131 MLX5_EVENT_TYPE_COMP = 0x0,
132
133 MLX5_EVENT_TYPE_PATH_MIG = 0x01,
134 MLX5_EVENT_TYPE_COMM_EST = 0x02,
135 MLX5_EVENT_TYPE_SQ_DRAINED = 0x03,
136 MLX5_EVENT_TYPE_SRQ_LAST_WQE = 0x13,
137 MLX5_EVENT_TYPE_SRQ_RQ_LIMIT = 0x14,
138
139 MLX5_EVENT_TYPE_CQ_ERROR = 0x04,
140 MLX5_EVENT_TYPE_WQ_CATAS_ERROR = 0x05,
141 MLX5_EVENT_TYPE_PATH_MIG_FAILED = 0x07,
142 MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR = 0x10,
143 MLX5_EVENT_TYPE_WQ_ACCESS_ERROR = 0x11,
144 MLX5_EVENT_TYPE_SRQ_CATAS_ERROR = 0x12,
145
146 MLX5_EVENT_TYPE_INTERNAL_ERROR = 0x08,
147 MLX5_EVENT_TYPE_PORT_CHANGE = 0x09,
148 MLX5_EVENT_TYPE_GPIO_EVENT = 0x15,
149 MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19,
150
151 MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a,
152 MLX5_EVENT_TYPE_STALL_EVENT = 0x1b,
153
154 MLX5_EVENT_TYPE_CMD = 0x0a,
155 MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb,
156};
157
158enum {
159 MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1,
160 MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4,
161 MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED = 5,
162 MLX5_PORT_CHANGE_SUBTYPE_LID = 6,
163 MLX5_PORT_CHANGE_SUBTYPE_PKEY = 7,
164 MLX5_PORT_CHANGE_SUBTYPE_GUID = 8,
165 MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG = 9,
166};
167
168enum {
169 MLX5_DEV_CAP_FLAG_RC = 1LL << 0,
170 MLX5_DEV_CAP_FLAG_UC = 1LL << 1,
171 MLX5_DEV_CAP_FLAG_UD = 1LL << 2,
172 MLX5_DEV_CAP_FLAG_XRC = 1LL << 3,
173 MLX5_DEV_CAP_FLAG_SRQ = 1LL << 6,
174 MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8,
175 MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9,
176 MLX5_DEV_CAP_FLAG_APM = 1LL << 17,
177 MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18,
178 MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24,
179 MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32,
180 MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38,
181 MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39,
182 MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40,
183 MLX5_DEV_CAP_FLAG_DCT = 1LL << 41,
184 MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 1LL << 46,
185};
186
187enum {
188 MLX5_OPCODE_NOP = 0x00,
189 MLX5_OPCODE_SEND_INVAL = 0x01,
190 MLX5_OPCODE_RDMA_WRITE = 0x08,
191 MLX5_OPCODE_RDMA_WRITE_IMM = 0x09,
192 MLX5_OPCODE_SEND = 0x0a,
193 MLX5_OPCODE_SEND_IMM = 0x0b,
194 MLX5_OPCODE_RDMA_READ = 0x10,
195 MLX5_OPCODE_ATOMIC_CS = 0x11,
196 MLX5_OPCODE_ATOMIC_FA = 0x12,
197 MLX5_OPCODE_ATOMIC_MASKED_CS = 0x14,
198 MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15,
199 MLX5_OPCODE_BIND_MW = 0x18,
200 MLX5_OPCODE_CONFIG_CMD = 0x1f,
201
202 MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
203 MLX5_RECV_OPCODE_SEND = 0x01,
204 MLX5_RECV_OPCODE_SEND_IMM = 0x02,
205 MLX5_RECV_OPCODE_SEND_INVAL = 0x03,
206
207 MLX5_CQE_OPCODE_ERROR = 0x1e,
208 MLX5_CQE_OPCODE_RESIZE = 0x16,
209
210 MLX5_OPCODE_SET_PSV = 0x20,
211 MLX5_OPCODE_GET_PSV = 0x21,
212 MLX5_OPCODE_CHECK_PSV = 0x22,
213 MLX5_OPCODE_RGET_PSV = 0x26,
214 MLX5_OPCODE_RCHECK_PSV = 0x27,
215
216 MLX5_OPCODE_UMR = 0x25,
217
218};
219
220enum {
221 MLX5_SET_PORT_RESET_QKEY = 0,
222 MLX5_SET_PORT_GUID0 = 16,
223 MLX5_SET_PORT_NODE_GUID = 17,
224 MLX5_SET_PORT_SYS_GUID = 18,
225 MLX5_SET_PORT_GID_TABLE = 19,
226 MLX5_SET_PORT_PKEY_TABLE = 20,
227};
228
229enum {
230 MLX5_MAX_PAGE_SHIFT = 31
231};
232
233struct mlx5_inbox_hdr {
234 __be16 opcode;
235 u8 rsvd[4];
236 __be16 opmod;
237};
238
239struct mlx5_outbox_hdr {
240 u8 status;
241 u8 rsvd[3];
242 __be32 syndrome;
243};
244
245struct mlx5_cmd_query_adapter_mbox_in {
246 struct mlx5_inbox_hdr hdr;
247 u8 rsvd[8];
248};
249
250struct mlx5_cmd_query_adapter_mbox_out {
251 struct mlx5_outbox_hdr hdr;
252 u8 rsvd0[24];
253 u8 intapin;
254 u8 rsvd1[13];
255 __be16 vsd_vendor_id;
256 u8 vsd[208];
257 u8 vsd_psid[16];
258};
259
260struct mlx5_hca_cap {
261 u8 rsvd1[16];
262 u8 log_max_srq_sz;
263 u8 log_max_qp_sz;
264 u8 rsvd2;
265 u8 log_max_qp;
266 u8 log_max_strq_sz;
267 u8 log_max_srqs;
268 u8 rsvd4[2];
269 u8 rsvd5;
270 u8 log_max_cq_sz;
271 u8 rsvd6;
272 u8 log_max_cq;
273 u8 log_max_eq_sz;
274 u8 log_max_mkey;
275 u8 rsvd7;
276 u8 log_max_eq;
277 u8 max_indirection;
278 u8 log_max_mrw_sz;
279 u8 log_max_bsf_list_sz;
280 u8 log_max_klm_list_sz;
281 u8 rsvd_8_0;
282 u8 log_max_ra_req_dc;
283 u8 rsvd_8_1;
284 u8 log_max_ra_res_dc;
285 u8 rsvd9;
286 u8 log_max_ra_req_qp;
287 u8 rsvd10;
288 u8 log_max_ra_res_qp;
289 u8 rsvd11[4];
290 __be16 max_qp_count;
291 __be16 rsvd12;
292 u8 rsvd13;
293 u8 local_ca_ack_delay;
294 u8 rsvd14;
295 u8 num_ports;
296 u8 log_max_msg;
297 u8 rsvd15[3];
298 __be16 stat_rate_support;
299 u8 rsvd16[2];
300 __be64 flags;
301 u8 rsvd17;
302 u8 uar_sz;
303 u8 rsvd18;
304 u8 log_pg_sz;
305 __be16 bf_log_bf_reg_size;
306 u8 rsvd19[4];
307 __be16 max_desc_sz_sq;
308 u8 rsvd20[2];
309 __be16 max_desc_sz_rq;
310 u8 rsvd21[2];
311 __be16 max_desc_sz_sq_dc;
312 u8 rsvd22[4];
313 __be16 max_qp_mcg;
314 u8 rsvd23;
315 u8 log_max_mcg;
316 u8 rsvd24;
317 u8 log_max_pd;
318 u8 rsvd25;
319 u8 log_max_xrcd;
320 u8 rsvd26[42];
321 __be16 log_uar_page_sz;
322 u8 rsvd27[28];
323 u8 log_msx_atomic_size_qp;
324 u8 rsvd28[2];
325 u8 log_msx_atomic_size_dc;
326 u8 rsvd29[76];
327};
328
329
330struct mlx5_cmd_query_hca_cap_mbox_in {
331 struct mlx5_inbox_hdr hdr;
332 u8 rsvd[8];
333};
334
335
336struct mlx5_cmd_query_hca_cap_mbox_out {
337 struct mlx5_outbox_hdr hdr;
338 u8 rsvd0[8];
339 struct mlx5_hca_cap hca_cap;
340};
341
342
343struct mlx5_cmd_set_hca_cap_mbox_in {
344 struct mlx5_inbox_hdr hdr;
345 u8 rsvd[8];
346 struct mlx5_hca_cap hca_cap;
347};
348
349
350struct mlx5_cmd_set_hca_cap_mbox_out {
351 struct mlx5_outbox_hdr hdr;
352 u8 rsvd0[8];
353};
354
355
356struct mlx5_cmd_init_hca_mbox_in {
357 struct mlx5_inbox_hdr hdr;
358 u8 rsvd0[2];
359 __be16 profile;
360 u8 rsvd1[4];
361};
362
363struct mlx5_cmd_init_hca_mbox_out {
364 struct mlx5_outbox_hdr hdr;
365 u8 rsvd[8];
366};
367
368struct mlx5_cmd_teardown_hca_mbox_in {
369 struct mlx5_inbox_hdr hdr;
370 u8 rsvd0[2];
371 __be16 profile;
372 u8 rsvd1[4];
373};
374
375struct mlx5_cmd_teardown_hca_mbox_out {
376 struct mlx5_outbox_hdr hdr;
377 u8 rsvd[8];
378};
379
380struct mlx5_cmd_layout {
381 u8 type;
382 u8 rsvd0[3];
383 __be32 inlen;
384 __be64 in_ptr;
385 __be32 in[4];
386 __be32 out[4];
387 __be64 out_ptr;
388 __be32 outlen;
389 u8 token;
390 u8 sig;
391 u8 rsvd1;
392 u8 status_own;
393};
394
395
396struct health_buffer {
397 __be32 assert_var[5];
398 __be32 rsvd0[3];
399 __be32 assert_exit_ptr;
400 __be32 assert_callra;
401 __be32 rsvd1[2];
402 __be32 fw_ver;
403 __be32 hw_id;
404 __be32 rsvd2;
405 u8 irisc_index;
406 u8 synd;
407 __be16 ext_sync;
408};
409
410struct mlx5_init_seg {
411 __be32 fw_rev;
412 __be32 cmdif_rev_fw_sub;
413 __be32 rsvd0[2];
414 __be32 cmdq_addr_h;
415 __be32 cmdq_addr_l_sz;
416 __be32 cmd_dbell;
417 __be32 rsvd1[121];
418 struct health_buffer health;
419 __be32 rsvd2[884];
420 __be32 health_counter;
421 __be32 rsvd3[1023];
422 __be64 ieee1588_clk;
423 __be32 ieee1588_clk_type;
424 __be32 clr_intx;
425};
426
427struct mlx5_eqe_comp {
428 __be32 reserved[6];
429 __be32 cqn;
430};
431
432struct mlx5_eqe_qp_srq {
433 __be32 reserved[6];
434 __be32 qp_srq_n;
435};
436
437struct mlx5_eqe_cq_err {
438 __be32 cqn;
439 u8 reserved1[7];
440 u8 syndrome;
441};
442
443struct mlx5_eqe_dropped_packet {
444};
445
446struct mlx5_eqe_port_state {
447 u8 reserved0[8];
448 u8 port;
449};
450
451struct mlx5_eqe_gpio {
452 __be32 reserved0[2];
453 __be64 gpio_event;
454};
455
456struct mlx5_eqe_congestion {
457 u8 type;
458 u8 rsvd0;
459 u8 congestion_level;
460};
461
462struct mlx5_eqe_stall_vl {
463 u8 rsvd0[3];
464 u8 port_vl;
465};
466
467struct mlx5_eqe_cmd {
468 __be32 vector;
469 __be32 rsvd[6];
470};
471
472struct mlx5_eqe_page_req {
473 u8 rsvd0[2];
474 __be16 func_id;
475 u8 rsvd1[2];
476 __be16 num_pages;
477 __be32 rsvd2[5];
478};
479
480union ev_data {
481 __be32 raw[7];
482 struct mlx5_eqe_cmd cmd;
483 struct mlx5_eqe_comp comp;
484 struct mlx5_eqe_qp_srq qp_srq;
485 struct mlx5_eqe_cq_err cq_err;
486 struct mlx5_eqe_dropped_packet dp;
487 struct mlx5_eqe_port_state port;
488 struct mlx5_eqe_gpio gpio;
489 struct mlx5_eqe_congestion cong;
490 struct mlx5_eqe_stall_vl stall_vl;
491 struct mlx5_eqe_page_req req_pages;
492} __packed;
493
494struct mlx5_eqe {
495 u8 rsvd0;
496 u8 type;
497 u8 rsvd1;
498 u8 sub_type;
499 __be32 rsvd2[7];
500 union ev_data data;
501 __be16 rsvd3;
502 u8 signature;
503 u8 owner;
504} __packed;
505
506struct mlx5_cmd_prot_block {
507 u8 data[MLX5_CMD_DATA_BLOCK_SIZE];
508 u8 rsvd0[48];
509 __be64 next;
510 __be32 block_num;
511 u8 rsvd1;
512 u8 token;
513 u8 ctrl_sig;
514 u8 sig;
515};
516
517struct mlx5_err_cqe {
518 u8 rsvd0[32];
519 __be32 srqn;
520 u8 rsvd1[18];
521 u8 vendor_err_synd;
522 u8 syndrome;
523 __be32 s_wqe_opcode_qpn;
524 __be16 wqe_counter;
525 u8 signature;
526 u8 op_own;
527};
528
529struct mlx5_cqe64 {
530 u8 rsvd0[17];
531 u8 ml_path;
532 u8 rsvd20[4];
533 __be16 slid;
534 __be32 flags_rqpn;
535 u8 rsvd28[4];
536 __be32 srqn;
537 __be32 imm_inval_pkey;
538 u8 rsvd40[4];
539 __be32 byte_cnt;
540 __be64 timestamp;
541 __be32 sop_drop_qpn;
542 __be16 wqe_counter;
543 u8 signature;
544 u8 op_own;
545};
546
547struct mlx5_wqe_srq_next_seg {
548 u8 rsvd0[2];
549 __be16 next_wqe_index;
550 u8 signature;
551 u8 rsvd1[11];
552};
553
554union mlx5_ext_cqe {
555 struct ib_grh grh;
556 u8 inl[64];
557};
558
559struct mlx5_cqe128 {
560 union mlx5_ext_cqe inl_grh;
561 struct mlx5_cqe64 cqe64;
562};
563
564struct mlx5_srq_ctx {
565 u8 state_log_sz;
566 u8 rsvd0[3];
567 __be32 flags_xrcd;
568 __be32 pgoff_cqn;
569 u8 rsvd1[4];
570 u8 log_pg_sz;
571 u8 rsvd2[7];
572 __be32 pd;
573 __be16 lwm;
574 __be16 wqe_cnt;
575 u8 rsvd3[8];
576 __be64 db_record;
577};
578
579struct mlx5_create_srq_mbox_in {
580 struct mlx5_inbox_hdr hdr;
581 __be32 input_srqn;
582 u8 rsvd0[4];
583 struct mlx5_srq_ctx ctx;
584 u8 rsvd1[208];
585 __be64 pas[0];
586};
587
588struct mlx5_create_srq_mbox_out {
589 struct mlx5_outbox_hdr hdr;
590 __be32 srqn;
591 u8 rsvd[4];
592};
593
594struct mlx5_destroy_srq_mbox_in {
595 struct mlx5_inbox_hdr hdr;
596 __be32 srqn;
597 u8 rsvd[4];
598};
599
600struct mlx5_destroy_srq_mbox_out {
601 struct mlx5_outbox_hdr hdr;
602 u8 rsvd[8];
603};
604
605struct mlx5_query_srq_mbox_in {
606 struct mlx5_inbox_hdr hdr;
607 __be32 srqn;
608 u8 rsvd0[4];
609};
610
611struct mlx5_query_srq_mbox_out {
612 struct mlx5_outbox_hdr hdr;
613 u8 rsvd0[8];
614 struct mlx5_srq_ctx ctx;
615 u8 rsvd1[32];
616 __be64 pas[0];
617};
618
619struct mlx5_arm_srq_mbox_in {
620 struct mlx5_inbox_hdr hdr;
621 __be32 srqn;
622 __be16 rsvd;
623 __be16 lwm;
624};
625
626struct mlx5_arm_srq_mbox_out {
627 struct mlx5_outbox_hdr hdr;
628 u8 rsvd[8];
629};
630
631struct mlx5_cq_context {
632 u8 status;
633 u8 cqe_sz_flags;
634 u8 st;
635 u8 rsvd3;
636 u8 rsvd4[6];
637 __be16 page_offset;
638 __be32 log_sz_usr_page;
639 __be16 cq_period;
640 __be16 cq_max_count;
641 __be16 rsvd20;
642 __be16 c_eqn;
643 u8 log_pg_sz;
644 u8 rsvd25[7];
645 __be32 last_notified_index;
646 __be32 solicit_producer_index;
647 __be32 consumer_counter;
648 __be32 producer_counter;
649 u8 rsvd48[8];
650 __be64 db_record_addr;
651};
652
653struct mlx5_create_cq_mbox_in {
654 struct mlx5_inbox_hdr hdr;
655 __be32 input_cqn;
656 u8 rsvdx[4];
657 struct mlx5_cq_context ctx;
658 u8 rsvd6[192];
659 __be64 pas[0];
660};
661
662struct mlx5_create_cq_mbox_out {
663 struct mlx5_outbox_hdr hdr;
664 __be32 cqn;
665 u8 rsvd0[4];
666};
667
668struct mlx5_destroy_cq_mbox_in {
669 struct mlx5_inbox_hdr hdr;
670 __be32 cqn;
671 u8 rsvd0[4];
672};
673
674struct mlx5_destroy_cq_mbox_out {
675 struct mlx5_outbox_hdr hdr;
676 u8 rsvd0[8];
677};
678
679struct mlx5_query_cq_mbox_in {
680 struct mlx5_inbox_hdr hdr;
681 __be32 cqn;
682 u8 rsvd0[4];
683};
684
685struct mlx5_query_cq_mbox_out {
686 struct mlx5_outbox_hdr hdr;
687 u8 rsvd0[8];
688 struct mlx5_cq_context ctx;
689 u8 rsvd6[16];
690 __be64 pas[0];
691};
692
693struct mlx5_eq_context {
694 u8 status;
695 u8 ec_oi;
696 u8 st;
697 u8 rsvd2[7];
698 __be16 page_pffset;
699 __be32 log_sz_usr_page;
700 u8 rsvd3[7];
701 u8 intr;
702 u8 log_page_size;
703 u8 rsvd4[15];
704 __be32 consumer_counter;
705 __be32 produser_counter;
706 u8 rsvd5[16];
707};
708
709struct mlx5_create_eq_mbox_in {
710 struct mlx5_inbox_hdr hdr;
711 u8 rsvd0[3];
712 u8 input_eqn;
713 u8 rsvd1[4];
714 struct mlx5_eq_context ctx;
715 u8 rsvd2[8];
716 __be64 events_mask;
717 u8 rsvd3[176];
718 __be64 pas[0];
719};
720
721struct mlx5_create_eq_mbox_out {
722 struct mlx5_outbox_hdr hdr;
723 u8 rsvd0[3];
724 u8 eq_number;
725 u8 rsvd1[4];
726};
727
728struct mlx5_destroy_eq_mbox_in {
729 struct mlx5_inbox_hdr hdr;
730 u8 rsvd0[3];
731 u8 eqn;
732 u8 rsvd1[4];
733};
734
735struct mlx5_destroy_eq_mbox_out {
736 struct mlx5_outbox_hdr hdr;
737 u8 rsvd[8];
738};
739
740struct mlx5_map_eq_mbox_in {
741 struct mlx5_inbox_hdr hdr;
742 __be64 mask;
743 u8 mu;
744 u8 rsvd0[2];
745 u8 eqn;
746 u8 rsvd1[24];
747};
748
749struct mlx5_map_eq_mbox_out {
750 struct mlx5_outbox_hdr hdr;
751 u8 rsvd[8];
752};
753
754struct mlx5_query_eq_mbox_in {
755 struct mlx5_inbox_hdr hdr;
756 u8 rsvd0[3];
757 u8 eqn;
758 u8 rsvd1[4];
759};
760
761struct mlx5_query_eq_mbox_out {
762 struct mlx5_outbox_hdr hdr;
763 u8 rsvd[8];
764 struct mlx5_eq_context ctx;
765};
766
767struct mlx5_mkey_seg {
768 /* This is a two bit field occupying bits 31-30.
769 * bit 31 is always 0,
770 * bit 30 is zero for regular MRs and 1 (e.g free) for UMRs that do not have tanslation
771 */
772 u8 status;
773 u8 pcie_control;
774 u8 flags;
775 u8 version;
776 __be32 qpn_mkey7_0;
777 u8 rsvd1[4];
778 __be32 flags_pd;
779 __be64 start_addr;
780 __be64 len;
781 __be32 bsfs_octo_size;
782 u8 rsvd2[16];
783 __be32 xlt_oct_size;
784 u8 rsvd3[3];
785 u8 log2_page_size;
786 u8 rsvd4[4];
787};
788
789struct mlx5_query_special_ctxs_mbox_in {
790 struct mlx5_inbox_hdr hdr;
791 u8 rsvd[8];
792};
793
794struct mlx5_query_special_ctxs_mbox_out {
795 struct mlx5_outbox_hdr hdr;
796 __be32 dump_fill_mkey;
797 __be32 reserved_lkey;
798};
799
800struct mlx5_create_mkey_mbox_in {
801 struct mlx5_inbox_hdr hdr;
802 __be32 input_mkey_index;
803 u8 rsvd0[4];
804 struct mlx5_mkey_seg seg;
805 u8 rsvd1[16];
806 __be32 xlat_oct_act_size;
807 __be32 bsf_coto_act_size;
808 u8 rsvd2[168];
809 __be64 pas[0];
810};
811
812struct mlx5_create_mkey_mbox_out {
813 struct mlx5_outbox_hdr hdr;
814 __be32 mkey;
815 u8 rsvd[4];
816};
817
818struct mlx5_destroy_mkey_mbox_in {
819 struct mlx5_inbox_hdr hdr;
820 __be32 mkey;
821 u8 rsvd[4];
822};
823
824struct mlx5_destroy_mkey_mbox_out {
825 struct mlx5_outbox_hdr hdr;
826 u8 rsvd[8];
827};
828
829struct mlx5_query_mkey_mbox_in {
830 struct mlx5_inbox_hdr hdr;
831 __be32 mkey;
832};
833
834struct mlx5_query_mkey_mbox_out {
835 struct mlx5_outbox_hdr hdr;
836 __be64 pas[0];
837};
838
839struct mlx5_modify_mkey_mbox_in {
840 struct mlx5_inbox_hdr hdr;
841 __be32 mkey;
842 __be64 pas[0];
843};
844
845struct mlx5_modify_mkey_mbox_out {
846 struct mlx5_outbox_hdr hdr;
847};
848
849struct mlx5_dump_mkey_mbox_in {
850 struct mlx5_inbox_hdr hdr;
851};
852
853struct mlx5_dump_mkey_mbox_out {
854 struct mlx5_outbox_hdr hdr;
855 __be32 mkey;
856};
857
858struct mlx5_mad_ifc_mbox_in {
859 struct mlx5_inbox_hdr hdr;
860 __be16 remote_lid;
861 u8 rsvd0;
862 u8 port;
863 u8 rsvd1[4];
864 u8 data[256];
865};
866
867struct mlx5_mad_ifc_mbox_out {
868 struct mlx5_outbox_hdr hdr;
869 u8 rsvd[8];
870 u8 data[256];
871};
872
873struct mlx5_access_reg_mbox_in {
874 struct mlx5_inbox_hdr hdr;
875 u8 rsvd0[2];
876 __be16 register_id;
877 __be32 arg;
878 __be32 data[0];
879};
880
881struct mlx5_access_reg_mbox_out {
882 struct mlx5_outbox_hdr hdr;
883 u8 rsvd[8];
884 __be32 data[0];
885};
886
887#define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
888
889enum {
890 MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0
891};
892
893#endif /* MLX5_DEVICE_H */
diff --git a/include/linux/mlx5/doorbell.h b/include/linux/mlx5/doorbell.h
new file mode 100644
index 000000000000..163a818411e7
--- /dev/null
+++ b/include/linux/mlx5/doorbell.h
@@ -0,0 +1,79 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_DOORBELL_H
34#define MLX5_DOORBELL_H
35
36#define MLX5_BF_OFFSET 0x800
37#define MLX5_CQ_DOORBELL 0x20
38
39#if BITS_PER_LONG == 64
40/* Assume that we can just write a 64-bit doorbell atomically. s390
41 * actually doesn't have writeq() but S/390 systems don't even have
42 * PCI so we won't worry about it.
43 */
44
45#define MLX5_DECLARE_DOORBELL_LOCK(name)
46#define MLX5_INIT_DOORBELL_LOCK(ptr) do { } while (0)
47#define MLX5_GET_DOORBELL_LOCK(ptr) (NULL)
48
49static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
50 spinlock_t *doorbell_lock)
51{
52 __raw_writeq(*(u64 *)val, dest);
53}
54
55#else
56
57/* Just fall back to a spinlock to protect the doorbell if
58 * BITS_PER_LONG is 32 -- there's no portable way to do atomic 64-bit
59 * MMIO writes.
60 */
61
62#define MLX5_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
63#define MLX5_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
64#define MLX5_GET_DOORBELL_LOCK(ptr) (ptr)
65
66static inline void mlx5_write64(__be32 val[2], void __iomem *dest,
67 spinlock_t *doorbell_lock)
68{
69 unsigned long flags;
70
71 spin_lock_irqsave(doorbell_lock, flags);
72 __raw_writel((__force u32) val[0], dest);
73 __raw_writel((__force u32) val[1], dest + 4);
74 spin_unlock_irqrestore(doorbell_lock, flags);
75}
76
77#endif
78
79#endif /* MLX5_DOORBELL_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
new file mode 100644
index 000000000000..f22e4419839b
--- /dev/null
+++ b/include/linux/mlx5/driver.h
@@ -0,0 +1,769 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_DRIVER_H
34#define MLX5_DRIVER_H
35
36#include <linux/kernel.h>
37#include <linux/completion.h>
38#include <linux/pci.h>
39#include <linux/spinlock_types.h>
40#include <linux/semaphore.h>
41#include <linux/vmalloc.h>
42#include <linux/radix-tree.h>
43#include <linux/mlx5/device.h>
44#include <linux/mlx5/doorbell.h>
45
46enum {
47 MLX5_BOARD_ID_LEN = 64,
48 MLX5_MAX_NAME_LEN = 16,
49};
50
51enum {
52 /* one minute for the sake of bringup. Generally, commands must always
53 * complete and we may need to increase this timeout value
54 */
55 MLX5_CMD_TIMEOUT_MSEC = 7200 * 1000,
56 MLX5_CMD_WQ_MAX_NAME = 32,
57};
58
59enum {
60 CMD_OWNER_SW = 0x0,
61 CMD_OWNER_HW = 0x1,
62 CMD_STATUS_SUCCESS = 0,
63};
64
65enum mlx5_sqp_t {
66 MLX5_SQP_SMI = 0,
67 MLX5_SQP_GSI = 1,
68 MLX5_SQP_IEEE_1588 = 2,
69 MLX5_SQP_SNIFFER = 3,
70 MLX5_SQP_SYNC_UMR = 4,
71};
72
73enum {
74 MLX5_MAX_PORTS = 2,
75};
76
77enum {
78 MLX5_EQ_VEC_PAGES = 0,
79 MLX5_EQ_VEC_CMD = 1,
80 MLX5_EQ_VEC_ASYNC = 2,
81 MLX5_EQ_VEC_COMP_BASE,
82};
83
84enum {
85 MLX5_MAX_EQ_NAME = 20
86};
87
88enum {
89 MLX5_ATOMIC_MODE_IB_COMP = 1 << 16,
90 MLX5_ATOMIC_MODE_CX = 2 << 16,
91 MLX5_ATOMIC_MODE_8B = 3 << 16,
92 MLX5_ATOMIC_MODE_16B = 4 << 16,
93 MLX5_ATOMIC_MODE_32B = 5 << 16,
94 MLX5_ATOMIC_MODE_64B = 6 << 16,
95 MLX5_ATOMIC_MODE_128B = 7 << 16,
96 MLX5_ATOMIC_MODE_256B = 8 << 16,
97};
98
99enum {
100 MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
101 MLX5_CMD_OP_QUERY_ADAPTER = 0x101,
102 MLX5_CMD_OP_INIT_HCA = 0x102,
103 MLX5_CMD_OP_TEARDOWN_HCA = 0x103,
104 MLX5_CMD_OP_QUERY_PAGES = 0x107,
105 MLX5_CMD_OP_MANAGE_PAGES = 0x108,
106 MLX5_CMD_OP_SET_HCA_CAP = 0x109,
107
108 MLX5_CMD_OP_CREATE_MKEY = 0x200,
109 MLX5_CMD_OP_QUERY_MKEY = 0x201,
110 MLX5_CMD_OP_DESTROY_MKEY = 0x202,
111 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS = 0x203,
112
113 MLX5_CMD_OP_CREATE_EQ = 0x301,
114 MLX5_CMD_OP_DESTROY_EQ = 0x302,
115 MLX5_CMD_OP_QUERY_EQ = 0x303,
116
117 MLX5_CMD_OP_CREATE_CQ = 0x400,
118 MLX5_CMD_OP_DESTROY_CQ = 0x401,
119 MLX5_CMD_OP_QUERY_CQ = 0x402,
120 MLX5_CMD_OP_MODIFY_CQ = 0x403,
121
122 MLX5_CMD_OP_CREATE_QP = 0x500,
123 MLX5_CMD_OP_DESTROY_QP = 0x501,
124 MLX5_CMD_OP_RST2INIT_QP = 0x502,
125 MLX5_CMD_OP_INIT2RTR_QP = 0x503,
126 MLX5_CMD_OP_RTR2RTS_QP = 0x504,
127 MLX5_CMD_OP_RTS2RTS_QP = 0x505,
128 MLX5_CMD_OP_SQERR2RTS_QP = 0x506,
129 MLX5_CMD_OP_2ERR_QP = 0x507,
130 MLX5_CMD_OP_RTS2SQD_QP = 0x508,
131 MLX5_CMD_OP_SQD2RTS_QP = 0x509,
132 MLX5_CMD_OP_2RST_QP = 0x50a,
133 MLX5_CMD_OP_QUERY_QP = 0x50b,
134 MLX5_CMD_OP_CONF_SQP = 0x50c,
135 MLX5_CMD_OP_MAD_IFC = 0x50d,
136 MLX5_CMD_OP_INIT2INIT_QP = 0x50e,
137 MLX5_CMD_OP_SUSPEND_QP = 0x50f,
138 MLX5_CMD_OP_UNSUSPEND_QP = 0x510,
139 MLX5_CMD_OP_SQD2SQD_QP = 0x511,
140 MLX5_CMD_OP_ALLOC_QP_COUNTER_SET = 0x512,
141 MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET = 0x513,
142 MLX5_CMD_OP_QUERY_QP_COUNTER_SET = 0x514,
143
144 MLX5_CMD_OP_CREATE_PSV = 0x600,
145 MLX5_CMD_OP_DESTROY_PSV = 0x601,
146 MLX5_CMD_OP_QUERY_PSV = 0x602,
147 MLX5_CMD_OP_QUERY_SIG_RULE_TABLE = 0x603,
148 MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE = 0x604,
149
150 MLX5_CMD_OP_CREATE_SRQ = 0x700,
151 MLX5_CMD_OP_DESTROY_SRQ = 0x701,
152 MLX5_CMD_OP_QUERY_SRQ = 0x702,
153 MLX5_CMD_OP_ARM_RQ = 0x703,
154 MLX5_CMD_OP_RESIZE_SRQ = 0x704,
155
156 MLX5_CMD_OP_ALLOC_PD = 0x800,
157 MLX5_CMD_OP_DEALLOC_PD = 0x801,
158 MLX5_CMD_OP_ALLOC_UAR = 0x802,
159 MLX5_CMD_OP_DEALLOC_UAR = 0x803,
160
161 MLX5_CMD_OP_ATTACH_TO_MCG = 0x806,
162 MLX5_CMD_OP_DETACH_FROM_MCG = 0x807,
163
164
165 MLX5_CMD_OP_ALLOC_XRCD = 0x80e,
166 MLX5_CMD_OP_DEALLOC_XRCD = 0x80f,
167
168 MLX5_CMD_OP_ACCESS_REG = 0x805,
169 MLX5_CMD_OP_MAX = 0x810,
170};
171
172enum {
173 MLX5_REG_PCAP = 0x5001,
174 MLX5_REG_PMTU = 0x5003,
175 MLX5_REG_PTYS = 0x5004,
176 MLX5_REG_PAOS = 0x5006,
177 MLX5_REG_PMAOS = 0x5012,
178 MLX5_REG_PUDE = 0x5009,
179 MLX5_REG_PMPE = 0x5010,
180 MLX5_REG_PELC = 0x500e,
181 MLX5_REG_PMLP = 0, /* TBD */
182 MLX5_REG_NODE_DESC = 0x6001,
183 MLX5_REG_HOST_ENDIANNESS = 0x7004,
184};
185
186enum dbg_rsc_type {
187 MLX5_DBG_RSC_QP,
188 MLX5_DBG_RSC_EQ,
189 MLX5_DBG_RSC_CQ,
190};
191
192struct mlx5_field_desc {
193 struct dentry *dent;
194 int i;
195};
196
197struct mlx5_rsc_debug {
198 struct mlx5_core_dev *dev;
199 void *object;
200 enum dbg_rsc_type type;
201 struct dentry *root;
202 struct mlx5_field_desc fields[0];
203};
204
205enum mlx5_dev_event {
206 MLX5_DEV_EVENT_SYS_ERROR,
207 MLX5_DEV_EVENT_PORT_UP,
208 MLX5_DEV_EVENT_PORT_DOWN,
209 MLX5_DEV_EVENT_PORT_INITIALIZED,
210 MLX5_DEV_EVENT_LID_CHANGE,
211 MLX5_DEV_EVENT_PKEY_CHANGE,
212 MLX5_DEV_EVENT_GUID_CHANGE,
213 MLX5_DEV_EVENT_CLIENT_REREG,
214};
215
216struct mlx5_uuar_info {
217 struct mlx5_uar *uars;
218 int num_uars;
219 int num_low_latency_uuars;
220 unsigned long *bitmap;
221 unsigned int *count;
222 struct mlx5_bf *bfs;
223
224 /*
225 * protect uuar allocation data structs
226 */
227 struct mutex lock;
228};
229
230struct mlx5_bf {
231 void __iomem *reg;
232 void __iomem *regreg;
233 int buf_size;
234 struct mlx5_uar *uar;
235 unsigned long offset;
236 int need_lock;
237 /* protect blue flame buffer selection when needed
238 */
239 spinlock_t lock;
240
241 /* serialize 64 bit writes when done as two 32 bit accesses
242 */
243 spinlock_t lock32;
244 int uuarn;
245};
246
247struct mlx5_cmd_first {
248 __be32 data[4];
249};
250
251struct mlx5_cmd_msg {
252 struct list_head list;
253 struct cache_ent *cache;
254 u32 len;
255 struct mlx5_cmd_first first;
256 struct mlx5_cmd_mailbox *next;
257};
258
259struct mlx5_cmd_debug {
260 struct dentry *dbg_root;
261 struct dentry *dbg_in;
262 struct dentry *dbg_out;
263 struct dentry *dbg_outlen;
264 struct dentry *dbg_status;
265 struct dentry *dbg_run;
266 void *in_msg;
267 void *out_msg;
268 u8 status;
269 u16 inlen;
270 u16 outlen;
271};
272
273struct cache_ent {
274 /* protect block chain allocations
275 */
276 spinlock_t lock;
277 struct list_head head;
278};
279
280struct cmd_msg_cache {
281 struct cache_ent large;
282 struct cache_ent med;
283
284};
285
286struct mlx5_cmd_stats {
287 u64 sum;
288 u64 n;
289 struct dentry *root;
290 struct dentry *avg;
291 struct dentry *count;
292 /* protect command average calculations */
293 spinlock_t lock;
294};
295
296struct mlx5_cmd {
297 void *cmd_buf;
298 dma_addr_t dma;
299 u16 cmdif_rev;
300 u8 log_sz;
301 u8 log_stride;
302 int max_reg_cmds;
303 int events;
304 u32 __iomem *vector;
305
306 /* protect command queue allocations
307 */
308 spinlock_t alloc_lock;
309
310 /* protect token allocations
311 */
312 spinlock_t token_lock;
313 u8 token;
314 unsigned long bitmask;
315 char wq_name[MLX5_CMD_WQ_MAX_NAME];
316 struct workqueue_struct *wq;
317 struct semaphore sem;
318 struct semaphore pages_sem;
319 int mode;
320 struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
321 struct pci_pool *pool;
322 struct mlx5_cmd_debug dbg;
323 struct cmd_msg_cache cache;
324 int checksum_disabled;
325 struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
326};
327
328struct mlx5_port_caps {
329 int gid_table_len;
330 int pkey_table_len;
331};
332
333struct mlx5_caps {
334 u8 log_max_eq;
335 u8 log_max_cq;
336 u8 log_max_qp;
337 u8 log_max_mkey;
338 u8 log_max_pd;
339 u8 log_max_srq;
340 u32 max_cqes;
341 int max_wqes;
342 int max_sq_desc_sz;
343 int max_rq_desc_sz;
344 u64 flags;
345 u16 stat_rate_support;
346 int log_max_msg;
347 int num_ports;
348 int max_ra_res_qp;
349 int max_ra_req_qp;
350 int max_srq_wqes;
351 int bf_reg_size;
352 int bf_regs_per_page;
353 struct mlx5_port_caps port[MLX5_MAX_PORTS];
354 u8 ext_port_cap[MLX5_MAX_PORTS];
355 int max_vf;
356 u32 reserved_lkey;
357 u8 local_ca_ack_delay;
358 u8 log_max_mcg;
359 u16 max_qp_mcg;
360 int min_page_sz;
361};
362
363struct mlx5_cmd_mailbox {
364 void *buf;
365 dma_addr_t dma;
366 struct mlx5_cmd_mailbox *next;
367};
368
369struct mlx5_buf_list {
370 void *buf;
371 dma_addr_t map;
372};
373
374struct mlx5_buf {
375 struct mlx5_buf_list direct;
376 struct mlx5_buf_list *page_list;
377 int nbufs;
378 int npages;
379 int page_shift;
380 int size;
381};
382
383struct mlx5_eq {
384 struct mlx5_core_dev *dev;
385 __be32 __iomem *doorbell;
386 u32 cons_index;
387 struct mlx5_buf buf;
388 int size;
389 u8 irqn;
390 u8 eqn;
391 int nent;
392 u64 mask;
393 char name[MLX5_MAX_EQ_NAME];
394 struct list_head list;
395 int index;
396 struct mlx5_rsc_debug *dbg;
397};
398
399
400struct mlx5_core_mr {
401 u64 iova;
402 u64 size;
403 u32 key;
404 u32 pd;
405 u32 access;
406};
407
408struct mlx5_core_srq {
409 u32 srqn;
410 int max;
411 int max_gs;
412 int max_avail_gather;
413 int wqe_shift;
414 void (*event) (struct mlx5_core_srq *, enum mlx5_event);
415
416 atomic_t refcount;
417 struct completion free;
418};
419
420struct mlx5_eq_table {
421 void __iomem *update_ci;
422 void __iomem *update_arm_ci;
423 struct list_head *comp_eq_head;
424 struct mlx5_eq pages_eq;
425 struct mlx5_eq async_eq;
426 struct mlx5_eq cmd_eq;
427 struct msix_entry *msix_arr;
428 int num_comp_vectors;
429 /* protect EQs list
430 */
431 spinlock_t lock;
432};
433
434struct mlx5_uar {
435 u32 index;
436 struct list_head bf_list;
437 unsigned free_bf_bmap;
438 void __iomem *wc_map;
439 void __iomem *map;
440};
441
442
443struct mlx5_core_health {
444 struct health_buffer __iomem *health;
445 __be32 __iomem *health_counter;
446 struct timer_list timer;
447 struct list_head list;
448 u32 prev;
449 int miss_counter;
450};
451
452struct mlx5_cq_table {
453 /* protect radix tree
454 */
455 spinlock_t lock;
456 struct radix_tree_root tree;
457};
458
459struct mlx5_qp_table {
460 /* protect radix tree
461 */
462 spinlock_t lock;
463 struct radix_tree_root tree;
464};
465
466struct mlx5_srq_table {
467 /* protect radix tree
468 */
469 spinlock_t lock;
470 struct radix_tree_root tree;
471};
472
473struct mlx5_priv {
474 char name[MLX5_MAX_NAME_LEN];
475 struct mlx5_eq_table eq_table;
476 struct mlx5_uuar_info uuari;
477 MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock);
478
479 /* pages stuff */
480 struct workqueue_struct *pg_wq;
481 struct rb_root page_root;
482 int fw_pages;
483 int reg_pages;
484
485 struct mlx5_core_health health;
486
487 struct mlx5_srq_table srq_table;
488
489 /* start: qp staff */
490 struct mlx5_qp_table qp_table;
491 struct dentry *qp_debugfs;
492 struct dentry *eq_debugfs;
493 struct dentry *cq_debugfs;
494 struct dentry *cmdif_debugfs;
495 /* end: qp staff */
496
497 /* start: cq staff */
498 struct mlx5_cq_table cq_table;
499 /* end: cq staff */
500
501 /* start: alloc staff */
502 struct mutex pgdir_mutex;
503 struct list_head pgdir_list;
504 /* end: alloc staff */
505 struct dentry *dbg_root;
506
507 /* protect mkey key part */
508 spinlock_t mkey_lock;
509 u8 mkey_key;
510};
511
512struct mlx5_core_dev {
513 struct pci_dev *pdev;
514 u8 rev_id;
515 char board_id[MLX5_BOARD_ID_LEN];
516 struct mlx5_cmd cmd;
517 struct mlx5_caps caps;
518 phys_addr_t iseg_base;
519 struct mlx5_init_seg __iomem *iseg;
520 void (*event) (struct mlx5_core_dev *dev,
521 enum mlx5_dev_event event,
522 void *data);
523 struct mlx5_priv priv;
524 struct mlx5_profile *profile;
525 atomic_t num_qps;
526};
527
528struct mlx5_db {
529 __be32 *db;
530 union {
531 struct mlx5_db_pgdir *pgdir;
532 struct mlx5_ib_user_db_page *user_page;
533 } u;
534 dma_addr_t dma;
535 int index;
536};
537
538enum {
539 MLX5_DB_PER_PAGE = PAGE_SIZE / L1_CACHE_BYTES,
540};
541
542enum {
543 MLX5_COMP_EQ_SIZE = 1024,
544};
545
546struct mlx5_db_pgdir {
547 struct list_head list;
548 DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE);
549 __be32 *db_page;
550 dma_addr_t db_dma;
551};
552
553typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
554
555struct mlx5_cmd_work_ent {
556 struct mlx5_cmd_msg *in;
557 struct mlx5_cmd_msg *out;
558 mlx5_cmd_cbk_t callback;
559 void *context;
560 int idx;
561 struct completion done;
562 struct mlx5_cmd *cmd;
563 struct work_struct work;
564 struct mlx5_cmd_layout *lay;
565 int ret;
566 int page_queue;
567 u8 status;
568 u8 token;
569 struct timespec ts1;
570 struct timespec ts2;
571};
572
573struct mlx5_pas {
574 u64 pa;
575 u8 log_sz;
576};
577
578static inline void *mlx5_buf_offset(struct mlx5_buf *buf, int offset)
579{
580 if (likely(BITS_PER_LONG == 64 || buf->nbufs == 1))
581 return buf->direct.buf + offset;
582 else
583 return buf->page_list[offset >> PAGE_SHIFT].buf +
584 (offset & (PAGE_SIZE - 1));
585}
586
587extern struct workqueue_struct *mlx5_core_wq;
588
589#define STRUCT_FIELD(header, field) \
590 .struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
591 .struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field
592
593struct ib_field {
594 size_t struct_offset_bytes;
595 size_t struct_size_bytes;
596 int offset_bits;
597 int size_bits;
598};
599
600static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev)
601{
602 return pci_get_drvdata(pdev);
603}
604
605extern struct dentry *mlx5_debugfs_root;
606
607static inline u16 fw_rev_maj(struct mlx5_core_dev *dev)
608{
609 return ioread32be(&dev->iseg->fw_rev) & 0xffff;
610}
611
612static inline u16 fw_rev_min(struct mlx5_core_dev *dev)
613{
614 return ioread32be(&dev->iseg->fw_rev) >> 16;
615}
616
617static inline u16 fw_rev_sub(struct mlx5_core_dev *dev)
618{
619 return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff;
620}
621
622static inline u16 cmdif_rev(struct mlx5_core_dev *dev)
623{
624 return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
625}
626
627static inline void *mlx5_vzalloc(unsigned long size)
628{
629 void *rtn;
630
631 rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
632 if (!rtn)
633 rtn = vzalloc(size);
634 return rtn;
635}
636
637static inline void mlx5_vfree(const void *addr)
638{
639 if (addr && is_vmalloc_addr(addr))
640 vfree(addr);
641 else
642 kfree(addr);
643}
644
645int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev);
646void mlx5_dev_cleanup(struct mlx5_core_dev *dev);
647int mlx5_cmd_init(struct mlx5_core_dev *dev);
648void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
649void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
650void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
651int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
652int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
653 int out_size);
654int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
655int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
656int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
657int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
658void mlx5_health_cleanup(void);
659void __init mlx5_health_init(void);
660void mlx5_start_health_poll(struct mlx5_core_dev *dev);
661void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
662int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, int max_direct,
663 struct mlx5_buf *buf);
664void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
665struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
666 gfp_t flags, int npages);
667void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
668 struct mlx5_cmd_mailbox *head);
669int mlx5_core_create_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
670 struct mlx5_create_srq_mbox_in *in, int inlen);
671int mlx5_core_destroy_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq);
672int mlx5_core_query_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
673 struct mlx5_query_srq_mbox_out *out);
674int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
675 u16 lwm, int is_srq);
676int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
677 struct mlx5_create_mkey_mbox_in *in, int inlen);
678int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr);
679int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
680 struct mlx5_query_mkey_mbox_out *out, int outlen);
681int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
682 u32 *mkey);
683int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
684int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
685int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, void *inb, void *outb,
686 u16 opmod, int port);
687void mlx5_pagealloc_init(struct mlx5_core_dev *dev);
688void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
689int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
690void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
691void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
692 s16 npages);
693int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev);
694int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
695void mlx5_register_debugfs(void);
696void mlx5_unregister_debugfs(void);
697int mlx5_eq_init(struct mlx5_core_dev *dev);
698void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
699void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
700void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
701void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type);
702void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
703struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
704void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
705void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
706int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
707 int nent, u64 mask, const char *name, struct mlx5_uar *uar);
708int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
709int mlx5_start_eqs(struct mlx5_core_dev *dev);
710int mlx5_stop_eqs(struct mlx5_core_dev *dev);
711int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
712int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
713
714int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev);
715void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev);
716int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
717 int size_in, void *data_out, int size_out,
718 u16 reg_num, int arg, int write);
719int mlx5_set_port_caps(struct mlx5_core_dev *dev, int port_num, u32 caps);
720
721int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
722void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
723int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
724 struct mlx5_query_eq_mbox_out *out, int outlen);
725int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
726void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
727int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
728void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
729int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
730void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
731
732typedef void (*health_handler_t)(struct pci_dev *pdev, struct health_buffer __iomem *buf, int size);
733int mlx5_register_health_report_handler(health_handler_t handler);
734void mlx5_unregister_health_report_handler(void);
735const char *mlx5_command_str(int command);
736int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev);
737void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
738
739static inline u32 mlx5_mkey_to_idx(u32 mkey)
740{
741 return mkey >> 8;
742}
743
744static inline u32 mlx5_idx_to_mkey(u32 mkey_idx)
745{
746 return mkey_idx << 8;
747}
748
749enum {
750 MLX5_PROF_MASK_QP_SIZE = (u64)1 << 0,
751 MLX5_PROF_MASK_CMDIF_CSUM = (u64)1 << 1,
752 MLX5_PROF_MASK_MR_CACHE = (u64)1 << 2,
753};
754
755enum {
756 MAX_MR_CACHE_ENTRIES = 16,
757};
758
759struct mlx5_profile {
760 u64 mask;
761 u32 log_max_qp;
762 int cmdif_csum;
763 struct {
764 int size;
765 int limit;
766 } mr_cache[MAX_MR_CACHE_ENTRIES];
767};
768
769#endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
new file mode 100644
index 000000000000..d9e3eacb3a7f
--- /dev/null
+++ b/include/linux/mlx5/qp.h
@@ -0,0 +1,467 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_QP_H
34#define MLX5_QP_H
35
36#include <linux/mlx5/device.h>
37#include <linux/mlx5/driver.h>
38
39#define MLX5_INVALID_LKEY 0x100
40
41enum mlx5_qp_optpar {
42 MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
43 MLX5_QP_OPTPAR_RRE = 1 << 1,
44 MLX5_QP_OPTPAR_RAE = 1 << 2,
45 MLX5_QP_OPTPAR_RWE = 1 << 3,
46 MLX5_QP_OPTPAR_PKEY_INDEX = 1 << 4,
47 MLX5_QP_OPTPAR_Q_KEY = 1 << 5,
48 MLX5_QP_OPTPAR_RNR_TIMEOUT = 1 << 6,
49 MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH = 1 << 7,
50 MLX5_QP_OPTPAR_SRA_MAX = 1 << 8,
51 MLX5_QP_OPTPAR_RRA_MAX = 1 << 9,
52 MLX5_QP_OPTPAR_PM_STATE = 1 << 10,
53 MLX5_QP_OPTPAR_RETRY_COUNT = 1 << 12,
54 MLX5_QP_OPTPAR_RNR_RETRY = 1 << 13,
55 MLX5_QP_OPTPAR_ACK_TIMEOUT = 1 << 14,
56 MLX5_QP_OPTPAR_PRI_PORT = 1 << 16,
57 MLX5_QP_OPTPAR_SRQN = 1 << 18,
58 MLX5_QP_OPTPAR_CQN_RCV = 1 << 19,
59 MLX5_QP_OPTPAR_DC_HS = 1 << 20,
60 MLX5_QP_OPTPAR_DC_KEY = 1 << 21,
61};
62
63enum mlx5_qp_state {
64 MLX5_QP_STATE_RST = 0,
65 MLX5_QP_STATE_INIT = 1,
66 MLX5_QP_STATE_RTR = 2,
67 MLX5_QP_STATE_RTS = 3,
68 MLX5_QP_STATE_SQER = 4,
69 MLX5_QP_STATE_SQD = 5,
70 MLX5_QP_STATE_ERR = 6,
71 MLX5_QP_STATE_SQ_DRAINING = 7,
72 MLX5_QP_STATE_SUSPENDED = 9,
73 MLX5_QP_NUM_STATE
74};
75
76enum {
77 MLX5_QP_ST_RC = 0x0,
78 MLX5_QP_ST_UC = 0x1,
79 MLX5_QP_ST_UD = 0x2,
80 MLX5_QP_ST_XRC = 0x3,
81 MLX5_QP_ST_MLX = 0x4,
82 MLX5_QP_ST_DCI = 0x5,
83 MLX5_QP_ST_DCT = 0x6,
84 MLX5_QP_ST_QP0 = 0x7,
85 MLX5_QP_ST_QP1 = 0x8,
86 MLX5_QP_ST_RAW_ETHERTYPE = 0x9,
87 MLX5_QP_ST_RAW_IPV6 = 0xa,
88 MLX5_QP_ST_SNIFFER = 0xb,
89 MLX5_QP_ST_SYNC_UMR = 0xe,
90 MLX5_QP_ST_PTP_1588 = 0xd,
91 MLX5_QP_ST_REG_UMR = 0xc,
92 MLX5_QP_ST_MAX
93};
94
95enum {
96 MLX5_QP_PM_MIGRATED = 0x3,
97 MLX5_QP_PM_ARMED = 0x0,
98 MLX5_QP_PM_REARM = 0x1
99};
100
101enum {
102 MLX5_NON_ZERO_RQ = 0 << 24,
103 MLX5_SRQ_RQ = 1 << 24,
104 MLX5_CRQ_RQ = 2 << 24,
105 MLX5_ZERO_LEN_RQ = 3 << 24
106};
107
108enum {
109 /* params1 */
110 MLX5_QP_BIT_SRE = 1 << 15,
111 MLX5_QP_BIT_SWE = 1 << 14,
112 MLX5_QP_BIT_SAE = 1 << 13,
113 /* params2 */
114 MLX5_QP_BIT_RRE = 1 << 15,
115 MLX5_QP_BIT_RWE = 1 << 14,
116 MLX5_QP_BIT_RAE = 1 << 13,
117 MLX5_QP_BIT_RIC = 1 << 4,
118};
119
120enum {
121 MLX5_WQE_CTRL_CQ_UPDATE = 2 << 2,
122 MLX5_WQE_CTRL_SOLICITED = 1 << 1,
123};
124
125enum {
126 MLX5_SEND_WQE_BB = 64,
127};
128
129enum {
130 MLX5_WQE_FMR_PERM_LOCAL_READ = 1 << 27,
131 MLX5_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28,
132 MLX5_WQE_FMR_PERM_REMOTE_READ = 1 << 29,
133 MLX5_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30,
134 MLX5_WQE_FMR_PERM_ATOMIC = 1 << 31
135};
136
137enum {
138 MLX5_FENCE_MODE_NONE = 0 << 5,
139 MLX5_FENCE_MODE_INITIATOR_SMALL = 1 << 5,
140 MLX5_FENCE_MODE_STRONG_ORDERING = 3 << 5,
141 MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5,
142};
143
144enum {
145 MLX5_QP_LAT_SENSITIVE = 1 << 28,
146 MLX5_QP_ENABLE_SIG = 1 << 31,
147};
148
149enum {
150 MLX5_RCV_DBR = 0,
151 MLX5_SND_DBR = 1,
152};
153
154struct mlx5_wqe_fmr_seg {
155 __be32 flags;
156 __be32 mem_key;
157 __be64 buf_list;
158 __be64 start_addr;
159 __be64 reg_len;
160 __be32 offset;
161 __be32 page_size;
162 u32 reserved[2];
163};
164
165struct mlx5_wqe_ctrl_seg {
166 __be32 opmod_idx_opcode;
167 __be32 qpn_ds;
168 u8 signature;
169 u8 rsvd[2];
170 u8 fm_ce_se;
171 __be32 imm;
172};
173
174struct mlx5_wqe_xrc_seg {
175 __be32 xrc_srqn;
176 u8 rsvd[12];
177};
178
179struct mlx5_wqe_masked_atomic_seg {
180 __be64 swap_add;
181 __be64 compare;
182 __be64 swap_add_mask;
183 __be64 compare_mask;
184};
185
186struct mlx5_av {
187 union {
188 struct {
189 __be32 qkey;
190 __be32 reserved;
191 } qkey;
192 __be64 dc_key;
193 } key;
194 __be32 dqp_dct;
195 u8 stat_rate_sl;
196 u8 fl_mlid;
197 __be16 rlid;
198 u8 reserved0[10];
199 u8 tclass;
200 u8 hop_limit;
201 __be32 grh_gid_fl;
202 u8 rgid[16];
203};
204
205struct mlx5_wqe_datagram_seg {
206 struct mlx5_av av;
207};
208
209struct mlx5_wqe_raddr_seg {
210 __be64 raddr;
211 __be32 rkey;
212 u32 reserved;
213};
214
215struct mlx5_wqe_atomic_seg {
216 __be64 swap_add;
217 __be64 compare;
218};
219
220struct mlx5_wqe_data_seg {
221 __be32 byte_count;
222 __be32 lkey;
223 __be64 addr;
224};
225
226struct mlx5_wqe_umr_ctrl_seg {
227 u8 flags;
228 u8 rsvd0[3];
229 __be16 klm_octowords;
230 __be16 bsf_octowords;
231 __be64 mkey_mask;
232 u8 rsvd1[32];
233};
234
235struct mlx5_seg_set_psv {
236 __be32 psv_num;
237 __be16 syndrome;
238 __be16 status;
239 __be32 transient_sig;
240 __be32 ref_tag;
241};
242
243struct mlx5_seg_get_psv {
244 u8 rsvd[19];
245 u8 num_psv;
246 __be32 l_key;
247 __be64 va;
248 __be32 psv_index[4];
249};
250
251struct mlx5_seg_check_psv {
252 u8 rsvd0[2];
253 __be16 err_coalescing_op;
254 u8 rsvd1[2];
255 __be16 xport_err_op;
256 u8 rsvd2[2];
257 __be16 xport_err_mask;
258 u8 rsvd3[7];
259 u8 num_psv;
260 __be32 l_key;
261 __be64 va;
262 __be32 psv_index[4];
263};
264
265struct mlx5_rwqe_sig {
266 u8 rsvd0[4];
267 u8 signature;
268 u8 rsvd1[11];
269};
270
271struct mlx5_wqe_signature_seg {
272 u8 rsvd0[4];
273 u8 signature;
274 u8 rsvd1[11];
275};
276
277struct mlx5_wqe_inline_seg {
278 __be32 byte_count;
279};
280
281struct mlx5_core_qp {
282 void (*event) (struct mlx5_core_qp *, int);
283 int qpn;
284 atomic_t refcount;
285 struct completion free;
286 struct mlx5_rsc_debug *dbg;
287 int pid;
288};
289
290struct mlx5_qp_path {
291 u8 fl;
292 u8 rsvd3;
293 u8 free_ar;
294 u8 pkey_index;
295 u8 rsvd0;
296 u8 grh_mlid;
297 __be16 rlid;
298 u8 ackto_lt;
299 u8 mgid_index;
300 u8 static_rate;
301 u8 hop_limit;
302 __be32 tclass_flowlabel;
303 u8 rgid[16];
304 u8 rsvd1[4];
305 u8 sl;
306 u8 port;
307 u8 rsvd2[6];
308};
309
310struct mlx5_qp_context {
311 __be32 flags;
312 __be32 flags_pd;
313 u8 mtu_msgmax;
314 u8 rq_size_stride;
315 __be16 sq_crq_size;
316 __be32 qp_counter_set_usr_page;
317 __be32 wire_qpn;
318 __be32 log_pg_sz_remote_qpn;
319 struct mlx5_qp_path pri_path;
320 struct mlx5_qp_path alt_path;
321 __be32 params1;
322 u8 reserved2[4];
323 __be32 next_send_psn;
324 __be32 cqn_send;
325 u8 reserved3[8];
326 __be32 last_acked_psn;
327 __be32 ssn;
328 __be32 params2;
329 __be32 rnr_nextrecvpsn;
330 __be32 xrcd;
331 __be32 cqn_recv;
332 __be64 db_rec_addr;
333 __be32 qkey;
334 __be32 rq_type_srqn;
335 __be32 rmsn;
336 __be16 hw_sq_wqe_counter;
337 __be16 sw_sq_wqe_counter;
338 __be16 hw_rcyclic_byte_counter;
339 __be16 hw_rq_counter;
340 __be16 sw_rcyclic_byte_counter;
341 __be16 sw_rq_counter;
342 u8 rsvd0[5];
343 u8 cgs;
344 u8 cs_req;
345 u8 cs_res;
346 __be64 dc_access_key;
347 u8 rsvd1[24];
348};
349
350struct mlx5_create_qp_mbox_in {
351 struct mlx5_inbox_hdr hdr;
352 __be32 input_qpn;
353 u8 rsvd0[4];
354 __be32 opt_param_mask;
355 u8 rsvd1[4];
356 struct mlx5_qp_context ctx;
357 u8 rsvd3[16];
358 __be64 pas[0];
359};
360
361struct mlx5_create_qp_mbox_out {
362 struct mlx5_outbox_hdr hdr;
363 __be32 qpn;
364 u8 rsvd0[4];
365};
366
367struct mlx5_destroy_qp_mbox_in {
368 struct mlx5_inbox_hdr hdr;
369 __be32 qpn;
370 u8 rsvd0[4];
371};
372
373struct mlx5_destroy_qp_mbox_out {
374 struct mlx5_outbox_hdr hdr;
375 u8 rsvd0[8];
376};
377
378struct mlx5_modify_qp_mbox_in {
379 struct mlx5_inbox_hdr hdr;
380 __be32 qpn;
381 u8 rsvd1[4];
382 __be32 optparam;
383 u8 rsvd0[4];
384 struct mlx5_qp_context ctx;
385};
386
387struct mlx5_modify_qp_mbox_out {
388 struct mlx5_outbox_hdr hdr;
389 u8 rsvd0[8];
390};
391
392struct mlx5_query_qp_mbox_in {
393 struct mlx5_inbox_hdr hdr;
394 __be32 qpn;
395 u8 rsvd[4];
396};
397
398struct mlx5_query_qp_mbox_out {
399 struct mlx5_outbox_hdr hdr;
400 u8 rsvd1[8];
401 __be32 optparam;
402 u8 rsvd0[4];
403 struct mlx5_qp_context ctx;
404 u8 rsvd2[16];
405 __be64 pas[0];
406};
407
408struct mlx5_conf_sqp_mbox_in {
409 struct mlx5_inbox_hdr hdr;
410 __be32 qpn;
411 u8 rsvd[3];
412 u8 type;
413};
414
415struct mlx5_conf_sqp_mbox_out {
416 struct mlx5_outbox_hdr hdr;
417 u8 rsvd[8];
418};
419
420struct mlx5_alloc_xrcd_mbox_in {
421 struct mlx5_inbox_hdr hdr;
422 u8 rsvd[8];
423};
424
425struct mlx5_alloc_xrcd_mbox_out {
426 struct mlx5_outbox_hdr hdr;
427 __be32 xrcdn;
428 u8 rsvd[4];
429};
430
431struct mlx5_dealloc_xrcd_mbox_in {
432 struct mlx5_inbox_hdr hdr;
433 __be32 xrcdn;
434 u8 rsvd[4];
435};
436
437struct mlx5_dealloc_xrcd_mbox_out {
438 struct mlx5_outbox_hdr hdr;
439 u8 rsvd[8];
440};
441
442static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn)
443{
444 return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
445}
446
447int mlx5_core_create_qp(struct mlx5_core_dev *dev,
448 struct mlx5_core_qp *qp,
449 struct mlx5_create_qp_mbox_in *in,
450 int inlen);
451int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
452 enum mlx5_qp_state new_state,
453 struct mlx5_modify_qp_mbox_in *in, int sqd_event,
454 struct mlx5_core_qp *qp);
455int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
456 struct mlx5_core_qp *qp);
457int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
458 struct mlx5_query_qp_mbox_out *out, int outlen);
459
460int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn);
461int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn);
462void mlx5_init_qp_table(struct mlx5_core_dev *dev);
463void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
464int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
465void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
466
467#endif /* MLX5_QP_H */
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
new file mode 100644
index 000000000000..e1a363a33663
--- /dev/null
+++ b/include/linux/mlx5/srq.h
@@ -0,0 +1,41 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_SRQ_H
34#define MLX5_SRQ_H
35
36#include <linux/mlx5/driver.h>
37
38void mlx5_init_srq_table(struct mlx5_core_dev *dev);
39void mlx5_cleanup_srq_table(struct mlx5_core_dev *dev);
40
41#endif /* MLX5_SRQ_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index b10ce4b341ea..230c04bda3e2 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -167,6 +167,7 @@ struct ucred {
167#define AF_PPPOX 24 /* PPPoX sockets */ 167#define AF_PPPOX 24 /* PPPoX sockets */
168#define AF_WANPIPE 25 /* Wanpipe API Sockets */ 168#define AF_WANPIPE 25 /* Wanpipe API Sockets */
169#define AF_LLC 26 /* Linux LLC */ 169#define AF_LLC 26 /* Linux LLC */
170#define AF_IB 27 /* Native InfiniBand address */
170#define AF_CAN 29 /* Controller Area Network */ 171#define AF_CAN 29 /* Controller Area Network */
171#define AF_TIPC 30 /* TIPC sockets */ 172#define AF_TIPC 30 /* TIPC sockets */
172#define AF_BLUETOOTH 31 /* Bluetooth sockets */ 173#define AF_BLUETOOTH 31 /* Bluetooth sockets */
@@ -211,6 +212,7 @@ struct ucred {
211#define PF_PPPOX AF_PPPOX 212#define PF_PPPOX AF_PPPOX
212#define PF_WANPIPE AF_WANPIPE 213#define PF_WANPIPE AF_WANPIPE
213#define PF_LLC AF_LLC 214#define PF_LLC AF_LLC
215#define PF_IB AF_IB
214#define PF_CAN AF_CAN 216#define PF_CAN AF_CAN
215#define PF_TIPC AF_TIPC 217#define PF_TIPC AF_TIPC
216#define PF_BLUETOOTH AF_BLUETOOTH 218#define PF_BLUETOOTH AF_BLUETOOTH
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
new file mode 100644
index 000000000000..cf8f9e700e48
--- /dev/null
+++ b/include/rdma/ib.h
@@ -0,0 +1,89 @@
1/*
2 * Copyright (c) 2010 Intel Corporation. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#if !defined(_RDMA_IB_H)
34#define _RDMA_IB_H
35
36#include <linux/types.h>
37
38struct ib_addr {
39 union {
40 __u8 uib_addr8[16];
41 __be16 uib_addr16[8];
42 __be32 uib_addr32[4];
43 __be64 uib_addr64[2];
44 } ib_u;
45#define sib_addr8 ib_u.uib_addr8
46#define sib_addr16 ib_u.uib_addr16
47#define sib_addr32 ib_u.uib_addr32
48#define sib_addr64 ib_u.uib_addr64
49#define sib_raw ib_u.uib_addr8
50#define sib_subnet_prefix ib_u.uib_addr64[0]
51#define sib_interface_id ib_u.uib_addr64[1]
52};
53
54static inline int ib_addr_any(const struct ib_addr *a)
55{
56 return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0);
57}
58
59static inline int ib_addr_loopback(const struct ib_addr *a)
60{
61 return ((a->sib_addr32[0] | a->sib_addr32[1] |
62 a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0);
63}
64
65static inline void ib_addr_set(struct ib_addr *addr,
66 __be32 w1, __be32 w2, __be32 w3, __be32 w4)
67{
68 addr->sib_addr32[0] = w1;
69 addr->sib_addr32[1] = w2;
70 addr->sib_addr32[2] = w3;
71 addr->sib_addr32[3] = w4;
72}
73
74static inline int ib_addr_cmp(const struct ib_addr *a1, const struct ib_addr *a2)
75{
76 return memcmp(a1, a2, sizeof(struct ib_addr));
77}
78
79struct sockaddr_ib {
80 unsigned short int sib_family; /* AF_IB */
81 __be16 sib_pkey;
82 __be32 sib_flowinfo;
83 struct ib_addr sib_addr;
84 __be64 sib_sid;
85 __be64 sib_sid_mask;
86 __u64 sib_scope_id;
87};
88
89#endif /* _RDMA_IB_H */
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 99965395c5f3..f3ac0f2c4c66 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -102,11 +102,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr);
102int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, 102int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
103 const unsigned char *dst_dev_addr); 103 const unsigned char *dst_dev_addr);
104 104
105static inline int ip_addr_size(struct sockaddr *addr) 105int rdma_addr_size(struct sockaddr *addr);
106{
107 return addr->sa_family == AF_INET6 ?
108 sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
109}
110 106
111static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr) 107static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
112{ 108{
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 8275e539bace..125f8714301d 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -402,6 +402,12 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
402 struct ib_ah_attr *ah_attr); 402 struct ib_ah_attr *ah_attr);
403 403
404/** 404/**
405 * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
406 * to IB MAD wire format.
407 */
408void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute);
409
410/**
405 * ib_sa_unpack_path - Convert a path record from MAD format to struct 411 * ib_sa_unpack_path - Convert a path record from MAD format to struct
406 * ib_sa_path_rec. 412 * ib_sa_path_rec.
407 */ 413 */
@@ -418,4 +424,5 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
418 void *context), 424 void *context),
419 void *context, 425 void *context,
420 struct ib_sa_query **sa_query); 426 struct ib_sa_query **sa_query);
427
421#endif /* IB_SA_H */ 428#endif /* IB_SA_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 98cc4b29fc5b..645c3cedce9c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -610,7 +610,21 @@ enum ib_qp_type {
610 IB_QPT_RAW_PACKET = 8, 610 IB_QPT_RAW_PACKET = 8,
611 IB_QPT_XRC_INI = 9, 611 IB_QPT_XRC_INI = 9,
612 IB_QPT_XRC_TGT, 612 IB_QPT_XRC_TGT,
613 IB_QPT_MAX 613 IB_QPT_MAX,
614 /* Reserve a range for qp types internal to the low level driver.
615 * These qp types will not be visible at the IB core layer, so the
616 * IB_QPT_MAX usages should not be affected in the core layer
617 */
618 IB_QPT_RESERVED1 = 0x1000,
619 IB_QPT_RESERVED2,
620 IB_QPT_RESERVED3,
621 IB_QPT_RESERVED4,
622 IB_QPT_RESERVED5,
623 IB_QPT_RESERVED6,
624 IB_QPT_RESERVED7,
625 IB_QPT_RESERVED8,
626 IB_QPT_RESERVED9,
627 IB_QPT_RESERVED10,
614}; 628};
615 629
616enum ib_qp_create_flags { 630enum ib_qp_create_flags {
@@ -766,6 +780,19 @@ enum ib_wr_opcode {
766 IB_WR_MASKED_ATOMIC_CMP_AND_SWP, 780 IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
767 IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, 781 IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
768 IB_WR_BIND_MW, 782 IB_WR_BIND_MW,
783 /* reserve values for low level drivers' internal use.
784 * These values will not be used at all in the ib core layer.
785 */
786 IB_WR_RESERVED1 = 0xf0,
787 IB_WR_RESERVED2,
788 IB_WR_RESERVED3,
789 IB_WR_RESERVED4,
790 IB_WR_RESERVED5,
791 IB_WR_RESERVED6,
792 IB_WR_RESERVED7,
793 IB_WR_RESERVED8,
794 IB_WR_RESERVED9,
795 IB_WR_RESERVED10,
769}; 796};
770 797
771enum ib_send_flags { 798enum ib_send_flags {
@@ -773,7 +800,11 @@ enum ib_send_flags {
773 IB_SEND_SIGNALED = (1<<1), 800 IB_SEND_SIGNALED = (1<<1),
774 IB_SEND_SOLICITED = (1<<2), 801 IB_SEND_SOLICITED = (1<<2),
775 IB_SEND_INLINE = (1<<3), 802 IB_SEND_INLINE = (1<<3),
776 IB_SEND_IP_CSUM = (1<<4) 803 IB_SEND_IP_CSUM = (1<<4),
804
805 /* reserve bits 26-31 for low level drivers' internal use */
806 IB_SEND_RESERVED_START = (1 << 26),
807 IB_SEND_RESERVED_END = (1 << 31),
777}; 808};
778 809
779struct ib_sge { 810struct ib_sge {
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index ad3a3142383a..1ed2088dc9f5 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -70,6 +70,11 @@ enum rdma_port_space {
70 RDMA_PS_UDP = 0x0111, 70 RDMA_PS_UDP = 0x0111,
71}; 71};
72 72
73#define RDMA_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL
74#define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL
75#define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL
76#define RDMA_IB_IP_PS_IB 0x00000000013F0000ULL
77
73struct rdma_addr { 78struct rdma_addr {
74 struct sockaddr_storage src_addr; 79 struct sockaddr_storage src_addr;
75 struct sockaddr_storage dst_addr; 80 struct sockaddr_storage dst_addr;
@@ -93,6 +98,7 @@ struct rdma_conn_param {
93 /* Fields below ignored if a QP is created on the rdma_cm_id. */ 98 /* Fields below ignored if a QP is created on the rdma_cm_id. */
94 u8 srq; 99 u8 srq;
95 u32 qp_num; 100 u32 qp_num;
101 u32 qkey;
96}; 102};
97 103
98struct rdma_ud_param { 104struct rdma_ud_param {
@@ -367,4 +373,11 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse);
367 */ 373 */
368int rdma_set_afonly(struct rdma_cm_id *id, int afonly); 374int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
369 375
376 /**
377 * rdma_get_service_id - Return the IB service ID for a specified address.
378 * @id: Communication identifier associated with the address.
379 * @addr: Address for the service ID.
380 */
381__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr);
382
370#endif /* RDMA_CM_H */ 383#endif /* RDMA_CM_H */
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 1ee9239ff8c2..99b80abf360a 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -45,8 +45,8 @@
45enum { 45enum {
46 RDMA_USER_CM_CMD_CREATE_ID, 46 RDMA_USER_CM_CMD_CREATE_ID,
47 RDMA_USER_CM_CMD_DESTROY_ID, 47 RDMA_USER_CM_CMD_DESTROY_ID,
48 RDMA_USER_CM_CMD_BIND_ADDR, 48 RDMA_USER_CM_CMD_BIND_IP,
49 RDMA_USER_CM_CMD_RESOLVE_ADDR, 49 RDMA_USER_CM_CMD_RESOLVE_IP,
50 RDMA_USER_CM_CMD_RESOLVE_ROUTE, 50 RDMA_USER_CM_CMD_RESOLVE_ROUTE,
51 RDMA_USER_CM_CMD_QUERY_ROUTE, 51 RDMA_USER_CM_CMD_QUERY_ROUTE,
52 RDMA_USER_CM_CMD_CONNECT, 52 RDMA_USER_CM_CMD_CONNECT,
@@ -59,9 +59,13 @@ enum {
59 RDMA_USER_CM_CMD_GET_OPTION, 59 RDMA_USER_CM_CMD_GET_OPTION,
60 RDMA_USER_CM_CMD_SET_OPTION, 60 RDMA_USER_CM_CMD_SET_OPTION,
61 RDMA_USER_CM_CMD_NOTIFY, 61 RDMA_USER_CM_CMD_NOTIFY,
62 RDMA_USER_CM_CMD_JOIN_MCAST, 62 RDMA_USER_CM_CMD_JOIN_IP_MCAST,
63 RDMA_USER_CM_CMD_LEAVE_MCAST, 63 RDMA_USER_CM_CMD_LEAVE_MCAST,
64 RDMA_USER_CM_CMD_MIGRATE_ID 64 RDMA_USER_CM_CMD_MIGRATE_ID,
65 RDMA_USER_CM_CMD_QUERY,
66 RDMA_USER_CM_CMD_BIND,
67 RDMA_USER_CM_CMD_RESOLVE_ADDR,
68 RDMA_USER_CM_CMD_JOIN_MCAST
65}; 69};
66 70
67/* 71/*
@@ -95,28 +99,51 @@ struct rdma_ucm_destroy_id_resp {
95 __u32 events_reported; 99 __u32 events_reported;
96}; 100};
97 101
98struct rdma_ucm_bind_addr { 102struct rdma_ucm_bind_ip {
99 __u64 response; 103 __u64 response;
100 struct sockaddr_in6 addr; 104 struct sockaddr_in6 addr;
101 __u32 id; 105 __u32 id;
102}; 106};
103 107
104struct rdma_ucm_resolve_addr { 108struct rdma_ucm_bind {
109 __u32 id;
110 __u16 addr_size;
111 __u16 reserved;
112 struct sockaddr_storage addr;
113};
114
115struct rdma_ucm_resolve_ip {
105 struct sockaddr_in6 src_addr; 116 struct sockaddr_in6 src_addr;
106 struct sockaddr_in6 dst_addr; 117 struct sockaddr_in6 dst_addr;
107 __u32 id; 118 __u32 id;
108 __u32 timeout_ms; 119 __u32 timeout_ms;
109}; 120};
110 121
122struct rdma_ucm_resolve_addr {
123 __u32 id;
124 __u32 timeout_ms;
125 __u16 src_size;
126 __u16 dst_size;
127 __u32 reserved;
128 struct sockaddr_storage src_addr;
129 struct sockaddr_storage dst_addr;
130};
131
111struct rdma_ucm_resolve_route { 132struct rdma_ucm_resolve_route {
112 __u32 id; 133 __u32 id;
113 __u32 timeout_ms; 134 __u32 timeout_ms;
114}; 135};
115 136
116struct rdma_ucm_query_route { 137enum {
138 RDMA_USER_CM_QUERY_ADDR,
139 RDMA_USER_CM_QUERY_PATH,
140 RDMA_USER_CM_QUERY_GID
141};
142
143struct rdma_ucm_query {
117 __u64 response; 144 __u64 response;
118 __u32 id; 145 __u32 id;
119 __u32 reserved; 146 __u32 option;
120}; 147};
121 148
122struct rdma_ucm_query_route_resp { 149struct rdma_ucm_query_route_resp {
@@ -129,9 +156,26 @@ struct rdma_ucm_query_route_resp {
129 __u8 reserved[3]; 156 __u8 reserved[3];
130}; 157};
131 158
159struct rdma_ucm_query_addr_resp {
160 __u64 node_guid;
161 __u8 port_num;
162 __u8 reserved;
163 __u16 pkey;
164 __u16 src_size;
165 __u16 dst_size;
166 struct sockaddr_storage src_addr;
167 struct sockaddr_storage dst_addr;
168};
169
170struct rdma_ucm_query_path_resp {
171 __u32 num_paths;
172 __u32 reserved;
173 struct ib_path_rec_data path_data[0];
174};
175
132struct rdma_ucm_conn_param { 176struct rdma_ucm_conn_param {
133 __u32 qp_num; 177 __u32 qp_num;
134 __u32 reserved; 178 __u32 qkey;
135 __u8 private_data[RDMA_MAX_PRIVATE_DATA]; 179 __u8 private_data[RDMA_MAX_PRIVATE_DATA];
136 __u8 private_data_len; 180 __u8 private_data_len;
137 __u8 srq; 181 __u8 srq;
@@ -192,13 +236,22 @@ struct rdma_ucm_notify {
192 __u32 event; 236 __u32 event;
193}; 237};
194 238
195struct rdma_ucm_join_mcast { 239struct rdma_ucm_join_ip_mcast {
196 __u64 response; /* rdma_ucm_create_id_resp */ 240 __u64 response; /* rdma_ucm_create_id_resp */
197 __u64 uid; 241 __u64 uid;
198 struct sockaddr_in6 addr; 242 struct sockaddr_in6 addr;
199 __u32 id; 243 __u32 id;
200}; 244};
201 245
246struct rdma_ucm_join_mcast {
247 __u64 response; /* rdma_ucma_create_id_resp */
248 __u64 uid;
249 __u32 id;
250 __u16 addr_size;
251 __u16 reserved;
252 struct sockaddr_storage addr;
253};
254
202struct rdma_ucm_get_event { 255struct rdma_ucm_get_event {
203 __u64 response; 256 __u64 response;
204}; 257};