aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-13 15:57:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-13 15:57:21 -0400
commitc55244137306b626bc64023fd7160985443205a7 (patch)
tree459acfb5c9b41e3e1616fb36aafda68a07ddbf54 /drivers/infiniband
parent858655116bfc722837e3aec0909b8e9d08f96996 (diff)
parente04abfa2436e3ab016b23eb1afb2c5578b8dc2cf (diff)
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull InfiniBand/RDMA changes from Roland Dreier: - AF_IB (native IB addressing) for CMA from Sean Hefty - new mlx5 driver for Mellanox Connect-IB adapters (including post merge request fixes) - SRP fixes from Bart Van Assche (including fix to first merge request) - qib HW driver updates - resurrection of ocrdma HW driver development - uverbs conversion to create fds with O_CLOEXEC set - other small changes and fixes * tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (66 commits) mlx5: Return -EFAULT instead of -EPERM IB/qib: Log all SDMA errors unconditionally IB/qib: Fix module-level leak mlx5_core: Adjust hca_cap.uar_page_sz to conform to Connect-IB spec IB/srp: Let srp_abort() return FAST_IO_FAIL if TL offline IB/uverbs: Use get_unused_fd_flags(O_CLOEXEC) instead of get_unused_fd() mlx5_core: Fixes for sparse warnings IB/mlx5: Make profile[] static in main.c mlx5: Fix parameter type of health_handler_t mlx5: Add driver for Mellanox Connect-IB adapters IB/core: Add reserved values to enums for low-level driver use IB/srp: Bump driver version and release date IB/srp: Make HCA completion vector configurable IB/srp: Maintain a single connection per I_T nexus IB/srp: Fail I/O fast if target offline IB/srp: Skip host settle delay IB/srp: Avoid skipping srp_reset_host() after a transport error IB/srp: Fix remove_one crash due to resource exhaustion IB/qib: New transmitter tunning settings for Dell 1.1 backplane IB/core: Fix error return code in add_port() ...
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/addr.c20
-rw-r--r--drivers/infiniband/core/cma.c906
-rw-r--r--drivers/infiniband/core/sa_query.c6
-rw-r--r--drivers/infiniband/core/sysfs.c8
-rw-r--r--drivers/infiniband/core/ucma.c321
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Kconfig10
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c92
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c843
-rw-r--r--drivers/infiniband/hw/mlx5/doorbell.c100
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c139
-rw-r--r--drivers/infiniband/hw/mlx5/main.c1504
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c162
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h545
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c1007
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2524
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c473
-rw-r--r--drivers/infiniband/hw/mlx5/user.h121
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h63
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c86
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h35
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c135
-rw-r--r--drivers/infiniband/hw/qib/Kconfig8
-rw-r--r--drivers/infiniband/hw/qib/Makefile1
-rw-r--r--drivers/infiniband/hw/qib/qib.h63
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_cq.c67
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.c283
-rw-r--r--drivers/infiniband/hw/qib/qib_debugfs.h45
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c176
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c507
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c145
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c123
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c56
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h33
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c89
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h1
47 files changed, 9961 insertions, 786 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index c85b56c28099..5ceda710f516 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -50,6 +50,7 @@ source "drivers/infiniband/hw/amso1100/Kconfig"
50source "drivers/infiniband/hw/cxgb3/Kconfig" 50source "drivers/infiniband/hw/cxgb3/Kconfig"
51source "drivers/infiniband/hw/cxgb4/Kconfig" 51source "drivers/infiniband/hw/cxgb4/Kconfig"
52source "drivers/infiniband/hw/mlx4/Kconfig" 52source "drivers/infiniband/hw/mlx4/Kconfig"
53source "drivers/infiniband/hw/mlx5/Kconfig"
53source "drivers/infiniband/hw/nes/Kconfig" 54source "drivers/infiniband/hw/nes/Kconfig"
54source "drivers/infiniband/hw/ocrdma/Kconfig" 55source "drivers/infiniband/hw/ocrdma/Kconfig"
55 56
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index b126fefe0b1c..1fe69888515f 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
7obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ 7obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
8obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/ 8obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/
9obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ 9obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
10obj-$(CONFIG_MLX5_INFINIBAND) += hw/mlx5/
10obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ 11obj-$(CONFIG_INFINIBAND_NES) += hw/nes/
11obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/ 12obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/
12obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 13obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index eaec8d7a3b73..e90f2b2eabd7 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -45,6 +45,7 @@
45#include <net/addrconf.h> 45#include <net/addrconf.h>
46#include <net/ip6_route.h> 46#include <net/ip6_route.h>
47#include <rdma/ib_addr.h> 47#include <rdma/ib_addr.h>
48#include <rdma/ib.h>
48 49
49MODULE_AUTHOR("Sean Hefty"); 50MODULE_AUTHOR("Sean Hefty");
50MODULE_DESCRIPTION("IB Address Translation"); 51MODULE_DESCRIPTION("IB Address Translation");
@@ -70,6 +71,21 @@ static LIST_HEAD(req_list);
70static DECLARE_DELAYED_WORK(work, process_req); 71static DECLARE_DELAYED_WORK(work, process_req);
71static struct workqueue_struct *addr_wq; 72static struct workqueue_struct *addr_wq;
72 73
74int rdma_addr_size(struct sockaddr *addr)
75{
76 switch (addr->sa_family) {
77 case AF_INET:
78 return sizeof(struct sockaddr_in);
79 case AF_INET6:
80 return sizeof(struct sockaddr_in6);
81 case AF_IB:
82 return sizeof(struct sockaddr_ib);
83 default:
84 return 0;
85 }
86}
87EXPORT_SYMBOL(rdma_addr_size);
88
73void rdma_addr_register_client(struct rdma_addr_client *client) 89void rdma_addr_register_client(struct rdma_addr_client *client)
74{ 90{
75 atomic_set(&client->refcount, 1); 91 atomic_set(&client->refcount, 1);
@@ -369,12 +385,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
369 goto err; 385 goto err;
370 } 386 }
371 387
372 memcpy(src_in, src_addr, ip_addr_size(src_addr)); 388 memcpy(src_in, src_addr, rdma_addr_size(src_addr));
373 } else { 389 } else {
374 src_in->sa_family = dst_addr->sa_family; 390 src_in->sa_family = dst_addr->sa_family;
375 } 391 }
376 392
377 memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); 393 memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
378 req->addr = addr; 394 req->addr = addr;
379 req->callback = callback; 395 req->callback = callback;
380 req->context = context; 396 req->context = context;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 34fbc2f60a09..f1c279fabe64 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -50,6 +50,7 @@
50#include <rdma/rdma_cm.h> 50#include <rdma/rdma_cm.h>
51#include <rdma/rdma_cm_ib.h> 51#include <rdma/rdma_cm_ib.h>
52#include <rdma/rdma_netlink.h> 52#include <rdma/rdma_netlink.h>
53#include <rdma/ib.h>
53#include <rdma/ib_cache.h> 54#include <rdma/ib_cache.h>
54#include <rdma/ib_cm.h> 55#include <rdma/ib_cm.h>
55#include <rdma/ib_sa.h> 56#include <rdma/ib_sa.h>
@@ -79,7 +80,6 @@ static LIST_HEAD(dev_list);
79static LIST_HEAD(listen_any_list); 80static LIST_HEAD(listen_any_list);
80static DEFINE_MUTEX(lock); 81static DEFINE_MUTEX(lock);
81static struct workqueue_struct *cma_wq; 82static struct workqueue_struct *cma_wq;
82static DEFINE_IDR(sdp_ps);
83static DEFINE_IDR(tcp_ps); 83static DEFINE_IDR(tcp_ps);
84static DEFINE_IDR(udp_ps); 84static DEFINE_IDR(udp_ps);
85static DEFINE_IDR(ipoib_ps); 85static DEFINE_IDR(ipoib_ps);
@@ -195,24 +195,7 @@ struct cma_hdr {
195 union cma_ip_addr dst_addr; 195 union cma_ip_addr dst_addr;
196}; 196};
197 197
198struct sdp_hh {
199 u8 bsdh[16];
200 u8 sdp_version; /* Major version: 7:4 */
201 u8 ip_version; /* IP version: 7:4 */
202 u8 sdp_specific1[10];
203 __be16 port;
204 __be16 sdp_specific2;
205 union cma_ip_addr src_addr;
206 union cma_ip_addr dst_addr;
207};
208
209struct sdp_hah {
210 u8 bsdh[16];
211 u8 sdp_version;
212};
213
214#define CMA_VERSION 0x00 198#define CMA_VERSION 0x00
215#define SDP_MAJ_VERSION 0x2
216 199
217static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 200static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
218{ 201{
@@ -261,21 +244,6 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
261 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 244 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
262} 245}
263 246
264static inline u8 sdp_get_majv(u8 sdp_version)
265{
266 return sdp_version >> 4;
267}
268
269static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
270{
271 return hh->ip_version >> 4;
272}
273
274static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
275{
276 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
277}
278
279static void cma_attach_to_dev(struct rdma_id_private *id_priv, 247static void cma_attach_to_dev(struct rdma_id_private *id_priv,
280 struct cma_device *cma_dev) 248 struct cma_device *cma_dev)
281{ 249{
@@ -310,16 +278,40 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
310 mutex_unlock(&lock); 278 mutex_unlock(&lock);
311} 279}
312 280
313static int cma_set_qkey(struct rdma_id_private *id_priv) 281static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv)
282{
283 return (struct sockaddr *) &id_priv->id.route.addr.src_addr;
284}
285
286static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv)
287{
288 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
289}
290
291static inline unsigned short cma_family(struct rdma_id_private *id_priv)
292{
293 return id_priv->id.route.addr.src_addr.ss_family;
294}
295
296static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)
314{ 297{
315 struct ib_sa_mcmember_rec rec; 298 struct ib_sa_mcmember_rec rec;
316 int ret = 0; 299 int ret = 0;
317 300
318 if (id_priv->qkey) 301 if (id_priv->qkey) {
302 if (qkey && id_priv->qkey != qkey)
303 return -EINVAL;
304 return 0;
305 }
306
307 if (qkey) {
308 id_priv->qkey = qkey;
319 return 0; 309 return 0;
310 }
320 311
321 switch (id_priv->id.ps) { 312 switch (id_priv->id.ps) {
322 case RDMA_PS_UDP: 313 case RDMA_PS_UDP:
314 case RDMA_PS_IB:
323 id_priv->qkey = RDMA_UDP_QKEY; 315 id_priv->qkey = RDMA_UDP_QKEY;
324 break; 316 break;
325 case RDMA_PS_IPOIB: 317 case RDMA_PS_IPOIB:
@@ -358,6 +350,27 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
358 return -EADDRNOTAVAIL; 350 return -EADDRNOTAVAIL;
359} 351}
360 352
353static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
354{
355 dev_addr->dev_type = ARPHRD_INFINIBAND;
356 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr);
357 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey));
358}
359
360static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
361{
362 int ret;
363
364 if (addr->sa_family != AF_IB) {
365 ret = rdma_translate_ip(addr, dev_addr);
366 } else {
367 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
368 ret = 0;
369 }
370
371 return ret;
372}
373
361static int cma_acquire_dev(struct rdma_id_private *id_priv) 374static int cma_acquire_dev(struct rdma_id_private *id_priv)
362{ 375{
363 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 376 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -401,6 +414,61 @@ out:
401 return ret; 414 return ret;
402} 415}
403 416
417/*
418 * Select the source IB device and address to reach the destination IB address.
419 */
420static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
421{
422 struct cma_device *cma_dev, *cur_dev;
423 struct sockaddr_ib *addr;
424 union ib_gid gid, sgid, *dgid;
425 u16 pkey, index;
426 u8 port, p;
427 int i;
428
429 cma_dev = NULL;
430 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv);
431 dgid = (union ib_gid *) &addr->sib_addr;
432 pkey = ntohs(addr->sib_pkey);
433
434 list_for_each_entry(cur_dev, &dev_list, list) {
435 if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
436 continue;
437
438 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
439 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
440 continue;
441
442 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) {
443 if (!memcmp(&gid, dgid, sizeof(gid))) {
444 cma_dev = cur_dev;
445 sgid = gid;
446 port = p;
447 goto found;
448 }
449
450 if (!cma_dev && (gid.global.subnet_prefix ==
451 dgid->global.subnet_prefix)) {
452 cma_dev = cur_dev;
453 sgid = gid;
454 port = p;
455 }
456 }
457 }
458 }
459
460 if (!cma_dev)
461 return -ENODEV;
462
463found:
464 cma_attach_to_dev(id_priv, cma_dev);
465 id_priv->id.port_num = port;
466 addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
467 memcpy(&addr->sib_addr, &sgid, sizeof sgid);
468 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
469 return 0;
470}
471
404static void cma_deref_id(struct rdma_id_private *id_priv) 472static void cma_deref_id(struct rdma_id_private *id_priv)
405{ 473{
406 if (atomic_dec_and_test(&id_priv->refcount)) 474 if (atomic_dec_and_test(&id_priv->refcount))
@@ -630,7 +698,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
630 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 698 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
631 699
632 if (id_priv->id.qp_type == IB_QPT_UD) { 700 if (id_priv->id.qp_type == IB_QPT_UD) {
633 ret = cma_set_qkey(id_priv); 701 ret = cma_set_qkey(id_priv, 0);
634 if (ret) 702 if (ret)
635 return ret; 703 return ret;
636 704
@@ -679,26 +747,30 @@ EXPORT_SYMBOL(rdma_init_qp_attr);
679 747
680static inline int cma_zero_addr(struct sockaddr *addr) 748static inline int cma_zero_addr(struct sockaddr *addr)
681{ 749{
682 struct in6_addr *ip6; 750 switch (addr->sa_family) {
683 751 case AF_INET:
684 if (addr->sa_family == AF_INET) 752 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
685 return ipv4_is_zeronet( 753 case AF_INET6:
686 ((struct sockaddr_in *)addr)->sin_addr.s_addr); 754 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
687 else { 755 case AF_IB:
688 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; 756 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
689 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | 757 default:
690 ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; 758 return 0;
691 } 759 }
692} 760}
693 761
694static inline int cma_loopback_addr(struct sockaddr *addr) 762static inline int cma_loopback_addr(struct sockaddr *addr)
695{ 763{
696 if (addr->sa_family == AF_INET) 764 switch (addr->sa_family) {
697 return ipv4_is_loopback( 765 case AF_INET:
698 ((struct sockaddr_in *) addr)->sin_addr.s_addr); 766 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
699 else 767 case AF_INET6:
700 return ipv6_addr_loopback( 768 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
701 &((struct sockaddr_in6 *) addr)->sin6_addr); 769 case AF_IB:
770 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
771 default:
772 return 0;
773 }
702} 774}
703 775
704static inline int cma_any_addr(struct sockaddr *addr) 776static inline int cma_any_addr(struct sockaddr *addr)
@@ -715,18 +787,31 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
715 case AF_INET: 787 case AF_INET:
716 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 788 return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
717 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 789 ((struct sockaddr_in *) dst)->sin_addr.s_addr;
718 default: 790 case AF_INET6:
719 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 791 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
720 &((struct sockaddr_in6 *) dst)->sin6_addr); 792 &((struct sockaddr_in6 *) dst)->sin6_addr);
793 default:
794 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
795 &((struct sockaddr_ib *) dst)->sib_addr);
721 } 796 }
722} 797}
723 798
724static inline __be16 cma_port(struct sockaddr *addr) 799static __be16 cma_port(struct sockaddr *addr)
725{ 800{
726 if (addr->sa_family == AF_INET) 801 struct sockaddr_ib *sib;
802
803 switch (addr->sa_family) {
804 case AF_INET:
727 return ((struct sockaddr_in *) addr)->sin_port; 805 return ((struct sockaddr_in *) addr)->sin_port;
728 else 806 case AF_INET6:
729 return ((struct sockaddr_in6 *) addr)->sin6_port; 807 return ((struct sockaddr_in6 *) addr)->sin6_port;
808 case AF_IB:
809 sib = (struct sockaddr_ib *) addr;
810 return htons((u16) (be64_to_cpu(sib->sib_sid) &
811 be64_to_cpu(sib->sib_sid_mask)));
812 default:
813 return 0;
814 }
730} 815}
731 816
732static inline int cma_any_port(struct sockaddr *addr) 817static inline int cma_any_port(struct sockaddr *addr)
@@ -734,83 +819,92 @@ static inline int cma_any_port(struct sockaddr *addr)
734 return !cma_port(addr); 819 return !cma_port(addr);
735} 820}
736 821
737static int cma_get_net_info(void *hdr, enum rdma_port_space ps, 822static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
738 u8 *ip_ver, __be16 *port, 823 struct ib_sa_path_rec *path)
739 union cma_ip_addr **src, union cma_ip_addr **dst)
740{ 824{
741 switch (ps) { 825 struct sockaddr_ib *listen_ib, *ib;
742 case RDMA_PS_SDP:
743 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
744 SDP_MAJ_VERSION)
745 return -EINVAL;
746 826
747 *ip_ver = sdp_get_ip_ver(hdr); 827 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr;
748 *port = ((struct sdp_hh *) hdr)->port; 828 ib = (struct sockaddr_ib *) &id->route.addr.src_addr;
749 *src = &((struct sdp_hh *) hdr)->src_addr; 829 ib->sib_family = listen_ib->sib_family;
750 *dst = &((struct sdp_hh *) hdr)->dst_addr; 830 ib->sib_pkey = path->pkey;
751 break; 831 ib->sib_flowinfo = path->flow_label;
752 default: 832 memcpy(&ib->sib_addr, &path->sgid, 16);
753 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION) 833 ib->sib_sid = listen_ib->sib_sid;
754 return -EINVAL; 834 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL);
835 ib->sib_scope_id = listen_ib->sib_scope_id;
755 836
756 *ip_ver = cma_get_ip_ver(hdr); 837 ib = (struct sockaddr_ib *) &id->route.addr.dst_addr;
757 *port = ((struct cma_hdr *) hdr)->port; 838 ib->sib_family = listen_ib->sib_family;
758 *src = &((struct cma_hdr *) hdr)->src_addr; 839 ib->sib_pkey = path->pkey;
759 *dst = &((struct cma_hdr *) hdr)->dst_addr; 840 ib->sib_flowinfo = path->flow_label;
760 break; 841 memcpy(&ib->sib_addr, &path->dgid, 16);
761 }
762
763 if (*ip_ver != 4 && *ip_ver != 6)
764 return -EINVAL;
765 return 0;
766} 842}
767 843
768static void cma_save_net_info(struct rdma_addr *addr, 844static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
769 struct rdma_addr *listen_addr, 845 struct cma_hdr *hdr)
770 u8 ip_ver, __be16 port,
771 union cma_ip_addr *src, union cma_ip_addr *dst)
772{ 846{
773 struct sockaddr_in *listen4, *ip4; 847 struct sockaddr_in *listen4, *ip4;
848
849 listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr;
850 ip4 = (struct sockaddr_in *) &id->route.addr.src_addr;
851 ip4->sin_family = listen4->sin_family;
852 ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
853 ip4->sin_port = listen4->sin_port;
854
855 ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr;
856 ip4->sin_family = listen4->sin_family;
857 ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
858 ip4->sin_port = hdr->port;
859}
860
861static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
862 struct cma_hdr *hdr)
863{
774 struct sockaddr_in6 *listen6, *ip6; 864 struct sockaddr_in6 *listen6, *ip6;
775 865
776 switch (ip_ver) { 866 listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr;
867 ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr;
868 ip6->sin6_family = listen6->sin6_family;
869 ip6->sin6_addr = hdr->dst_addr.ip6;
870 ip6->sin6_port = listen6->sin6_port;
871
872 ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr;
873 ip6->sin6_family = listen6->sin6_family;
874 ip6->sin6_addr = hdr->src_addr.ip6;
875 ip6->sin6_port = hdr->port;
876}
877
878static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id,
879 struct ib_cm_event *ib_event)
880{
881 struct cma_hdr *hdr;
882
883 if (listen_id->route.addr.src_addr.ss_family == AF_IB) {
884 cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
885 return 0;
886 }
887
888 hdr = ib_event->private_data;
889 if (hdr->cma_version != CMA_VERSION)
890 return -EINVAL;
891
892 switch (cma_get_ip_ver(hdr)) {
777 case 4: 893 case 4:
778 listen4 = (struct sockaddr_in *) &listen_addr->src_addr; 894 cma_save_ip4_info(id, listen_id, hdr);
779 ip4 = (struct sockaddr_in *) &addr->src_addr;
780 ip4->sin_family = listen4->sin_family;
781 ip4->sin_addr.s_addr = dst->ip4.addr;
782 ip4->sin_port = listen4->sin_port;
783
784 ip4 = (struct sockaddr_in *) &addr->dst_addr;
785 ip4->sin_family = listen4->sin_family;
786 ip4->sin_addr.s_addr = src->ip4.addr;
787 ip4->sin_port = port;
788 break; 895 break;
789 case 6: 896 case 6:
790 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr; 897 cma_save_ip6_info(id, listen_id, hdr);
791 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
792 ip6->sin6_family = listen6->sin6_family;
793 ip6->sin6_addr = dst->ip6;
794 ip6->sin6_port = listen6->sin6_port;
795
796 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
797 ip6->sin6_family = listen6->sin6_family;
798 ip6->sin6_addr = src->ip6;
799 ip6->sin6_port = port;
800 break; 898 break;
801 default: 899 default:
802 break; 900 return -EINVAL;
803 } 901 }
902 return 0;
804} 903}
805 904
806static inline int cma_user_data_offset(enum rdma_port_space ps) 905static inline int cma_user_data_offset(struct rdma_id_private *id_priv)
807{ 906{
808 switch (ps) { 907 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);
809 case RDMA_PS_SDP:
810 return 0;
811 default:
812 return sizeof(struct cma_hdr);
813 }
814} 908}
815 909
816static void cma_cancel_route(struct rdma_id_private *id_priv) 910static void cma_cancel_route(struct rdma_id_private *id_priv)
@@ -861,8 +955,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
861 cma_cancel_route(id_priv); 955 cma_cancel_route(id_priv);
862 break; 956 break;
863 case RDMA_CM_LISTEN: 957 case RDMA_CM_LISTEN:
864 if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr) 958 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)
865 && !id_priv->cma_dev)
866 cma_cancel_listens(id_priv); 959 cma_cancel_listens(id_priv);
867 break; 960 break;
868 default: 961 default:
@@ -977,16 +1070,6 @@ reject:
977 return ret; 1070 return ret;
978} 1071}
979 1072
980static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
981{
982 if (id_priv->id.ps == RDMA_PS_SDP &&
983 sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
984 SDP_MAJ_VERSION)
985 return -EINVAL;
986
987 return 0;
988}
989
990static void cma_set_rep_event_data(struct rdma_cm_event *event, 1073static void cma_set_rep_event_data(struct rdma_cm_event *event,
991 struct ib_cm_rep_event_param *rep_data, 1074 struct ib_cm_rep_event_param *rep_data,
992 void *private_data) 1075 void *private_data)
@@ -1021,15 +1104,13 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1021 event.status = -ETIMEDOUT; 1104 event.status = -ETIMEDOUT;
1022 break; 1105 break;
1023 case IB_CM_REP_RECEIVED: 1106 case IB_CM_REP_RECEIVED:
1024 event.status = cma_verify_rep(id_priv, ib_event->private_data); 1107 if (id_priv->id.qp) {
1025 if (event.status)
1026 event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1027 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
1028 event.status = cma_rep_recv(id_priv); 1108 event.status = cma_rep_recv(id_priv);
1029 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1109 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
1030 RDMA_CM_EVENT_ESTABLISHED; 1110 RDMA_CM_EVENT_ESTABLISHED;
1031 } else 1111 } else {
1032 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1112 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
1113 }
1033 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1114 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
1034 ib_event->private_data); 1115 ib_event->private_data);
1035 break; 1116 break;
@@ -1085,22 +1166,16 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1085 struct rdma_id_private *id_priv; 1166 struct rdma_id_private *id_priv;
1086 struct rdma_cm_id *id; 1167 struct rdma_cm_id *id;
1087 struct rdma_route *rt; 1168 struct rdma_route *rt;
1088 union cma_ip_addr *src, *dst;
1089 __be16 port;
1090 u8 ip_ver;
1091 int ret; 1169 int ret;
1092 1170
1093 if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1094 &ip_ver, &port, &src, &dst))
1095 return NULL;
1096
1097 id = rdma_create_id(listen_id->event_handler, listen_id->context, 1171 id = rdma_create_id(listen_id->event_handler, listen_id->context,
1098 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1172 listen_id->ps, ib_event->param.req_rcvd.qp_type);
1099 if (IS_ERR(id)) 1173 if (IS_ERR(id))
1100 return NULL; 1174 return NULL;
1101 1175
1102 cma_save_net_info(&id->route.addr, &listen_id->route.addr, 1176 id_priv = container_of(id, struct rdma_id_private, id);
1103 ip_ver, port, src, dst); 1177 if (cma_save_net_info(id, listen_id, ib_event))
1178 goto err;
1104 1179
1105 rt = &id->route; 1180 rt = &id->route;
1106 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 1181 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
@@ -1113,19 +1188,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1113 if (rt->num_paths == 2) 1188 if (rt->num_paths == 2)
1114 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1189 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1115 1190
1116 if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { 1191 if (cma_any_addr(cma_src_addr(id_priv))) {
1117 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1192 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1118 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1193 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1119 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1194 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1120 } else { 1195 } else {
1121 ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, 1196 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);
1122 &rt->addr.dev_addr);
1123 if (ret) 1197 if (ret)
1124 goto err; 1198 goto err;
1125 } 1199 }
1126 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1200 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1127 1201
1128 id_priv = container_of(id, struct rdma_id_private, id);
1129 id_priv->state = RDMA_CM_CONNECT; 1202 id_priv->state = RDMA_CM_CONNECT;
1130 return id_priv; 1203 return id_priv;
1131 1204
@@ -1139,9 +1212,6 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1139{ 1212{
1140 struct rdma_id_private *id_priv; 1213 struct rdma_id_private *id_priv;
1141 struct rdma_cm_id *id; 1214 struct rdma_cm_id *id;
1142 union cma_ip_addr *src, *dst;
1143 __be16 port;
1144 u8 ip_ver;
1145 int ret; 1215 int ret;
1146 1216
1147 id = rdma_create_id(listen_id->event_handler, listen_id->context, 1217 id = rdma_create_id(listen_id->event_handler, listen_id->context,
@@ -1149,22 +1219,16 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1149 if (IS_ERR(id)) 1219 if (IS_ERR(id))
1150 return NULL; 1220 return NULL;
1151 1221
1152 1222 id_priv = container_of(id, struct rdma_id_private, id);
1153 if (cma_get_net_info(ib_event->private_data, listen_id->ps, 1223 if (cma_save_net_info(id, listen_id, ib_event))
1154 &ip_ver, &port, &src, &dst))
1155 goto err; 1224 goto err;
1156 1225
1157 cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1158 ip_ver, port, src, dst);
1159
1160 if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { 1226 if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
1161 ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, 1227 ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr);
1162 &id->route.addr.dev_addr);
1163 if (ret) 1228 if (ret)
1164 goto err; 1229 goto err;
1165 } 1230 }
1166 1231
1167 id_priv = container_of(id, struct rdma_id_private, id);
1168 id_priv->state = RDMA_CM_CONNECT; 1232 id_priv->state = RDMA_CM_CONNECT;
1169 return id_priv; 1233 return id_priv;
1170err: 1234err:
@@ -1210,7 +1274,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1210 return -ECONNABORTED; 1274 return -ECONNABORTED;
1211 1275
1212 memset(&event, 0, sizeof event); 1276 memset(&event, 0, sizeof event);
1213 offset = cma_user_data_offset(listen_id->id.ps); 1277 offset = cma_user_data_offset(listen_id);
1214 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1278 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1215 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1279 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
1216 conn_id = cma_new_udp_id(&listen_id->id, ib_event); 1280 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
@@ -1272,58 +1336,44 @@ err1:
1272 return ret; 1336 return ret;
1273} 1337}
1274 1338
1275static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr) 1339__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)
1276{ 1340{
1277 return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr))); 1341 if (addr->sa_family == AF_IB)
1342 return ((struct sockaddr_ib *) addr)->sib_sid;
1343
1344 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));
1278} 1345}
1346EXPORT_SYMBOL(rdma_get_service_id);
1279 1347
1280static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, 1348static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1281 struct ib_cm_compare_data *compare) 1349 struct ib_cm_compare_data *compare)
1282{ 1350{
1283 struct cma_hdr *cma_data, *cma_mask; 1351 struct cma_hdr *cma_data, *cma_mask;
1284 struct sdp_hh *sdp_data, *sdp_mask;
1285 __be32 ip4_addr; 1352 __be32 ip4_addr;
1286 struct in6_addr ip6_addr; 1353 struct in6_addr ip6_addr;
1287 1354
1288 memset(compare, 0, sizeof *compare); 1355 memset(compare, 0, sizeof *compare);
1289 cma_data = (void *) compare->data; 1356 cma_data = (void *) compare->data;
1290 cma_mask = (void *) compare->mask; 1357 cma_mask = (void *) compare->mask;
1291 sdp_data = (void *) compare->data;
1292 sdp_mask = (void *) compare->mask;
1293 1358
1294 switch (addr->sa_family) { 1359 switch (addr->sa_family) {
1295 case AF_INET: 1360 case AF_INET:
1296 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr; 1361 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1297 if (ps == RDMA_PS_SDP) { 1362 cma_set_ip_ver(cma_data, 4);
1298 sdp_set_ip_ver(sdp_data, 4); 1363 cma_set_ip_ver(cma_mask, 0xF);
1299 sdp_set_ip_ver(sdp_mask, 0xF); 1364 if (!cma_any_addr(addr)) {
1300 sdp_data->dst_addr.ip4.addr = ip4_addr; 1365 cma_data->dst_addr.ip4.addr = ip4_addr;
1301 sdp_mask->dst_addr.ip4.addr = htonl(~0); 1366 cma_mask->dst_addr.ip4.addr = htonl(~0);
1302 } else {
1303 cma_set_ip_ver(cma_data, 4);
1304 cma_set_ip_ver(cma_mask, 0xF);
1305 if (!cma_any_addr(addr)) {
1306 cma_data->dst_addr.ip4.addr = ip4_addr;
1307 cma_mask->dst_addr.ip4.addr = htonl(~0);
1308 }
1309 } 1367 }
1310 break; 1368 break;
1311 case AF_INET6: 1369 case AF_INET6:
1312 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; 1370 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1313 if (ps == RDMA_PS_SDP) { 1371 cma_set_ip_ver(cma_data, 6);
1314 sdp_set_ip_ver(sdp_data, 6); 1372 cma_set_ip_ver(cma_mask, 0xF);
1315 sdp_set_ip_ver(sdp_mask, 0xF); 1373 if (!cma_any_addr(addr)) {
1316 sdp_data->dst_addr.ip6 = ip6_addr; 1374 cma_data->dst_addr.ip6 = ip6_addr;
1317 memset(&sdp_mask->dst_addr.ip6, 0xFF, 1375 memset(&cma_mask->dst_addr.ip6, 0xFF,
1318 sizeof sdp_mask->dst_addr.ip6); 1376 sizeof cma_mask->dst_addr.ip6);
1319 } else {
1320 cma_set_ip_ver(cma_data, 6);
1321 cma_set_ip_ver(cma_mask, 0xF);
1322 if (!cma_any_addr(addr)) {
1323 cma_data->dst_addr.ip6 = ip6_addr;
1324 memset(&cma_mask->dst_addr.ip6, 0xFF,
1325 sizeof cma_mask->dst_addr.ip6);
1326 }
1327 } 1377 }
1328 break; 1378 break;
1329 default: 1379 default:
@@ -1347,9 +1397,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1347 event.event = RDMA_CM_EVENT_DISCONNECTED; 1397 event.event = RDMA_CM_EVENT_DISCONNECTED;
1348 break; 1398 break;
1349 case IW_CM_EVENT_CONNECT_REPLY: 1399 case IW_CM_EVENT_CONNECT_REPLY:
1350 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; 1400 sin = (struct sockaddr_in *) cma_src_addr(id_priv);
1351 *sin = iw_event->local_addr; 1401 *sin = iw_event->local_addr;
1352 sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; 1402 sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
1353 *sin = iw_event->remote_addr; 1403 *sin = iw_event->remote_addr;
1354 switch (iw_event->status) { 1404 switch (iw_event->status) {
1355 case 0: 1405 case 0:
@@ -1447,9 +1497,9 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1447 cm_id->context = conn_id; 1497 cm_id->context = conn_id;
1448 cm_id->cm_handler = cma_iw_handler; 1498 cm_id->cm_handler = cma_iw_handler;
1449 1499
1450 sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr; 1500 sin = (struct sockaddr_in *) cma_src_addr(conn_id);
1451 *sin = iw_event->local_addr; 1501 *sin = iw_event->local_addr;
1452 sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; 1502 sin = (struct sockaddr_in *) cma_dst_addr(conn_id);
1453 *sin = iw_event->remote_addr; 1503 *sin = iw_event->remote_addr;
1454 1504
1455 ret = ib_query_device(conn_id->id.device, &attr); 1505 ret = ib_query_device(conn_id->id.device, &attr);
@@ -1506,8 +1556,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
1506 1556
1507 id_priv->cm_id.ib = id; 1557 id_priv->cm_id.ib = id;
1508 1558
1509 addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; 1559 addr = cma_src_addr(id_priv);
1510 svc_id = cma_get_service_id(id_priv->id.ps, addr); 1560 svc_id = rdma_get_service_id(&id_priv->id, addr);
1511 if (cma_any_addr(addr) && !id_priv->afonly) 1561 if (cma_any_addr(addr) && !id_priv->afonly)
1512 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); 1562 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1513 else { 1563 else {
@@ -1537,7 +1587,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1537 1587
1538 id_priv->cm_id.iw = id; 1588 id_priv->cm_id.iw = id;
1539 1589
1540 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; 1590 sin = (struct sockaddr_in *) cma_src_addr(id_priv);
1541 id_priv->cm_id.iw->local_addr = *sin; 1591 id_priv->cm_id.iw->local_addr = *sin;
1542 1592
1543 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 1593 ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
@@ -1567,6 +1617,10 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1567 struct rdma_cm_id *id; 1617 struct rdma_cm_id *id;
1568 int ret; 1618 int ret;
1569 1619
1620 if (cma_family(id_priv) == AF_IB &&
1621 rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB)
1622 return;
1623
1570 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, 1624 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
1571 id_priv->id.qp_type); 1625 id_priv->id.qp_type);
1572 if (IS_ERR(id)) 1626 if (IS_ERR(id))
@@ -1575,8 +1629,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1575 dev_id_priv = container_of(id, struct rdma_id_private, id); 1629 dev_id_priv = container_of(id, struct rdma_id_private, id);
1576 1630
1577 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 1631 dev_id_priv->state = RDMA_CM_ADDR_BOUND;
1578 memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, 1632 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
1579 ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); 1633 rdma_addr_size(cma_src_addr(id_priv)));
1580 1634
1581 cma_attach_to_dev(dev_id_priv, cma_dev); 1635 cma_attach_to_dev(dev_id_priv, cma_dev);
1582 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 1636 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
@@ -1634,31 +1688,39 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1634static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 1688static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1635 struct cma_work *work) 1689 struct cma_work *work)
1636{ 1690{
1637 struct rdma_addr *addr = &id_priv->id.route.addr; 1691 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
1638 struct ib_sa_path_rec path_rec; 1692 struct ib_sa_path_rec path_rec;
1639 ib_sa_comp_mask comp_mask; 1693 ib_sa_comp_mask comp_mask;
1640 struct sockaddr_in6 *sin6; 1694 struct sockaddr_in6 *sin6;
1695 struct sockaddr_ib *sib;
1641 1696
1642 memset(&path_rec, 0, sizeof path_rec); 1697 memset(&path_rec, 0, sizeof path_rec);
1643 rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); 1698 rdma_addr_get_sgid(dev_addr, &path_rec.sgid);
1644 rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); 1699 rdma_addr_get_dgid(dev_addr, &path_rec.dgid);
1645 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); 1700 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
1646 path_rec.numb_path = 1; 1701 path_rec.numb_path = 1;
1647 path_rec.reversible = 1; 1702 path_rec.reversible = 1;
1648 path_rec.service_id = cma_get_service_id(id_priv->id.ps, 1703 path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
1649 (struct sockaddr *) &addr->dst_addr);
1650 1704
1651 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 1705 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1652 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 1706 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1653 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 1707 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID;
1654 1708
1655 if (addr->src_addr.ss_family == AF_INET) { 1709 switch (cma_family(id_priv)) {
1710 case AF_INET:
1656 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 1711 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);
1657 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 1712 comp_mask |= IB_SA_PATH_REC_QOS_CLASS;
1658 } else { 1713 break;
1659 sin6 = (struct sockaddr_in6 *) &addr->src_addr; 1714 case AF_INET6:
1715 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
1660 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 1716 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);
1661 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 1717 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1718 break;
1719 case AF_IB:
1720 sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
1721 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20);
1722 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS;
1723 break;
1662 } 1724 }
1663 1725
1664 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 1726 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
@@ -1800,14 +1862,9 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
1800 struct rdma_addr *addr = &route->addr; 1862 struct rdma_addr *addr = &route->addr;
1801 struct cma_work *work; 1863 struct cma_work *work;
1802 int ret; 1864 int ret;
1803 struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
1804 struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
1805 struct net_device *ndev = NULL; 1865 struct net_device *ndev = NULL;
1806 u16 vid; 1866 u16 vid;
1807 1867
1808 if (src_addr->sin_family != dst_addr->sin_family)
1809 return -EINVAL;
1810
1811 work = kzalloc(sizeof *work, GFP_KERNEL); 1868 work = kzalloc(sizeof *work, GFP_KERNEL);
1812 if (!work) 1869 if (!work)
1813 return -ENOMEM; 1870 return -ENOMEM;
@@ -1913,28 +1970,57 @@ err:
1913} 1970}
1914EXPORT_SYMBOL(rdma_resolve_route); 1971EXPORT_SYMBOL(rdma_resolve_route);
1915 1972
1973static void cma_set_loopback(struct sockaddr *addr)
1974{
1975 switch (addr->sa_family) {
1976 case AF_INET:
1977 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1978 break;
1979 case AF_INET6:
1980 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr,
1981 0, 0, 0, htonl(1));
1982 break;
1983 default:
1984 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr,
1985 0, 0, 0, htonl(1));
1986 break;
1987 }
1988}
1989
1916static int cma_bind_loopback(struct rdma_id_private *id_priv) 1990static int cma_bind_loopback(struct rdma_id_private *id_priv)
1917{ 1991{
1918 struct cma_device *cma_dev; 1992 struct cma_device *cma_dev, *cur_dev;
1919 struct ib_port_attr port_attr; 1993 struct ib_port_attr port_attr;
1920 union ib_gid gid; 1994 union ib_gid gid;
1921 u16 pkey; 1995 u16 pkey;
1922 int ret; 1996 int ret;
1923 u8 p; 1997 u8 p;
1924 1998
1999 cma_dev = NULL;
1925 mutex_lock(&lock); 2000 mutex_lock(&lock);
1926 if (list_empty(&dev_list)) { 2001 list_for_each_entry(cur_dev, &dev_list, list) {
2002 if (cma_family(id_priv) == AF_IB &&
2003 rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB)
2004 continue;
2005
2006 if (!cma_dev)
2007 cma_dev = cur_dev;
2008
2009 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
2010 if (!ib_query_port(cur_dev->device, p, &port_attr) &&
2011 port_attr.state == IB_PORT_ACTIVE) {
2012 cma_dev = cur_dev;
2013 goto port_found;
2014 }
2015 }
2016 }
2017
2018 if (!cma_dev) {
1927 ret = -ENODEV; 2019 ret = -ENODEV;
1928 goto out; 2020 goto out;
1929 } 2021 }
1930 list_for_each_entry(cma_dev, &dev_list, list)
1931 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1932 if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1933 port_attr.state == IB_PORT_ACTIVE)
1934 goto port_found;
1935 2022
1936 p = 1; 2023 p = 1;
1937 cma_dev = list_entry(dev_list.next, struct cma_device, list);
1938 2024
1939port_found: 2025port_found:
1940 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); 2026 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
@@ -1953,6 +2039,7 @@ port_found:
1953 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2039 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1954 id_priv->id.port_num = p; 2040 id_priv->id.port_num = p;
1955 cma_attach_to_dev(id_priv, cma_dev); 2041 cma_attach_to_dev(id_priv, cma_dev);
2042 cma_set_loopback(cma_src_addr(id_priv));
1956out: 2043out:
1957 mutex_unlock(&lock); 2044 mutex_unlock(&lock);
1958 return ret; 2045 return ret;
@@ -1980,8 +2067,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
1980 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2067 event.event = RDMA_CM_EVENT_ADDR_ERROR;
1981 event.status = status; 2068 event.status = status;
1982 } else { 2069 } else {
1983 memcpy(&id_priv->id.route.addr.src_addr, src_addr, 2070 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
1984 ip_addr_size(src_addr));
1985 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2071 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1986 } 2072 }
1987 2073
@@ -2000,7 +2086,6 @@ out:
2000static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2086static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2001{ 2087{
2002 struct cma_work *work; 2088 struct cma_work *work;
2003 struct sockaddr *src, *dst;
2004 union ib_gid gid; 2089 union ib_gid gid;
2005 int ret; 2090 int ret;
2006 2091
@@ -2017,18 +2102,36 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2017 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2102 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
2018 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2103 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
2019 2104
2020 src = (struct sockaddr *) &id_priv->id.route.addr.src_addr; 2105 work->id = id_priv;
2021 if (cma_zero_addr(src)) { 2106 INIT_WORK(&work->work, cma_work_handler);
2022 dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 2107 work->old_state = RDMA_CM_ADDR_QUERY;
2023 if ((src->sa_family = dst->sa_family) == AF_INET) { 2108 work->new_state = RDMA_CM_ADDR_RESOLVED;
2024 ((struct sockaddr_in *)src)->sin_addr = 2109 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
2025 ((struct sockaddr_in *)dst)->sin_addr; 2110 queue_work(cma_wq, &work->work);
2026 } else { 2111 return 0;
2027 ((struct sockaddr_in6 *)src)->sin6_addr = 2112err:
2028 ((struct sockaddr_in6 *)dst)->sin6_addr; 2113 kfree(work);
2029 } 2114 return ret;
2115}
2116
2117static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
2118{
2119 struct cma_work *work;
2120 int ret;
2121
2122 work = kzalloc(sizeof *work, GFP_KERNEL);
2123 if (!work)
2124 return -ENOMEM;
2125
2126 if (!id_priv->cma_dev) {
2127 ret = cma_resolve_ib_dev(id_priv);
2128 if (ret)
2129 goto err;
2030 } 2130 }
2031 2131
2132 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *)
2133 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr));
2134
2032 work->id = id_priv; 2135 work->id = id_priv;
2033 INIT_WORK(&work->work, cma_work_handler); 2136 INIT_WORK(&work->work, cma_work_handler);
2034 work->old_state = RDMA_CM_ADDR_QUERY; 2137 work->old_state = RDMA_CM_ADDR_QUERY;
@@ -2046,9 +2149,13 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2046{ 2149{
2047 if (!src_addr || !src_addr->sa_family) { 2150 if (!src_addr || !src_addr->sa_family) {
2048 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2151 src_addr = (struct sockaddr *) &id->route.addr.src_addr;
2049 if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) { 2152 src_addr->sa_family = dst_addr->sa_family;
2153 if (dst_addr->sa_family == AF_INET6) {
2050 ((struct sockaddr_in6 *) src_addr)->sin6_scope_id = 2154 ((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
2051 ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id; 2155 ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
2156 } else if (dst_addr->sa_family == AF_IB) {
2157 ((struct sockaddr_ib *) src_addr)->sib_pkey =
2158 ((struct sockaddr_ib *) dst_addr)->sib_pkey;
2052 } 2159 }
2053 } 2160 }
2054 return rdma_bind_addr(id, src_addr); 2161 return rdma_bind_addr(id, src_addr);
@@ -2067,17 +2174,25 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
2067 return ret; 2174 return ret;
2068 } 2175 }
2069 2176
2177 if (cma_family(id_priv) != dst_addr->sa_family)
2178 return -EINVAL;
2179
2070 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) 2180 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
2071 return -EINVAL; 2181 return -EINVAL;
2072 2182
2073 atomic_inc(&id_priv->refcount); 2183 atomic_inc(&id_priv->refcount);
2074 memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr)); 2184 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
2075 if (cma_any_addr(dst_addr)) 2185 if (cma_any_addr(dst_addr)) {
2076 ret = cma_resolve_loopback(id_priv); 2186 ret = cma_resolve_loopback(id_priv);
2077 else 2187 } else {
2078 ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr, 2188 if (dst_addr->sa_family == AF_IB) {
2079 dst_addr, &id->route.addr.dev_addr, 2189 ret = cma_resolve_ib_addr(id_priv);
2080 timeout_ms, addr_handler, id_priv); 2190 } else {
2191 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
2192 dst_addr, &id->route.addr.dev_addr,
2193 timeout_ms, addr_handler, id_priv);
2194 }
2195 }
2081 if (ret) 2196 if (ret)
2082 goto err; 2197 goto err;
2083 2198
@@ -2097,7 +2212,7 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
2097 2212
2098 id_priv = container_of(id, struct rdma_id_private, id); 2213 id_priv = container_of(id, struct rdma_id_private, id);
2099 spin_lock_irqsave(&id_priv->lock, flags); 2214 spin_lock_irqsave(&id_priv->lock, flags);
2100 if (id_priv->state == RDMA_CM_IDLE) { 2215 if (reuse || id_priv->state == RDMA_CM_IDLE) {
2101 id_priv->reuseaddr = reuse; 2216 id_priv->reuseaddr = reuse;
2102 ret = 0; 2217 ret = 0;
2103 } else { 2218 } else {
@@ -2131,10 +2246,29 @@ EXPORT_SYMBOL(rdma_set_afonly);
2131static void cma_bind_port(struct rdma_bind_list *bind_list, 2246static void cma_bind_port(struct rdma_bind_list *bind_list,
2132 struct rdma_id_private *id_priv) 2247 struct rdma_id_private *id_priv)
2133{ 2248{
2134 struct sockaddr_in *sin; 2249 struct sockaddr *addr;
2250 struct sockaddr_ib *sib;
2251 u64 sid, mask;
2252 __be16 port;
2135 2253
2136 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; 2254 addr = cma_src_addr(id_priv);
2137 sin->sin_port = htons(bind_list->port); 2255 port = htons(bind_list->port);
2256
2257 switch (addr->sa_family) {
2258 case AF_INET:
2259 ((struct sockaddr_in *) addr)->sin_port = port;
2260 break;
2261 case AF_INET6:
2262 ((struct sockaddr_in6 *) addr)->sin6_port = port;
2263 break;
2264 case AF_IB:
2265 sib = (struct sockaddr_ib *) addr;
2266 sid = be64_to_cpu(sib->sib_sid);
2267 mask = be64_to_cpu(sib->sib_sid_mask);
2268 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port));
2269 sib->sib_sid_mask = cpu_to_be64(~0ULL);
2270 break;
2271 }
2138 id_priv->bind_list = bind_list; 2272 id_priv->bind_list = bind_list;
2139 hlist_add_head(&id_priv->node, &bind_list->owners); 2273 hlist_add_head(&id_priv->node, &bind_list->owners);
2140} 2274}
@@ -2205,7 +2339,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
2205 struct rdma_id_private *cur_id; 2339 struct rdma_id_private *cur_id;
2206 struct sockaddr *addr, *cur_addr; 2340 struct sockaddr *addr, *cur_addr;
2207 2341
2208 addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; 2342 addr = cma_src_addr(id_priv);
2209 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 2343 hlist_for_each_entry(cur_id, &bind_list->owners, node) {
2210 if (id_priv == cur_id) 2344 if (id_priv == cur_id)
2211 continue; 2345 continue;
@@ -2214,7 +2348,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
2214 cur_id->reuseaddr) 2348 cur_id->reuseaddr)
2215 continue; 2349 continue;
2216 2350
2217 cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; 2351 cur_addr = cma_src_addr(cur_id);
2218 if (id_priv->afonly && cur_id->afonly && 2352 if (id_priv->afonly && cur_id->afonly &&
2219 (addr->sa_family != cur_addr->sa_family)) 2353 (addr->sa_family != cur_addr->sa_family))
2220 continue; 2354 continue;
@@ -2234,7 +2368,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
2234 unsigned short snum; 2368 unsigned short snum;
2235 int ret; 2369 int ret;
2236 2370
2237 snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)); 2371 snum = ntohs(cma_port(cma_src_addr(id_priv)));
2238 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 2372 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
2239 return -EACCES; 2373 return -EACCES;
2240 2374
@@ -2261,33 +2395,67 @@ static int cma_bind_listen(struct rdma_id_private *id_priv)
2261 return ret; 2395 return ret;
2262} 2396}
2263 2397
2264static int cma_get_port(struct rdma_id_private *id_priv) 2398static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv)
2265{ 2399{
2266 struct idr *ps;
2267 int ret;
2268
2269 switch (id_priv->id.ps) { 2400 switch (id_priv->id.ps) {
2270 case RDMA_PS_SDP:
2271 ps = &sdp_ps;
2272 break;
2273 case RDMA_PS_TCP: 2401 case RDMA_PS_TCP:
2274 ps = &tcp_ps; 2402 return &tcp_ps;
2275 break;
2276 case RDMA_PS_UDP: 2403 case RDMA_PS_UDP:
2277 ps = &udp_ps; 2404 return &udp_ps;
2278 break;
2279 case RDMA_PS_IPOIB: 2405 case RDMA_PS_IPOIB:
2280 ps = &ipoib_ps; 2406 return &ipoib_ps;
2281 break;
2282 case RDMA_PS_IB: 2407 case RDMA_PS_IB:
2283 ps = &ib_ps; 2408 return &ib_ps;
2284 break;
2285 default: 2409 default:
2286 return -EPROTONOSUPPORT; 2410 return NULL;
2411 }
2412}
2413
2414static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv)
2415{
2416 struct idr *ps = NULL;
2417 struct sockaddr_ib *sib;
2418 u64 sid_ps, mask, sid;
2419
2420 sib = (struct sockaddr_ib *) cma_src_addr(id_priv);
2421 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK;
2422 sid = be64_to_cpu(sib->sib_sid) & mask;
2423
2424 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) {
2425 sid_ps = RDMA_IB_IP_PS_IB;
2426 ps = &ib_ps;
2427 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) &&
2428 (sid == (RDMA_IB_IP_PS_TCP & mask))) {
2429 sid_ps = RDMA_IB_IP_PS_TCP;
2430 ps = &tcp_ps;
2431 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) &&
2432 (sid == (RDMA_IB_IP_PS_UDP & mask))) {
2433 sid_ps = RDMA_IB_IP_PS_UDP;
2434 ps = &udp_ps;
2287 } 2435 }
2288 2436
2437 if (ps) {
2438 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib)));
2439 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK |
2440 be64_to_cpu(sib->sib_sid_mask));
2441 }
2442 return ps;
2443}
2444
2445static int cma_get_port(struct rdma_id_private *id_priv)
2446{
2447 struct idr *ps;
2448 int ret;
2449
2450 if (cma_family(id_priv) != AF_IB)
2451 ps = cma_select_inet_ps(id_priv);
2452 else
2453 ps = cma_select_ib_ps(id_priv);
2454 if (!ps)
2455 return -EPROTONOSUPPORT;
2456
2289 mutex_lock(&lock); 2457 mutex_lock(&lock);
2290 if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)) 2458 if (cma_any_port(cma_src_addr(id_priv)))
2291 ret = cma_alloc_any_port(ps, id_priv); 2459 ret = cma_alloc_any_port(ps, id_priv);
2292 else 2460 else
2293 ret = cma_use_port(ps, id_priv); 2461 ret = cma_use_port(ps, id_priv);
@@ -2322,8 +2490,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
2322 2490
2323 id_priv = container_of(id, struct rdma_id_private, id); 2491 id_priv = container_of(id, struct rdma_id_private, id);
2324 if (id_priv->state == RDMA_CM_IDLE) { 2492 if (id_priv->state == RDMA_CM_IDLE) {
2325 ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; 2493 id->route.addr.src_addr.ss_family = AF_INET;
2326 ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); 2494 ret = rdma_bind_addr(id, cma_src_addr(id_priv));
2327 if (ret) 2495 if (ret)
2328 return ret; 2496 return ret;
2329 } 2497 }
@@ -2370,7 +2538,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2370 struct rdma_id_private *id_priv; 2538 struct rdma_id_private *id_priv;
2371 int ret; 2539 int ret;
2372 2540
2373 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 2541 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
2542 addr->sa_family != AF_IB)
2374 return -EAFNOSUPPORT; 2543 return -EAFNOSUPPORT;
2375 2544
2376 id_priv = container_of(id, struct rdma_id_private, id); 2545 id_priv = container_of(id, struct rdma_id_private, id);
@@ -2382,7 +2551,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2382 goto err1; 2551 goto err1;
2383 2552
2384 if (!cma_any_addr(addr)) { 2553 if (!cma_any_addr(addr)) {
2385 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); 2554 ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
2386 if (ret) 2555 if (ret)
2387 goto err1; 2556 goto err1;
2388 2557
@@ -2391,7 +2560,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2391 goto err1; 2560 goto err1;
2392 } 2561 }
2393 2562
2394 memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); 2563 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
2395 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 2564 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
2396 if (addr->sa_family == AF_INET) 2565 if (addr->sa_family == AF_INET)
2397 id_priv->afonly = 1; 2566 id_priv->afonly = 1;
@@ -2414,62 +2583,32 @@ err1:
2414} 2583}
2415EXPORT_SYMBOL(rdma_bind_addr); 2584EXPORT_SYMBOL(rdma_bind_addr);
2416 2585
2417static int cma_format_hdr(void *hdr, enum rdma_port_space ps, 2586static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
2418 struct rdma_route *route)
2419{ 2587{
2420 struct cma_hdr *cma_hdr; 2588 struct cma_hdr *cma_hdr;
2421 struct sdp_hh *sdp_hdr;
2422 2589
2423 if (route->addr.src_addr.ss_family == AF_INET) { 2590 cma_hdr = hdr;
2591 cma_hdr->cma_version = CMA_VERSION;
2592 if (cma_family(id_priv) == AF_INET) {
2424 struct sockaddr_in *src4, *dst4; 2593 struct sockaddr_in *src4, *dst4;
2425 2594
2426 src4 = (struct sockaddr_in *) &route->addr.src_addr; 2595 src4 = (struct sockaddr_in *) cma_src_addr(id_priv);
2427 dst4 = (struct sockaddr_in *) &route->addr.dst_addr; 2596 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv);
2428 2597
2429 switch (ps) { 2598 cma_set_ip_ver(cma_hdr, 4);
2430 case RDMA_PS_SDP: 2599 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2431 sdp_hdr = hdr; 2600 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2432 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) 2601 cma_hdr->port = src4->sin_port;
2433 return -EINVAL; 2602 } else if (cma_family(id_priv) == AF_INET6) {
2434 sdp_set_ip_ver(sdp_hdr, 4);
2435 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2436 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2437 sdp_hdr->port = src4->sin_port;
2438 break;
2439 default:
2440 cma_hdr = hdr;
2441 cma_hdr->cma_version = CMA_VERSION;
2442 cma_set_ip_ver(cma_hdr, 4);
2443 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2444 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2445 cma_hdr->port = src4->sin_port;
2446 break;
2447 }
2448 } else {
2449 struct sockaddr_in6 *src6, *dst6; 2603 struct sockaddr_in6 *src6, *dst6;
2450 2604
2451 src6 = (struct sockaddr_in6 *) &route->addr.src_addr; 2605 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);
2452 dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr; 2606 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv);
2453 2607
2454 switch (ps) { 2608 cma_set_ip_ver(cma_hdr, 6);
2455 case RDMA_PS_SDP: 2609 cma_hdr->src_addr.ip6 = src6->sin6_addr;
2456 sdp_hdr = hdr; 2610 cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2457 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) 2611 cma_hdr->port = src6->sin6_port;
2458 return -EINVAL;
2459 sdp_set_ip_ver(sdp_hdr, 6);
2460 sdp_hdr->src_addr.ip6 = src6->sin6_addr;
2461 sdp_hdr->dst_addr.ip6 = dst6->sin6_addr;
2462 sdp_hdr->port = src6->sin6_port;
2463 break;
2464 default:
2465 cma_hdr = hdr;
2466 cma_hdr->cma_version = CMA_VERSION;
2467 cma_set_ip_ver(cma_hdr, 6);
2468 cma_hdr->src_addr.ip6 = src6->sin6_addr;
2469 cma_hdr->dst_addr.ip6 = dst6->sin6_addr;
2470 cma_hdr->port = src6->sin6_port;
2471 break;
2472 }
2473 } 2612 }
2474 return 0; 2613 return 0;
2475} 2614}
@@ -2499,15 +2638,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2499 event.status = ib_event->param.sidr_rep_rcvd.status; 2638 event.status = ib_event->param.sidr_rep_rcvd.status;
2500 break; 2639 break;
2501 } 2640 }
2502 ret = cma_set_qkey(id_priv); 2641 ret = cma_set_qkey(id_priv, rep->qkey);
2503 if (ret) { 2642 if (ret) {
2504 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2643 event.event = RDMA_CM_EVENT_ADDR_ERROR;
2505 event.status = -EINVAL; 2644 event.status = ret;
2506 break;
2507 }
2508 if (id_priv->qkey != rep->qkey) {
2509 event.event = RDMA_CM_EVENT_UNREACHABLE;
2510 event.status = -EINVAL;
2511 break; 2645 break;
2512 } 2646 }
2513 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, 2647 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
@@ -2542,27 +2676,31 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2542 struct rdma_conn_param *conn_param) 2676 struct rdma_conn_param *conn_param)
2543{ 2677{
2544 struct ib_cm_sidr_req_param req; 2678 struct ib_cm_sidr_req_param req;
2545 struct rdma_route *route;
2546 struct ib_cm_id *id; 2679 struct ib_cm_id *id;
2547 int ret; 2680 int offset, ret;
2548 2681
2549 req.private_data_len = sizeof(struct cma_hdr) + 2682 offset = cma_user_data_offset(id_priv);
2550 conn_param->private_data_len; 2683 req.private_data_len = offset + conn_param->private_data_len;
2551 if (req.private_data_len < conn_param->private_data_len) 2684 if (req.private_data_len < conn_param->private_data_len)
2552 return -EINVAL; 2685 return -EINVAL;
2553 2686
2554 req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 2687 if (req.private_data_len) {
2555 if (!req.private_data) 2688 req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2556 return -ENOMEM; 2689 if (!req.private_data)
2690 return -ENOMEM;
2691 } else {
2692 req.private_data = NULL;
2693 }
2557 2694
2558 if (conn_param->private_data && conn_param->private_data_len) 2695 if (conn_param->private_data && conn_param->private_data_len)
2559 memcpy((void *) req.private_data + sizeof(struct cma_hdr), 2696 memcpy((void *) req.private_data + offset,
2560 conn_param->private_data, conn_param->private_data_len); 2697 conn_param->private_data, conn_param->private_data_len);
2561 2698
2562 route = &id_priv->id.route; 2699 if (req.private_data) {
2563 ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route); 2700 ret = cma_format_hdr((void *) req.private_data, id_priv);
2564 if (ret) 2701 if (ret)
2565 goto out; 2702 goto out;
2703 }
2566 2704
2567 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 2705 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
2568 id_priv); 2706 id_priv);
@@ -2572,9 +2710,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2572 } 2710 }
2573 id_priv->cm_id.ib = id; 2711 id_priv->cm_id.ib = id;
2574 2712
2575 req.path = route->path_rec; 2713 req.path = id_priv->id.route.path_rec;
2576 req.service_id = cma_get_service_id(id_priv->id.ps, 2714 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
2577 (struct sockaddr *) &route->addr.dst_addr);
2578 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 2715 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2579 req.max_cm_retries = CMA_MAX_CM_RETRIES; 2716 req.max_cm_retries = CMA_MAX_CM_RETRIES;
2580 2717
@@ -2598,14 +2735,18 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
2598 int offset, ret; 2735 int offset, ret;
2599 2736
2600 memset(&req, 0, sizeof req); 2737 memset(&req, 0, sizeof req);
2601 offset = cma_user_data_offset(id_priv->id.ps); 2738 offset = cma_user_data_offset(id_priv);
2602 req.private_data_len = offset + conn_param->private_data_len; 2739 req.private_data_len = offset + conn_param->private_data_len;
2603 if (req.private_data_len < conn_param->private_data_len) 2740 if (req.private_data_len < conn_param->private_data_len)
2604 return -EINVAL; 2741 return -EINVAL;
2605 2742
2606 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 2743 if (req.private_data_len) {
2607 if (!private_data) 2744 private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2608 return -ENOMEM; 2745 if (!private_data)
2746 return -ENOMEM;
2747 } else {
2748 private_data = NULL;
2749 }
2609 2750
2610 if (conn_param->private_data && conn_param->private_data_len) 2751 if (conn_param->private_data && conn_param->private_data_len)
2611 memcpy(private_data + offset, conn_param->private_data, 2752 memcpy(private_data + offset, conn_param->private_data,
@@ -2619,17 +2760,18 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
2619 id_priv->cm_id.ib = id; 2760 id_priv->cm_id.ib = id;
2620 2761
2621 route = &id_priv->id.route; 2762 route = &id_priv->id.route;
2622 ret = cma_format_hdr(private_data, id_priv->id.ps, route); 2763 if (private_data) {
2623 if (ret) 2764 ret = cma_format_hdr(private_data, id_priv);
2624 goto out; 2765 if (ret)
2625 req.private_data = private_data; 2766 goto out;
2767 req.private_data = private_data;
2768 }
2626 2769
2627 req.primary_path = &route->path_rec[0]; 2770 req.primary_path = &route->path_rec[0];
2628 if (route->num_paths == 2) 2771 if (route->num_paths == 2)
2629 req.alternate_path = &route->path_rec[1]; 2772 req.alternate_path = &route->path_rec[1];
2630 2773
2631 req.service_id = cma_get_service_id(id_priv->id.ps, 2774 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
2632 (struct sockaddr *) &route->addr.dst_addr);
2633 req.qp_num = id_priv->qp_num; 2775 req.qp_num = id_priv->qp_num;
2634 req.qp_type = id_priv->id.qp_type; 2776 req.qp_type = id_priv->id.qp_type;
2635 req.starting_psn = id_priv->seq_num; 2777 req.starting_psn = id_priv->seq_num;
@@ -2668,10 +2810,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
2668 2810
2669 id_priv->cm_id.iw = cm_id; 2811 id_priv->cm_id.iw = cm_id;
2670 2812
2671 sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr; 2813 sin = (struct sockaddr_in *) cma_src_addr(id_priv);
2672 cm_id->local_addr = *sin; 2814 cm_id->local_addr = *sin;
2673 2815
2674 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; 2816 sin = (struct sockaddr_in *) cma_dst_addr(id_priv);
2675 cm_id->remote_addr = *sin; 2817 cm_id->remote_addr = *sin;
2676 2818
2677 ret = cma_modify_qp_rtr(id_priv, conn_param); 2819 ret = cma_modify_qp_rtr(id_priv, conn_param);
@@ -2789,7 +2931,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
2789} 2931}
2790 2932
2791static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 2933static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2792 enum ib_cm_sidr_status status, 2934 enum ib_cm_sidr_status status, u32 qkey,
2793 const void *private_data, int private_data_len) 2935 const void *private_data, int private_data_len)
2794{ 2936{
2795 struct ib_cm_sidr_rep_param rep; 2937 struct ib_cm_sidr_rep_param rep;
@@ -2798,7 +2940,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2798 memset(&rep, 0, sizeof rep); 2940 memset(&rep, 0, sizeof rep);
2799 rep.status = status; 2941 rep.status = status;
2800 if (status == IB_SIDR_SUCCESS) { 2942 if (status == IB_SIDR_SUCCESS) {
2801 ret = cma_set_qkey(id_priv); 2943 ret = cma_set_qkey(id_priv, qkey);
2802 if (ret) 2944 if (ret)
2803 return ret; 2945 return ret;
2804 rep.qp_num = id_priv->qp_num; 2946 rep.qp_num = id_priv->qp_num;
@@ -2832,11 +2974,12 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2832 if (id->qp_type == IB_QPT_UD) { 2974 if (id->qp_type == IB_QPT_UD) {
2833 if (conn_param) 2975 if (conn_param)
2834 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 2976 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2977 conn_param->qkey,
2835 conn_param->private_data, 2978 conn_param->private_data,
2836 conn_param->private_data_len); 2979 conn_param->private_data_len);
2837 else 2980 else
2838 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 2981 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2839 NULL, 0); 2982 0, NULL, 0);
2840 } else { 2983 } else {
2841 if (conn_param) 2984 if (conn_param)
2842 ret = cma_accept_ib(id_priv, conn_param); 2985 ret = cma_accept_ib(id_priv, conn_param);
@@ -2897,7 +3040,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2897 switch (rdma_node_get_transport(id->device->node_type)) { 3040 switch (rdma_node_get_transport(id->device->node_type)) {
2898 case RDMA_TRANSPORT_IB: 3041 case RDMA_TRANSPORT_IB:
2899 if (id->qp_type == IB_QPT_UD) 3042 if (id->qp_type == IB_QPT_UD)
2900 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 3043 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,
2901 private_data, private_data_len); 3044 private_data, private_data_len);
2902 else 3045 else
2903 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3046 ret = ib_send_cm_rej(id_priv->cm_id.ib,
@@ -2958,6 +3101,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2958 cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED)) 3101 cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
2959 return 0; 3102 return 0;
2960 3103
3104 if (!status)
3105 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
2961 mutex_lock(&id_priv->qp_mutex); 3106 mutex_lock(&id_priv->qp_mutex);
2962 if (!status && id_priv->id.qp) 3107 if (!status && id_priv->id.qp)
2963 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3108 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
@@ -3004,6 +3149,8 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
3004 0xFF10A01B)) { 3149 0xFF10A01B)) {
3005 /* IPv6 address is an SA assigned MGID. */ 3150 /* IPv6 address is an SA assigned MGID. */
3006 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3151 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
3152 } else if (addr->sa_family == AF_IB) {
3153 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
3007 } else if ((addr->sa_family == AF_INET6)) { 3154 } else if ((addr->sa_family == AF_INET6)) {
3008 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 3155 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
3009 if (id_priv->id.ps == RDMA_PS_UDP) 3156 if (id_priv->id.ps == RDMA_PS_UDP)
@@ -3031,9 +3178,12 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3031 if (ret) 3178 if (ret)
3032 return ret; 3179 return ret;
3033 3180
3181 ret = cma_set_qkey(id_priv, 0);
3182 if (ret)
3183 return ret;
3184
3034 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 3185 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
3035 if (id_priv->id.ps == RDMA_PS_UDP) 3186 rec.qkey = cpu_to_be32(id_priv->qkey);
3036 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
3037 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 3187 rdma_addr_get_sgid(dev_addr, &rec.port_gid);
3038 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 3188 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
3039 rec.join_state = 1; 3189 rec.join_state = 1;
@@ -3170,7 +3320,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
3170 if (!mc) 3320 if (!mc)
3171 return -ENOMEM; 3321 return -ENOMEM;
3172 3322
3173 memcpy(&mc->addr, addr, ip_addr_size(addr)); 3323 memcpy(&mc->addr, addr, rdma_addr_size(addr));
3174 mc->context = context; 3324 mc->context = context;
3175 mc->id_priv = id_priv; 3325 mc->id_priv = id_priv;
3176 3326
@@ -3215,7 +3365,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
3215 id_priv = container_of(id, struct rdma_id_private, id); 3365 id_priv = container_of(id, struct rdma_id_private, id);
3216 spin_lock_irq(&id_priv->lock); 3366 spin_lock_irq(&id_priv->lock);
3217 list_for_each_entry(mc, &id_priv->mc_list, list) { 3367 list_for_each_entry(mc, &id_priv->mc_list, list) {
3218 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { 3368 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {
3219 list_del(&mc->list); 3369 list_del(&mc->list);
3220 spin_unlock_irq(&id_priv->lock); 3370 spin_unlock_irq(&id_priv->lock);
3221 3371
@@ -3436,33 +3586,16 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
3436 id_stats->bound_dev_if = 3586 id_stats->bound_dev_if =
3437 id->route.addr.dev_addr.bound_dev_if; 3587 id->route.addr.dev_addr.bound_dev_if;
3438 3588
3439 if (id->route.addr.src_addr.ss_family == AF_INET) { 3589 if (ibnl_put_attr(skb, nlh,
3440 if (ibnl_put_attr(skb, nlh, 3590 rdma_addr_size(cma_src_addr(id_priv)),
3441 sizeof(struct sockaddr_in), 3591 cma_src_addr(id_priv),
3442 &id->route.addr.src_addr, 3592 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
3443 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) { 3593 goto out;
3444 goto out; 3594 if (ibnl_put_attr(skb, nlh,
3445 } 3595 rdma_addr_size(cma_src_addr(id_priv)),
3446 if (ibnl_put_attr(skb, nlh, 3596 cma_dst_addr(id_priv),
3447 sizeof(struct sockaddr_in), 3597 RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
3448 &id->route.addr.dst_addr, 3598 goto out;
3449 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
3450 goto out;
3451 }
3452 } else if (id->route.addr.src_addr.ss_family == AF_INET6) {
3453 if (ibnl_put_attr(skb, nlh,
3454 sizeof(struct sockaddr_in6),
3455 &id->route.addr.src_addr,
3456 RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) {
3457 goto out;
3458 }
3459 if (ibnl_put_attr(skb, nlh,
3460 sizeof(struct sockaddr_in6),
3461 &id->route.addr.dst_addr,
3462 RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) {
3463 goto out;
3464 }
3465 }
3466 3599
3467 id_stats->pid = id_priv->owner; 3600 id_stats->pid = id_priv->owner;
3468 id_stats->port_space = id->ps; 3601 id_stats->port_space = id->ps;
@@ -3527,7 +3660,6 @@ static void __exit cma_cleanup(void)
3527 rdma_addr_unregister_client(&addr_client); 3660 rdma_addr_unregister_client(&addr_client);
3528 ib_sa_unregister_client(&sa_client); 3661 ib_sa_unregister_client(&sa_client);
3529 destroy_workqueue(cma_wq); 3662 destroy_workqueue(cma_wq);
3530 idr_destroy(&sdp_ps);
3531 idr_destroy(&tcp_ps); 3663 idr_destroy(&tcp_ps);
3532 idr_destroy(&udp_ps); 3664 idr_destroy(&udp_ps);
3533 idr_destroy(&ipoib_ps); 3665 idr_destroy(&ipoib_ps);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 934f45e79e5e..9838ca484389 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -652,6 +652,12 @@ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
652} 652}
653EXPORT_SYMBOL(ib_sa_unpack_path); 653EXPORT_SYMBOL(ib_sa_unpack_path);
654 654
655void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute)
656{
657 ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute);
658}
659EXPORT_SYMBOL(ib_sa_pack_path);
660
655static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, 661static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
656 int status, 662 int status,
657 struct ib_sa_mad *mad) 663 struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 99904f7d59e3..cde1e7b5b85d 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -545,8 +545,10 @@ static int add_port(struct ib_device *device, int port_num,
545 545
546 p->gid_group.name = "gids"; 546 p->gid_group.name = "gids";
547 p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); 547 p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
548 if (!p->gid_group.attrs) 548 if (!p->gid_group.attrs) {
549 ret = -ENOMEM;
549 goto err_remove_pma; 550 goto err_remove_pma;
551 }
550 552
551 ret = sysfs_create_group(&p->kobj, &p->gid_group); 553 ret = sysfs_create_group(&p->kobj, &p->gid_group);
552 if (ret) 554 if (ret)
@@ -555,8 +557,10 @@ static int add_port(struct ib_device *device, int port_num,
555 p->pkey_group.name = "pkeys"; 557 p->pkey_group.name = "pkeys";
556 p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, 558 p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
557 attr.pkey_tbl_len); 559 attr.pkey_tbl_len);
558 if (!p->pkey_group.attrs) 560 if (!p->pkey_group.attrs) {
561 ret = -ENOMEM;
559 goto err_remove_gid; 562 goto err_remove_gid;
563 }
560 564
561 ret = sysfs_create_group(&p->kobj, &p->pkey_group); 565 ret = sysfs_create_group(&p->kobj, &p->pkey_group);
562 if (ret) 566 if (ret)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 5ca44cd9b00c..b0f189be543b 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -47,6 +47,8 @@
47#include <rdma/ib_marshall.h> 47#include <rdma/ib_marshall.h>
48#include <rdma/rdma_cm.h> 48#include <rdma/rdma_cm.h>
49#include <rdma/rdma_cm_ib.h> 49#include <rdma/rdma_cm_ib.h>
50#include <rdma/ib_addr.h>
51#include <rdma/ib.h>
50 52
51MODULE_AUTHOR("Sean Hefty"); 53MODULE_AUTHOR("Sean Hefty");
52MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); 54MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -510,10 +512,10 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
510 return ret; 512 return ret;
511} 513}
512 514
513static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf, 515static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,
514 int in_len, int out_len) 516 int in_len, int out_len)
515{ 517{
516 struct rdma_ucm_bind_addr cmd; 518 struct rdma_ucm_bind_ip cmd;
517 struct ucma_context *ctx; 519 struct ucma_context *ctx;
518 int ret; 520 int ret;
519 521
@@ -529,24 +531,75 @@ static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,
529 return ret; 531 return ret;
530} 532}
531 533
534static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf,
535 int in_len, int out_len)
536{
537 struct rdma_ucm_bind cmd;
538 struct sockaddr *addr;
539 struct ucma_context *ctx;
540 int ret;
541
542 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
543 return -EFAULT;
544
545 addr = (struct sockaddr *) &cmd.addr;
546 if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr)))
547 return -EINVAL;
548
549 ctx = ucma_get_ctx(file, cmd.id);
550 if (IS_ERR(ctx))
551 return PTR_ERR(ctx);
552
553 ret = rdma_bind_addr(ctx->cm_id, addr);
554 ucma_put_ctx(ctx);
555 return ret;
556}
557
558static ssize_t ucma_resolve_ip(struct ucma_file *file,
559 const char __user *inbuf,
560 int in_len, int out_len)
561{
562 struct rdma_ucm_resolve_ip cmd;
563 struct ucma_context *ctx;
564 int ret;
565
566 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
567 return -EFAULT;
568
569 ctx = ucma_get_ctx(file, cmd.id);
570 if (IS_ERR(ctx))
571 return PTR_ERR(ctx);
572
573 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr,
574 (struct sockaddr *) &cmd.dst_addr,
575 cmd.timeout_ms);
576 ucma_put_ctx(ctx);
577 return ret;
578}
579
532static ssize_t ucma_resolve_addr(struct ucma_file *file, 580static ssize_t ucma_resolve_addr(struct ucma_file *file,
533 const char __user *inbuf, 581 const char __user *inbuf,
534 int in_len, int out_len) 582 int in_len, int out_len)
535{ 583{
536 struct rdma_ucm_resolve_addr cmd; 584 struct rdma_ucm_resolve_addr cmd;
585 struct sockaddr *src, *dst;
537 struct ucma_context *ctx; 586 struct ucma_context *ctx;
538 int ret; 587 int ret;
539 588
540 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 589 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
541 return -EFAULT; 590 return -EFAULT;
542 591
592 src = (struct sockaddr *) &cmd.src_addr;
593 dst = (struct sockaddr *) &cmd.dst_addr;
594 if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) ||
595 !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst)))
596 return -EINVAL;
597
543 ctx = ucma_get_ctx(file, cmd.id); 598 ctx = ucma_get_ctx(file, cmd.id);
544 if (IS_ERR(ctx)) 599 if (IS_ERR(ctx))
545 return PTR_ERR(ctx); 600 return PTR_ERR(ctx);
546 601
547 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 602 ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);
548 (struct sockaddr *) &cmd.dst_addr,
549 cmd.timeout_ms);
550 ucma_put_ctx(ctx); 603 ucma_put_ctx(ctx);
551 return ret; 604 return ret;
552} 605}
@@ -649,7 +702,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
649 const char __user *inbuf, 702 const char __user *inbuf,
650 int in_len, int out_len) 703 int in_len, int out_len)
651{ 704{
652 struct rdma_ucm_query_route cmd; 705 struct rdma_ucm_query cmd;
653 struct rdma_ucm_query_route_resp resp; 706 struct rdma_ucm_query_route_resp resp;
654 struct ucma_context *ctx; 707 struct ucma_context *ctx;
655 struct sockaddr *addr; 708 struct sockaddr *addr;
@@ -709,7 +762,162 @@ out:
709 return ret; 762 return ret;
710} 763}
711 764
712static void ucma_copy_conn_param(struct rdma_conn_param *dst, 765static void ucma_query_device_addr(struct rdma_cm_id *cm_id,
766 struct rdma_ucm_query_addr_resp *resp)
767{
768 if (!cm_id->device)
769 return;
770
771 resp->node_guid = (__force __u64) cm_id->device->node_guid;
772 resp->port_num = cm_id->port_num;
773 resp->pkey = (__force __u16) cpu_to_be16(
774 ib_addr_get_pkey(&cm_id->route.addr.dev_addr));
775}
776
777static ssize_t ucma_query_addr(struct ucma_context *ctx,
778 void __user *response, int out_len)
779{
780 struct rdma_ucm_query_addr_resp resp;
781 struct sockaddr *addr;
782 int ret = 0;
783
784 if (out_len < sizeof(resp))
785 return -ENOSPC;
786
787 memset(&resp, 0, sizeof resp);
788
789 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr;
790 resp.src_size = rdma_addr_size(addr);
791 memcpy(&resp.src_addr, addr, resp.src_size);
792
793 addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr;
794 resp.dst_size = rdma_addr_size(addr);
795 memcpy(&resp.dst_addr, addr, resp.dst_size);
796
797 ucma_query_device_addr(ctx->cm_id, &resp);
798
799 if (copy_to_user(response, &resp, sizeof(resp)))
800 ret = -EFAULT;
801
802 return ret;
803}
804
805static ssize_t ucma_query_path(struct ucma_context *ctx,
806 void __user *response, int out_len)
807{
808 struct rdma_ucm_query_path_resp *resp;
809 int i, ret = 0;
810
811 if (out_len < sizeof(*resp))
812 return -ENOSPC;
813
814 resp = kzalloc(out_len, GFP_KERNEL);
815 if (!resp)
816 return -ENOMEM;
817
818 resp->num_paths = ctx->cm_id->route.num_paths;
819 for (i = 0, out_len -= sizeof(*resp);
820 i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data);
821 i++, out_len -= sizeof(struct ib_path_rec_data)) {
822
823 resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY |
824 IB_PATH_BIDIRECTIONAL;
825 ib_sa_pack_path(&ctx->cm_id->route.path_rec[i],
826 &resp->path_data[i].path_rec);
827 }
828
829 if (copy_to_user(response, resp,
830 sizeof(*resp) + (i * sizeof(struct ib_path_rec_data))))
831 ret = -EFAULT;
832
833 kfree(resp);
834 return ret;
835}
836
837static ssize_t ucma_query_gid(struct ucma_context *ctx,
838 void __user *response, int out_len)
839{
840 struct rdma_ucm_query_addr_resp resp;
841 struct sockaddr_ib *addr;
842 int ret = 0;
843
844 if (out_len < sizeof(resp))
845 return -ENOSPC;
846
847 memset(&resp, 0, sizeof resp);
848
849 ucma_query_device_addr(ctx->cm_id, &resp);
850
851 addr = (struct sockaddr_ib *) &resp.src_addr;
852 resp.src_size = sizeof(*addr);
853 if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) {
854 memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size);
855 } else {
856 addr->sib_family = AF_IB;
857 addr->sib_pkey = (__force __be16) resp.pkey;
858 rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr,
859 (union ib_gid *) &addr->sib_addr);
860 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
861 &ctx->cm_id->route.addr.src_addr);
862 }
863
864 addr = (struct sockaddr_ib *) &resp.dst_addr;
865 resp.dst_size = sizeof(*addr);
866 if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) {
867 memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size);
868 } else {
869 addr->sib_family = AF_IB;
870 addr->sib_pkey = (__force __be16) resp.pkey;
871 rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr,
872 (union ib_gid *) &addr->sib_addr);
873 addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *)
874 &ctx->cm_id->route.addr.dst_addr);
875 }
876
877 if (copy_to_user(response, &resp, sizeof(resp)))
878 ret = -EFAULT;
879
880 return ret;
881}
882
883static ssize_t ucma_query(struct ucma_file *file,
884 const char __user *inbuf,
885 int in_len, int out_len)
886{
887 struct rdma_ucm_query cmd;
888 struct ucma_context *ctx;
889 void __user *response;
890 int ret;
891
892 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
893 return -EFAULT;
894
895 response = (void __user *)(unsigned long) cmd.response;
896 ctx = ucma_get_ctx(file, cmd.id);
897 if (IS_ERR(ctx))
898 return PTR_ERR(ctx);
899
900 switch (cmd.option) {
901 case RDMA_USER_CM_QUERY_ADDR:
902 ret = ucma_query_addr(ctx, response, out_len);
903 break;
904 case RDMA_USER_CM_QUERY_PATH:
905 ret = ucma_query_path(ctx, response, out_len);
906 break;
907 case RDMA_USER_CM_QUERY_GID:
908 ret = ucma_query_gid(ctx, response, out_len);
909 break;
910 default:
911 ret = -ENOSYS;
912 break;
913 }
914
915 ucma_put_ctx(ctx);
916 return ret;
917}
918
919static void ucma_copy_conn_param(struct rdma_cm_id *id,
920 struct rdma_conn_param *dst,
713 struct rdma_ucm_conn_param *src) 921 struct rdma_ucm_conn_param *src)
714{ 922{
715 dst->private_data = src->private_data; 923 dst->private_data = src->private_data;
@@ -721,6 +929,7 @@ static void ucma_copy_conn_param(struct rdma_conn_param *dst,
721 dst->rnr_retry_count = src->rnr_retry_count; 929 dst->rnr_retry_count = src->rnr_retry_count;
722 dst->srq = src->srq; 930 dst->srq = src->srq;
723 dst->qp_num = src->qp_num; 931 dst->qp_num = src->qp_num;
932 dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;
724} 933}
725 934
726static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, 935static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
@@ -741,7 +950,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,
741 if (IS_ERR(ctx)) 950 if (IS_ERR(ctx))
742 return PTR_ERR(ctx); 951 return PTR_ERR(ctx);
743 952
744 ucma_copy_conn_param(&conn_param, &cmd.conn_param); 953 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
745 ret = rdma_connect(ctx->cm_id, &conn_param); 954 ret = rdma_connect(ctx->cm_id, &conn_param);
746 ucma_put_ctx(ctx); 955 ucma_put_ctx(ctx);
747 return ret; 956 return ret;
@@ -784,7 +993,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
784 return PTR_ERR(ctx); 993 return PTR_ERR(ctx);
785 994
786 if (cmd.conn_param.valid) { 995 if (cmd.conn_param.valid) {
787 ucma_copy_conn_param(&conn_param, &cmd.conn_param); 996 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
788 mutex_lock(&file->mut); 997 mutex_lock(&file->mut);
789 ret = rdma_accept(ctx->cm_id, &conn_param); 998 ret = rdma_accept(ctx->cm_id, &conn_param);
790 if (!ret) 999 if (!ret)
@@ -1020,23 +1229,23 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,
1020 return ret; 1229 return ret;
1021} 1230}
1022 1231
1023static ssize_t ucma_join_multicast(struct ucma_file *file, 1232static ssize_t ucma_process_join(struct ucma_file *file,
1024 const char __user *inbuf, 1233 struct rdma_ucm_join_mcast *cmd, int out_len)
1025 int in_len, int out_len)
1026{ 1234{
1027 struct rdma_ucm_join_mcast cmd;
1028 struct rdma_ucm_create_id_resp resp; 1235 struct rdma_ucm_create_id_resp resp;
1029 struct ucma_context *ctx; 1236 struct ucma_context *ctx;
1030 struct ucma_multicast *mc; 1237 struct ucma_multicast *mc;
1238 struct sockaddr *addr;
1031 int ret; 1239 int ret;
1032 1240
1033 if (out_len < sizeof(resp)) 1241 if (out_len < sizeof(resp))
1034 return -ENOSPC; 1242 return -ENOSPC;
1035 1243
1036 if (copy_from_user(&cmd, inbuf, sizeof(cmd))) 1244 addr = (struct sockaddr *) &cmd->addr;
1037 return -EFAULT; 1245 if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr)))
1246 return -EINVAL;
1038 1247
1039 ctx = ucma_get_ctx(file, cmd.id); 1248 ctx = ucma_get_ctx(file, cmd->id);
1040 if (IS_ERR(ctx)) 1249 if (IS_ERR(ctx))
1041 return PTR_ERR(ctx); 1250 return PTR_ERR(ctx);
1042 1251
@@ -1047,14 +1256,14 @@ static ssize_t ucma_join_multicast(struct ucma_file *file,
1047 goto err1; 1256 goto err1;
1048 } 1257 }
1049 1258
1050 mc->uid = cmd.uid; 1259 mc->uid = cmd->uid;
1051 memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr); 1260 memcpy(&mc->addr, addr, cmd->addr_size);
1052 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); 1261 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);
1053 if (ret) 1262 if (ret)
1054 goto err2; 1263 goto err2;
1055 1264
1056 resp.id = mc->id; 1265 resp.id = mc->id;
1057 if (copy_to_user((void __user *)(unsigned long)cmd.response, 1266 if (copy_to_user((void __user *)(unsigned long) cmd->response,
1058 &resp, sizeof(resp))) { 1267 &resp, sizeof(resp))) {
1059 ret = -EFAULT; 1268 ret = -EFAULT;
1060 goto err3; 1269 goto err3;
@@ -1079,6 +1288,38 @@ err1:
1079 return ret; 1288 return ret;
1080} 1289}
1081 1290
1291static ssize_t ucma_join_ip_multicast(struct ucma_file *file,
1292 const char __user *inbuf,
1293 int in_len, int out_len)
1294{
1295 struct rdma_ucm_join_ip_mcast cmd;
1296 struct rdma_ucm_join_mcast join_cmd;
1297
1298 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1299 return -EFAULT;
1300
1301 join_cmd.response = cmd.response;
1302 join_cmd.uid = cmd.uid;
1303 join_cmd.id = cmd.id;
1304 join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr);
1305 join_cmd.reserved = 0;
1306 memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size);
1307
1308 return ucma_process_join(file, &join_cmd, out_len);
1309}
1310
1311static ssize_t ucma_join_multicast(struct ucma_file *file,
1312 const char __user *inbuf,
1313 int in_len, int out_len)
1314{
1315 struct rdma_ucm_join_mcast cmd;
1316
1317 if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
1318 return -EFAULT;
1319
1320 return ucma_process_join(file, &cmd, out_len);
1321}
1322
1082static ssize_t ucma_leave_multicast(struct ucma_file *file, 1323static ssize_t ucma_leave_multicast(struct ucma_file *file,
1083 const char __user *inbuf, 1324 const char __user *inbuf,
1084 int in_len, int out_len) 1325 int in_len, int out_len)
@@ -1221,25 +1462,29 @@ file_put:
1221static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, 1462static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,
1222 const char __user *inbuf, 1463 const char __user *inbuf,
1223 int in_len, int out_len) = { 1464 int in_len, int out_len) = {
1224 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, 1465 [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id,
1225 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, 1466 [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id,
1226 [RDMA_USER_CM_CMD_BIND_ADDR] = ucma_bind_addr, 1467 [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip,
1227 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, 1468 [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip,
1228 [RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route, 1469 [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route,
1229 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, 1470 [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route,
1230 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, 1471 [RDMA_USER_CM_CMD_CONNECT] = ucma_connect,
1231 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, 1472 [RDMA_USER_CM_CMD_LISTEN] = ucma_listen,
1232 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, 1473 [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept,
1233 [RDMA_USER_CM_CMD_REJECT] = ucma_reject, 1474 [RDMA_USER_CM_CMD_REJECT] = ucma_reject,
1234 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, 1475 [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect,
1235 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, 1476 [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr,
1236 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, 1477 [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event,
1237 [RDMA_USER_CM_CMD_GET_OPTION] = NULL, 1478 [RDMA_USER_CM_CMD_GET_OPTION] = NULL,
1238 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, 1479 [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option,
1239 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, 1480 [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify,
1240 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, 1481 [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast,
1241 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, 1482 [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast,
1242 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id 1483 [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id,
1484 [RDMA_USER_CM_CMD_QUERY] = ucma_query,
1485 [RDMA_USER_CM_CMD_BIND] = ucma_bind,
1486 [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr,
1487 [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast
1243}; 1488};
1244 1489
1245static ssize_t ucma_write(struct file *filp, const char __user *buf, 1490static ssize_t ucma_write(struct file *filp, const char __user *buf,
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a7d00f6b3bc1..b3c07b0c9f26 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -334,7 +334,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
334 334
335 resp.num_comp_vectors = file->device->num_comp_vectors; 335 resp.num_comp_vectors = file->device->num_comp_vectors;
336 336
337 ret = get_unused_fd(); 337 ret = get_unused_fd_flags(O_CLOEXEC);
338 if (ret < 0) 338 if (ret < 0)
339 goto err_free; 339 goto err_free;
340 resp.async_fd = ret; 340 resp.async_fd = ret;
@@ -1184,7 +1184,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
1184 if (copy_from_user(&cmd, buf, sizeof cmd)) 1184 if (copy_from_user(&cmd, buf, sizeof cmd))
1185 return -EFAULT; 1185 return -EFAULT;
1186 1186
1187 ret = get_unused_fd(); 1187 ret = get_unused_fd_flags(O_CLOEXEC);
1188 if (ret < 0) 1188 if (ret < 0)
1189 return ret; 1189 return ret;
1190 resp.fd = ret; 1190 resp.fd = ret;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index e5649e8b215d..b57c0befd962 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -883,7 +883,8 @@ u16 iwch_rqes_posted(struct iwch_qp *qhp)
883{ 883{
884 union t3_wr *wqe = qhp->wq.queue; 884 union t3_wr *wqe = qhp->wq.queue;
885 u16 count = 0; 885 u16 count = 0;
886 while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) { 886
887 while (count < USHRT_MAX && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
887 count++; 888 count++;
888 wqe++; 889 wqe++;
889 } 890 }
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 982e3efd98d3..cd8d290a09fc 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -211,6 +211,7 @@ static int ehca_create_slab_caches(void)
211 if (!ctblk_cache) { 211 if (!ctblk_cache) {
212 ehca_gen_err("Cannot create ctblk SLAB cache."); 212 ehca_gen_err("Cannot create ctblk SLAB cache.");
213 ehca_cleanup_small_qp_cache(); 213 ehca_cleanup_small_qp_cache();
214 ret = -ENOMEM;
214 goto create_slab_caches6; 215 goto create_slab_caches6;
215 } 216 }
216#endif 217#endif
diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
new file mode 100644
index 000000000000..8e6aebfaf8a4
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -0,0 +1,10 @@
1config MLX5_INFINIBAND
2 tristate "Mellanox Connect-IB HCA support"
3 depends on NETDEVICES && ETHERNET && PCI && X86
4 select NET_VENDOR_MELLANOX
5 select MLX5_CORE
6 ---help---
7 This driver provides low-level InfiniBand support for
8 Mellanox Connect-IB PCI Express host channel adapters (HCAs).
9 This is required to use InfiniBand protocols such as
10 IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
new file mode 100644
index 000000000000..4ea0135af484
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
2
3mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
new file mode 100644
index 000000000000..39ab0caefdf9
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -0,0 +1,92 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "mlx5_ib.h"
34
35struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
36 struct mlx5_ib_ah *ah)
37{
38 if (ah_attr->ah_flags & IB_AH_GRH) {
39 memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
40 ah->av.grh_gid_fl = cpu_to_be32(ah_attr->grh.flow_label |
41 (1 << 30) |
42 ah_attr->grh.sgid_index << 20);
43 ah->av.hop_limit = ah_attr->grh.hop_limit;
44 ah->av.tclass = ah_attr->grh.traffic_class;
45 }
46
47 ah->av.rlid = cpu_to_be16(ah_attr->dlid);
48 ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
49 ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
50
51 return &ah->ibah;
52}
53
54struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
55{
56 struct mlx5_ib_ah *ah;
57
58 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
59 if (!ah)
60 return ERR_PTR(-ENOMEM);
61
62 return create_ib_ah(ah_attr, ah); /* never fails */
63}
64
65int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
66{
67 struct mlx5_ib_ah *ah = to_mah(ibah);
68 u32 tmp;
69
70 memset(ah_attr, 0, sizeof(*ah_attr));
71
72 tmp = be32_to_cpu(ah->av.grh_gid_fl);
73 if (tmp & (1 << 30)) {
74 ah_attr->ah_flags = IB_AH_GRH;
75 ah_attr->grh.sgid_index = (tmp >> 20) & 0xff;
76 ah_attr->grh.flow_label = tmp & 0xfffff;
77 memcpy(&ah_attr->grh.dgid, ah->av.rgid, 16);
78 ah_attr->grh.hop_limit = ah->av.hop_limit;
79 ah_attr->grh.traffic_class = ah->av.tclass;
80 }
81 ah_attr->dlid = be16_to_cpu(ah->av.rlid);
82 ah_attr->static_rate = ah->av.stat_rate_sl >> 4;
83 ah_attr->sl = ah->av.stat_rate_sl & 0xf;
84
85 return 0;
86}
87
88int mlx5_ib_destroy_ah(struct ib_ah *ah)
89{
90 kfree(to_mah(ah));
91 return 0;
92}
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
new file mode 100644
index 000000000000..344ab03948a3
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -0,0 +1,843 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kref.h>
34#include <rdma/ib_umem.h>
35#include "mlx5_ib.h"
36#include "user.h"
37
38static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
39{
40 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
41
42 ibcq->comp_handler(ibcq, ibcq->cq_context);
43}
44
45static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
46{
47 struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq);
48 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
49 struct ib_cq *ibcq = &cq->ibcq;
50 struct ib_event event;
51
52 if (type != MLX5_EVENT_TYPE_CQ_ERROR) {
53 mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n",
54 type, mcq->cqn);
55 return;
56 }
57
58 if (ibcq->event_handler) {
59 event.device = &dev->ib_dev;
60 event.event = IB_EVENT_CQ_ERR;
61 event.element.cq = ibcq;
62 ibcq->event_handler(&event, ibcq->cq_context);
63 }
64}
65
66static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size)
67{
68 return mlx5_buf_offset(&buf->buf, n * size);
69}
70
71static void *get_cqe(struct mlx5_ib_cq *cq, int n)
72{
73 return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
74}
75
76static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
77{
78 void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
79 struct mlx5_cqe64 *cqe64;
80
81 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
82 return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
83 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
84}
85
86static void *next_cqe_sw(struct mlx5_ib_cq *cq)
87{
88 return get_sw_cqe(cq, cq->mcq.cons_index);
89}
90
91static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
92{
93 switch (wq->wr_data[idx]) {
94 case MLX5_IB_WR_UMR:
95 return 0;
96
97 case IB_WR_LOCAL_INV:
98 return IB_WC_LOCAL_INV;
99
100 case IB_WR_FAST_REG_MR:
101 return IB_WC_FAST_REG_MR;
102
103 default:
104 pr_warn("unknown completion status\n");
105 return 0;
106 }
107}
108
109static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
110 struct mlx5_ib_wq *wq, int idx)
111{
112 wc->wc_flags = 0;
113 switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) {
114 case MLX5_OPCODE_RDMA_WRITE_IMM:
115 wc->wc_flags |= IB_WC_WITH_IMM;
116 case MLX5_OPCODE_RDMA_WRITE:
117 wc->opcode = IB_WC_RDMA_WRITE;
118 break;
119 case MLX5_OPCODE_SEND_IMM:
120 wc->wc_flags |= IB_WC_WITH_IMM;
121 case MLX5_OPCODE_SEND:
122 case MLX5_OPCODE_SEND_INVAL:
123 wc->opcode = IB_WC_SEND;
124 break;
125 case MLX5_OPCODE_RDMA_READ:
126 wc->opcode = IB_WC_RDMA_READ;
127 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
128 break;
129 case MLX5_OPCODE_ATOMIC_CS:
130 wc->opcode = IB_WC_COMP_SWAP;
131 wc->byte_len = 8;
132 break;
133 case MLX5_OPCODE_ATOMIC_FA:
134 wc->opcode = IB_WC_FETCH_ADD;
135 wc->byte_len = 8;
136 break;
137 case MLX5_OPCODE_ATOMIC_MASKED_CS:
138 wc->opcode = IB_WC_MASKED_COMP_SWAP;
139 wc->byte_len = 8;
140 break;
141 case MLX5_OPCODE_ATOMIC_MASKED_FA:
142 wc->opcode = IB_WC_MASKED_FETCH_ADD;
143 wc->byte_len = 8;
144 break;
145 case MLX5_OPCODE_BIND_MW:
146 wc->opcode = IB_WC_BIND_MW;
147 break;
148 case MLX5_OPCODE_UMR:
149 wc->opcode = get_umr_comp(wq, idx);
150 break;
151 }
152}
153
154enum {
155 MLX5_GRH_IN_BUFFER = 1,
156 MLX5_GRH_IN_CQE = 2,
157};
158
159static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
160 struct mlx5_ib_qp *qp)
161{
162 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
163 struct mlx5_ib_srq *srq;
164 struct mlx5_ib_wq *wq;
165 u16 wqe_ctr;
166 u8 g;
167
168 if (qp->ibqp.srq || qp->ibqp.xrcd) {
169 struct mlx5_core_srq *msrq = NULL;
170
171 if (qp->ibqp.xrcd) {
172 msrq = mlx5_core_get_srq(&dev->mdev,
173 be32_to_cpu(cqe->srqn));
174 srq = to_mibsrq(msrq);
175 } else {
176 srq = to_msrq(qp->ibqp.srq);
177 }
178 if (srq) {
179 wqe_ctr = be16_to_cpu(cqe->wqe_counter);
180 wc->wr_id = srq->wrid[wqe_ctr];
181 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
182 if (msrq && atomic_dec_and_test(&msrq->refcount))
183 complete(&msrq->free);
184 }
185 } else {
186 wq = &qp->rq;
187 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
188 ++wq->tail;
189 }
190 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
191
192 switch (cqe->op_own >> 4) {
193 case MLX5_CQE_RESP_WR_IMM:
194 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
195 wc->wc_flags = IB_WC_WITH_IMM;
196 wc->ex.imm_data = cqe->imm_inval_pkey;
197 break;
198 case MLX5_CQE_RESP_SEND:
199 wc->opcode = IB_WC_RECV;
200 wc->wc_flags = 0;
201 break;
202 case MLX5_CQE_RESP_SEND_IMM:
203 wc->opcode = IB_WC_RECV;
204 wc->wc_flags = IB_WC_WITH_IMM;
205 wc->ex.imm_data = cqe->imm_inval_pkey;
206 break;
207 case MLX5_CQE_RESP_SEND_INV:
208 wc->opcode = IB_WC_RECV;
209 wc->wc_flags = IB_WC_WITH_INVALIDATE;
210 wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
211 break;
212 }
213 wc->slid = be16_to_cpu(cqe->slid);
214 wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf;
215 wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
216 wc->dlid_path_bits = cqe->ml_path;
217 g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
218 wc->wc_flags |= g ? IB_WC_GRH : 0;
219 wc->pkey_index = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
220}
221
222static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
223{
224 __be32 *p = (__be32 *)cqe;
225 int i;
226
227 mlx5_ib_warn(dev, "dump error cqe\n");
228 for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4)
229 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]),
230 be32_to_cpu(p[1]), be32_to_cpu(p[2]),
231 be32_to_cpu(p[3]));
232}
233
234static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
235 struct mlx5_err_cqe *cqe,
236 struct ib_wc *wc)
237{
238 int dump = 1;
239
240 switch (cqe->syndrome) {
241 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
242 wc->status = IB_WC_LOC_LEN_ERR;
243 break;
244 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR:
245 wc->status = IB_WC_LOC_QP_OP_ERR;
246 break;
247 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
248 wc->status = IB_WC_LOC_PROT_ERR;
249 break;
250 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
251 dump = 0;
252 wc->status = IB_WC_WR_FLUSH_ERR;
253 break;
254 case MLX5_CQE_SYNDROME_MW_BIND_ERR:
255 wc->status = IB_WC_MW_BIND_ERR;
256 break;
257 case MLX5_CQE_SYNDROME_BAD_RESP_ERR:
258 wc->status = IB_WC_BAD_RESP_ERR;
259 break;
260 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR:
261 wc->status = IB_WC_LOC_ACCESS_ERR;
262 break;
263 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
264 wc->status = IB_WC_REM_INV_REQ_ERR;
265 break;
266 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
267 wc->status = IB_WC_REM_ACCESS_ERR;
268 break;
269 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
270 wc->status = IB_WC_REM_OP_ERR;
271 break;
272 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
273 wc->status = IB_WC_RETRY_EXC_ERR;
274 dump = 0;
275 break;
276 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
277 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
278 dump = 0;
279 break;
280 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
281 wc->status = IB_WC_REM_ABORT_ERR;
282 break;
283 default:
284 wc->status = IB_WC_GENERAL_ERR;
285 break;
286 }
287
288 wc->vendor_err = cqe->vendor_err_synd;
289 if (dump)
290 dump_cqe(dev, cqe);
291}
292
293static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
294{
295 /* TBD: waiting decision
296 */
297 return 0;
298}
299
300static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
301{
302 struct mlx5_wqe_data_seg *dpseg;
303 void *addr;
304
305 dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
306 sizeof(struct mlx5_wqe_raddr_seg) +
307 sizeof(struct mlx5_wqe_atomic_seg);
308 addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
309 return addr;
310}
311
312static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
313 uint16_t idx)
314{
315 void *addr;
316 int byte_count;
317 int i;
318
319 if (!is_atomic_response(qp, idx))
320 return;
321
322 byte_count = be32_to_cpu(cqe64->byte_cnt);
323 addr = mlx5_get_atomic_laddr(qp, idx);
324
325 if (byte_count == 4) {
326 *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
327 } else {
328 for (i = 0; i < byte_count; i += 8) {
329 *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
330 addr += 8;
331 }
332 }
333
334 return;
335}
336
337static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
338 u16 tail, u16 head)
339{
340 int idx;
341
342 do {
343 idx = tail & (qp->sq.wqe_cnt - 1);
344 handle_atomic(qp, cqe64, idx);
345 if (idx == head)
346 break;
347
348 tail = qp->sq.w_list[idx].next;
349 } while (1);
350 tail = qp->sq.w_list[idx].next;
351 qp->sq.last_poll = tail;
352}
353
354static int mlx5_poll_one(struct mlx5_ib_cq *cq,
355 struct mlx5_ib_qp **cur_qp,
356 struct ib_wc *wc)
357{
358 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
359 struct mlx5_err_cqe *err_cqe;
360 struct mlx5_cqe64 *cqe64;
361 struct mlx5_core_qp *mqp;
362 struct mlx5_ib_wq *wq;
363 uint8_t opcode;
364 uint32_t qpn;
365 u16 wqe_ctr;
366 void *cqe;
367 int idx;
368
369 cqe = next_cqe_sw(cq);
370 if (!cqe)
371 return -EAGAIN;
372
373 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
374
375 ++cq->mcq.cons_index;
376
377 /* Make sure we read CQ entry contents after we've checked the
378 * ownership bit.
379 */
380 rmb();
381
382 /* TBD: resize CQ */
383
384 qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
385 if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
386 /* We do not have to take the QP table lock here,
387 * because CQs will be locked while QPs are removed
388 * from the table.
389 */
390 mqp = __mlx5_qp_lookup(&dev->mdev, qpn);
391 if (unlikely(!mqp)) {
392 mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
393 cq->mcq.cqn, qpn);
394 return -EINVAL;
395 }
396
397 *cur_qp = to_mibqp(mqp);
398 }
399
400 wc->qp = &(*cur_qp)->ibqp;
401 opcode = cqe64->op_own >> 4;
402 switch (opcode) {
403 case MLX5_CQE_REQ:
404 wq = &(*cur_qp)->sq;
405 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
406 idx = wqe_ctr & (wq->wqe_cnt - 1);
407 handle_good_req(wc, cqe64, wq, idx);
408 handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
409 wc->wr_id = wq->wrid[idx];
410 wq->tail = wq->wqe_head[idx] + 1;
411 wc->status = IB_WC_SUCCESS;
412 break;
413 case MLX5_CQE_RESP_WR_IMM:
414 case MLX5_CQE_RESP_SEND:
415 case MLX5_CQE_RESP_SEND_IMM:
416 case MLX5_CQE_RESP_SEND_INV:
417 handle_responder(wc, cqe64, *cur_qp);
418 wc->status = IB_WC_SUCCESS;
419 break;
420 case MLX5_CQE_RESIZE_CQ:
421 break;
422 case MLX5_CQE_REQ_ERR:
423 case MLX5_CQE_RESP_ERR:
424 err_cqe = (struct mlx5_err_cqe *)cqe64;
425 mlx5_handle_error_cqe(dev, err_cqe, wc);
426 mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n",
427 opcode == MLX5_CQE_REQ_ERR ?
428 "Requestor" : "Responder", cq->mcq.cqn);
429 mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
430 err_cqe->syndrome, err_cqe->vendor_err_synd);
431 if (opcode == MLX5_CQE_REQ_ERR) {
432 wq = &(*cur_qp)->sq;
433 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
434 idx = wqe_ctr & (wq->wqe_cnt - 1);
435 wc->wr_id = wq->wrid[idx];
436 wq->tail = wq->wqe_head[idx] + 1;
437 } else {
438 struct mlx5_ib_srq *srq;
439
440 if ((*cur_qp)->ibqp.srq) {
441 srq = to_msrq((*cur_qp)->ibqp.srq);
442 wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
443 wc->wr_id = srq->wrid[wqe_ctr];
444 mlx5_ib_free_srq_wqe(srq, wqe_ctr);
445 } else {
446 wq = &(*cur_qp)->rq;
447 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
448 ++wq->tail;
449 }
450 }
451 break;
452 }
453
454 return 0;
455}
456
457int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
458{
459 struct mlx5_ib_cq *cq = to_mcq(ibcq);
460 struct mlx5_ib_qp *cur_qp = NULL;
461 unsigned long flags;
462 int npolled;
463 int err = 0;
464
465 spin_lock_irqsave(&cq->lock, flags);
466
467 for (npolled = 0; npolled < num_entries; npolled++) {
468 err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
469 if (err)
470 break;
471 }
472
473 if (npolled)
474 mlx5_cq_set_ci(&cq->mcq);
475
476 spin_unlock_irqrestore(&cq->lock, flags);
477
478 if (err == 0 || err == -EAGAIN)
479 return npolled;
480 else
481 return err;
482}
483
484int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
485{
486 mlx5_cq_arm(&to_mcq(ibcq)->mcq,
487 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
488 MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
489 to_mdev(ibcq->device)->mdev.priv.uuari.uars[0].map,
490 MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->mdev.priv.cq_uar_lock));
491
492 return 0;
493}
494
495static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
496 int nent, int cqe_size)
497{
498 int err;
499
500 err = mlx5_buf_alloc(&dev->mdev, nent * cqe_size,
501 PAGE_SIZE * 2, &buf->buf);
502 if (err)
503 return err;
504
505 buf->cqe_size = cqe_size;
506
507 return 0;
508}
509
510static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
511{
512 mlx5_buf_free(&dev->mdev, &buf->buf);
513}
514
515static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
516 struct ib_ucontext *context, struct mlx5_ib_cq *cq,
517 int entries, struct mlx5_create_cq_mbox_in **cqb,
518 int *cqe_size, int *index, int *inlen)
519{
520 struct mlx5_ib_create_cq ucmd;
521 int page_shift;
522 int npages;
523 int ncont;
524 int err;
525
526 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)))
527 return -EFAULT;
528
529 if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128)
530 return -EINVAL;
531
532 *cqe_size = ucmd.cqe_size;
533
534 cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
535 entries * ucmd.cqe_size,
536 IB_ACCESS_LOCAL_WRITE, 1);
537 if (IS_ERR(cq->buf.umem)) {
538 err = PTR_ERR(cq->buf.umem);
539 return err;
540 }
541
542 err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
543 &cq->db);
544 if (err)
545 goto err_umem;
546
547 mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
548 &ncont, NULL);
549 mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
550 ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
551
552 *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
553 *cqb = mlx5_vzalloc(*inlen);
554 if (!*cqb) {
555 err = -ENOMEM;
556 goto err_db;
557 }
558 mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
559 (*cqb)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
560
561 *index = to_mucontext(context)->uuari.uars[0].index;
562
563 return 0;
564
565err_db:
566 mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
567
568err_umem:
569 ib_umem_release(cq->buf.umem);
570 return err;
571}
572
573static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
574{
575 mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
576 ib_umem_release(cq->buf.umem);
577}
578
579static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
580{
581 int i;
582 void *cqe;
583 struct mlx5_cqe64 *cqe64;
584
585 for (i = 0; i < nent; i++) {
586 cqe = get_cqe(cq, i);
587 cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
588 cqe64->op_own = 0xf1;
589 }
590}
591
592static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
593 int entries, int cqe_size,
594 struct mlx5_create_cq_mbox_in **cqb,
595 int *index, int *inlen)
596{
597 int err;
598
599 err = mlx5_db_alloc(&dev->mdev, &cq->db);
600 if (err)
601 return err;
602
603 cq->mcq.set_ci_db = cq->db.db;
604 cq->mcq.arm_db = cq->db.db + 1;
605 *cq->mcq.set_ci_db = 0;
606 *cq->mcq.arm_db = 0;
607 cq->mcq.cqe_sz = cqe_size;
608
609 err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size);
610 if (err)
611 goto err_db;
612
613 init_cq_buf(cq, entries);
614
615 *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
616 *cqb = mlx5_vzalloc(*inlen);
617 if (!*cqb) {
618 err = -ENOMEM;
619 goto err_buf;
620 }
621 mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
622
623 (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - PAGE_SHIFT;
624 *index = dev->mdev.priv.uuari.uars[0].index;
625
626 return 0;
627
628err_buf:
629 free_cq_buf(dev, &cq->buf);
630
631err_db:
632 mlx5_db_free(&dev->mdev, &cq->db);
633 return err;
634}
635
636static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
637{
638 free_cq_buf(dev, &cq->buf);
639 mlx5_db_free(&dev->mdev, &cq->db);
640}
641
642struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
643 int vector, struct ib_ucontext *context,
644 struct ib_udata *udata)
645{
646 struct mlx5_create_cq_mbox_in *cqb = NULL;
647 struct mlx5_ib_dev *dev = to_mdev(ibdev);
648 struct mlx5_ib_cq *cq;
649 int uninitialized_var(index);
650 int uninitialized_var(inlen);
651 int cqe_size;
652 int irqn;
653 int eqn;
654 int err;
655
656 entries = roundup_pow_of_two(entries + 1);
657 if (entries < 1 || entries > dev->mdev.caps.max_cqes)
658 return ERR_PTR(-EINVAL);
659
660 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
661 if (!cq)
662 return ERR_PTR(-ENOMEM);
663
664 cq->ibcq.cqe = entries - 1;
665 mutex_init(&cq->resize_mutex);
666 spin_lock_init(&cq->lock);
667 cq->resize_buf = NULL;
668 cq->resize_umem = NULL;
669
670 if (context) {
671 err = create_cq_user(dev, udata, context, cq, entries,
672 &cqb, &cqe_size, &index, &inlen);
673 if (err)
674 goto err_create;
675 } else {
676 /* for now choose 64 bytes till we have a proper interface */
677 cqe_size = 64;
678 err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
679 &index, &inlen);
680 if (err)
681 goto err_create;
682 }
683
684 cq->cqe_size = cqe_size;
685 cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
686 cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
687 err = mlx5_vector2eqn(dev, vector, &eqn, &irqn);
688 if (err)
689 goto err_cqb;
690
691 cqb->ctx.c_eqn = cpu_to_be16(eqn);
692 cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
693
694 err = mlx5_core_create_cq(&dev->mdev, &cq->mcq, cqb, inlen);
695 if (err)
696 goto err_cqb;
697
698 mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn);
699 cq->mcq.irqn = irqn;
700 cq->mcq.comp = mlx5_ib_cq_comp;
701 cq->mcq.event = mlx5_ib_cq_event;
702
703 if (context)
704 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
705 err = -EFAULT;
706 goto err_cmd;
707 }
708
709
710 mlx5_vfree(cqb);
711 return &cq->ibcq;
712
713err_cmd:
714 mlx5_core_destroy_cq(&dev->mdev, &cq->mcq);
715
716err_cqb:
717 mlx5_vfree(cqb);
718 if (context)
719 destroy_cq_user(cq, context);
720 else
721 destroy_cq_kernel(dev, cq);
722
723err_create:
724 kfree(cq);
725
726 return ERR_PTR(err);
727}
728
729
730int mlx5_ib_destroy_cq(struct ib_cq *cq)
731{
732 struct mlx5_ib_dev *dev = to_mdev(cq->device);
733 struct mlx5_ib_cq *mcq = to_mcq(cq);
734 struct ib_ucontext *context = NULL;
735
736 if (cq->uobject)
737 context = cq->uobject->context;
738
739 mlx5_core_destroy_cq(&dev->mdev, &mcq->mcq);
740 if (context)
741 destroy_cq_user(mcq, context);
742 else
743 destroy_cq_kernel(dev, mcq);
744
745 kfree(mcq);
746
747 return 0;
748}
749
750static int is_equal_rsn(struct mlx5_cqe64 *cqe64, struct mlx5_ib_srq *srq,
751 u32 rsn)
752{
753 u32 lrsn;
754
755 if (srq)
756 lrsn = be32_to_cpu(cqe64->srqn) & 0xffffff;
757 else
758 lrsn = be32_to_cpu(cqe64->sop_drop_qpn) & 0xffffff;
759
760 return rsn == lrsn;
761}
762
763void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq)
764{
765 struct mlx5_cqe64 *cqe64, *dest64;
766 void *cqe, *dest;
767 u32 prod_index;
768 int nfreed = 0;
769 u8 owner_bit;
770
771 if (!cq)
772 return;
773
774 /* First we need to find the current producer index, so we
775 * know where to start cleaning from. It doesn't matter if HW
776 * adds new entries after this loop -- the QP we're worried
777 * about is already in RESET, so the new entries won't come
778 * from our QP and therefore don't need to be checked.
779 */
780 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++)
781 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
782 break;
783
784 /* Now sweep backwards through the CQ, removing CQ entries
785 * that match our QP by copying older entries on top of them.
786 */
787 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
788 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
789 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
790 if (is_equal_rsn(cqe64, srq, rsn)) {
791 if (srq)
792 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter));
793 ++nfreed;
794 } else if (nfreed) {
795 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
796 dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64;
797 owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK;
798 memcpy(dest, cqe, cq->mcq.cqe_sz);
799 dest64->op_own = owner_bit |
800 (dest64->op_own & ~MLX5_CQE_OWNER_MASK);
801 }
802 }
803
804 if (nfreed) {
805 cq->mcq.cons_index += nfreed;
806 /* Make sure update of buffer contents is done before
807 * updating consumer index.
808 */
809 wmb();
810 mlx5_cq_set_ci(&cq->mcq);
811 }
812}
813
814void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
815{
816 if (!cq)
817 return;
818
819 spin_lock_irq(&cq->lock);
820 __mlx5_ib_cq_clean(cq, qpn, srq);
821 spin_unlock_irq(&cq->lock);
822}
823
824int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
825{
826 return -ENOSYS;
827}
828
829int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
830{
831 return -ENOSYS;
832}
833
834int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
835{
836 struct mlx5_ib_cq *cq;
837
838 if (!ibcq)
839 return 128;
840
841 cq = to_mcq(ibcq);
842 return cq->cqe_size;
843}
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
new file mode 100644
index 000000000000..256a23344f28
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -0,0 +1,100 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/kref.h>
34#include <linux/slab.h>
35#include <rdma/ib_umem.h>
36
37#include "mlx5_ib.h"
38
39struct mlx5_ib_user_db_page {
40 struct list_head list;
41 struct ib_umem *umem;
42 unsigned long user_virt;
43 int refcnt;
44};
45
46int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
47 struct mlx5_db *db)
48{
49 struct mlx5_ib_user_db_page *page;
50 struct ib_umem_chunk *chunk;
51 int err = 0;
52
53 mutex_lock(&context->db_page_mutex);
54
55 list_for_each_entry(page, &context->db_page_list, list)
56 if (page->user_virt == (virt & PAGE_MASK))
57 goto found;
58
59 page = kmalloc(sizeof(*page), GFP_KERNEL);
60 if (!page) {
61 err = -ENOMEM;
62 goto out;
63 }
64
65 page->user_virt = (virt & PAGE_MASK);
66 page->refcnt = 0;
67 page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
68 PAGE_SIZE, 0, 0);
69 if (IS_ERR(page->umem)) {
70 err = PTR_ERR(page->umem);
71 kfree(page);
72 goto out;
73 }
74
75 list_add(&page->list, &context->db_page_list);
76
77found:
78 chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
79 db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
80 db->u.user_page = page;
81 ++page->refcnt;
82
83out:
84 mutex_unlock(&context->db_page_mutex);
85
86 return err;
87}
88
89void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
90{
91 mutex_lock(&context->db_page_mutex);
92
93 if (!--db->u.user_page->refcnt) {
94 list_del(&db->u.user_page->list);
95 ib_umem_release(db->u.user_page->umem);
96 kfree(db->u.user_page);
97 }
98
99 mutex_unlock(&context->db_page_mutex);
100}
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
new file mode 100644
index 000000000000..5c8938be0e08
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -0,0 +1,139 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx5/cmd.h>
34#include <rdma/ib_mad.h>
35#include <rdma/ib_smi.h>
36#include "mlx5_ib.h"
37
38enum {
39 MLX5_IB_VENDOR_CLASS1 = 0x9,
40 MLX5_IB_VENDOR_CLASS2 = 0xa
41};
42
43int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
44 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
45 void *in_mad, void *response_mad)
46{
47 u8 op_modifier = 0;
48
49 /* Key check traps can't be generated unless we have in_wc to
50 * tell us where to send the trap.
51 */
52 if (ignore_mkey || !in_wc)
53 op_modifier |= 0x1;
54 if (ignore_bkey || !in_wc)
55 op_modifier |= 0x2;
56
57 return mlx5_core_mad_ifc(&dev->mdev, in_mad, response_mad, op_modifier, port);
58}
59
60int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
61 struct ib_wc *in_wc, struct ib_grh *in_grh,
62 struct ib_mad *in_mad, struct ib_mad *out_mad)
63{
64 u16 slid;
65 int err;
66
67 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
68
69 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0)
70 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
71
72 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
73 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
74 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
75 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
76 in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
77 return IB_MAD_RESULT_SUCCESS;
78
79 /* Don't process SMInfo queries -- the SMA can't handle them.
80 */
81 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
82 return IB_MAD_RESULT_SUCCESS;
83 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
84 in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS1 ||
85 in_mad->mad_hdr.mgmt_class == MLX5_IB_VENDOR_CLASS2 ||
86 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
87 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
88 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
89 return IB_MAD_RESULT_SUCCESS;
90 } else {
91 return IB_MAD_RESULT_SUCCESS;
92 }
93
94 err = mlx5_MAD_IFC(to_mdev(ibdev),
95 mad_flags & IB_MAD_IGNORE_MKEY,
96 mad_flags & IB_MAD_IGNORE_BKEY,
97 port_num, in_wc, in_grh, in_mad, out_mad);
98 if (err)
99 return IB_MAD_RESULT_FAILURE;
100
101 /* set return bit in status of directed route responses */
102 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
103 out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
104
105 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
106 /* no response for trap repress */
107 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
108
109 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
110}
111
112int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
113{
114 struct ib_smp *in_mad = NULL;
115 struct ib_smp *out_mad = NULL;
116 int err = -ENOMEM;
117 u16 packet_error;
118
119 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
120 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
121 if (!in_mad || !out_mad)
122 goto out;
123
124 init_query_mad(in_mad);
125 in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
126 in_mad->attr_mod = cpu_to_be32(port);
127
128 err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
129
130 packet_error = be16_to_cpu(out_mad->status);
131
132 dev->mdev.caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
133 MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
134
135out:
136 kfree(in_mad);
137 kfree(out_mad);
138 return err;
139}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
new file mode 100644
index 000000000000..8000fff4d444
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -0,0 +1,1504 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <asm-generic/kmap_types.h>
34#include <linux/module.h>
35#include <linux/init.h>
36#include <linux/errno.h>
37#include <linux/pci.h>
38#include <linux/dma-mapping.h>
39#include <linux/slab.h>
40#include <linux/io-mapping.h>
41#include <linux/sched.h>
42#include <rdma/ib_user_verbs.h>
43#include <rdma/ib_smi.h>
44#include <rdma/ib_umem.h>
45#include "user.h"
46#include "mlx5_ib.h"
47
48#define DRIVER_NAME "mlx5_ib"
49#define DRIVER_VERSION "1.0"
50#define DRIVER_RELDATE "June 2013"
51
52MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
53MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
54MODULE_LICENSE("Dual BSD/GPL");
55MODULE_VERSION(DRIVER_VERSION);
56
57static int prof_sel = 2;
58module_param_named(prof_sel, prof_sel, int, 0444);
59MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
60
61static char mlx5_version[] =
62 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
63 DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
64
65static struct mlx5_profile profile[] = {
66 [0] = {
67 .mask = 0,
68 },
69 [1] = {
70 .mask = MLX5_PROF_MASK_QP_SIZE,
71 .log_max_qp = 12,
72 },
73 [2] = {
74 .mask = MLX5_PROF_MASK_QP_SIZE |
75 MLX5_PROF_MASK_MR_CACHE,
76 .log_max_qp = 17,
77 .mr_cache[0] = {
78 .size = 500,
79 .limit = 250
80 },
81 .mr_cache[1] = {
82 .size = 500,
83 .limit = 250
84 },
85 .mr_cache[2] = {
86 .size = 500,
87 .limit = 250
88 },
89 .mr_cache[3] = {
90 .size = 500,
91 .limit = 250
92 },
93 .mr_cache[4] = {
94 .size = 500,
95 .limit = 250
96 },
97 .mr_cache[5] = {
98 .size = 500,
99 .limit = 250
100 },
101 .mr_cache[6] = {
102 .size = 500,
103 .limit = 250
104 },
105 .mr_cache[7] = {
106 .size = 500,
107 .limit = 250
108 },
109 .mr_cache[8] = {
110 .size = 500,
111 .limit = 250
112 },
113 .mr_cache[9] = {
114 .size = 500,
115 .limit = 250
116 },
117 .mr_cache[10] = {
118 .size = 500,
119 .limit = 250
120 },
121 .mr_cache[11] = {
122 .size = 500,
123 .limit = 250
124 },
125 .mr_cache[12] = {
126 .size = 64,
127 .limit = 32
128 },
129 .mr_cache[13] = {
130 .size = 32,
131 .limit = 16
132 },
133 .mr_cache[14] = {
134 .size = 16,
135 .limit = 8
136 },
137 .mr_cache[15] = {
138 .size = 8,
139 .limit = 4
140 },
141 },
142};
143
144int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn)
145{
146 struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
147 struct mlx5_eq *eq, *n;
148 int err = -ENOENT;
149
150 spin_lock(&table->lock);
151 list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
152 if (eq->index == vector) {
153 *eqn = eq->eqn;
154 *irqn = eq->irqn;
155 err = 0;
156 break;
157 }
158 }
159 spin_unlock(&table->lock);
160
161 return err;
162}
163
164static int alloc_comp_eqs(struct mlx5_ib_dev *dev)
165{
166 struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
167 struct mlx5_eq *eq, *n;
168 int ncomp_vec;
169 int nent;
170 int err;
171 int i;
172
173 INIT_LIST_HEAD(&dev->eqs_list);
174 ncomp_vec = table->num_comp_vectors;
175 nent = MLX5_COMP_EQ_SIZE;
176 for (i = 0; i < ncomp_vec; i++) {
177 eq = kzalloc(sizeof(*eq), GFP_KERNEL);
178 if (!eq) {
179 err = -ENOMEM;
180 goto clean;
181 }
182
183 snprintf(eq->name, MLX5_MAX_EQ_NAME, "mlx5_comp%d", i);
184 err = mlx5_create_map_eq(&dev->mdev, eq,
185 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
186 eq->name,
187 &dev->mdev.priv.uuari.uars[0]);
188 if (err) {
189 kfree(eq);
190 goto clean;
191 }
192 mlx5_ib_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
193 eq->index = i;
194 spin_lock(&table->lock);
195 list_add_tail(&eq->list, &dev->eqs_list);
196 spin_unlock(&table->lock);
197 }
198
199 dev->num_comp_vectors = ncomp_vec;
200 return 0;
201
202clean:
203 spin_lock(&table->lock);
204 list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
205 list_del(&eq->list);
206 spin_unlock(&table->lock);
207 if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
208 mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
209 kfree(eq);
210 spin_lock(&table->lock);
211 }
212 spin_unlock(&table->lock);
213 return err;
214}
215
216static void free_comp_eqs(struct mlx5_ib_dev *dev)
217{
218 struct mlx5_eq_table *table = &dev->mdev.priv.eq_table;
219 struct mlx5_eq *eq, *n;
220
221 spin_lock(&table->lock);
222 list_for_each_entry_safe(eq, n, &dev->eqs_list, list) {
223 list_del(&eq->list);
224 spin_unlock(&table->lock);
225 if (mlx5_destroy_unmap_eq(&dev->mdev, eq))
226 mlx5_ib_warn(dev, "failed to destroy EQ 0x%x\n", eq->eqn);
227 kfree(eq);
228 spin_lock(&table->lock);
229 }
230 spin_unlock(&table->lock);
231}
232
233static int mlx5_ib_query_device(struct ib_device *ibdev,
234 struct ib_device_attr *props)
235{
236 struct mlx5_ib_dev *dev = to_mdev(ibdev);
237 struct ib_smp *in_mad = NULL;
238 struct ib_smp *out_mad = NULL;
239 int err = -ENOMEM;
240 int max_rq_sg;
241 int max_sq_sg;
242 u64 flags;
243
244 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
245 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
246 if (!in_mad || !out_mad)
247 goto out;
248
249 init_query_mad(in_mad);
250 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
251
252 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
253 if (err)
254 goto out;
255
256 memset(props, 0, sizeof(*props));
257
258 props->fw_ver = ((u64)fw_rev_maj(&dev->mdev) << 32) |
259 (fw_rev_min(&dev->mdev) << 16) |
260 fw_rev_sub(&dev->mdev);
261 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
262 IB_DEVICE_PORT_ACTIVE_EVENT |
263 IB_DEVICE_SYS_IMAGE_GUID |
264 IB_DEVICE_RC_RNR_NAK_GEN |
265 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
266 flags = dev->mdev.caps.flags;
267 if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
268 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
269 if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
270 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
271 if (flags & MLX5_DEV_CAP_FLAG_APM)
272 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
273 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
274 if (flags & MLX5_DEV_CAP_FLAG_XRC)
275 props->device_cap_flags |= IB_DEVICE_XRC;
276 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
277
278 props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) &
279 0xffffff;
280 props->vendor_part_id = be16_to_cpup((__be16 *)(out_mad->data + 30));
281 props->hw_ver = be32_to_cpup((__be32 *)(out_mad->data + 32));
282 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
283
284 props->max_mr_size = ~0ull;
285 props->page_size_cap = dev->mdev.caps.min_page_sz;
286 props->max_qp = 1 << dev->mdev.caps.log_max_qp;
287 props->max_qp_wr = dev->mdev.caps.max_wqes;
288 max_rq_sg = dev->mdev.caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
289 max_sq_sg = (dev->mdev.caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
290 sizeof(struct mlx5_wqe_data_seg);
291 props->max_sge = min(max_rq_sg, max_sq_sg);
292 props->max_cq = 1 << dev->mdev.caps.log_max_cq;
293 props->max_cqe = dev->mdev.caps.max_cqes - 1;
294 props->max_mr = 1 << dev->mdev.caps.log_max_mkey;
295 props->max_pd = 1 << dev->mdev.caps.log_max_pd;
296 props->max_qp_rd_atom = dev->mdev.caps.max_ra_req_qp;
297 props->max_qp_init_rd_atom = dev->mdev.caps.max_ra_res_qp;
298 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
299 props->max_srq = 1 << dev->mdev.caps.log_max_srq;
300 props->max_srq_wr = dev->mdev.caps.max_srq_wqes - 1;
301 props->max_srq_sge = max_rq_sg - 1;
302 props->max_fast_reg_page_list_len = (unsigned int)-1;
303 props->local_ca_ack_delay = dev->mdev.caps.local_ca_ack_delay;
304 props->atomic_cap = dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_ATOMIC ?
305 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
306 props->masked_atomic_cap = IB_ATOMIC_HCA;
307 props->max_pkeys = be16_to_cpup((__be16 *)(out_mad->data + 28));
308 props->max_mcast_grp = 1 << dev->mdev.caps.log_max_mcg;
309 props->max_mcast_qp_attach = dev->mdev.caps.max_qp_mcg;
310 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
311 props->max_mcast_grp;
312 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
313
314out:
315 kfree(in_mad);
316 kfree(out_mad);
317
318 return err;
319}
320
321int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
322 struct ib_port_attr *props)
323{
324 struct mlx5_ib_dev *dev = to_mdev(ibdev);
325 struct ib_smp *in_mad = NULL;
326 struct ib_smp *out_mad = NULL;
327 int ext_active_speed;
328 int err = -ENOMEM;
329
330 if (port < 1 || port > dev->mdev.caps.num_ports) {
331 mlx5_ib_warn(dev, "invalid port number %d\n", port);
332 return -EINVAL;
333 }
334
335 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
336 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
337 if (!in_mad || !out_mad)
338 goto out;
339
340 memset(props, 0, sizeof(*props));
341
342 init_query_mad(in_mad);
343 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
344 in_mad->attr_mod = cpu_to_be32(port);
345
346 err = mlx5_MAD_IFC(dev, 1, 1, port, NULL, NULL, in_mad, out_mad);
347 if (err) {
348 mlx5_ib_warn(dev, "err %d\n", err);
349 goto out;
350 }
351
352
353 props->lid = be16_to_cpup((__be16 *)(out_mad->data + 16));
354 props->lmc = out_mad->data[34] & 0x7;
355 props->sm_lid = be16_to_cpup((__be16 *)(out_mad->data + 18));
356 props->sm_sl = out_mad->data[36] & 0xf;
357 props->state = out_mad->data[32] & 0xf;
358 props->phys_state = out_mad->data[33] >> 4;
359 props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20));
360 props->gid_tbl_len = out_mad->data[50];
361 props->max_msg_sz = 1 << to_mdev(ibdev)->mdev.caps.log_max_msg;
362 props->pkey_tbl_len = to_mdev(ibdev)->mdev.caps.port[port - 1].pkey_table_len;
363 props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46));
364 props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48));
365 props->active_width = out_mad->data[31] & 0xf;
366 props->active_speed = out_mad->data[35] >> 4;
367 props->max_mtu = out_mad->data[41] & 0xf;
368 props->active_mtu = out_mad->data[36] >> 4;
369 props->subnet_timeout = out_mad->data[51] & 0x1f;
370 props->max_vl_num = out_mad->data[37] >> 4;
371 props->init_type_reply = out_mad->data[41] >> 4;
372
373 /* Check if extended speeds (EDR/FDR/...) are supported */
374 if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
375 ext_active_speed = out_mad->data[62] >> 4;
376
377 switch (ext_active_speed) {
378 case 1:
379 props->active_speed = 16; /* FDR */
380 break;
381 case 2:
382 props->active_speed = 32; /* EDR */
383 break;
384 }
385 }
386
387 /* If reported active speed is QDR, check if is FDR-10 */
388 if (props->active_speed == 4) {
389 if (dev->mdev.caps.ext_port_cap[port - 1] &
390 MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
391 init_query_mad(in_mad);
392 in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
393 in_mad->attr_mod = cpu_to_be32(port);
394
395 err = mlx5_MAD_IFC(dev, 1, 1, port,
396 NULL, NULL, in_mad, out_mad);
397 if (err)
398 goto out;
399
400 /* Checking LinkSpeedActive for FDR-10 */
401 if (out_mad->data[15] & 0x1)
402 props->active_speed = 8;
403 }
404 }
405
406out:
407 kfree(in_mad);
408 kfree(out_mad);
409
410 return err;
411}
412
413static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
414 union ib_gid *gid)
415{
416 struct ib_smp *in_mad = NULL;
417 struct ib_smp *out_mad = NULL;
418 int err = -ENOMEM;
419
420 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
421 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
422 if (!in_mad || !out_mad)
423 goto out;
424
425 init_query_mad(in_mad);
426 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
427 in_mad->attr_mod = cpu_to_be32(port);
428
429 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
430 if (err)
431 goto out;
432
433 memcpy(gid->raw, out_mad->data + 8, 8);
434
435 init_query_mad(in_mad);
436 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
437 in_mad->attr_mod = cpu_to_be32(index / 8);
438
439 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
440 if (err)
441 goto out;
442
443 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
444
445out:
446 kfree(in_mad);
447 kfree(out_mad);
448 return err;
449}
450
451static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
452 u16 *pkey)
453{
454 struct ib_smp *in_mad = NULL;
455 struct ib_smp *out_mad = NULL;
456 int err = -ENOMEM;
457
458 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
459 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
460 if (!in_mad || !out_mad)
461 goto out;
462
463 init_query_mad(in_mad);
464 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
465 in_mad->attr_mod = cpu_to_be32(index / 32);
466
467 err = mlx5_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
468 if (err)
469 goto out;
470
471 *pkey = be16_to_cpu(((__be16 *)out_mad->data)[index % 32]);
472
473out:
474 kfree(in_mad);
475 kfree(out_mad);
476 return err;
477}
478
479struct mlx5_reg_node_desc {
480 u8 desc[64];
481};
482
483static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
484 struct ib_device_modify *props)
485{
486 struct mlx5_ib_dev *dev = to_mdev(ibdev);
487 struct mlx5_reg_node_desc in;
488 struct mlx5_reg_node_desc out;
489 int err;
490
491 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
492 return -EOPNOTSUPP;
493
494 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
495 return 0;
496
497 /*
498 * If possible, pass node desc to FW, so it can generate
499 * a 144 trap. If cmd fails, just ignore.
500 */
501 memcpy(&in, props->node_desc, 64);
502 err = mlx5_core_access_reg(&dev->mdev, &in, sizeof(in), &out,
503 sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
504 if (err)
505 return err;
506
507 memcpy(ibdev->node_desc, props->node_desc, 64);
508
509 return err;
510}
511
512static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
513 struct ib_port_modify *props)
514{
515 struct mlx5_ib_dev *dev = to_mdev(ibdev);
516 struct ib_port_attr attr;
517 u32 tmp;
518 int err;
519
520 mutex_lock(&dev->cap_mask_mutex);
521
522 err = mlx5_ib_query_port(ibdev, port, &attr);
523 if (err)
524 goto out;
525
526 tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
527 ~props->clr_port_cap_mask;
528
529 err = mlx5_set_port_caps(&dev->mdev, port, tmp);
530
531out:
532 mutex_unlock(&dev->cap_mask_mutex);
533 return err;
534}
535
536static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
537 struct ib_udata *udata)
538{
539 struct mlx5_ib_dev *dev = to_mdev(ibdev);
540 struct mlx5_ib_alloc_ucontext_req req;
541 struct mlx5_ib_alloc_ucontext_resp resp;
542 struct mlx5_ib_ucontext *context;
543 struct mlx5_uuar_info *uuari;
544 struct mlx5_uar *uars;
545 int num_uars;
546 int uuarn;
547 int err;
548 int i;
549
550 if (!dev->ib_active)
551 return ERR_PTR(-EAGAIN);
552
553 err = ib_copy_from_udata(&req, udata, sizeof(req));
554 if (err)
555 return ERR_PTR(err);
556
557 if (req.total_num_uuars > MLX5_MAX_UUARS)
558 return ERR_PTR(-ENOMEM);
559
560 if (req.total_num_uuars == 0)
561 return ERR_PTR(-EINVAL);
562
563 req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
564 if (req.num_low_latency_uuars > req.total_num_uuars - 1)
565 return ERR_PTR(-EINVAL);
566
567 num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
568 resp.qp_tab_size = 1 << dev->mdev.caps.log_max_qp;
569 resp.bf_reg_size = dev->mdev.caps.bf_reg_size;
570 resp.cache_line_size = L1_CACHE_BYTES;
571 resp.max_sq_desc_sz = dev->mdev.caps.max_sq_desc_sz;
572 resp.max_rq_desc_sz = dev->mdev.caps.max_rq_desc_sz;
573 resp.max_send_wqebb = dev->mdev.caps.max_wqes;
574 resp.max_recv_wr = dev->mdev.caps.max_wqes;
575 resp.max_srq_recv_wr = dev->mdev.caps.max_srq_wqes;
576
577 context = kzalloc(sizeof(*context), GFP_KERNEL);
578 if (!context)
579 return ERR_PTR(-ENOMEM);
580
581 uuari = &context->uuari;
582 mutex_init(&uuari->lock);
583 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
584 if (!uars) {
585 err = -ENOMEM;
586 goto out_ctx;
587 }
588
589 uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
590 sizeof(*uuari->bitmap),
591 GFP_KERNEL);
592 if (!uuari->bitmap) {
593 err = -ENOMEM;
594 goto out_uar_ctx;
595 }
596 /*
597 * clear all fast path uuars
598 */
599 for (i = 0; i < req.total_num_uuars; i++) {
600 uuarn = i & 3;
601 if (uuarn == 2 || uuarn == 3)
602 set_bit(i, uuari->bitmap);
603 }
604
605 uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
606 if (!uuari->count) {
607 err = -ENOMEM;
608 goto out_bitmap;
609 }
610
611 for (i = 0; i < num_uars; i++) {
612 err = mlx5_cmd_alloc_uar(&dev->mdev, &uars[i].index);
613 if (err)
614 goto out_count;
615 }
616
617 INIT_LIST_HEAD(&context->db_page_list);
618 mutex_init(&context->db_page_mutex);
619
620 resp.tot_uuars = req.total_num_uuars;
621 resp.num_ports = dev->mdev.caps.num_ports;
622 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
623 if (err)
624 goto out_uars;
625
626 uuari->num_low_latency_uuars = req.num_low_latency_uuars;
627 uuari->uars = uars;
628 uuari->num_uars = num_uars;
629 return &context->ibucontext;
630
631out_uars:
632 for (i--; i >= 0; i--)
633 mlx5_cmd_free_uar(&dev->mdev, uars[i].index);
634out_count:
635 kfree(uuari->count);
636
637out_bitmap:
638 kfree(uuari->bitmap);
639
640out_uar_ctx:
641 kfree(uars);
642
643out_ctx:
644 kfree(context);
645 return ERR_PTR(err);
646}
647
648static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
649{
650 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
651 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
652 struct mlx5_uuar_info *uuari = &context->uuari;
653 int i;
654
655 for (i = 0; i < uuari->num_uars; i++) {
656 if (mlx5_cmd_free_uar(&dev->mdev, uuari->uars[i].index))
657 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
658 }
659
660 kfree(uuari->count);
661 kfree(uuari->bitmap);
662 kfree(uuari->uars);
663 kfree(context);
664
665 return 0;
666}
667
668static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
669{
670 return (pci_resource_start(dev->mdev.pdev, 0) >> PAGE_SHIFT) + index;
671}
672
673static int get_command(unsigned long offset)
674{
675 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
676}
677
678static int get_arg(unsigned long offset)
679{
680 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
681}
682
683static int get_index(unsigned long offset)
684{
685 return get_arg(offset);
686}
687
688static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
689{
690 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
691 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
692 struct mlx5_uuar_info *uuari = &context->uuari;
693 unsigned long command;
694 unsigned long idx;
695 phys_addr_t pfn;
696
697 command = get_command(vma->vm_pgoff);
698 switch (command) {
699 case MLX5_IB_MMAP_REGULAR_PAGE:
700 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
701 return -EINVAL;
702
703 idx = get_index(vma->vm_pgoff);
704 pfn = uar_index2pfn(dev, uuari->uars[idx].index);
705 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
706 (unsigned long long)pfn);
707
708 if (idx >= uuari->num_uars)
709 return -EINVAL;
710
711 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
712 if (io_remap_pfn_range(vma, vma->vm_start, pfn,
713 PAGE_SIZE, vma->vm_page_prot))
714 return -EAGAIN;
715
716 mlx5_ib_dbg(dev, "mapped WC at 0x%lx, PA 0x%llx\n",
717 vma->vm_start,
718 (unsigned long long)pfn << PAGE_SHIFT);
719 break;
720
721 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
722 return -ENOSYS;
723
724 default:
725 return -EINVAL;
726 }
727
728 return 0;
729}
730
731static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
732{
733 struct mlx5_create_mkey_mbox_in *in;
734 struct mlx5_mkey_seg *seg;
735 struct mlx5_core_mr mr;
736 int err;
737
738 in = kzalloc(sizeof(*in), GFP_KERNEL);
739 if (!in)
740 return -ENOMEM;
741
742 seg = &in->seg;
743 seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
744 seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
745 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
746 seg->start_addr = 0;
747
748 err = mlx5_core_create_mkey(&dev->mdev, &mr, in, sizeof(*in));
749 if (err) {
750 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
751 goto err_in;
752 }
753
754 kfree(in);
755 *key = mr.key;
756
757 return 0;
758
759err_in:
760 kfree(in);
761
762 return err;
763}
764
765static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
766{
767 struct mlx5_core_mr mr;
768 int err;
769
770 memset(&mr, 0, sizeof(mr));
771 mr.key = key;
772 err = mlx5_core_destroy_mkey(&dev->mdev, &mr);
773 if (err)
774 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
775}
776
777static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
778 struct ib_ucontext *context,
779 struct ib_udata *udata)
780{
781 struct mlx5_ib_alloc_pd_resp resp;
782 struct mlx5_ib_pd *pd;
783 int err;
784
785 pd = kmalloc(sizeof(*pd), GFP_KERNEL);
786 if (!pd)
787 return ERR_PTR(-ENOMEM);
788
789 err = mlx5_core_alloc_pd(&to_mdev(ibdev)->mdev, &pd->pdn);
790 if (err) {
791 kfree(pd);
792 return ERR_PTR(err);
793 }
794
795 if (context) {
796 resp.pdn = pd->pdn;
797 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
798 mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
799 kfree(pd);
800 return ERR_PTR(-EFAULT);
801 }
802 } else {
803 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
804 if (err) {
805 mlx5_core_dealloc_pd(&to_mdev(ibdev)->mdev, pd->pdn);
806 kfree(pd);
807 return ERR_PTR(err);
808 }
809 }
810
811 return &pd->ibpd;
812}
813
814static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
815{
816 struct mlx5_ib_dev *mdev = to_mdev(pd->device);
817 struct mlx5_ib_pd *mpd = to_mpd(pd);
818
819 if (!pd->uobject)
820 free_pa_mkey(mdev, mpd->pa_lkey);
821
822 mlx5_core_dealloc_pd(&mdev->mdev, mpd->pdn);
823 kfree(mpd);
824
825 return 0;
826}
827
828static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
829{
830 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
831 int err;
832
833 err = mlx5_core_attach_mcg(&dev->mdev, gid, ibqp->qp_num);
834 if (err)
835 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
836 ibqp->qp_num, gid->raw);
837
838 return err;
839}
840
841static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
842{
843 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
844 int err;
845
846 err = mlx5_core_detach_mcg(&dev->mdev, gid, ibqp->qp_num);
847 if (err)
848 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
849 ibqp->qp_num, gid->raw);
850
851 return err;
852}
853
854static int init_node_data(struct mlx5_ib_dev *dev)
855{
856 struct ib_smp *in_mad = NULL;
857 struct ib_smp *out_mad = NULL;
858 int err = -ENOMEM;
859
860 in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
861 out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
862 if (!in_mad || !out_mad)
863 goto out;
864
865 init_query_mad(in_mad);
866 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
867
868 err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
869 if (err)
870 goto out;
871
872 memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
873
874 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
875
876 err = mlx5_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
877 if (err)
878 goto out;
879
880 dev->mdev.rev_id = be32_to_cpup((__be32 *)(out_mad->data + 32));
881 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
882
883out:
884 kfree(in_mad);
885 kfree(out_mad);
886 return err;
887}
888
889static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
890 char *buf)
891{
892 struct mlx5_ib_dev *dev =
893 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
894
895 return sprintf(buf, "%d\n", dev->mdev.priv.fw_pages);
896}
897
898static ssize_t show_reg_pages(struct device *device,
899 struct device_attribute *attr, char *buf)
900{
901 struct mlx5_ib_dev *dev =
902 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
903
904 return sprintf(buf, "%d\n", dev->mdev.priv.reg_pages);
905}
906
907static ssize_t show_hca(struct device *device, struct device_attribute *attr,
908 char *buf)
909{
910 struct mlx5_ib_dev *dev =
911 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
912 return sprintf(buf, "MT%d\n", dev->mdev.pdev->device);
913}
914
915static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
916 char *buf)
917{
918 struct mlx5_ib_dev *dev =
919 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
920 return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(&dev->mdev),
921 fw_rev_min(&dev->mdev), fw_rev_sub(&dev->mdev));
922}
923
924static ssize_t show_rev(struct device *device, struct device_attribute *attr,
925 char *buf)
926{
927 struct mlx5_ib_dev *dev =
928 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
929 return sprintf(buf, "%x\n", dev->mdev.rev_id);
930}
931
932static ssize_t show_board(struct device *device, struct device_attribute *attr,
933 char *buf)
934{
935 struct mlx5_ib_dev *dev =
936 container_of(device, struct mlx5_ib_dev, ib_dev.dev);
937 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
938 dev->mdev.board_id);
939}
940
941static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
942static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
943static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
944static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
945static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
946static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
947
948static struct device_attribute *mlx5_class_attributes[] = {
949 &dev_attr_hw_rev,
950 &dev_attr_fw_ver,
951 &dev_attr_hca_type,
952 &dev_attr_board_id,
953 &dev_attr_fw_pages,
954 &dev_attr_reg_pages,
955};
956
957static void mlx5_ib_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
958 void *data)
959{
960 struct mlx5_ib_dev *ibdev = container_of(dev, struct mlx5_ib_dev, mdev);
961 struct ib_event ibev;
962 u8 port = 0;
963
964 switch (event) {
965 case MLX5_DEV_EVENT_SYS_ERROR:
966 ibdev->ib_active = false;
967 ibev.event = IB_EVENT_DEVICE_FATAL;
968 break;
969
970 case MLX5_DEV_EVENT_PORT_UP:
971 ibev.event = IB_EVENT_PORT_ACTIVE;
972 port = *(u8 *)data;
973 break;
974
975 case MLX5_DEV_EVENT_PORT_DOWN:
976 ibev.event = IB_EVENT_PORT_ERR;
977 port = *(u8 *)data;
978 break;
979
980 case MLX5_DEV_EVENT_PORT_INITIALIZED:
981 /* not used by ULPs */
982 return;
983
984 case MLX5_DEV_EVENT_LID_CHANGE:
985 ibev.event = IB_EVENT_LID_CHANGE;
986 port = *(u8 *)data;
987 break;
988
989 case MLX5_DEV_EVENT_PKEY_CHANGE:
990 ibev.event = IB_EVENT_PKEY_CHANGE;
991 port = *(u8 *)data;
992 break;
993
994 case MLX5_DEV_EVENT_GUID_CHANGE:
995 ibev.event = IB_EVENT_GID_CHANGE;
996 port = *(u8 *)data;
997 break;
998
999 case MLX5_DEV_EVENT_CLIENT_REREG:
1000 ibev.event = IB_EVENT_CLIENT_REREGISTER;
1001 port = *(u8 *)data;
1002 break;
1003 }
1004
1005 ibev.device = &ibdev->ib_dev;
1006 ibev.element.port_num = port;
1007
1008 if (ibdev->ib_active)
1009 ib_dispatch_event(&ibev);
1010}
1011
1012static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1013{
1014 int port;
1015
1016 for (port = 1; port <= dev->mdev.caps.num_ports; port++)
1017 mlx5_query_ext_port_caps(dev, port);
1018}
1019
1020static int get_port_caps(struct mlx5_ib_dev *dev)
1021{
1022 struct ib_device_attr *dprops = NULL;
1023 struct ib_port_attr *pprops = NULL;
1024 int err = 0;
1025 int port;
1026
1027 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1028 if (!pprops)
1029 goto out;
1030
1031 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1032 if (!dprops)
1033 goto out;
1034
1035 err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1036 if (err) {
1037 mlx5_ib_warn(dev, "query_device failed %d\n", err);
1038 goto out;
1039 }
1040
1041 for (port = 1; port <= dev->mdev.caps.num_ports; port++) {
1042 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1043 if (err) {
1044 mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
1045 break;
1046 }
1047 dev->mdev.caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
1048 dev->mdev.caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
1049 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1050 dprops->max_pkeys, pprops->gid_tbl_len);
1051 }
1052
1053out:
1054 kfree(pprops);
1055 kfree(dprops);
1056
1057 return err;
1058}
1059
1060static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1061{
1062 int err;
1063
1064 err = mlx5_mr_cache_cleanup(dev);
1065 if (err)
1066 mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1067
1068 mlx5_ib_destroy_qp(dev->umrc.qp);
1069 ib_destroy_cq(dev->umrc.cq);
1070 ib_dereg_mr(dev->umrc.mr);
1071 ib_dealloc_pd(dev->umrc.pd);
1072}
1073
1074enum {
1075 MAX_UMR_WR = 128,
1076};
1077
1078static int create_umr_res(struct mlx5_ib_dev *dev)
1079{
1080 struct ib_qp_init_attr *init_attr = NULL;
1081 struct ib_qp_attr *attr = NULL;
1082 struct ib_pd *pd;
1083 struct ib_cq *cq;
1084 struct ib_qp *qp;
1085 struct ib_mr *mr;
1086 int ret;
1087
1088 attr = kzalloc(sizeof(*attr), GFP_KERNEL);
1089 init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
1090 if (!attr || !init_attr) {
1091 ret = -ENOMEM;
1092 goto error_0;
1093 }
1094
1095 pd = ib_alloc_pd(&dev->ib_dev);
1096 if (IS_ERR(pd)) {
1097 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1098 ret = PTR_ERR(pd);
1099 goto error_0;
1100 }
1101
1102 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE);
1103 if (IS_ERR(mr)) {
1104 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1105 ret = PTR_ERR(mr);
1106 goto error_1;
1107 }
1108
1109 cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL, 128,
1110 0);
1111 if (IS_ERR(cq)) {
1112 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
1113 ret = PTR_ERR(cq);
1114 goto error_2;
1115 }
1116 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1117
1118 init_attr->send_cq = cq;
1119 init_attr->recv_cq = cq;
1120 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1121 init_attr->cap.max_send_wr = MAX_UMR_WR;
1122 init_attr->cap.max_send_sge = 1;
1123 init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
1124 init_attr->port_num = 1;
1125 qp = mlx5_ib_create_qp(pd, init_attr, NULL);
1126 if (IS_ERR(qp)) {
1127 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
1128 ret = PTR_ERR(qp);
1129 goto error_3;
1130 }
1131 qp->device = &dev->ib_dev;
1132 qp->real_qp = qp;
1133 qp->uobject = NULL;
1134 qp->qp_type = MLX5_IB_QPT_REG_UMR;
1135
1136 attr->qp_state = IB_QPS_INIT;
1137 attr->port_num = 1;
1138 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX |
1139 IB_QP_PORT, NULL);
1140 if (ret) {
1141 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
1142 goto error_4;
1143 }
1144
1145 memset(attr, 0, sizeof(*attr));
1146 attr->qp_state = IB_QPS_RTR;
1147 attr->path_mtu = IB_MTU_256;
1148
1149 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1150 if (ret) {
1151 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
1152 goto error_4;
1153 }
1154
1155 memset(attr, 0, sizeof(*attr));
1156 attr->qp_state = IB_QPS_RTS;
1157 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL);
1158 if (ret) {
1159 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
1160 goto error_4;
1161 }
1162
1163 dev->umrc.qp = qp;
1164 dev->umrc.cq = cq;
1165 dev->umrc.mr = mr;
1166 dev->umrc.pd = pd;
1167
1168 sema_init(&dev->umrc.sem, MAX_UMR_WR);
1169 ret = mlx5_mr_cache_init(dev);
1170 if (ret) {
1171 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1172 goto error_4;
1173 }
1174
1175 kfree(attr);
1176 kfree(init_attr);
1177
1178 return 0;
1179
1180error_4:
1181 mlx5_ib_destroy_qp(qp);
1182
1183error_3:
1184 ib_destroy_cq(cq);
1185
1186error_2:
1187 ib_dereg_mr(mr);
1188
1189error_1:
1190 ib_dealloc_pd(pd);
1191
1192error_0:
1193 kfree(attr);
1194 kfree(init_attr);
1195 return ret;
1196}
1197
1198static int create_dev_resources(struct mlx5_ib_resources *devr)
1199{
1200 struct ib_srq_init_attr attr;
1201 struct mlx5_ib_dev *dev;
1202 int ret = 0;
1203
1204 dev = container_of(devr, struct mlx5_ib_dev, devr);
1205
1206 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1207 if (IS_ERR(devr->p0)) {
1208 ret = PTR_ERR(devr->p0);
1209 goto error0;
1210 }
1211 devr->p0->device = &dev->ib_dev;
1212 devr->p0->uobject = NULL;
1213 atomic_set(&devr->p0->usecnt, 0);
1214
1215 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
1216 if (IS_ERR(devr->c0)) {
1217 ret = PTR_ERR(devr->c0);
1218 goto error1;
1219 }
1220 devr->c0->device = &dev->ib_dev;
1221 devr->c0->uobject = NULL;
1222 devr->c0->comp_handler = NULL;
1223 devr->c0->event_handler = NULL;
1224 devr->c0->cq_context = NULL;
1225 atomic_set(&devr->c0->usecnt, 0);
1226
1227 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1228 if (IS_ERR(devr->x0)) {
1229 ret = PTR_ERR(devr->x0);
1230 goto error2;
1231 }
1232 devr->x0->device = &dev->ib_dev;
1233 devr->x0->inode = NULL;
1234 atomic_set(&devr->x0->usecnt, 0);
1235 mutex_init(&devr->x0->tgt_qp_mutex);
1236 INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1237
1238 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1239 if (IS_ERR(devr->x1)) {
1240 ret = PTR_ERR(devr->x1);
1241 goto error3;
1242 }
1243 devr->x1->device = &dev->ib_dev;
1244 devr->x1->inode = NULL;
1245 atomic_set(&devr->x1->usecnt, 0);
1246 mutex_init(&devr->x1->tgt_qp_mutex);
1247 INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1248
1249 memset(&attr, 0, sizeof(attr));
1250 attr.attr.max_sge = 1;
1251 attr.attr.max_wr = 1;
1252 attr.srq_type = IB_SRQT_XRC;
1253 attr.ext.xrc.cq = devr->c0;
1254 attr.ext.xrc.xrcd = devr->x0;
1255
1256 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1257 if (IS_ERR(devr->s0)) {
1258 ret = PTR_ERR(devr->s0);
1259 goto error4;
1260 }
1261 devr->s0->device = &dev->ib_dev;
1262 devr->s0->pd = devr->p0;
1263 devr->s0->uobject = NULL;
1264 devr->s0->event_handler = NULL;
1265 devr->s0->srq_context = NULL;
1266 devr->s0->srq_type = IB_SRQT_XRC;
1267 devr->s0->ext.xrc.xrcd = devr->x0;
1268 devr->s0->ext.xrc.cq = devr->c0;
1269 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1270 atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1271 atomic_inc(&devr->p0->usecnt);
1272 atomic_set(&devr->s0->usecnt, 0);
1273
1274 return 0;
1275
1276error4:
1277 mlx5_ib_dealloc_xrcd(devr->x1);
1278error3:
1279 mlx5_ib_dealloc_xrcd(devr->x0);
1280error2:
1281 mlx5_ib_destroy_cq(devr->c0);
1282error1:
1283 mlx5_ib_dealloc_pd(devr->p0);
1284error0:
1285 return ret;
1286}
1287
1288static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1289{
1290 mlx5_ib_destroy_srq(devr->s0);
1291 mlx5_ib_dealloc_xrcd(devr->x0);
1292 mlx5_ib_dealloc_xrcd(devr->x1);
1293 mlx5_ib_destroy_cq(devr->c0);
1294 mlx5_ib_dealloc_pd(devr->p0);
1295}
1296
1297static int init_one(struct pci_dev *pdev,
1298 const struct pci_device_id *id)
1299{
1300 struct mlx5_core_dev *mdev;
1301 struct mlx5_ib_dev *dev;
1302 int err;
1303 int i;
1304
1305 printk_once(KERN_INFO "%s", mlx5_version);
1306
1307 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
1308 if (!dev)
1309 return -ENOMEM;
1310
1311 mdev = &dev->mdev;
1312 mdev->event = mlx5_ib_event;
1313 if (prof_sel >= ARRAY_SIZE(profile)) {
1314 pr_warn("selected pofile out of range, selceting default\n");
1315 prof_sel = 0;
1316 }
1317 mdev->profile = &profile[prof_sel];
1318 err = mlx5_dev_init(mdev, pdev);
1319 if (err)
1320 goto err_free;
1321
1322 err = get_port_caps(dev);
1323 if (err)
1324 goto err_cleanup;
1325
1326 get_ext_port_caps(dev);
1327
1328 err = alloc_comp_eqs(dev);
1329 if (err)
1330 goto err_cleanup;
1331
1332 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
1333
1334 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
1335 dev->ib_dev.owner = THIS_MODULE;
1336 dev->ib_dev.node_type = RDMA_NODE_IB_CA;
1337 dev->ib_dev.local_dma_lkey = mdev->caps.reserved_lkey;
1338 dev->num_ports = mdev->caps.num_ports;
1339 dev->ib_dev.phys_port_cnt = dev->num_ports;
1340 dev->ib_dev.num_comp_vectors = dev->num_comp_vectors;
1341 dev->ib_dev.dma_device = &mdev->pdev->dev;
1342
1343 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
1344 dev->ib_dev.uverbs_cmd_mask =
1345 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
1346 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
1347 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
1348 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
1349 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
1350 (1ull << IB_USER_VERBS_CMD_REG_MR) |
1351 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
1352 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1353 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
1354 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
1355 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
1356 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
1357 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
1358 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
1359 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
1360 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
1361 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
1362 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
1363 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
1364 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
1365 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
1366 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
1367 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
1368
1369 dev->ib_dev.query_device = mlx5_ib_query_device;
1370 dev->ib_dev.query_port = mlx5_ib_query_port;
1371 dev->ib_dev.query_gid = mlx5_ib_query_gid;
1372 dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
1373 dev->ib_dev.modify_device = mlx5_ib_modify_device;
1374 dev->ib_dev.modify_port = mlx5_ib_modify_port;
1375 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
1376 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
1377 dev->ib_dev.mmap = mlx5_ib_mmap;
1378 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
1379 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
1380 dev->ib_dev.create_ah = mlx5_ib_create_ah;
1381 dev->ib_dev.query_ah = mlx5_ib_query_ah;
1382 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
1383 dev->ib_dev.create_srq = mlx5_ib_create_srq;
1384 dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
1385 dev->ib_dev.query_srq = mlx5_ib_query_srq;
1386 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
1387 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
1388 dev->ib_dev.create_qp = mlx5_ib_create_qp;
1389 dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
1390 dev->ib_dev.query_qp = mlx5_ib_query_qp;
1391 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
1392 dev->ib_dev.post_send = mlx5_ib_post_send;
1393 dev->ib_dev.post_recv = mlx5_ib_post_recv;
1394 dev->ib_dev.create_cq = mlx5_ib_create_cq;
1395 dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
1396 dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
1397 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
1398 dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
1399 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
1400 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
1401 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
1402 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
1403 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
1404 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
1405 dev->ib_dev.process_mad = mlx5_ib_process_mad;
1406 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr;
1407 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
1408 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list;
1409
1410 if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
1411 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
1412 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
1413 dev->ib_dev.uverbs_cmd_mask |=
1414 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
1415 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1416 }
1417
1418 err = init_node_data(dev);
1419 if (err)
1420 goto err_eqs;
1421
1422 mutex_init(&dev->cap_mask_mutex);
1423 spin_lock_init(&dev->mr_lock);
1424
1425 err = create_dev_resources(&dev->devr);
1426 if (err)
1427 goto err_eqs;
1428
1429 if (ib_register_device(&dev->ib_dev, NULL))
1430 goto err_rsrc;
1431
1432 err = create_umr_res(dev);
1433 if (err)
1434 goto err_dev;
1435
1436 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
1437 if (device_create_file(&dev->ib_dev.dev,
1438 mlx5_class_attributes[i]))
1439 goto err_umrc;
1440 }
1441
1442 dev->ib_active = true;
1443
1444 return 0;
1445
1446err_umrc:
1447 destroy_umrc_res(dev);
1448
1449err_dev:
1450 ib_unregister_device(&dev->ib_dev);
1451
1452err_rsrc:
1453 destroy_dev_resources(&dev->devr);
1454
1455err_eqs:
1456 free_comp_eqs(dev);
1457
1458err_cleanup:
1459 mlx5_dev_cleanup(mdev);
1460
1461err_free:
1462 ib_dealloc_device((struct ib_device *)dev);
1463
1464 return err;
1465}
1466
1467static void remove_one(struct pci_dev *pdev)
1468{
1469 struct mlx5_ib_dev *dev = mlx5_pci2ibdev(pdev);
1470
1471 destroy_umrc_res(dev);
1472 ib_unregister_device(&dev->ib_dev);
1473 destroy_dev_resources(&dev->devr);
1474 free_comp_eqs(dev);
1475 mlx5_dev_cleanup(&dev->mdev);
1476 ib_dealloc_device(&dev->ib_dev);
1477}
1478
1479static DEFINE_PCI_DEVICE_TABLE(mlx5_ib_pci_table) = {
1480 { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */
1481 { 0, }
1482};
1483
1484MODULE_DEVICE_TABLE(pci, mlx5_ib_pci_table);
1485
1486static struct pci_driver mlx5_ib_driver = {
1487 .name = DRIVER_NAME,
1488 .id_table = mlx5_ib_pci_table,
1489 .probe = init_one,
1490 .remove = remove_one
1491};
1492
1493static int __init mlx5_ib_init(void)
1494{
1495 return pci_register_driver(&mlx5_ib_driver);
1496}
1497
1498static void __exit mlx5_ib_cleanup(void)
1499{
1500 pci_unregister_driver(&mlx5_ib_driver);
1501}
1502
1503module_init(mlx5_ib_init);
1504module_exit(mlx5_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
new file mode 100644
index 000000000000..3a5322870b96
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -0,0 +1,162 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <rdma/ib_umem.h>
35#include "mlx5_ib.h"
36
37/* @umem: umem object to scan
38 * @addr: ib virtual address requested by the user
39 * @count: number of PAGE_SIZE pages covered by umem
40 * @shift: page shift for the compound pages found in the region
41 * @ncont: number of compund pages
42 * @order: log2 of the number of compound pages
43 */
44void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
45 int *ncont, int *order)
46{
47 struct ib_umem_chunk *chunk;
48 unsigned long tmp;
49 unsigned long m;
50 int i, j, k;
51 u64 base = 0;
52 int p = 0;
53 int skip;
54 int mask;
55 u64 len;
56 u64 pfn;
57
58 addr = addr >> PAGE_SHIFT;
59 tmp = (unsigned long)addr;
60 m = find_first_bit(&tmp, sizeof(tmp));
61 skip = 1 << m;
62 mask = skip - 1;
63 i = 0;
64 list_for_each_entry(chunk, &umem->chunk_list, list)
65 for (j = 0; j < chunk->nmap; j++) {
66 len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
67 pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT;
68 for (k = 0; k < len; k++) {
69 if (!(i & mask)) {
70 tmp = (unsigned long)pfn;
71 m = min(m, find_first_bit(&tmp, sizeof(tmp)));
72 skip = 1 << m;
73 mask = skip - 1;
74 base = pfn;
75 p = 0;
76 } else {
77 if (base + p != pfn) {
78 tmp = (unsigned long)p;
79 m = find_first_bit(&tmp, sizeof(tmp));
80 skip = 1 << m;
81 mask = skip - 1;
82 base = pfn;
83 p = 0;
84 }
85 }
86 p++;
87 i++;
88 }
89 }
90
91 if (i) {
92 m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
93
94 if (order)
95 *order = ilog2(roundup_pow_of_two(i) >> m);
96
97 *ncont = DIV_ROUND_UP(i, (1 << m));
98 } else {
99 m = 0;
100
101 if (order)
102 *order = 0;
103
104 *ncont = 0;
105 }
106 *shift = PAGE_SHIFT + m;
107 *count = i;
108}
109
110void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
111 int page_shift, __be64 *pas, int umr)
112{
113 int shift = page_shift - PAGE_SHIFT;
114 int mask = (1 << shift) - 1;
115 struct ib_umem_chunk *chunk;
116 int i, j, k;
117 u64 cur = 0;
118 u64 base;
119 int len;
120
121 i = 0;
122 list_for_each_entry(chunk, &umem->chunk_list, list)
123 for (j = 0; j < chunk->nmap; j++) {
124 len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT;
125 base = sg_dma_address(&chunk->page_list[j]);
126 for (k = 0; k < len; k++) {
127 if (!(i & mask)) {
128 cur = base + (k << PAGE_SHIFT);
129 if (umr)
130 cur |= 3;
131
132 pas[i >> shift] = cpu_to_be64(cur);
133 mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
134 i >> shift, be64_to_cpu(pas[i >> shift]));
135 } else
136 mlx5_ib_dbg(dev, "=====> 0x%llx\n",
137 base + (k << PAGE_SHIFT));
138 i++;
139 }
140 }
141}
142
143int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
144{
145 u64 page_size;
146 u64 page_mask;
147 u64 off_size;
148 u64 off_mask;
149 u64 buf_off;
150
151 page_size = 1 << page_shift;
152 page_mask = page_size - 1;
153 buf_off = addr & page_mask;
154 off_size = page_size >> 6;
155 off_mask = off_size - 1;
156
157 if (buf_off & off_mask)
158 return -EINVAL;
159
160 *offset = buf_off >> ilog2(off_size);
161 return 0;
162}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
new file mode 100644
index 000000000000..836be9157242
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -0,0 +1,545 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_IB_H
34#define MLX5_IB_H
35
36#include <linux/kernel.h>
37#include <linux/sched.h>
38#include <rdma/ib_verbs.h>
39#include <rdma/ib_smi.h>
40#include <linux/mlx5/driver.h>
41#include <linux/mlx5/cq.h>
42#include <linux/mlx5/qp.h>
43#include <linux/mlx5/srq.h>
44#include <linux/types.h>
45
46#define mlx5_ib_dbg(dev, format, arg...) \
47pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
48 __LINE__, current->pid, ##arg)
49
50#define mlx5_ib_err(dev, format, arg...) \
51pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
52 __LINE__, current->pid, ##arg)
53
54#define mlx5_ib_warn(dev, format, arg...) \
55pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
56 __LINE__, current->pid, ##arg)
57
58enum {
59 MLX5_IB_MMAP_CMD_SHIFT = 8,
60 MLX5_IB_MMAP_CMD_MASK = 0xff,
61};
62
63enum mlx5_ib_mmap_cmd {
64 MLX5_IB_MMAP_REGULAR_PAGE = 0,
65 MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */
66};
67
68enum {
69 MLX5_RES_SCAT_DATA32_CQE = 0x1,
70 MLX5_RES_SCAT_DATA64_CQE = 0x2,
71 MLX5_REQ_SCAT_DATA32_CQE = 0x11,
72 MLX5_REQ_SCAT_DATA64_CQE = 0x22,
73};
74
75enum mlx5_ib_latency_class {
76 MLX5_IB_LATENCY_CLASS_LOW,
77 MLX5_IB_LATENCY_CLASS_MEDIUM,
78 MLX5_IB_LATENCY_CLASS_HIGH,
79 MLX5_IB_LATENCY_CLASS_FAST_PATH
80};
81
82enum mlx5_ib_mad_ifc_flags {
83 MLX5_MAD_IFC_IGNORE_MKEY = 1,
84 MLX5_MAD_IFC_IGNORE_BKEY = 2,
85 MLX5_MAD_IFC_NET_VIEW = 4,
86};
87
88struct mlx5_ib_ucontext {
89 struct ib_ucontext ibucontext;
90 struct list_head db_page_list;
91
92 /* protect doorbell record alloc/free
93 */
94 struct mutex db_page_mutex;
95 struct mlx5_uuar_info uuari;
96};
97
98static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
99{
100 return container_of(ibucontext, struct mlx5_ib_ucontext, ibucontext);
101}
102
103struct mlx5_ib_pd {
104 struct ib_pd ibpd;
105 u32 pdn;
106 u32 pa_lkey;
107};
108
109/* Use macros here so that don't have to duplicate
110 * enum ib_send_flags and enum ib_qp_type for low-level driver
111 */
112
113#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
114#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
115#define MLX5_IB_WR_UMR IB_WR_RESERVED1
116
117struct wr_list {
118 u16 opcode;
119 u16 next;
120};
121
122struct mlx5_ib_wq {
123 u64 *wrid;
124 u32 *wr_data;
125 struct wr_list *w_list;
126 unsigned *wqe_head;
127 u16 unsig_count;
128
129 /* serialize post to the work queue
130 */
131 spinlock_t lock;
132 int wqe_cnt;
133 int max_post;
134 int max_gs;
135 int offset;
136 int wqe_shift;
137 unsigned head;
138 unsigned tail;
139 u16 cur_post;
140 u16 last_poll;
141 void *qend;
142};
143
144enum {
145 MLX5_QP_USER,
146 MLX5_QP_KERNEL,
147 MLX5_QP_EMPTY
148};
149
150struct mlx5_ib_qp {
151 struct ib_qp ibqp;
152 struct mlx5_core_qp mqp;
153 struct mlx5_buf buf;
154
155 struct mlx5_db db;
156 struct mlx5_ib_wq rq;
157
158 u32 doorbell_qpn;
159 u8 sq_signal_bits;
160 u8 fm_cache;
161 int sq_max_wqes_per_wr;
162 int sq_spare_wqes;
163 struct mlx5_ib_wq sq;
164
165 struct ib_umem *umem;
166 int buf_size;
167
168 /* serialize qp state modifications
169 */
170 struct mutex mutex;
171 u16 xrcdn;
172 u32 flags;
173 u8 port;
174 u8 alt_port;
175 u8 atomic_rd_en;
176 u8 resp_depth;
177 u8 state;
178 int mlx_type;
179 int wq_sig;
180 int scat_cqe;
181 int max_inline_data;
182 struct mlx5_bf *bf;
183 int has_rq;
184
185 /* only for user space QPs. For kernel
186 * we have it from the bf object
187 */
188 int uuarn;
189
190 int create_type;
191 u32 pa_lkey;
192};
193
194struct mlx5_ib_cq_buf {
195 struct mlx5_buf buf;
196 struct ib_umem *umem;
197 int cqe_size;
198};
199
200enum mlx5_ib_qp_flags {
201 MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
202 MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
203};
204
205struct mlx5_shared_mr_info {
206 int mr_id;
207 struct ib_umem *umem;
208};
209
210struct mlx5_ib_cq {
211 struct ib_cq ibcq;
212 struct mlx5_core_cq mcq;
213 struct mlx5_ib_cq_buf buf;
214 struct mlx5_db db;
215
216 /* serialize access to the CQ
217 */
218 spinlock_t lock;
219
220 /* protect resize cq
221 */
222 struct mutex resize_mutex;
223 struct mlx5_ib_cq_resize *resize_buf;
224 struct ib_umem *resize_umem;
225 int cqe_size;
226};
227
228struct mlx5_ib_srq {
229 struct ib_srq ibsrq;
230 struct mlx5_core_srq msrq;
231 struct mlx5_buf buf;
232 struct mlx5_db db;
233 u64 *wrid;
234 /* protect SRQ hanlding
235 */
236 spinlock_t lock;
237 int head;
238 int tail;
239 u16 wqe_ctr;
240 struct ib_umem *umem;
241 /* serialize arming a SRQ
242 */
243 struct mutex mutex;
244 int wq_sig;
245};
246
247struct mlx5_ib_xrcd {
248 struct ib_xrcd ibxrcd;
249 u32 xrcdn;
250};
251
252struct mlx5_ib_mr {
253 struct ib_mr ibmr;
254 struct mlx5_core_mr mmr;
255 struct ib_umem *umem;
256 struct mlx5_shared_mr_info *smr_info;
257 struct list_head list;
258 int order;
259 int umred;
260 __be64 *pas;
261 dma_addr_t dma;
262 int npages;
263 struct completion done;
264 enum ib_wc_status status;
265};
266
267struct mlx5_ib_fast_reg_page_list {
268 struct ib_fast_reg_page_list ibfrpl;
269 __be64 *mapped_page_list;
270 dma_addr_t map;
271};
272
273struct umr_common {
274 struct ib_pd *pd;
275 struct ib_cq *cq;
276 struct ib_qp *qp;
277 struct ib_mr *mr;
278 /* control access to UMR QP
279 */
280 struct semaphore sem;
281};
282
283enum {
284 MLX5_FMR_INVALID,
285 MLX5_FMR_VALID,
286 MLX5_FMR_BUSY,
287};
288
289struct mlx5_ib_fmr {
290 struct ib_fmr ibfmr;
291 struct mlx5_core_mr mr;
292 int access_flags;
293 int state;
294 /* protect fmr state
295 */
296 spinlock_t lock;
297 u64 wrid;
298 struct ib_send_wr wr[2];
299 u8 page_shift;
300 struct ib_fast_reg_page_list page_list;
301};
302
303struct mlx5_cache_ent {
304 struct list_head head;
305 /* sync access to the cahce entry
306 */
307 spinlock_t lock;
308
309
310 struct dentry *dir;
311 char name[4];
312 u32 order;
313 u32 size;
314 u32 cur;
315 u32 miss;
316 u32 limit;
317
318 struct dentry *fsize;
319 struct dentry *fcur;
320 struct dentry *fmiss;
321 struct dentry *flimit;
322
323 struct mlx5_ib_dev *dev;
324 struct work_struct work;
325 struct delayed_work dwork;
326};
327
328struct mlx5_mr_cache {
329 struct workqueue_struct *wq;
330 struct mlx5_cache_ent ent[MAX_MR_CACHE_ENTRIES];
331 int stopped;
332 struct dentry *root;
333 unsigned long last_add;
334};
335
336struct mlx5_ib_resources {
337 struct ib_cq *c0;
338 struct ib_xrcd *x0;
339 struct ib_xrcd *x1;
340 struct ib_pd *p0;
341 struct ib_srq *s0;
342};
343
344struct mlx5_ib_dev {
345 struct ib_device ib_dev;
346 struct mlx5_core_dev mdev;
347 MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
348 struct list_head eqs_list;
349 int num_ports;
350 int num_comp_vectors;
351 /* serialize update of capability mask
352 */
353 struct mutex cap_mask_mutex;
354 bool ib_active;
355 struct umr_common umrc;
356 /* sync used page count stats
357 */
358 spinlock_t mr_lock;
359 struct mlx5_ib_resources devr;
360 struct mlx5_mr_cache cache;
361};
362
363static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
364{
365 return container_of(mcq, struct mlx5_ib_cq, mcq);
366}
367
368static inline struct mlx5_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
369{
370 return container_of(ibxrcd, struct mlx5_ib_xrcd, ibxrcd);
371}
372
373static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
374{
375 return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
376}
377
378static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
379{
380 return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
381}
382
383static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
384{
385 return container_of(ibcq, struct mlx5_ib_cq, ibcq);
386}
387
388static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
389{
390 return container_of(mqp, struct mlx5_ib_qp, mqp);
391}
392
393static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
394{
395 return container_of(ibpd, struct mlx5_ib_pd, ibpd);
396}
397
398static inline struct mlx5_ib_srq *to_msrq(struct ib_srq *ibsrq)
399{
400 return container_of(ibsrq, struct mlx5_ib_srq, ibsrq);
401}
402
403static inline struct mlx5_ib_qp *to_mqp(struct ib_qp *ibqp)
404{
405 return container_of(ibqp, struct mlx5_ib_qp, ibqp);
406}
407
408static inline struct mlx5_ib_srq *to_mibsrq(struct mlx5_core_srq *msrq)
409{
410 return container_of(msrq, struct mlx5_ib_srq, msrq);
411}
412
413static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
414{
415 return container_of(ibmr, struct mlx5_ib_mr, ibmr);
416}
417
418static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
419{
420 return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
421}
422
423struct mlx5_ib_ah {
424 struct ib_ah ibah;
425 struct mlx5_av av;
426};
427
428static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
429{
430 return container_of(ibah, struct mlx5_ib_ah, ibah);
431}
432
433static inline struct mlx5_ib_dev *mlx5_core2ibdev(struct mlx5_core_dev *dev)
434{
435 return container_of(dev, struct mlx5_ib_dev, mdev);
436}
437
438static inline struct mlx5_ib_dev *mlx5_pci2ibdev(struct pci_dev *pdev)
439{
440 return mlx5_core2ibdev(pci2mlx5_core_dev(pdev));
441}
442
443int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
444 struct mlx5_db *db);
445void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
446void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
447void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
448void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
449int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
450 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
451 void *in_mad, void *response_mad);
452struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
453 struct mlx5_ib_ah *ah);
454struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
455int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
456int mlx5_ib_destroy_ah(struct ib_ah *ah);
457struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
458 struct ib_srq_init_attr *init_attr,
459 struct ib_udata *udata);
460int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
461 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
462int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
463int mlx5_ib_destroy_srq(struct ib_srq *srq);
464int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
465 struct ib_recv_wr **bad_wr);
466struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
467 struct ib_qp_init_attr *init_attr,
468 struct ib_udata *udata);
469int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
470 int attr_mask, struct ib_udata *udata);
471int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
472 struct ib_qp_init_attr *qp_init_attr);
473int mlx5_ib_destroy_qp(struct ib_qp *qp);
474int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
475 struct ib_send_wr **bad_wr);
476int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
477 struct ib_recv_wr **bad_wr);
478void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
479struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
480 int vector, struct ib_ucontext *context,
481 struct ib_udata *udata);
482int mlx5_ib_destroy_cq(struct ib_cq *cq);
483int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
484int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
485int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
486int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
487struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
488struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
489 u64 virt_addr, int access_flags,
490 struct ib_udata *udata);
491int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
492struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
493 int max_page_list_len);
494struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
495 int page_list_len);
496void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
497struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
498 struct ib_fmr_attr *fmr_attr);
499int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
500 int npages, u64 iova);
501int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
502int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
503int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
504 struct ib_wc *in_wc, struct ib_grh *in_grh,
505 struct ib_mad *in_mad, struct ib_mad *out_mad);
506struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
507 struct ib_ucontext *context,
508 struct ib_udata *udata);
509int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd);
510int mlx5_vector2eqn(struct mlx5_ib_dev *dev, int vector, int *eqn, int *irqn);
511int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset);
512int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port);
513int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
514 struct ib_port_attr *props);
515int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
516void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
517void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
518 int *ncont, int *order);
519void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
520 int page_shift, __be64 *pas, int umr);
521void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
522int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
523int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
524int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
525int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
526void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
527
528static inline void init_query_mad(struct ib_smp *mad)
529{
530 mad->base_version = 1;
531 mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
532 mad->class_version = 1;
533 mad->method = IB_MGMT_METHOD_GET;
534}
535
536static inline u8 convert_access(int acc)
537{
538 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
539 (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
540 (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
541 (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
542 MLX5_PERM_LOCAL_READ;
543}
544
545#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
new file mode 100644
index 000000000000..bd41df95b6f0
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -0,0 +1,1007 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34#include <linux/kref.h>
35#include <linux/random.h>
36#include <linux/debugfs.h>
37#include <linux/export.h>
38#include <rdma/ib_umem.h>
39#include "mlx5_ib.h"
40
41enum {
42 DEF_CACHE_SIZE = 10,
43};
44
45static __be64 *mr_align(__be64 *ptr, int align)
46{
47 unsigned long mask = align - 1;
48
49 return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
50}
51
52static int order2idx(struct mlx5_ib_dev *dev, int order)
53{
54 struct mlx5_mr_cache *cache = &dev->cache;
55
56 if (order < cache->ent[0].order)
57 return 0;
58 else
59 return order - cache->ent[0].order;
60}
61
62static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
63{
64 struct device *ddev = dev->ib_dev.dma_device;
65 struct mlx5_mr_cache *cache = &dev->cache;
66 struct mlx5_cache_ent *ent = &cache->ent[c];
67 struct mlx5_create_mkey_mbox_in *in;
68 struct mlx5_ib_mr *mr;
69 int npages = 1 << ent->order;
70 int size = sizeof(u64) * npages;
71 int err = 0;
72 int i;
73
74 in = kzalloc(sizeof(*in), GFP_KERNEL);
75 if (!in)
76 return -ENOMEM;
77
78 for (i = 0; i < num; i++) {
79 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
80 if (!mr) {
81 err = -ENOMEM;
82 goto out;
83 }
84 mr->order = ent->order;
85 mr->umred = 1;
86 mr->pas = kmalloc(size + 0x3f, GFP_KERNEL);
87 if (!mr->pas) {
88 kfree(mr);
89 err = -ENOMEM;
90 goto out;
91 }
92 mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size,
93 DMA_TO_DEVICE);
94 if (dma_mapping_error(ddev, mr->dma)) {
95 kfree(mr->pas);
96 kfree(mr);
97 err = -ENOMEM;
98 goto out;
99 }
100
101 in->seg.status = 1 << 6;
102 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
103 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
104 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
105 in->seg.log2_page_size = 12;
106
107 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
108 sizeof(*in));
109 if (err) {
110 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
111 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
112 kfree(mr->pas);
113 kfree(mr);
114 goto out;
115 }
116 cache->last_add = jiffies;
117
118 spin_lock(&ent->lock);
119 list_add_tail(&mr->list, &ent->head);
120 ent->cur++;
121 ent->size++;
122 spin_unlock(&ent->lock);
123 }
124
125out:
126 kfree(in);
127 return err;
128}
129
130static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
131{
132 struct device *ddev = dev->ib_dev.dma_device;
133 struct mlx5_mr_cache *cache = &dev->cache;
134 struct mlx5_cache_ent *ent = &cache->ent[c];
135 struct mlx5_ib_mr *mr;
136 int size;
137 int err;
138 int i;
139
140 for (i = 0; i < num; i++) {
141 spin_lock(&ent->lock);
142 if (list_empty(&ent->head)) {
143 spin_unlock(&ent->lock);
144 return;
145 }
146 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
147 list_del(&mr->list);
148 ent->cur--;
149 ent->size--;
150 spin_unlock(&ent->lock);
151 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
152 if (err) {
153 mlx5_ib_warn(dev, "failed destroy mkey\n");
154 } else {
155 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
156 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
157 kfree(mr->pas);
158 kfree(mr);
159 }
160 }
161}
162
163static ssize_t size_write(struct file *filp, const char __user *buf,
164 size_t count, loff_t *pos)
165{
166 struct mlx5_cache_ent *ent = filp->private_data;
167 struct mlx5_ib_dev *dev = ent->dev;
168 char lbuf[20];
169 u32 var;
170 int err;
171 int c;
172
173 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
174 return -EFAULT;
175
176 c = order2idx(dev, ent->order);
177 lbuf[sizeof(lbuf) - 1] = 0;
178
179 if (sscanf(lbuf, "%u", &var) != 1)
180 return -EINVAL;
181
182 if (var < ent->limit)
183 return -EINVAL;
184
185 if (var > ent->size) {
186 err = add_keys(dev, c, var - ent->size);
187 if (err)
188 return err;
189 } else if (var < ent->size) {
190 remove_keys(dev, c, ent->size - var);
191 }
192
193 return count;
194}
195
196static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
197 loff_t *pos)
198{
199 struct mlx5_cache_ent *ent = filp->private_data;
200 char lbuf[20];
201 int err;
202
203 if (*pos)
204 return 0;
205
206 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
207 if (err < 0)
208 return err;
209
210 if (copy_to_user(buf, lbuf, err))
211 return -EFAULT;
212
213 *pos += err;
214
215 return err;
216}
217
218static const struct file_operations size_fops = {
219 .owner = THIS_MODULE,
220 .open = simple_open,
221 .write = size_write,
222 .read = size_read,
223};
224
225static ssize_t limit_write(struct file *filp, const char __user *buf,
226 size_t count, loff_t *pos)
227{
228 struct mlx5_cache_ent *ent = filp->private_data;
229 struct mlx5_ib_dev *dev = ent->dev;
230 char lbuf[20];
231 u32 var;
232 int err;
233 int c;
234
235 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
236 return -EFAULT;
237
238 c = order2idx(dev, ent->order);
239 lbuf[sizeof(lbuf) - 1] = 0;
240
241 if (sscanf(lbuf, "%u", &var) != 1)
242 return -EINVAL;
243
244 if (var > ent->size)
245 return -EINVAL;
246
247 ent->limit = var;
248
249 if (ent->cur < ent->limit) {
250 err = add_keys(dev, c, 2 * ent->limit - ent->cur);
251 if (err)
252 return err;
253 }
254
255 return count;
256}
257
258static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
259 loff_t *pos)
260{
261 struct mlx5_cache_ent *ent = filp->private_data;
262 char lbuf[20];
263 int err;
264
265 if (*pos)
266 return 0;
267
268 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
269 if (err < 0)
270 return err;
271
272 if (copy_to_user(buf, lbuf, err))
273 return -EFAULT;
274
275 *pos += err;
276
277 return err;
278}
279
280static const struct file_operations limit_fops = {
281 .owner = THIS_MODULE,
282 .open = simple_open,
283 .write = limit_write,
284 .read = limit_read,
285};
286
287static int someone_adding(struct mlx5_mr_cache *cache)
288{
289 int i;
290
291 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
292 if (cache->ent[i].cur < cache->ent[i].limit)
293 return 1;
294 }
295
296 return 0;
297}
298
299static void __cache_work_func(struct mlx5_cache_ent *ent)
300{
301 struct mlx5_ib_dev *dev = ent->dev;
302 struct mlx5_mr_cache *cache = &dev->cache;
303 int i = order2idx(dev, ent->order);
304
305 if (cache->stopped)
306 return;
307
308 ent = &dev->cache.ent[i];
309 if (ent->cur < 2 * ent->limit) {
310 add_keys(dev, i, 1);
311 if (ent->cur < 2 * ent->limit)
312 queue_work(cache->wq, &ent->work);
313 } else if (ent->cur > 2 * ent->limit) {
314 if (!someone_adding(cache) &&
315 time_after(jiffies, cache->last_add + 60 * HZ)) {
316 remove_keys(dev, i, 1);
317 if (ent->cur > ent->limit)
318 queue_work(cache->wq, &ent->work);
319 } else {
320 queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
321 }
322 }
323}
324
325static void delayed_cache_work_func(struct work_struct *work)
326{
327 struct mlx5_cache_ent *ent;
328
329 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
330 __cache_work_func(ent);
331}
332
333static void cache_work_func(struct work_struct *work)
334{
335 struct mlx5_cache_ent *ent;
336
337 ent = container_of(work, struct mlx5_cache_ent, work);
338 __cache_work_func(ent);
339}
340
341static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
342{
343 struct mlx5_mr_cache *cache = &dev->cache;
344 struct mlx5_ib_mr *mr = NULL;
345 struct mlx5_cache_ent *ent;
346 int c;
347 int i;
348
349 c = order2idx(dev, order);
350 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
351 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
352 return NULL;
353 }
354
355 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
356 ent = &cache->ent[i];
357
358 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
359
360 spin_lock(&ent->lock);
361 if (!list_empty(&ent->head)) {
362 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
363 list);
364 list_del(&mr->list);
365 ent->cur--;
366 spin_unlock(&ent->lock);
367 if (ent->cur < ent->limit)
368 queue_work(cache->wq, &ent->work);
369 break;
370 }
371 spin_unlock(&ent->lock);
372
373 queue_work(cache->wq, &ent->work);
374
375 if (mr)
376 break;
377 }
378
379 if (!mr)
380 cache->ent[c].miss++;
381
382 return mr;
383}
384
385static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
386{
387 struct mlx5_mr_cache *cache = &dev->cache;
388 struct mlx5_cache_ent *ent;
389 int shrink = 0;
390 int c;
391
392 c = order2idx(dev, mr->order);
393 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
394 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
395 return;
396 }
397 ent = &cache->ent[c];
398 spin_lock(&ent->lock);
399 list_add_tail(&mr->list, &ent->head);
400 ent->cur++;
401 if (ent->cur > 2 * ent->limit)
402 shrink = 1;
403 spin_unlock(&ent->lock);
404
405 if (shrink)
406 queue_work(cache->wq, &ent->work);
407}
408
409static void clean_keys(struct mlx5_ib_dev *dev, int c)
410{
411 struct device *ddev = dev->ib_dev.dma_device;
412 struct mlx5_mr_cache *cache = &dev->cache;
413 struct mlx5_cache_ent *ent = &cache->ent[c];
414 struct mlx5_ib_mr *mr;
415 int size;
416 int err;
417
418 while (1) {
419 spin_lock(&ent->lock);
420 if (list_empty(&ent->head)) {
421 spin_unlock(&ent->lock);
422 return;
423 }
424 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
425 list_del(&mr->list);
426 ent->cur--;
427 ent->size--;
428 spin_unlock(&ent->lock);
429 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
430 if (err) {
431 mlx5_ib_warn(dev, "failed destroy mkey\n");
432 } else {
433 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40);
434 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
435 kfree(mr->pas);
436 kfree(mr);
437 }
438 }
439}
440
441static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
442{
443 struct mlx5_mr_cache *cache = &dev->cache;
444 struct mlx5_cache_ent *ent;
445 int i;
446
447 if (!mlx5_debugfs_root)
448 return 0;
449
450 cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
451 if (!cache->root)
452 return -ENOMEM;
453
454 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
455 ent = &cache->ent[i];
456 sprintf(ent->name, "%d", ent->order);
457 ent->dir = debugfs_create_dir(ent->name, cache->root);
458 if (!ent->dir)
459 return -ENOMEM;
460
461 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
462 &size_fops);
463 if (!ent->fsize)
464 return -ENOMEM;
465
466 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
467 &limit_fops);
468 if (!ent->flimit)
469 return -ENOMEM;
470
471 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
472 &ent->cur);
473 if (!ent->fcur)
474 return -ENOMEM;
475
476 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
477 &ent->miss);
478 if (!ent->fmiss)
479 return -ENOMEM;
480 }
481
482 return 0;
483}
484
485static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
486{
487 if (!mlx5_debugfs_root)
488 return;
489
490 debugfs_remove_recursive(dev->cache.root);
491}
492
493int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
494{
495 struct mlx5_mr_cache *cache = &dev->cache;
496 struct mlx5_cache_ent *ent;
497 int limit;
498 int size;
499 int err;
500 int i;
501
502 cache->wq = create_singlethread_workqueue("mkey_cache");
503 if (!cache->wq) {
504 mlx5_ib_warn(dev, "failed to create work queue\n");
505 return -ENOMEM;
506 }
507
508 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
509 INIT_LIST_HEAD(&cache->ent[i].head);
510 spin_lock_init(&cache->ent[i].lock);
511
512 ent = &cache->ent[i];
513 INIT_LIST_HEAD(&ent->head);
514 spin_lock_init(&ent->lock);
515 ent->order = i + 2;
516 ent->dev = dev;
517
518 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
519 size = dev->mdev.profile->mr_cache[i].size;
520 limit = dev->mdev.profile->mr_cache[i].limit;
521 } else {
522 size = DEF_CACHE_SIZE;
523 limit = 0;
524 }
525 INIT_WORK(&ent->work, cache_work_func);
526 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
527 ent->limit = limit;
528 queue_work(cache->wq, &ent->work);
529 }
530
531 err = mlx5_mr_cache_debugfs_init(dev);
532 if (err)
533 mlx5_ib_warn(dev, "cache debugfs failure\n");
534
535 return 0;
536}
537
538int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
539{
540 int i;
541
542 dev->cache.stopped = 1;
543 destroy_workqueue(dev->cache.wq);
544
545 mlx5_mr_cache_debugfs_cleanup(dev);
546
547 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
548 clean_keys(dev, i);
549
550 return 0;
551}
552
553struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
554{
555 struct mlx5_ib_dev *dev = to_mdev(pd->device);
556 struct mlx5_core_dev *mdev = &dev->mdev;
557 struct mlx5_create_mkey_mbox_in *in;
558 struct mlx5_mkey_seg *seg;
559 struct mlx5_ib_mr *mr;
560 int err;
561
562 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
563 if (!mr)
564 return ERR_PTR(-ENOMEM);
565
566 in = kzalloc(sizeof(*in), GFP_KERNEL);
567 if (!in) {
568 err = -ENOMEM;
569 goto err_free;
570 }
571
572 seg = &in->seg;
573 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
574 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
575 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
576 seg->start_addr = 0;
577
578 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
579 if (err)
580 goto err_in;
581
582 kfree(in);
583 mr->ibmr.lkey = mr->mmr.key;
584 mr->ibmr.rkey = mr->mmr.key;
585 mr->umem = NULL;
586
587 return &mr->ibmr;
588
589err_in:
590 kfree(in);
591
592err_free:
593 kfree(mr);
594
595 return ERR_PTR(err);
596}
597
598static int get_octo_len(u64 addr, u64 len, int page_size)
599{
600 u64 offset;
601 int npages;
602
603 offset = addr & (page_size - 1);
604 npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
605 return (npages + 1) / 2;
606}
607
608static int use_umr(int order)
609{
610 return order <= 17;
611}
612
613static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
614 struct ib_sge *sg, u64 dma, int n, u32 key,
615 int page_shift, u64 virt_addr, u64 len,
616 int access_flags)
617{
618 struct mlx5_ib_dev *dev = to_mdev(pd->device);
619 struct ib_mr *mr = dev->umrc.mr;
620
621 sg->addr = dma;
622 sg->length = ALIGN(sizeof(u64) * n, 64);
623 sg->lkey = mr->lkey;
624
625 wr->next = NULL;
626 wr->send_flags = 0;
627 wr->sg_list = sg;
628 if (n)
629 wr->num_sge = 1;
630 else
631 wr->num_sge = 0;
632
633 wr->opcode = MLX5_IB_WR_UMR;
634 wr->wr.fast_reg.page_list_len = n;
635 wr->wr.fast_reg.page_shift = page_shift;
636 wr->wr.fast_reg.rkey = key;
637 wr->wr.fast_reg.iova_start = virt_addr;
638 wr->wr.fast_reg.length = len;
639 wr->wr.fast_reg.access_flags = access_flags;
640 wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
641}
642
643static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
644 struct ib_send_wr *wr, u32 key)
645{
646 wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
647 wr->opcode = MLX5_IB_WR_UMR;
648 wr->wr.fast_reg.rkey = key;
649}
650
651void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
652{
653 struct mlx5_ib_mr *mr;
654 struct ib_wc wc;
655 int err;
656
657 while (1) {
658 err = ib_poll_cq(cq, 1, &wc);
659 if (err < 0) {
660 pr_warn("poll cq error %d\n", err);
661 return;
662 }
663 if (err == 0)
664 break;
665
666 mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
667 mr->status = wc.status;
668 complete(&mr->done);
669 }
670 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
671}
672
673static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
674 u64 virt_addr, u64 len, int npages,
675 int page_shift, int order, int access_flags)
676{
677 struct mlx5_ib_dev *dev = to_mdev(pd->device);
678 struct umr_common *umrc = &dev->umrc;
679 struct ib_send_wr wr, *bad;
680 struct mlx5_ib_mr *mr;
681 struct ib_sge sg;
682 int err;
683 int i;
684
685 for (i = 0; i < 10; i++) {
686 mr = alloc_cached_mr(dev, order);
687 if (mr)
688 break;
689
690 err = add_keys(dev, order2idx(dev, order), 1);
691 if (err) {
692 mlx5_ib_warn(dev, "add_keys failed\n");
693 break;
694 }
695 }
696
697 if (!mr)
698 return ERR_PTR(-EAGAIN);
699
700 mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1);
701
702 memset(&wr, 0, sizeof(wr));
703 wr.wr_id = (u64)(unsigned long)mr;
704 prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
705
706 /* We serialize polls so one process does not kidnap another's
707 * completion. This is not a problem since wr is completed in
708 * around 1 usec
709 */
710 down(&umrc->sem);
711 init_completion(&mr->done);
712 err = ib_post_send(umrc->qp, &wr, &bad);
713 if (err) {
714 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
715 up(&umrc->sem);
716 goto error;
717 }
718 wait_for_completion(&mr->done);
719 up(&umrc->sem);
720
721 if (mr->status != IB_WC_SUCCESS) {
722 mlx5_ib_warn(dev, "reg umr failed\n");
723 err = -EFAULT;
724 goto error;
725 }
726
727 return mr;
728
729error:
730 free_cached_mr(dev, mr);
731 return ERR_PTR(err);
732}
733
734static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
735 u64 length, struct ib_umem *umem,
736 int npages, int page_shift,
737 int access_flags)
738{
739 struct mlx5_ib_dev *dev = to_mdev(pd->device);
740 struct mlx5_create_mkey_mbox_in *in;
741 struct mlx5_ib_mr *mr;
742 int inlen;
743 int err;
744
745 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
746 if (!mr)
747 return ERR_PTR(-ENOMEM);
748
749 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
750 in = mlx5_vzalloc(inlen);
751 if (!in) {
752 err = -ENOMEM;
753 goto err_1;
754 }
755 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
756
757 in->seg.flags = convert_access(access_flags) |
758 MLX5_ACCESS_MODE_MTT;
759 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
760 in->seg.start_addr = cpu_to_be64(virt_addr);
761 in->seg.len = cpu_to_be64(length);
762 in->seg.bsfs_octo_size = 0;
763 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
764 in->seg.log2_page_size = page_shift;
765 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
766 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
767 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
768 if (err) {
769 mlx5_ib_warn(dev, "create mkey failed\n");
770 goto err_2;
771 }
772 mr->umem = umem;
773 mlx5_vfree(in);
774
775 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
776
777 return mr;
778
779err_2:
780 mlx5_vfree(in);
781
782err_1:
783 kfree(mr);
784
785 return ERR_PTR(err);
786}
787
788struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
789 u64 virt_addr, int access_flags,
790 struct ib_udata *udata)
791{
792 struct mlx5_ib_dev *dev = to_mdev(pd->device);
793 struct mlx5_ib_mr *mr = NULL;
794 struct ib_umem *umem;
795 int page_shift;
796 int npages;
797 int ncont;
798 int order;
799 int err;
800
801 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
802 start, virt_addr, length);
803 umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
804 0);
805 if (IS_ERR(umem)) {
806 mlx5_ib_dbg(dev, "umem get failed\n");
807 return (void *)umem;
808 }
809
810 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
811 if (!npages) {
812 mlx5_ib_warn(dev, "avoid zero region\n");
813 err = -EINVAL;
814 goto error;
815 }
816
817 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
818 npages, ncont, order, page_shift);
819
820 if (use_umr(order)) {
821 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
822 order, access_flags);
823 if (PTR_ERR(mr) == -EAGAIN) {
824 mlx5_ib_dbg(dev, "cache empty for order %d", order);
825 mr = NULL;
826 }
827 }
828
829 if (!mr)
830 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
831 access_flags);
832
833 if (IS_ERR(mr)) {
834 err = PTR_ERR(mr);
835 goto error;
836 }
837
838 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
839
840 mr->umem = umem;
841 mr->npages = npages;
842 spin_lock(&dev->mr_lock);
843 dev->mdev.priv.reg_pages += npages;
844 spin_unlock(&dev->mr_lock);
845 mr->ibmr.lkey = mr->mmr.key;
846 mr->ibmr.rkey = mr->mmr.key;
847
848 return &mr->ibmr;
849
850error:
851 ib_umem_release(umem);
852 return ERR_PTR(err);
853}
854
855static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
856{
857 struct umr_common *umrc = &dev->umrc;
858 struct ib_send_wr wr, *bad;
859 int err;
860
861 memset(&wr, 0, sizeof(wr));
862 wr.wr_id = (u64)(unsigned long)mr;
863 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
864
865 down(&umrc->sem);
866 init_completion(&mr->done);
867 err = ib_post_send(umrc->qp, &wr, &bad);
868 if (err) {
869 up(&umrc->sem);
870 mlx5_ib_dbg(dev, "err %d\n", err);
871 goto error;
872 }
873 wait_for_completion(&mr->done);
874 up(&umrc->sem);
875 if (mr->status != IB_WC_SUCCESS) {
876 mlx5_ib_warn(dev, "unreg umr failed\n");
877 err = -EFAULT;
878 goto error;
879 }
880 return 0;
881
882error:
883 return err;
884}
885
886int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
887{
888 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
889 struct mlx5_ib_mr *mr = to_mmr(ibmr);
890 struct ib_umem *umem = mr->umem;
891 int npages = mr->npages;
892 int umred = mr->umred;
893 int err;
894
895 if (!umred) {
896 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
897 if (err) {
898 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
899 mr->mmr.key, err);
900 return err;
901 }
902 } else {
903 err = unreg_umr(dev, mr);
904 if (err) {
905 mlx5_ib_warn(dev, "failed unregister\n");
906 return err;
907 }
908 free_cached_mr(dev, mr);
909 }
910
911 if (umem) {
912 ib_umem_release(umem);
913 spin_lock(&dev->mr_lock);
914 dev->mdev.priv.reg_pages -= npages;
915 spin_unlock(&dev->mr_lock);
916 }
917
918 if (!umred)
919 kfree(mr);
920
921 return 0;
922}
923
924struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
925 int max_page_list_len)
926{
927 struct mlx5_ib_dev *dev = to_mdev(pd->device);
928 struct mlx5_create_mkey_mbox_in *in;
929 struct mlx5_ib_mr *mr;
930 int err;
931
932 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
933 if (!mr)
934 return ERR_PTR(-ENOMEM);
935
936 in = kzalloc(sizeof(*in), GFP_KERNEL);
937 if (!in) {
938 err = -ENOMEM;
939 goto err_free;
940 }
941
942 in->seg.status = 1 << 6; /* free */
943 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
944 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
945 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
946 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
947 /*
948 * TBD not needed - issue 197292 */
949 in->seg.log2_page_size = PAGE_SHIFT;
950
951 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
952 kfree(in);
953 if (err)
954 goto err_free;
955
956 mr->ibmr.lkey = mr->mmr.key;
957 mr->ibmr.rkey = mr->mmr.key;
958 mr->umem = NULL;
959
960 return &mr->ibmr;
961
962err_free:
963 kfree(mr);
964 return ERR_PTR(err);
965}
966
967struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
968 int page_list_len)
969{
970 struct mlx5_ib_fast_reg_page_list *mfrpl;
971 int size = page_list_len * sizeof(u64);
972
973 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
974 if (!mfrpl)
975 return ERR_PTR(-ENOMEM);
976
977 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
978 if (!mfrpl->ibfrpl.page_list)
979 goto err_free;
980
981 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
982 size, &mfrpl->map,
983 GFP_KERNEL);
984 if (!mfrpl->mapped_page_list)
985 goto err_free;
986
987 WARN_ON(mfrpl->map & 0x3f);
988
989 return &mfrpl->ibfrpl;
990
991err_free:
992 kfree(mfrpl->ibfrpl.page_list);
993 kfree(mfrpl);
994 return ERR_PTR(-ENOMEM);
995}
996
997void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
998{
999 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1000 struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1001 int size = page_list->max_page_list_len * sizeof(u64);
1002
1003 dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
1004 mfrpl->map);
1005 kfree(mfrpl->ibfrpl.page_list);
1006 kfree(mfrpl);
1007}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
new file mode 100644
index 000000000000..16ac54c9819f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -0,0 +1,2524 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <rdma/ib_umem.h>
35#include "mlx5_ib.h"
36#include "user.h"
37
38/* not supported currently */
39static int wq_signature;
40
41enum {
42 MLX5_IB_ACK_REQ_FREQ = 8,
43};
44
45enum {
46 MLX5_IB_DEFAULT_SCHED_QUEUE = 0x83,
47 MLX5_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f,
48 MLX5_IB_LINK_TYPE_IB = 0,
49 MLX5_IB_LINK_TYPE_ETH = 1
50};
51
52enum {
53 MLX5_IB_SQ_STRIDE = 6,
54 MLX5_IB_CACHE_LINE_SIZE = 64,
55};
56
57static const u32 mlx5_ib_opcode[] = {
58 [IB_WR_SEND] = MLX5_OPCODE_SEND,
59 [IB_WR_SEND_WITH_IMM] = MLX5_OPCODE_SEND_IMM,
60 [IB_WR_RDMA_WRITE] = MLX5_OPCODE_RDMA_WRITE,
61 [IB_WR_RDMA_WRITE_WITH_IMM] = MLX5_OPCODE_RDMA_WRITE_IMM,
62 [IB_WR_RDMA_READ] = MLX5_OPCODE_RDMA_READ,
63 [IB_WR_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_CS,
64 [IB_WR_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_FA,
65 [IB_WR_SEND_WITH_INV] = MLX5_OPCODE_SEND_INVAL,
66 [IB_WR_LOCAL_INV] = MLX5_OPCODE_UMR,
67 [IB_WR_FAST_REG_MR] = MLX5_OPCODE_UMR,
68 [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = MLX5_OPCODE_ATOMIC_MASKED_CS,
69 [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = MLX5_OPCODE_ATOMIC_MASKED_FA,
70 [MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
71};
72
73struct umr_wr {
74 u64 virt_addr;
75 struct ib_pd *pd;
76 unsigned int page_shift;
77 unsigned int npages;
78 u32 length;
79 int access_flags;
80 u32 mkey;
81};
82
83static int is_qp0(enum ib_qp_type qp_type)
84{
85 return qp_type == IB_QPT_SMI;
86}
87
88static int is_qp1(enum ib_qp_type qp_type)
89{
90 return qp_type == IB_QPT_GSI;
91}
92
93static int is_sqp(enum ib_qp_type qp_type)
94{
95 return is_qp0(qp_type) || is_qp1(qp_type);
96}
97
98static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
99{
100 return mlx5_buf_offset(&qp->buf, offset);
101}
102
103static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
104{
105 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
106}
107
108void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
109{
110 return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
111}
112
113static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
114{
115 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
116 struct ib_event event;
117
118 if (type == MLX5_EVENT_TYPE_PATH_MIG)
119 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
120
121 if (ibqp->event_handler) {
122 event.device = ibqp->device;
123 event.element.qp = ibqp;
124 switch (type) {
125 case MLX5_EVENT_TYPE_PATH_MIG:
126 event.event = IB_EVENT_PATH_MIG;
127 break;
128 case MLX5_EVENT_TYPE_COMM_EST:
129 event.event = IB_EVENT_COMM_EST;
130 break;
131 case MLX5_EVENT_TYPE_SQ_DRAINED:
132 event.event = IB_EVENT_SQ_DRAINED;
133 break;
134 case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
135 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
136 break;
137 case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
138 event.event = IB_EVENT_QP_FATAL;
139 break;
140 case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
141 event.event = IB_EVENT_PATH_MIG_ERR;
142 break;
143 case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
144 event.event = IB_EVENT_QP_REQ_ERR;
145 break;
146 case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
147 event.event = IB_EVENT_QP_ACCESS_ERR;
148 break;
149 default:
150 pr_warn("mlx5_ib: Unexpected event type %d on QP %06x\n", type, qp->qpn);
151 return;
152 }
153
154 ibqp->event_handler(&event, ibqp->qp_context);
155 }
156}
157
158static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
159 int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
160{
161 int wqe_size;
162 int wq_size;
163
164 /* Sanity check RQ size before proceeding */
165 if (cap->max_recv_wr > dev->mdev.caps.max_wqes)
166 return -EINVAL;
167
168 if (!has_rq) {
169 qp->rq.max_gs = 0;
170 qp->rq.wqe_cnt = 0;
171 qp->rq.wqe_shift = 0;
172 } else {
173 if (ucmd) {
174 qp->rq.wqe_cnt = ucmd->rq_wqe_count;
175 qp->rq.wqe_shift = ucmd->rq_wqe_shift;
176 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
177 qp->rq.max_post = qp->rq.wqe_cnt;
178 } else {
179 wqe_size = qp->wq_sig ? sizeof(struct mlx5_wqe_signature_seg) : 0;
180 wqe_size += cap->max_recv_sge * sizeof(struct mlx5_wqe_data_seg);
181 wqe_size = roundup_pow_of_two(wqe_size);
182 wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
183 wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
184 qp->rq.wqe_cnt = wq_size / wqe_size;
185 if (wqe_size > dev->mdev.caps.max_rq_desc_sz) {
186 mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
187 wqe_size,
188 dev->mdev.caps.max_rq_desc_sz);
189 return -EINVAL;
190 }
191 qp->rq.wqe_shift = ilog2(wqe_size);
192 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof(struct mlx5_wqe_data_seg) - qp->wq_sig;
193 qp->rq.max_post = qp->rq.wqe_cnt;
194 }
195 }
196
197 return 0;
198}
199
200static int sq_overhead(enum ib_qp_type qp_type)
201{
202 int size;
203
204 switch (qp_type) {
205 case IB_QPT_XRC_INI:
206 size = sizeof(struct mlx5_wqe_xrc_seg);
207 /* fall through */
208 case IB_QPT_RC:
209 size += sizeof(struct mlx5_wqe_ctrl_seg) +
210 sizeof(struct mlx5_wqe_atomic_seg) +
211 sizeof(struct mlx5_wqe_raddr_seg);
212 break;
213
214 case IB_QPT_UC:
215 size = sizeof(struct mlx5_wqe_ctrl_seg) +
216 sizeof(struct mlx5_wqe_raddr_seg);
217 break;
218
219 case IB_QPT_UD:
220 case IB_QPT_SMI:
221 case IB_QPT_GSI:
222 size = sizeof(struct mlx5_wqe_ctrl_seg) +
223 sizeof(struct mlx5_wqe_datagram_seg);
224 break;
225
226 case MLX5_IB_QPT_REG_UMR:
227 size = sizeof(struct mlx5_wqe_ctrl_seg) +
228 sizeof(struct mlx5_wqe_umr_ctrl_seg) +
229 sizeof(struct mlx5_mkey_seg);
230 break;
231
232 default:
233 return -EINVAL;
234 }
235
236 return size;
237}
238
239static int calc_send_wqe(struct ib_qp_init_attr *attr)
240{
241 int inl_size = 0;
242 int size;
243
244 size = sq_overhead(attr->qp_type);
245 if (size < 0)
246 return size;
247
248 if (attr->cap.max_inline_data) {
249 inl_size = size + sizeof(struct mlx5_wqe_inline_seg) +
250 attr->cap.max_inline_data;
251 }
252
253 size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg);
254
255 return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
256}
257
258static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
259 struct mlx5_ib_qp *qp)
260{
261 int wqe_size;
262 int wq_size;
263
264 if (!attr->cap.max_send_wr)
265 return 0;
266
267 wqe_size = calc_send_wqe(attr);
268 mlx5_ib_dbg(dev, "wqe_size %d\n", wqe_size);
269 if (wqe_size < 0)
270 return wqe_size;
271
272 if (wqe_size > dev->mdev.caps.max_sq_desc_sz) {
273 mlx5_ib_dbg(dev, "\n");
274 return -EINVAL;
275 }
276
277 qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
278 sizeof(struct mlx5_wqe_inline_seg);
279 attr->cap.max_inline_data = qp->max_inline_data;
280
281 wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
282 qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
283 qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
284 qp->sq.max_gs = attr->cap.max_send_sge;
285 qp->sq.max_post = 1 << ilog2(wq_size / wqe_size);
286
287 return wq_size;
288}
289
290static int set_user_buf_size(struct mlx5_ib_dev *dev,
291 struct mlx5_ib_qp *qp,
292 struct mlx5_ib_create_qp *ucmd)
293{
294 int desc_sz = 1 << qp->sq.wqe_shift;
295
296 if (desc_sz > dev->mdev.caps.max_sq_desc_sz) {
297 mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
298 desc_sz, dev->mdev.caps.max_sq_desc_sz);
299 return -EINVAL;
300 }
301
302 if (ucmd->sq_wqe_count && ((1 << ilog2(ucmd->sq_wqe_count)) != ucmd->sq_wqe_count)) {
303 mlx5_ib_warn(dev, "sq_wqe_count %d, sq_wqe_count %d\n",
304 ucmd->sq_wqe_count, ucmd->sq_wqe_count);
305 return -EINVAL;
306 }
307
308 qp->sq.wqe_cnt = ucmd->sq_wqe_count;
309
310 if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) {
311 mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
312 qp->sq.wqe_cnt, dev->mdev.caps.max_wqes);
313 return -EINVAL;
314 }
315
316 qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
317 (qp->sq.wqe_cnt << 6);
318
319 return 0;
320}
321
322static int qp_has_rq(struct ib_qp_init_attr *attr)
323{
324 if (attr->qp_type == IB_QPT_XRC_INI ||
325 attr->qp_type == IB_QPT_XRC_TGT || attr->srq ||
326 attr->qp_type == MLX5_IB_QPT_REG_UMR ||
327 !attr->cap.max_recv_wr)
328 return 0;
329
330 return 1;
331}
332
333static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
334{
335 int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
336 int start_uuar;
337 int i;
338
339 start_uuar = nuuars - uuari->num_low_latency_uuars;
340 for (i = start_uuar; i < nuuars; i++) {
341 if (!test_bit(i, uuari->bitmap)) {
342 set_bit(i, uuari->bitmap);
343 uuari->count[i]++;
344 return i;
345 }
346 }
347
348 return -ENOMEM;
349}
350
351static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
352{
353 int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
354 int minidx = 1;
355 int uuarn;
356 int end;
357 int i;
358
359 end = nuuars - uuari->num_low_latency_uuars;
360
361 for (i = 1; i < end; i++) {
362 uuarn = i & 3;
363 if (uuarn == 2 || uuarn == 3)
364 continue;
365
366 if (uuari->count[i] < uuari->count[minidx])
367 minidx = i;
368 }
369
370 uuari->count[minidx]++;
371 return minidx;
372}
373
374static int alloc_uuar(struct mlx5_uuar_info *uuari,
375 enum mlx5_ib_latency_class lat)
376{
377 int uuarn = -EINVAL;
378
379 mutex_lock(&uuari->lock);
380 switch (lat) {
381 case MLX5_IB_LATENCY_CLASS_LOW:
382 uuarn = 0;
383 uuari->count[uuarn]++;
384 break;
385
386 case MLX5_IB_LATENCY_CLASS_MEDIUM:
387 uuarn = alloc_med_class_uuar(uuari);
388 break;
389
390 case MLX5_IB_LATENCY_CLASS_HIGH:
391 uuarn = alloc_high_class_uuar(uuari);
392 break;
393
394 case MLX5_IB_LATENCY_CLASS_FAST_PATH:
395 uuarn = 2;
396 break;
397 }
398 mutex_unlock(&uuari->lock);
399
400 return uuarn;
401}
402
403static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
404{
405 clear_bit(uuarn, uuari->bitmap);
406 --uuari->count[uuarn];
407}
408
409static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
410{
411 clear_bit(uuarn, uuari->bitmap);
412 --uuari->count[uuarn];
413}
414
415static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
416{
417 int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
418 int high_uuar = nuuars - uuari->num_low_latency_uuars;
419
420 mutex_lock(&uuari->lock);
421 if (uuarn == 0) {
422 --uuari->count[uuarn];
423 goto out;
424 }
425
426 if (uuarn < high_uuar) {
427 free_med_class_uuar(uuari, uuarn);
428 goto out;
429 }
430
431 free_high_class_uuar(uuari, uuarn);
432
433out:
434 mutex_unlock(&uuari->lock);
435}
436
437static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
438{
439 switch (state) {
440 case IB_QPS_RESET: return MLX5_QP_STATE_RST;
441 case IB_QPS_INIT: return MLX5_QP_STATE_INIT;
442 case IB_QPS_RTR: return MLX5_QP_STATE_RTR;
443 case IB_QPS_RTS: return MLX5_QP_STATE_RTS;
444 case IB_QPS_SQD: return MLX5_QP_STATE_SQD;
445 case IB_QPS_SQE: return MLX5_QP_STATE_SQER;
446 case IB_QPS_ERR: return MLX5_QP_STATE_ERR;
447 default: return -1;
448 }
449}
450
451static int to_mlx5_st(enum ib_qp_type type)
452{
453 switch (type) {
454 case IB_QPT_RC: return MLX5_QP_ST_RC;
455 case IB_QPT_UC: return MLX5_QP_ST_UC;
456 case IB_QPT_UD: return MLX5_QP_ST_UD;
457 case MLX5_IB_QPT_REG_UMR: return MLX5_QP_ST_REG_UMR;
458 case IB_QPT_XRC_INI:
459 case IB_QPT_XRC_TGT: return MLX5_QP_ST_XRC;
460 case IB_QPT_SMI: return MLX5_QP_ST_QP0;
461 case IB_QPT_GSI: return MLX5_QP_ST_QP1;
462 case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
463 case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
464 case IB_QPT_RAW_PACKET:
465 case IB_QPT_MAX:
466 default: return -EINVAL;
467 }
468}
469
470static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
471{
472 return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
473}
474
475static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
476 struct mlx5_ib_qp *qp, struct ib_udata *udata,
477 struct mlx5_create_qp_mbox_in **in,
478 struct mlx5_ib_create_qp_resp *resp, int *inlen)
479{
480 struct mlx5_ib_ucontext *context;
481 struct mlx5_ib_create_qp ucmd;
482 int page_shift;
483 int uar_index;
484 int npages;
485 u32 offset;
486 int uuarn;
487 int ncont;
488 int err;
489
490 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
491 if (err) {
492 mlx5_ib_dbg(dev, "copy failed\n");
493 return err;
494 }
495
496 context = to_mucontext(pd->uobject->context);
497 /*
498 * TBD: should come from the verbs when we have the API
499 */
500 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
501 if (uuarn < 0) {
502 mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
503 mlx5_ib_dbg(dev, "reverting to high latency\n");
504 uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
505 if (uuarn < 0) {
506 mlx5_ib_dbg(dev, "uuar allocation failed\n");
507 return uuarn;
508 }
509 }
510
511 uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
512 mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
513
514 err = set_user_buf_size(dev, qp, &ucmd);
515 if (err)
516 goto err_uuar;
517
518 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
519 qp->buf_size, 0, 0);
520 if (IS_ERR(qp->umem)) {
521 mlx5_ib_dbg(dev, "umem_get failed\n");
522 err = PTR_ERR(qp->umem);
523 goto err_uuar;
524 }
525
526 mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
527 &ncont, NULL);
528 err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
529 if (err) {
530 mlx5_ib_warn(dev, "bad offset\n");
531 goto err_umem;
532 }
533 mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
534 ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
535
536 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
537 *in = mlx5_vzalloc(*inlen);
538 if (!*in) {
539 err = -ENOMEM;
540 goto err_umem;
541 }
542 mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
543 (*in)->ctx.log_pg_sz_remote_qpn =
544 cpu_to_be32((page_shift - PAGE_SHIFT) << 24);
545 (*in)->ctx.params2 = cpu_to_be32(offset << 6);
546
547 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
548 resp->uuar_index = uuarn;
549 qp->uuarn = uuarn;
550
551 err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
552 if (err) {
553 mlx5_ib_dbg(dev, "map failed\n");
554 goto err_free;
555 }
556
557 err = ib_copy_to_udata(udata, resp, sizeof(*resp));
558 if (err) {
559 mlx5_ib_dbg(dev, "copy failed\n");
560 goto err_unmap;
561 }
562 qp->create_type = MLX5_QP_USER;
563
564 return 0;
565
566err_unmap:
567 mlx5_ib_db_unmap_user(context, &qp->db);
568
569err_free:
570 mlx5_vfree(*in);
571
572err_umem:
573 ib_umem_release(qp->umem);
574
575err_uuar:
576 free_uuar(&context->uuari, uuarn);
577 return err;
578}
579
580static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
581{
582 struct mlx5_ib_ucontext *context;
583
584 context = to_mucontext(pd->uobject->context);
585 mlx5_ib_db_unmap_user(context, &qp->db);
586 ib_umem_release(qp->umem);
587 free_uuar(&context->uuari, qp->uuarn);
588}
589
590static int create_kernel_qp(struct mlx5_ib_dev *dev,
591 struct ib_qp_init_attr *init_attr,
592 struct mlx5_ib_qp *qp,
593 struct mlx5_create_qp_mbox_in **in, int *inlen)
594{
595 enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
596 struct mlx5_uuar_info *uuari;
597 int uar_index;
598 int uuarn;
599 int err;
600
601 uuari = &dev->mdev.priv.uuari;
602 if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
603 qp->flags |= MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK;
604
605 if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
606 lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
607
608 uuarn = alloc_uuar(uuari, lc);
609 if (uuarn < 0) {
610 mlx5_ib_dbg(dev, "\n");
611 return -ENOMEM;
612 }
613
614 qp->bf = &uuari->bfs[uuarn];
615 uar_index = qp->bf->uar->index;
616
617 err = calc_sq_size(dev, init_attr, qp);
618 if (err < 0) {
619 mlx5_ib_dbg(dev, "err %d\n", err);
620 goto err_uuar;
621 }
622
623 qp->rq.offset = 0;
624 qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
625 qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
626
627 err = mlx5_buf_alloc(&dev->mdev, qp->buf_size, PAGE_SIZE * 2, &qp->buf);
628 if (err) {
629 mlx5_ib_dbg(dev, "err %d\n", err);
630 goto err_uuar;
631 }
632
633 qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
634 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
635 *in = mlx5_vzalloc(*inlen);
636 if (!*in) {
637 err = -ENOMEM;
638 goto err_buf;
639 }
640 (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
641 (*in)->ctx.log_pg_sz_remote_qpn = cpu_to_be32((qp->buf.page_shift - PAGE_SHIFT) << 24);
642 /* Set "fast registration enabled" for all kernel QPs */
643 (*in)->ctx.params1 |= cpu_to_be32(1 << 11);
644 (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
645
646 mlx5_fill_page_array(&qp->buf, (*in)->pas);
647
648 err = mlx5_db_alloc(&dev->mdev, &qp->db);
649 if (err) {
650 mlx5_ib_dbg(dev, "err %d\n", err);
651 goto err_free;
652 }
653
654 qp->db.db[0] = 0;
655 qp->db.db[1] = 0;
656
657 qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid), GFP_KERNEL);
658 qp->sq.wr_data = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data), GFP_KERNEL);
659 qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(*qp->rq.wrid), GFP_KERNEL);
660 qp->sq.w_list = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.w_list), GFP_KERNEL);
661 qp->sq.wqe_head = kmalloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head), GFP_KERNEL);
662
663 if (!qp->sq.wrid || !qp->sq.wr_data || !qp->rq.wrid ||
664 !qp->sq.w_list || !qp->sq.wqe_head) {
665 err = -ENOMEM;
666 goto err_wrid;
667 }
668 qp->create_type = MLX5_QP_KERNEL;
669
670 return 0;
671
672err_wrid:
673 mlx5_db_free(&dev->mdev, &qp->db);
674 kfree(qp->sq.wqe_head);
675 kfree(qp->sq.w_list);
676 kfree(qp->sq.wrid);
677 kfree(qp->sq.wr_data);
678 kfree(qp->rq.wrid);
679
680err_free:
681 mlx5_vfree(*in);
682
683err_buf:
684 mlx5_buf_free(&dev->mdev, &qp->buf);
685
686err_uuar:
687 free_uuar(&dev->mdev.priv.uuari, uuarn);
688 return err;
689}
690
691static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
692{
693 mlx5_db_free(&dev->mdev, &qp->db);
694 kfree(qp->sq.wqe_head);
695 kfree(qp->sq.w_list);
696 kfree(qp->sq.wrid);
697 kfree(qp->sq.wr_data);
698 kfree(qp->rq.wrid);
699 mlx5_buf_free(&dev->mdev, &qp->buf);
700 free_uuar(&dev->mdev.priv.uuari, qp->bf->uuarn);
701}
702
703static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
704{
705 if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
706 (attr->qp_type == IB_QPT_XRC_INI))
707 return cpu_to_be32(MLX5_SRQ_RQ);
708 else if (!qp->has_rq)
709 return cpu_to_be32(MLX5_ZERO_LEN_RQ);
710 else
711 return cpu_to_be32(MLX5_NON_ZERO_RQ);
712}
713
714static int is_connected(enum ib_qp_type qp_type)
715{
716 if (qp_type == IB_QPT_RC || qp_type == IB_QPT_UC)
717 return 1;
718
719 return 0;
720}
721
722static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
723 struct ib_qp_init_attr *init_attr,
724 struct ib_udata *udata, struct mlx5_ib_qp *qp)
725{
726 struct mlx5_ib_resources *devr = &dev->devr;
727 struct mlx5_ib_create_qp_resp resp;
728 struct mlx5_create_qp_mbox_in *in;
729 struct mlx5_ib_create_qp ucmd;
730 int inlen = sizeof(*in);
731 int err;
732
733 mutex_init(&qp->mutex);
734 spin_lock_init(&qp->sq.lock);
735 spin_lock_init(&qp->rq.lock);
736
737 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
738 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
739
740 if (pd && pd->uobject) {
741 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
742 mlx5_ib_dbg(dev, "copy failed\n");
743 return -EFAULT;
744 }
745
746 qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
747 qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
748 } else {
749 qp->wq_sig = !!wq_signature;
750 }
751
752 qp->has_rq = qp_has_rq(init_attr);
753 err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
754 qp, (pd && pd->uobject) ? &ucmd : NULL);
755 if (err) {
756 mlx5_ib_dbg(dev, "err %d\n", err);
757 return err;
758 }
759
760 if (pd) {
761 if (pd->uobject) {
762 mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
763 if (ucmd.rq_wqe_shift != qp->rq.wqe_shift ||
764 ucmd.rq_wqe_count != qp->rq.wqe_cnt) {
765 mlx5_ib_dbg(dev, "invalid rq params\n");
766 return -EINVAL;
767 }
768 if (ucmd.sq_wqe_count > dev->mdev.caps.max_wqes) {
769 mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
770 ucmd.sq_wqe_count, dev->mdev.caps.max_wqes);
771 return -EINVAL;
772 }
773 err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
774 if (err)
775 mlx5_ib_dbg(dev, "err %d\n", err);
776 } else {
777 err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
778 if (err)
779 mlx5_ib_dbg(dev, "err %d\n", err);
780 else
781 qp->pa_lkey = to_mpd(pd)->pa_lkey;
782 }
783
784 if (err)
785 return err;
786 } else {
787 in = mlx5_vzalloc(sizeof(*in));
788 if (!in)
789 return -ENOMEM;
790
791 qp->create_type = MLX5_QP_EMPTY;
792 }
793
794 if (is_sqp(init_attr->qp_type))
795 qp->port = init_attr->port_num;
796
797 in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
798 MLX5_QP_PM_MIGRATED << 11);
799
800 if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
801 in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
802 else
803 in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
804
805 if (qp->wq_sig)
806 in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
807
808 if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
809 int rcqe_sz;
810 int scqe_sz;
811
812 rcqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->recv_cq);
813 scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
814
815 if (rcqe_sz == 128)
816 in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
817 else
818 in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
819
820 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
821 if (scqe_sz == 128)
822 in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
823 else
824 in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
825 }
826 }
827
828 if (qp->rq.wqe_cnt) {
829 in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
830 in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
831 }
832
833 in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
834
835 if (qp->sq.wqe_cnt)
836 in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
837 else
838 in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
839
840 /* Set default resources */
841 switch (init_attr->qp_type) {
842 case IB_QPT_XRC_TGT:
843 in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
844 in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
845 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
846 in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
847 break;
848 case IB_QPT_XRC_INI:
849 in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
850 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
851 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
852 break;
853 default:
854 if (init_attr->srq) {
855 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
856 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
857 } else {
858 in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
859 in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
860 }
861 }
862
863 if (init_attr->send_cq)
864 in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
865
866 if (init_attr->recv_cq)
867 in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
868
869 in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
870
871 err = mlx5_core_create_qp(&dev->mdev, &qp->mqp, in, inlen);
872 if (err) {
873 mlx5_ib_dbg(dev, "create qp failed\n");
874 goto err_create;
875 }
876
877 mlx5_vfree(in);
878 /* Hardware wants QPN written in big-endian order (after
879 * shifting) for send doorbell. Precompute this value to save
880 * a little bit when posting sends.
881 */
882 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
883
884 qp->mqp.event = mlx5_ib_qp_event;
885
886 return 0;
887
888err_create:
889 if (qp->create_type == MLX5_QP_USER)
890 destroy_qp_user(pd, qp);
891 else if (qp->create_type == MLX5_QP_KERNEL)
892 destroy_qp_kernel(dev, qp);
893
894 mlx5_vfree(in);
895 return err;
896}
897
898static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
899 __acquires(&send_cq->lock) __acquires(&recv_cq->lock)
900{
901 if (send_cq) {
902 if (recv_cq) {
903 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
904 spin_lock_irq(&send_cq->lock);
905 spin_lock_nested(&recv_cq->lock,
906 SINGLE_DEPTH_NESTING);
907 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
908 spin_lock_irq(&send_cq->lock);
909 __acquire(&recv_cq->lock);
910 } else {
911 spin_lock_irq(&recv_cq->lock);
912 spin_lock_nested(&send_cq->lock,
913 SINGLE_DEPTH_NESTING);
914 }
915 } else {
916 spin_lock_irq(&send_cq->lock);
917 }
918 } else if (recv_cq) {
919 spin_lock_irq(&recv_cq->lock);
920 }
921}
922
923static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq)
924 __releases(&send_cq->lock) __releases(&recv_cq->lock)
925{
926 if (send_cq) {
927 if (recv_cq) {
928 if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
929 spin_unlock(&recv_cq->lock);
930 spin_unlock_irq(&send_cq->lock);
931 } else if (send_cq->mcq.cqn == recv_cq->mcq.cqn) {
932 __release(&recv_cq->lock);
933 spin_unlock_irq(&send_cq->lock);
934 } else {
935 spin_unlock(&send_cq->lock);
936 spin_unlock_irq(&recv_cq->lock);
937 }
938 } else {
939 spin_unlock_irq(&send_cq->lock);
940 }
941 } else if (recv_cq) {
942 spin_unlock_irq(&recv_cq->lock);
943 }
944}
945
946static struct mlx5_ib_pd *get_pd(struct mlx5_ib_qp *qp)
947{
948 return to_mpd(qp->ibqp.pd);
949}
950
951static void get_cqs(struct mlx5_ib_qp *qp,
952 struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq)
953{
954 switch (qp->ibqp.qp_type) {
955 case IB_QPT_XRC_TGT:
956 *send_cq = NULL;
957 *recv_cq = NULL;
958 break;
959 case MLX5_IB_QPT_REG_UMR:
960 case IB_QPT_XRC_INI:
961 *send_cq = to_mcq(qp->ibqp.send_cq);
962 *recv_cq = NULL;
963 break;
964
965 case IB_QPT_SMI:
966 case IB_QPT_GSI:
967 case IB_QPT_RC:
968 case IB_QPT_UC:
969 case IB_QPT_UD:
970 case IB_QPT_RAW_IPV6:
971 case IB_QPT_RAW_ETHERTYPE:
972 *send_cq = to_mcq(qp->ibqp.send_cq);
973 *recv_cq = to_mcq(qp->ibqp.recv_cq);
974 break;
975
976 case IB_QPT_RAW_PACKET:
977 case IB_QPT_MAX:
978 default:
979 *send_cq = NULL;
980 *recv_cq = NULL;
981 break;
982 }
983}
984
985static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
986{
987 struct mlx5_ib_cq *send_cq, *recv_cq;
988 struct mlx5_modify_qp_mbox_in *in;
989 int err;
990
991 in = kzalloc(sizeof(*in), GFP_KERNEL);
992 if (!in)
993 return;
994 if (qp->state != IB_QPS_RESET)
995 if (mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(qp->state),
996 MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
997 mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
998 qp->mqp.qpn);
999
1000 get_cqs(qp, &send_cq, &recv_cq);
1001
1002 if (qp->create_type == MLX5_QP_KERNEL) {
1003 mlx5_ib_lock_cqs(send_cq, recv_cq);
1004 __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
1005 qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
1006 if (send_cq != recv_cq)
1007 __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1008 mlx5_ib_unlock_cqs(send_cq, recv_cq);
1009 }
1010
1011 err = mlx5_core_destroy_qp(&dev->mdev, &qp->mqp);
1012 if (err)
1013 mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
1014 kfree(in);
1015
1016
1017 if (qp->create_type == MLX5_QP_KERNEL)
1018 destroy_qp_kernel(dev, qp);
1019 else if (qp->create_type == MLX5_QP_USER)
1020 destroy_qp_user(&get_pd(qp)->ibpd, qp);
1021}
1022
1023static const char *ib_qp_type_str(enum ib_qp_type type)
1024{
1025 switch (type) {
1026 case IB_QPT_SMI:
1027 return "IB_QPT_SMI";
1028 case IB_QPT_GSI:
1029 return "IB_QPT_GSI";
1030 case IB_QPT_RC:
1031 return "IB_QPT_RC";
1032 case IB_QPT_UC:
1033 return "IB_QPT_UC";
1034 case IB_QPT_UD:
1035 return "IB_QPT_UD";
1036 case IB_QPT_RAW_IPV6:
1037 return "IB_QPT_RAW_IPV6";
1038 case IB_QPT_RAW_ETHERTYPE:
1039 return "IB_QPT_RAW_ETHERTYPE";
1040 case IB_QPT_XRC_INI:
1041 return "IB_QPT_XRC_INI";
1042 case IB_QPT_XRC_TGT:
1043 return "IB_QPT_XRC_TGT";
1044 case IB_QPT_RAW_PACKET:
1045 return "IB_QPT_RAW_PACKET";
1046 case MLX5_IB_QPT_REG_UMR:
1047 return "MLX5_IB_QPT_REG_UMR";
1048 case IB_QPT_MAX:
1049 default:
1050 return "Invalid QP type";
1051 }
1052}
1053
1054struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
1055 struct ib_qp_init_attr *init_attr,
1056 struct ib_udata *udata)
1057{
1058 struct mlx5_ib_dev *dev;
1059 struct mlx5_ib_qp *qp;
1060 u16 xrcdn = 0;
1061 int err;
1062
1063 if (pd) {
1064 dev = to_mdev(pd->device);
1065 } else {
1066 /* being cautious here */
1067 if (init_attr->qp_type != IB_QPT_XRC_TGT &&
1068 init_attr->qp_type != MLX5_IB_QPT_REG_UMR) {
1069 pr_warn("%s: no PD for transport %s\n", __func__,
1070 ib_qp_type_str(init_attr->qp_type));
1071 return ERR_PTR(-EINVAL);
1072 }
1073 dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
1074 }
1075
1076 switch (init_attr->qp_type) {
1077 case IB_QPT_XRC_TGT:
1078 case IB_QPT_XRC_INI:
1079 if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
1080 mlx5_ib_dbg(dev, "XRC not supported\n");
1081 return ERR_PTR(-ENOSYS);
1082 }
1083 init_attr->recv_cq = NULL;
1084 if (init_attr->qp_type == IB_QPT_XRC_TGT) {
1085 xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
1086 init_attr->send_cq = NULL;
1087 }
1088
1089 /* fall through */
1090 case IB_QPT_RC:
1091 case IB_QPT_UC:
1092 case IB_QPT_UD:
1093 case IB_QPT_SMI:
1094 case IB_QPT_GSI:
1095 case MLX5_IB_QPT_REG_UMR:
1096 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1097 if (!qp)
1098 return ERR_PTR(-ENOMEM);
1099
1100 err = create_qp_common(dev, pd, init_attr, udata, qp);
1101 if (err) {
1102 mlx5_ib_dbg(dev, "create_qp_common failed\n");
1103 kfree(qp);
1104 return ERR_PTR(err);
1105 }
1106
1107 if (is_qp0(init_attr->qp_type))
1108 qp->ibqp.qp_num = 0;
1109 else if (is_qp1(init_attr->qp_type))
1110 qp->ibqp.qp_num = 1;
1111 else
1112 qp->ibqp.qp_num = qp->mqp.qpn;
1113
1114 mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
1115 qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
1116 to_mcq(init_attr->send_cq)->mcq.cqn);
1117
1118 qp->xrcdn = xrcdn;
1119
1120 break;
1121
1122 case IB_QPT_RAW_IPV6:
1123 case IB_QPT_RAW_ETHERTYPE:
1124 case IB_QPT_RAW_PACKET:
1125 case IB_QPT_MAX:
1126 default:
1127 mlx5_ib_dbg(dev, "unsupported qp type %d\n",
1128 init_attr->qp_type);
1129 /* Don't support raw QPs */
1130 return ERR_PTR(-EINVAL);
1131 }
1132
1133 return &qp->ibqp;
1134}
1135
1136int mlx5_ib_destroy_qp(struct ib_qp *qp)
1137{
1138 struct mlx5_ib_dev *dev = to_mdev(qp->device);
1139 struct mlx5_ib_qp *mqp = to_mqp(qp);
1140
1141 destroy_qp_common(dev, mqp);
1142
1143 kfree(mqp);
1144
1145 return 0;
1146}
1147
1148static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
1149 int attr_mask)
1150{
1151 u32 hw_access_flags = 0;
1152 u8 dest_rd_atomic;
1153 u32 access_flags;
1154
1155 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1156 dest_rd_atomic = attr->max_dest_rd_atomic;
1157 else
1158 dest_rd_atomic = qp->resp_depth;
1159
1160 if (attr_mask & IB_QP_ACCESS_FLAGS)
1161 access_flags = attr->qp_access_flags;
1162 else
1163 access_flags = qp->atomic_rd_en;
1164
1165 if (!dest_rd_atomic)
1166 access_flags &= IB_ACCESS_REMOTE_WRITE;
1167
1168 if (access_flags & IB_ACCESS_REMOTE_READ)
1169 hw_access_flags |= MLX5_QP_BIT_RRE;
1170 if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
1171 hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
1172 if (access_flags & IB_ACCESS_REMOTE_WRITE)
1173 hw_access_flags |= MLX5_QP_BIT_RWE;
1174
1175 return cpu_to_be32(hw_access_flags);
1176}
1177
1178enum {
1179 MLX5_PATH_FLAG_FL = 1 << 0,
1180 MLX5_PATH_FLAG_FREE_AR = 1 << 1,
1181 MLX5_PATH_FLAG_COUNTER = 1 << 2,
1182};
1183
1184static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
1185{
1186 if (rate == IB_RATE_PORT_CURRENT) {
1187 return 0;
1188 } else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
1189 return -EINVAL;
1190 } else {
1191 while (rate != IB_RATE_2_5_GBPS &&
1192 !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
1193 dev->mdev.caps.stat_rate_support))
1194 --rate;
1195 }
1196
1197 return rate + MLX5_STAT_RATE_OFFSET;
1198}
1199
1200static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
1201 struct mlx5_qp_path *path, u8 port, int attr_mask,
1202 u32 path_flags, const struct ib_qp_attr *attr)
1203{
1204 int err;
1205
1206 path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
1207 path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
1208
1209 if (attr_mask & IB_QP_PKEY_INDEX)
1210 path->pkey_index = attr->pkey_index;
1211
1212 path->grh_mlid = ah->src_path_bits & 0x7f;
1213 path->rlid = cpu_to_be16(ah->dlid);
1214
1215 if (ah->ah_flags & IB_AH_GRH) {
1216 path->grh_mlid |= 1 << 7;
1217 path->mgid_index = ah->grh.sgid_index;
1218 path->hop_limit = ah->grh.hop_limit;
1219 path->tclass_flowlabel =
1220 cpu_to_be32((ah->grh.traffic_class << 20) |
1221 (ah->grh.flow_label));
1222 memcpy(path->rgid, ah->grh.dgid.raw, 16);
1223 }
1224
1225 err = ib_rate_to_mlx5(dev, ah->static_rate);
1226 if (err < 0)
1227 return err;
1228 path->static_rate = err;
1229 path->port = port;
1230
1231 if (ah->ah_flags & IB_AH_GRH) {
1232 if (ah->grh.sgid_index >= dev->mdev.caps.port[port - 1].gid_table_len) {
1233 pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
1234 ah->grh.sgid_index, dev->mdev.caps.port[port - 1].gid_table_len);
1235 return -EINVAL;
1236 }
1237
1238 path->grh_mlid |= 1 << 7;
1239 path->mgid_index = ah->grh.sgid_index;
1240 path->hop_limit = ah->grh.hop_limit;
1241 path->tclass_flowlabel =
1242 cpu_to_be32((ah->grh.traffic_class << 20) |
1243 (ah->grh.flow_label));
1244 memcpy(path->rgid, ah->grh.dgid.raw, 16);
1245 }
1246
1247 if (attr_mask & IB_QP_TIMEOUT)
1248 path->ackto_lt = attr->timeout << 3;
1249
1250 path->sl = ah->sl & 0xf;
1251
1252 return 0;
1253}
1254
1255static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_QP_ST_MAX] = {
1256 [MLX5_QP_STATE_INIT] = {
1257 [MLX5_QP_STATE_INIT] = {
1258 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
1259 MLX5_QP_OPTPAR_RAE |
1260 MLX5_QP_OPTPAR_RWE |
1261 MLX5_QP_OPTPAR_PKEY_INDEX |
1262 MLX5_QP_OPTPAR_PRI_PORT,
1263 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
1264 MLX5_QP_OPTPAR_PKEY_INDEX |
1265 MLX5_QP_OPTPAR_PRI_PORT,
1266 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
1267 MLX5_QP_OPTPAR_Q_KEY |
1268 MLX5_QP_OPTPAR_PRI_PORT,
1269 },
1270 [MLX5_QP_STATE_RTR] = {
1271 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1272 MLX5_QP_OPTPAR_RRE |
1273 MLX5_QP_OPTPAR_RAE |
1274 MLX5_QP_OPTPAR_RWE |
1275 MLX5_QP_OPTPAR_PKEY_INDEX,
1276 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1277 MLX5_QP_OPTPAR_RWE |
1278 MLX5_QP_OPTPAR_PKEY_INDEX,
1279 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
1280 MLX5_QP_OPTPAR_Q_KEY,
1281 [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
1282 MLX5_QP_OPTPAR_Q_KEY,
1283 },
1284 },
1285 [MLX5_QP_STATE_RTR] = {
1286 [MLX5_QP_STATE_RTS] = {
1287 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1288 MLX5_QP_OPTPAR_RRE |
1289 MLX5_QP_OPTPAR_RAE |
1290 MLX5_QP_OPTPAR_RWE |
1291 MLX5_QP_OPTPAR_PM_STATE |
1292 MLX5_QP_OPTPAR_RNR_TIMEOUT,
1293 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
1294 MLX5_QP_OPTPAR_RWE |
1295 MLX5_QP_OPTPAR_PM_STATE,
1296 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
1297 },
1298 },
1299 [MLX5_QP_STATE_RTS] = {
1300 [MLX5_QP_STATE_RTS] = {
1301 [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_RRE |
1302 MLX5_QP_OPTPAR_RAE |
1303 MLX5_QP_OPTPAR_RWE |
1304 MLX5_QP_OPTPAR_RNR_TIMEOUT |
1305 MLX5_QP_OPTPAR_PM_STATE,
1306 [MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
1307 MLX5_QP_OPTPAR_PM_STATE,
1308 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY |
1309 MLX5_QP_OPTPAR_SRQN |
1310 MLX5_QP_OPTPAR_CQN_RCV,
1311 },
1312 },
1313 [MLX5_QP_STATE_SQER] = {
1314 [MLX5_QP_STATE_RTS] = {
1315 [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY,
1316 [MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_Q_KEY,
1317 },
1318 },
1319};
1320
1321static int ib_nr_to_mlx5_nr(int ib_mask)
1322{
1323 switch (ib_mask) {
1324 case IB_QP_STATE:
1325 return 0;
1326 case IB_QP_CUR_STATE:
1327 return 0;
1328 case IB_QP_EN_SQD_ASYNC_NOTIFY:
1329 return 0;
1330 case IB_QP_ACCESS_FLAGS:
1331 return MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RRE |
1332 MLX5_QP_OPTPAR_RAE;
1333 case IB_QP_PKEY_INDEX:
1334 return MLX5_QP_OPTPAR_PKEY_INDEX;
1335 case IB_QP_PORT:
1336 return MLX5_QP_OPTPAR_PRI_PORT;
1337 case IB_QP_QKEY:
1338 return MLX5_QP_OPTPAR_Q_KEY;
1339 case IB_QP_AV:
1340 return MLX5_QP_OPTPAR_PRIMARY_ADDR_PATH |
1341 MLX5_QP_OPTPAR_PRI_PORT;
1342 case IB_QP_PATH_MTU:
1343 return 0;
1344 case IB_QP_TIMEOUT:
1345 return MLX5_QP_OPTPAR_ACK_TIMEOUT;
1346 case IB_QP_RETRY_CNT:
1347 return MLX5_QP_OPTPAR_RETRY_COUNT;
1348 case IB_QP_RNR_RETRY:
1349 return MLX5_QP_OPTPAR_RNR_RETRY;
1350 case IB_QP_RQ_PSN:
1351 return 0;
1352 case IB_QP_MAX_QP_RD_ATOMIC:
1353 return MLX5_QP_OPTPAR_SRA_MAX;
1354 case IB_QP_ALT_PATH:
1355 return MLX5_QP_OPTPAR_ALT_ADDR_PATH;
1356 case IB_QP_MIN_RNR_TIMER:
1357 return MLX5_QP_OPTPAR_RNR_TIMEOUT;
1358 case IB_QP_SQ_PSN:
1359 return 0;
1360 case IB_QP_MAX_DEST_RD_ATOMIC:
1361 return MLX5_QP_OPTPAR_RRA_MAX | MLX5_QP_OPTPAR_RWE |
1362 MLX5_QP_OPTPAR_RRE | MLX5_QP_OPTPAR_RAE;
1363 case IB_QP_PATH_MIG_STATE:
1364 return MLX5_QP_OPTPAR_PM_STATE;
1365 case IB_QP_CAP:
1366 return 0;
1367 case IB_QP_DEST_QPN:
1368 return 0;
1369 }
1370 return 0;
1371}
1372
1373static int ib_mask_to_mlx5_opt(int ib_mask)
1374{
1375 int result = 0;
1376 int i;
1377
1378 for (i = 0; i < 8 * sizeof(int); i++) {
1379 if ((1 << i) & ib_mask)
1380 result |= ib_nr_to_mlx5_nr(1 << i);
1381 }
1382
1383 return result;
1384}
1385
1386static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
1387 const struct ib_qp_attr *attr, int attr_mask,
1388 enum ib_qp_state cur_state, enum ib_qp_state new_state)
1389{
1390 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1391 struct mlx5_ib_qp *qp = to_mqp(ibqp);
1392 struct mlx5_ib_cq *send_cq, *recv_cq;
1393 struct mlx5_qp_context *context;
1394 struct mlx5_modify_qp_mbox_in *in;
1395 struct mlx5_ib_pd *pd;
1396 enum mlx5_qp_state mlx5_cur, mlx5_new;
1397 enum mlx5_qp_optpar optpar;
1398 int sqd_event;
1399 int mlx5_st;
1400 int err;
1401
1402 in = kzalloc(sizeof(*in), GFP_KERNEL);
1403 if (!in)
1404 return -ENOMEM;
1405
1406 context = &in->ctx;
1407 err = to_mlx5_st(ibqp->qp_type);
1408 if (err < 0)
1409 goto out;
1410
1411 context->flags = cpu_to_be32(err << 16);
1412
1413 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) {
1414 context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1415 } else {
1416 switch (attr->path_mig_state) {
1417 case IB_MIG_MIGRATED:
1418 context->flags |= cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
1419 break;
1420 case IB_MIG_REARM:
1421 context->flags |= cpu_to_be32(MLX5_QP_PM_REARM << 11);
1422 break;
1423 case IB_MIG_ARMED:
1424 context->flags |= cpu_to_be32(MLX5_QP_PM_ARMED << 11);
1425 break;
1426 }
1427 }
1428
1429 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
1430 context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
1431 } else if (ibqp->qp_type == IB_QPT_UD ||
1432 ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
1433 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
1434 } else if (attr_mask & IB_QP_PATH_MTU) {
1435 if (attr->path_mtu < IB_MTU_256 ||
1436 attr->path_mtu > IB_MTU_4096) {
1437 mlx5_ib_warn(dev, "invalid mtu %d\n", attr->path_mtu);
1438 err = -EINVAL;
1439 goto out;
1440 }
1441 context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev.caps.log_max_msg;
1442 }
1443
1444 if (attr_mask & IB_QP_DEST_QPN)
1445 context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
1446
1447 if (attr_mask & IB_QP_PKEY_INDEX)
1448 context->pri_path.pkey_index = attr->pkey_index;
1449
1450 /* todo implement counter_index functionality */
1451
1452 if (is_sqp(ibqp->qp_type))
1453 context->pri_path.port = qp->port;
1454
1455 if (attr_mask & IB_QP_PORT)
1456 context->pri_path.port = attr->port_num;
1457
1458 if (attr_mask & IB_QP_AV) {
1459 err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
1460 attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
1461 attr_mask, 0, attr);
1462 if (err)
1463 goto out;
1464 }
1465
1466 if (attr_mask & IB_QP_TIMEOUT)
1467 context->pri_path.ackto_lt |= attr->timeout << 3;
1468
1469 if (attr_mask & IB_QP_ALT_PATH) {
1470 err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
1471 attr->alt_port_num, attr_mask, 0, attr);
1472 if (err)
1473 goto out;
1474 }
1475
1476 pd = get_pd(qp);
1477 get_cqs(qp, &send_cq, &recv_cq);
1478
1479 context->flags_pd = cpu_to_be32(pd ? pd->pdn : to_mpd(dev->devr.p0)->pdn);
1480 context->cqn_send = send_cq ? cpu_to_be32(send_cq->mcq.cqn) : 0;
1481 context->cqn_recv = recv_cq ? cpu_to_be32(recv_cq->mcq.cqn) : 0;
1482 context->params1 = cpu_to_be32(MLX5_IB_ACK_REQ_FREQ << 28);
1483
1484 if (attr_mask & IB_QP_RNR_RETRY)
1485 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
1486
1487 if (attr_mask & IB_QP_RETRY_CNT)
1488 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
1489
1490 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
1491 if (attr->max_rd_atomic)
1492 context->params1 |=
1493 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
1494 }
1495
1496 if (attr_mask & IB_QP_SQ_PSN)
1497 context->next_send_psn = cpu_to_be32(attr->sq_psn);
1498
1499 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1500 if (attr->max_dest_rd_atomic)
1501 context->params2 |=
1502 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
1503 }
1504
1505 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
1506 context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
1507
1508 if (attr_mask & IB_QP_MIN_RNR_TIMER)
1509 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
1510
1511 if (attr_mask & IB_QP_RQ_PSN)
1512 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1513
1514 if (attr_mask & IB_QP_QKEY)
1515 context->qkey = cpu_to_be32(attr->qkey);
1516
1517 if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1518 context->db_rec_addr = cpu_to_be64(qp->db.dma);
1519
1520 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
1521 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
1522 sqd_event = 1;
1523 else
1524 sqd_event = 0;
1525
1526 if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1527 context->sq_crq_size |= cpu_to_be16(1 << 4);
1528
1529
1530 mlx5_cur = to_mlx5_state(cur_state);
1531 mlx5_new = to_mlx5_state(new_state);
1532 mlx5_st = to_mlx5_st(ibqp->qp_type);
1533 if (mlx5_cur < 0 || mlx5_new < 0 || mlx5_st < 0)
1534 goto out;
1535
1536 optpar = ib_mask_to_mlx5_opt(attr_mask);
1537 optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
1538 in->optparam = cpu_to_be32(optpar);
1539 err = mlx5_core_qp_modify(&dev->mdev, to_mlx5_state(cur_state),
1540 to_mlx5_state(new_state), in, sqd_event,
1541 &qp->mqp);
1542 if (err)
1543 goto out;
1544
1545 qp->state = new_state;
1546
1547 if (attr_mask & IB_QP_ACCESS_FLAGS)
1548 qp->atomic_rd_en = attr->qp_access_flags;
1549 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1550 qp->resp_depth = attr->max_dest_rd_atomic;
1551 if (attr_mask & IB_QP_PORT)
1552 qp->port = attr->port_num;
1553 if (attr_mask & IB_QP_ALT_PATH)
1554 qp->alt_port = attr->alt_port_num;
1555
1556 /*
1557 * If we moved a kernel QP to RESET, clean up all old CQ
1558 * entries and reinitialize the QP.
1559 */
1560 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
1561 mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
1562 ibqp->srq ? to_msrq(ibqp->srq) : NULL);
1563 if (send_cq != recv_cq)
1564 mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
1565
1566 qp->rq.head = 0;
1567 qp->rq.tail = 0;
1568 qp->sq.head = 0;
1569 qp->sq.tail = 0;
1570 qp->sq.cur_post = 0;
1571 qp->sq.last_poll = 0;
1572 qp->db.db[MLX5_RCV_DBR] = 0;
1573 qp->db.db[MLX5_SND_DBR] = 0;
1574 }
1575
1576out:
1577 kfree(in);
1578 return err;
1579}
1580
1581int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1582 int attr_mask, struct ib_udata *udata)
1583{
1584 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1585 struct mlx5_ib_qp *qp = to_mqp(ibqp);
1586 enum ib_qp_state cur_state, new_state;
1587 int err = -EINVAL;
1588 int port;
1589
1590 mutex_lock(&qp->mutex);
1591
1592 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
1593 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1594
1595 if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
1596 !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
1597 goto out;
1598
1599 if ((attr_mask & IB_QP_PORT) &&
1600 (attr->port_num == 0 || attr->port_num > dev->mdev.caps.num_ports))
1601 goto out;
1602
1603 if (attr_mask & IB_QP_PKEY_INDEX) {
1604 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1605 if (attr->pkey_index >= dev->mdev.caps.port[port - 1].pkey_table_len)
1606 goto out;
1607 }
1608
1609 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1610 attr->max_rd_atomic > dev->mdev.caps.max_ra_res_qp)
1611 goto out;
1612
1613 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1614 attr->max_dest_rd_atomic > dev->mdev.caps.max_ra_req_qp)
1615 goto out;
1616
1617 if (cur_state == new_state && cur_state == IB_QPS_RESET) {
1618 err = 0;
1619 goto out;
1620 }
1621
1622 err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
1623
1624out:
1625 mutex_unlock(&qp->mutex);
1626 return err;
1627}
1628
1629static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
1630{
1631 struct mlx5_ib_cq *cq;
1632 unsigned cur;
1633
1634 cur = wq->head - wq->tail;
1635 if (likely(cur + nreq < wq->max_post))
1636 return 0;
1637
1638 cq = to_mcq(ib_cq);
1639 spin_lock(&cq->lock);
1640 cur = wq->head - wq->tail;
1641 spin_unlock(&cq->lock);
1642
1643 return cur + nreq >= wq->max_post;
1644}
1645
1646static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
1647 u64 remote_addr, u32 rkey)
1648{
1649 rseg->raddr = cpu_to_be64(remote_addr);
1650 rseg->rkey = cpu_to_be32(rkey);
1651 rseg->reserved = 0;
1652}
1653
1654static void set_atomic_seg(struct mlx5_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
1655{
1656 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1657 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1658 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1659 } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
1660 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1661 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add_mask);
1662 } else {
1663 aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
1664 aseg->compare = 0;
1665 }
1666}
1667
1668static void set_masked_atomic_seg(struct mlx5_wqe_masked_atomic_seg *aseg,
1669 struct ib_send_wr *wr)
1670{
1671 aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
1672 aseg->swap_add_mask = cpu_to_be64(wr->wr.atomic.swap_mask);
1673 aseg->compare = cpu_to_be64(wr->wr.atomic.compare_add);
1674 aseg->compare_mask = cpu_to_be64(wr->wr.atomic.compare_add_mask);
1675}
1676
1677static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
1678 struct ib_send_wr *wr)
1679{
1680 memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
1681 dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
1682 dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
1683}
1684
1685static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
1686{
1687 dseg->byte_count = cpu_to_be32(sg->length);
1688 dseg->lkey = cpu_to_be32(sg->lkey);
1689 dseg->addr = cpu_to_be64(sg->addr);
1690}
1691
1692static __be16 get_klm_octo(int npages)
1693{
1694 return cpu_to_be16(ALIGN(npages, 8) / 2);
1695}
1696
1697static __be64 frwr_mkey_mask(void)
1698{
1699 u64 result;
1700
1701 result = MLX5_MKEY_MASK_LEN |
1702 MLX5_MKEY_MASK_PAGE_SIZE |
1703 MLX5_MKEY_MASK_START_ADDR |
1704 MLX5_MKEY_MASK_EN_RINVAL |
1705 MLX5_MKEY_MASK_KEY |
1706 MLX5_MKEY_MASK_LR |
1707 MLX5_MKEY_MASK_LW |
1708 MLX5_MKEY_MASK_RR |
1709 MLX5_MKEY_MASK_RW |
1710 MLX5_MKEY_MASK_A |
1711 MLX5_MKEY_MASK_SMALL_FENCE |
1712 MLX5_MKEY_MASK_FREE;
1713
1714 return cpu_to_be64(result);
1715}
1716
1717static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
1718 struct ib_send_wr *wr, int li)
1719{
1720 memset(umr, 0, sizeof(*umr));
1721
1722 if (li) {
1723 umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
1724 umr->flags = 1 << 7;
1725 return;
1726 }
1727
1728 umr->flags = (1 << 5); /* fail if not free */
1729 umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
1730 umr->mkey_mask = frwr_mkey_mask();
1731}
1732
1733static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
1734 struct ib_send_wr *wr)
1735{
1736 struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
1737 u64 mask;
1738
1739 memset(umr, 0, sizeof(*umr));
1740
1741 if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
1742 umr->flags = 1 << 5; /* fail if not free */
1743 umr->klm_octowords = get_klm_octo(umrwr->npages);
1744 mask = MLX5_MKEY_MASK_LEN |
1745 MLX5_MKEY_MASK_PAGE_SIZE |
1746 MLX5_MKEY_MASK_START_ADDR |
1747 MLX5_MKEY_MASK_PD |
1748 MLX5_MKEY_MASK_LR |
1749 MLX5_MKEY_MASK_LW |
1750 MLX5_MKEY_MASK_RR |
1751 MLX5_MKEY_MASK_RW |
1752 MLX5_MKEY_MASK_A |
1753 MLX5_MKEY_MASK_FREE;
1754 umr->mkey_mask = cpu_to_be64(mask);
1755 } else {
1756 umr->flags = 2 << 5; /* fail if free */
1757 mask = MLX5_MKEY_MASK_FREE;
1758 umr->mkey_mask = cpu_to_be64(mask);
1759 }
1760
1761 if (!wr->num_sge)
1762 umr->flags |= (1 << 7); /* inline */
1763}
1764
1765static u8 get_umr_flags(int acc)
1766{
1767 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
1768 (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) |
1769 (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) |
1770 (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) |
1771 MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
1772}
1773
1774static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
1775 int li, int *writ)
1776{
1777 memset(seg, 0, sizeof(*seg));
1778 if (li) {
1779 seg->status = 1 << 6;
1780 return;
1781 }
1782
1783 seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags);
1784 *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
1785 seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
1786 seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
1787 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1788 seg->len = cpu_to_be64(wr->wr.fast_reg.length);
1789 seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
1790 seg->log2_page_size = wr->wr.fast_reg.page_shift;
1791}
1792
1793static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
1794{
1795 memset(seg, 0, sizeof(*seg));
1796 if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
1797 seg->status = 1 << 6;
1798 return;
1799 }
1800
1801 seg->flags = convert_access(wr->wr.fast_reg.access_flags);
1802 seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
1803 seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
1804 seg->len = cpu_to_be64(wr->wr.fast_reg.length);
1805 seg->log2_page_size = wr->wr.fast_reg.page_shift;
1806 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1807}
1808
1809static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
1810 struct ib_send_wr *wr,
1811 struct mlx5_core_dev *mdev,
1812 struct mlx5_ib_pd *pd,
1813 int writ)
1814{
1815 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
1816 u64 *page_list = wr->wr.fast_reg.page_list->page_list;
1817 u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
1818 int i;
1819
1820 for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
1821 mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
1822 dseg->addr = cpu_to_be64(mfrpl->map);
1823 dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
1824 dseg->lkey = cpu_to_be32(pd->pa_lkey);
1825}
1826
1827static __be32 send_ieth(struct ib_send_wr *wr)
1828{
1829 switch (wr->opcode) {
1830 case IB_WR_SEND_WITH_IMM:
1831 case IB_WR_RDMA_WRITE_WITH_IMM:
1832 return wr->ex.imm_data;
1833
1834 case IB_WR_SEND_WITH_INV:
1835 return cpu_to_be32(wr->ex.invalidate_rkey);
1836
1837 default:
1838 return 0;
1839 }
1840}
1841
1842static u8 calc_sig(void *wqe, int size)
1843{
1844 u8 *p = wqe;
1845 u8 res = 0;
1846 int i;
1847
1848 for (i = 0; i < size; i++)
1849 res ^= p[i];
1850
1851 return ~res;
1852}
1853
1854static u8 wq_sig(void *wqe)
1855{
1856 return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
1857}
1858
1859static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
1860 void *wqe, int *sz)
1861{
1862 struct mlx5_wqe_inline_seg *seg;
1863 void *qend = qp->sq.qend;
1864 void *addr;
1865 int inl = 0;
1866 int copy;
1867 int len;
1868 int i;
1869
1870 seg = wqe;
1871 wqe += sizeof(*seg);
1872 for (i = 0; i < wr->num_sge; i++) {
1873 addr = (void *)(unsigned long)(wr->sg_list[i].addr);
1874 len = wr->sg_list[i].length;
1875 inl += len;
1876
1877 if (unlikely(inl > qp->max_inline_data))
1878 return -ENOMEM;
1879
1880 if (unlikely(wqe + len > qend)) {
1881 copy = qend - wqe;
1882 memcpy(wqe, addr, copy);
1883 addr += copy;
1884 len -= copy;
1885 wqe = mlx5_get_send_wqe(qp, 0);
1886 }
1887 memcpy(wqe, addr, len);
1888 wqe += len;
1889 }
1890
1891 seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
1892
1893 *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
1894
1895 return 0;
1896}
1897
1898static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
1899 struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
1900{
1901 int writ = 0;
1902 int li;
1903
1904 li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
1905 if (unlikely(wr->send_flags & IB_SEND_INLINE))
1906 return -EINVAL;
1907
1908 set_frwr_umr_segment(*seg, wr, li);
1909 *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
1910 *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
1911 if (unlikely((*seg == qp->sq.qend)))
1912 *seg = mlx5_get_send_wqe(qp, 0);
1913 set_mkey_segment(*seg, wr, li, &writ);
1914 *seg += sizeof(struct mlx5_mkey_seg);
1915 *size += sizeof(struct mlx5_mkey_seg) / 16;
1916 if (unlikely((*seg == qp->sq.qend)))
1917 *seg = mlx5_get_send_wqe(qp, 0);
1918 if (!li) {
1919 set_frwr_pages(*seg, wr, mdev, pd, writ);
1920 *seg += sizeof(struct mlx5_wqe_data_seg);
1921 *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
1922 }
1923 return 0;
1924}
1925
1926static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
1927{
1928 __be32 *p = NULL;
1929 int tidx = idx;
1930 int i, j;
1931
1932 pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
1933 for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
1934 if ((i & 0xf) == 0) {
1935 void *buf = mlx5_get_send_wqe(qp, tidx);
1936 tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
1937 p = buf;
1938 j = 0;
1939 }
1940 pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
1941 be32_to_cpu(p[j + 1]), be32_to_cpu(p[j + 2]),
1942 be32_to_cpu(p[j + 3]));
1943 }
1944}
1945
1946static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
1947 unsigned bytecnt, struct mlx5_ib_qp *qp)
1948{
1949 while (bytecnt > 0) {
1950 __iowrite64_copy(dst++, src++, 8);
1951 __iowrite64_copy(dst++, src++, 8);
1952 __iowrite64_copy(dst++, src++, 8);
1953 __iowrite64_copy(dst++, src++, 8);
1954 __iowrite64_copy(dst++, src++, 8);
1955 __iowrite64_copy(dst++, src++, 8);
1956 __iowrite64_copy(dst++, src++, 8);
1957 __iowrite64_copy(dst++, src++, 8);
1958 bytecnt -= 64;
1959 if (unlikely(src == qp->sq.qend))
1960 src = mlx5_get_send_wqe(qp, 0);
1961 }
1962}
1963
1964static u8 get_fence(u8 fence, struct ib_send_wr *wr)
1965{
1966 if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
1967 wr->send_flags & IB_SEND_FENCE))
1968 return MLX5_FENCE_MODE_STRONG_ORDERING;
1969
1970 if (unlikely(fence)) {
1971 if (wr->send_flags & IB_SEND_FENCE)
1972 return MLX5_FENCE_MODE_SMALL_AND_FENCE;
1973 else
1974 return fence;
1975
1976 } else {
1977 return 0;
1978 }
1979}
1980
1981int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1982 struct ib_send_wr **bad_wr)
1983{
1984 struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
1985 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1986 struct mlx5_core_dev *mdev = &dev->mdev;
1987 struct mlx5_ib_qp *qp = to_mqp(ibqp);
1988 struct mlx5_wqe_data_seg *dpseg;
1989 struct mlx5_wqe_xrc_seg *xrc;
1990 struct mlx5_bf *bf = qp->bf;
1991 int uninitialized_var(size);
1992 void *qend = qp->sq.qend;
1993 unsigned long flags;
1994 u32 mlx5_opcode;
1995 unsigned idx;
1996 int err = 0;
1997 int inl = 0;
1998 int num_sge;
1999 void *seg;
2000 int nreq;
2001 int i;
2002 u8 next_fence = 0;
2003 u8 opmod = 0;
2004 u8 fence;
2005
2006 spin_lock_irqsave(&qp->sq.lock, flags);
2007
2008 for (nreq = 0; wr; nreq++, wr = wr->next) {
2009 if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) {
2010 mlx5_ib_warn(dev, "\n");
2011 err = -EINVAL;
2012 *bad_wr = wr;
2013 goto out;
2014 }
2015
2016 if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
2017 mlx5_ib_warn(dev, "\n");
2018 err = -ENOMEM;
2019 *bad_wr = wr;
2020 goto out;
2021 }
2022
2023 fence = qp->fm_cache;
2024 num_sge = wr->num_sge;
2025 if (unlikely(num_sge > qp->sq.max_gs)) {
2026 mlx5_ib_warn(dev, "\n");
2027 err = -ENOMEM;
2028 *bad_wr = wr;
2029 goto out;
2030 }
2031
2032 idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
2033 seg = mlx5_get_send_wqe(qp, idx);
2034 ctrl = seg;
2035 *(uint32_t *)(seg + 8) = 0;
2036 ctrl->imm = send_ieth(wr);
2037 ctrl->fm_ce_se = qp->sq_signal_bits |
2038 (wr->send_flags & IB_SEND_SIGNALED ?
2039 MLX5_WQE_CTRL_CQ_UPDATE : 0) |
2040 (wr->send_flags & IB_SEND_SOLICITED ?
2041 MLX5_WQE_CTRL_SOLICITED : 0);
2042
2043 seg += sizeof(*ctrl);
2044 size = sizeof(*ctrl) / 16;
2045
2046 switch (ibqp->qp_type) {
2047 case IB_QPT_XRC_INI:
2048 xrc = seg;
2049 xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
2050 seg += sizeof(*xrc);
2051 size += sizeof(*xrc) / 16;
2052 /* fall through */
2053 case IB_QPT_RC:
2054 switch (wr->opcode) {
2055 case IB_WR_RDMA_READ:
2056 case IB_WR_RDMA_WRITE:
2057 case IB_WR_RDMA_WRITE_WITH_IMM:
2058 set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2059 wr->wr.rdma.rkey);
2060 seg += sizeof(struct mlx5_wqe_raddr_seg);
2061 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2062 break;
2063
2064 case IB_WR_ATOMIC_CMP_AND_SWP:
2065 case IB_WR_ATOMIC_FETCH_AND_ADD:
2066 set_raddr_seg(seg, wr->wr.atomic.remote_addr,
2067 wr->wr.atomic.rkey);
2068 seg += sizeof(struct mlx5_wqe_raddr_seg);
2069
2070 set_atomic_seg(seg, wr);
2071 seg += sizeof(struct mlx5_wqe_atomic_seg);
2072
2073 size += (sizeof(struct mlx5_wqe_raddr_seg) +
2074 sizeof(struct mlx5_wqe_atomic_seg)) / 16;
2075 break;
2076
2077 case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
2078 set_raddr_seg(seg, wr->wr.atomic.remote_addr,
2079 wr->wr.atomic.rkey);
2080 seg += sizeof(struct mlx5_wqe_raddr_seg);
2081
2082 set_masked_atomic_seg(seg, wr);
2083 seg += sizeof(struct mlx5_wqe_masked_atomic_seg);
2084
2085 size += (sizeof(struct mlx5_wqe_raddr_seg) +
2086 sizeof(struct mlx5_wqe_masked_atomic_seg)) / 16;
2087 break;
2088
2089 case IB_WR_LOCAL_INV:
2090 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2091 qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
2092 ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
2093 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2094 if (err) {
2095 mlx5_ib_warn(dev, "\n");
2096 *bad_wr = wr;
2097 goto out;
2098 }
2099 num_sge = 0;
2100 break;
2101
2102 case IB_WR_FAST_REG_MR:
2103 next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
2104 qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR;
2105 ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
2106 err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
2107 if (err) {
2108 mlx5_ib_warn(dev, "\n");
2109 *bad_wr = wr;
2110 goto out;
2111 }
2112 num_sge = 0;
2113 break;
2114
2115 default:
2116 break;
2117 }
2118 break;
2119
2120 case IB_QPT_UC:
2121 switch (wr->opcode) {
2122 case IB_WR_RDMA_WRITE:
2123 case IB_WR_RDMA_WRITE_WITH_IMM:
2124 set_raddr_seg(seg, wr->wr.rdma.remote_addr,
2125 wr->wr.rdma.rkey);
2126 seg += sizeof(struct mlx5_wqe_raddr_seg);
2127 size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
2128 break;
2129
2130 default:
2131 break;
2132 }
2133 break;
2134
2135 case IB_QPT_UD:
2136 case IB_QPT_SMI:
2137 case IB_QPT_GSI:
2138 set_datagram_seg(seg, wr);
2139 seg += sizeof(struct mlx5_wqe_datagram_seg);
2140 size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
2141 if (unlikely((seg == qend)))
2142 seg = mlx5_get_send_wqe(qp, 0);
2143 break;
2144
2145 case MLX5_IB_QPT_REG_UMR:
2146 if (wr->opcode != MLX5_IB_WR_UMR) {
2147 err = -EINVAL;
2148 mlx5_ib_warn(dev, "bad opcode\n");
2149 goto out;
2150 }
2151 qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
2152 ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
2153 set_reg_umr_segment(seg, wr);
2154 seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
2155 size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
2156 if (unlikely((seg == qend)))
2157 seg = mlx5_get_send_wqe(qp, 0);
2158 set_reg_mkey_segment(seg, wr);
2159 seg += sizeof(struct mlx5_mkey_seg);
2160 size += sizeof(struct mlx5_mkey_seg) / 16;
2161 if (unlikely((seg == qend)))
2162 seg = mlx5_get_send_wqe(qp, 0);
2163 break;
2164
2165 default:
2166 break;
2167 }
2168
2169 if (wr->send_flags & IB_SEND_INLINE && num_sge) {
2170 int uninitialized_var(sz);
2171
2172 err = set_data_inl_seg(qp, wr, seg, &sz);
2173 if (unlikely(err)) {
2174 mlx5_ib_warn(dev, "\n");
2175 *bad_wr = wr;
2176 goto out;
2177 }
2178 inl = 1;
2179 size += sz;
2180 } else {
2181 dpseg = seg;
2182 for (i = 0; i < num_sge; i++) {
2183 if (unlikely(dpseg == qend)) {
2184 seg = mlx5_get_send_wqe(qp, 0);
2185 dpseg = seg;
2186 }
2187 if (likely(wr->sg_list[i].length)) {
2188 set_data_ptr_seg(dpseg, wr->sg_list + i);
2189 size += sizeof(struct mlx5_wqe_data_seg) / 16;
2190 dpseg++;
2191 }
2192 }
2193 }
2194
2195 mlx5_opcode = mlx5_ib_opcode[wr->opcode];
2196 ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
2197 mlx5_opcode |
2198 ((u32)opmod << 24));
2199 ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
2200 ctrl->fm_ce_se |= get_fence(fence, wr);
2201 qp->fm_cache = next_fence;
2202 if (unlikely(qp->wq_sig))
2203 ctrl->signature = wq_sig(ctrl);
2204
2205 qp->sq.wrid[idx] = wr->wr_id;
2206 qp->sq.w_list[idx].opcode = mlx5_opcode;
2207 qp->sq.wqe_head[idx] = qp->sq.head + nreq;
2208 qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
2209 qp->sq.w_list[idx].next = qp->sq.cur_post;
2210
2211 if (0)
2212 dump_wqe(qp, idx, size);
2213 }
2214
2215out:
2216 if (likely(nreq)) {
2217 qp->sq.head += nreq;
2218
2219 /* Make sure that descriptors are written before
2220 * updating doorbell record and ringing the doorbell
2221 */
2222 wmb();
2223
2224 qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
2225
2226 if (bf->need_lock)
2227 spin_lock(&bf->lock);
2228
2229 /* TBD enable WC */
2230 if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
2231 mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
2232 /* wc_wmb(); */
2233 } else {
2234 mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
2235 MLX5_GET_DOORBELL_LOCK(&bf->lock32));
2236 /* Make sure doorbells don't leak out of SQ spinlock
2237 * and reach the HCA out of order.
2238 */
2239 mmiowb();
2240 }
2241 bf->offset ^= bf->buf_size;
2242 if (bf->need_lock)
2243 spin_unlock(&bf->lock);
2244 }
2245
2246 spin_unlock_irqrestore(&qp->sq.lock, flags);
2247
2248 return err;
2249}
2250
2251static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
2252{
2253 sig->signature = calc_sig(sig, size);
2254}
2255
2256int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2257 struct ib_recv_wr **bad_wr)
2258{
2259 struct mlx5_ib_qp *qp = to_mqp(ibqp);
2260 struct mlx5_wqe_data_seg *scat;
2261 struct mlx5_rwqe_sig *sig;
2262 unsigned long flags;
2263 int err = 0;
2264 int nreq;
2265 int ind;
2266 int i;
2267
2268 spin_lock_irqsave(&qp->rq.lock, flags);
2269
2270 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
2271
2272 for (nreq = 0; wr; nreq++, wr = wr->next) {
2273 if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
2274 err = -ENOMEM;
2275 *bad_wr = wr;
2276 goto out;
2277 }
2278
2279 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
2280 err = -EINVAL;
2281 *bad_wr = wr;
2282 goto out;
2283 }
2284
2285 scat = get_recv_wqe(qp, ind);
2286 if (qp->wq_sig)
2287 scat++;
2288
2289 for (i = 0; i < wr->num_sge; i++)
2290 set_data_ptr_seg(scat + i, wr->sg_list + i);
2291
2292 if (i < qp->rq.max_gs) {
2293 scat[i].byte_count = 0;
2294 scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
2295 scat[i].addr = 0;
2296 }
2297
2298 if (qp->wq_sig) {
2299 sig = (struct mlx5_rwqe_sig *)scat;
2300 set_sig_seg(sig, (qp->rq.max_gs + 1) << 2);
2301 }
2302
2303 qp->rq.wrid[ind] = wr->wr_id;
2304
2305 ind = (ind + 1) & (qp->rq.wqe_cnt - 1);
2306 }
2307
2308out:
2309 if (likely(nreq)) {
2310 qp->rq.head += nreq;
2311
2312 /* Make sure that descriptors are written before
2313 * doorbell record.
2314 */
2315 wmb();
2316
2317 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
2318 }
2319
2320 spin_unlock_irqrestore(&qp->rq.lock, flags);
2321
2322 return err;
2323}
2324
2325static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
2326{
2327 switch (mlx5_state) {
2328 case MLX5_QP_STATE_RST: return IB_QPS_RESET;
2329 case MLX5_QP_STATE_INIT: return IB_QPS_INIT;
2330 case MLX5_QP_STATE_RTR: return IB_QPS_RTR;
2331 case MLX5_QP_STATE_RTS: return IB_QPS_RTS;
2332 case MLX5_QP_STATE_SQ_DRAINING:
2333 case MLX5_QP_STATE_SQD: return IB_QPS_SQD;
2334 case MLX5_QP_STATE_SQER: return IB_QPS_SQE;
2335 case MLX5_QP_STATE_ERR: return IB_QPS_ERR;
2336 default: return -1;
2337 }
2338}
2339
2340static inline enum ib_mig_state to_ib_mig_state(int mlx5_mig_state)
2341{
2342 switch (mlx5_mig_state) {
2343 case MLX5_QP_PM_ARMED: return IB_MIG_ARMED;
2344 case MLX5_QP_PM_REARM: return IB_MIG_REARM;
2345 case MLX5_QP_PM_MIGRATED: return IB_MIG_MIGRATED;
2346 default: return -1;
2347 }
2348}
2349
2350static int to_ib_qp_access_flags(int mlx5_flags)
2351{
2352 int ib_flags = 0;
2353
2354 if (mlx5_flags & MLX5_QP_BIT_RRE)
2355 ib_flags |= IB_ACCESS_REMOTE_READ;
2356 if (mlx5_flags & MLX5_QP_BIT_RWE)
2357 ib_flags |= IB_ACCESS_REMOTE_WRITE;
2358 if (mlx5_flags & MLX5_QP_BIT_RAE)
2359 ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
2360
2361 return ib_flags;
2362}
2363
2364static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_attr,
2365 struct mlx5_qp_path *path)
2366{
2367 struct mlx5_core_dev *dev = &ibdev->mdev;
2368
2369 memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
2370 ib_ah_attr->port_num = path->port;
2371
2372 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
2373 return;
2374
2375 ib_ah_attr->sl = path->sl & 0xf;
2376
2377 ib_ah_attr->dlid = be16_to_cpu(path->rlid);
2378 ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
2379 ib_ah_attr->static_rate = path->static_rate ? path->static_rate - 5 : 0;
2380 ib_ah_attr->ah_flags = (path->grh_mlid & (1 << 7)) ? IB_AH_GRH : 0;
2381 if (ib_ah_attr->ah_flags) {
2382 ib_ah_attr->grh.sgid_index = path->mgid_index;
2383 ib_ah_attr->grh.hop_limit = path->hop_limit;
2384 ib_ah_attr->grh.traffic_class =
2385 (be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
2386 ib_ah_attr->grh.flow_label =
2387 be32_to_cpu(path->tclass_flowlabel) & 0xfffff;
2388 memcpy(ib_ah_attr->grh.dgid.raw,
2389 path->rgid, sizeof(ib_ah_attr->grh.dgid.raw));
2390 }
2391}
2392
2393int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
2394 struct ib_qp_init_attr *qp_init_attr)
2395{
2396 struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
2397 struct mlx5_ib_qp *qp = to_mqp(ibqp);
2398 struct mlx5_query_qp_mbox_out *outb;
2399 struct mlx5_qp_context *context;
2400 int mlx5_state;
2401 int err = 0;
2402
2403 mutex_lock(&qp->mutex);
2404 outb = kzalloc(sizeof(*outb), GFP_KERNEL);
2405 if (!outb) {
2406 err = -ENOMEM;
2407 goto out;
2408 }
2409 context = &outb->ctx;
2410 err = mlx5_core_qp_query(&dev->mdev, &qp->mqp, outb, sizeof(*outb));
2411 if (err)
2412 goto out_free;
2413
2414 mlx5_state = be32_to_cpu(context->flags) >> 28;
2415
2416 qp->state = to_ib_qp_state(mlx5_state);
2417 qp_attr->qp_state = qp->state;
2418 qp_attr->path_mtu = context->mtu_msgmax >> 5;
2419 qp_attr->path_mig_state =
2420 to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
2421 qp_attr->qkey = be32_to_cpu(context->qkey);
2422 qp_attr->rq_psn = be32_to_cpu(context->rnr_nextrecvpsn) & 0xffffff;
2423 qp_attr->sq_psn = be32_to_cpu(context->next_send_psn) & 0xffffff;
2424 qp_attr->dest_qp_num = be32_to_cpu(context->log_pg_sz_remote_qpn) & 0xffffff;
2425 qp_attr->qp_access_flags =
2426 to_ib_qp_access_flags(be32_to_cpu(context->params2));
2427
2428 if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
2429 to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
2430 to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
2431 qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f;
2432 qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num;
2433 }
2434
2435 qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f;
2436 qp_attr->port_num = context->pri_path.port;
2437
2438 /* qp_attr->en_sqd_async_notify is only applicable in modify qp */
2439 qp_attr->sq_draining = mlx5_state == MLX5_QP_STATE_SQ_DRAINING;
2440
2441 qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context->params1) >> 21) & 0x7);
2442
2443 qp_attr->max_dest_rd_atomic =
2444 1 << ((be32_to_cpu(context->params2) >> 21) & 0x7);
2445 qp_attr->min_rnr_timer =
2446 (be32_to_cpu(context->rnr_nextrecvpsn) >> 24) & 0x1f;
2447 qp_attr->timeout = context->pri_path.ackto_lt >> 3;
2448 qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
2449 qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
2450 qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
2451 qp_attr->cur_qp_state = qp_attr->qp_state;
2452 qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
2453 qp_attr->cap.max_recv_sge = qp->rq.max_gs;
2454
2455 if (!ibqp->uobject) {
2456 qp_attr->cap.max_send_wr = qp->sq.wqe_cnt;
2457 qp_attr->cap.max_send_sge = qp->sq.max_gs;
2458 } else {
2459 qp_attr->cap.max_send_wr = 0;
2460 qp_attr->cap.max_send_sge = 0;
2461 }
2462
2463 /* We don't support inline sends for kernel QPs (yet), and we
2464 * don't know what userspace's value should be.
2465 */
2466 qp_attr->cap.max_inline_data = 0;
2467
2468 qp_init_attr->cap = qp_attr->cap;
2469
2470 qp_init_attr->create_flags = 0;
2471 if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
2472 qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
2473
2474 qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
2475 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2476
2477out_free:
2478 kfree(outb);
2479
2480out:
2481 mutex_unlock(&qp->mutex);
2482 return err;
2483}
2484
2485struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
2486 struct ib_ucontext *context,
2487 struct ib_udata *udata)
2488{
2489 struct mlx5_ib_dev *dev = to_mdev(ibdev);
2490 struct mlx5_ib_xrcd *xrcd;
2491 int err;
2492
2493 if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_XRC))
2494 return ERR_PTR(-ENOSYS);
2495
2496 xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
2497 if (!xrcd)
2498 return ERR_PTR(-ENOMEM);
2499
2500 err = mlx5_core_xrcd_alloc(&dev->mdev, &xrcd->xrcdn);
2501 if (err) {
2502 kfree(xrcd);
2503 return ERR_PTR(-ENOMEM);
2504 }
2505
2506 return &xrcd->ibxrcd;
2507}
2508
2509int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
2510{
2511 struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
2512 u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
2513 int err;
2514
2515 err = mlx5_core_xrcd_dealloc(&dev->mdev, xrcdn);
2516 if (err) {
2517 mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
2518 return err;
2519 }
2520
2521 kfree(xrcd);
2522
2523 return 0;
2524}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
new file mode 100644
index 000000000000..84d297afd6a9
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -0,0 +1,473 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <linux/mlx5/qp.h>
35#include <linux/mlx5/srq.h>
36#include <linux/slab.h>
37#include <rdma/ib_umem.h>
38
39#include "mlx5_ib.h"
40#include "user.h"
41
42/* not supported currently */
43static int srq_signature;
44
45static void *get_wqe(struct mlx5_ib_srq *srq, int n)
46{
47 return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
48}
49
50static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
51{
52 struct ib_event event;
53 struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
54
55 if (ibsrq->event_handler) {
56 event.device = ibsrq->device;
57 event.element.srq = ibsrq;
58 switch (type) {
59 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
60 event.event = IB_EVENT_SRQ_LIMIT_REACHED;
61 break;
62 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
63 event.event = IB_EVENT_SRQ_ERR;
64 break;
65 default:
66 pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n",
67 type, srq->srqn);
68 return;
69 }
70
71 ibsrq->event_handler(&event, ibsrq->srq_context);
72 }
73}
74
75static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
76 struct mlx5_create_srq_mbox_in **in,
77 struct ib_udata *udata, int buf_size, int *inlen)
78{
79 struct mlx5_ib_dev *dev = to_mdev(pd->device);
80 struct mlx5_ib_create_srq ucmd;
81 int err;
82 int npages;
83 int page_shift;
84 int ncont;
85 u32 offset;
86
87 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
88 mlx5_ib_dbg(dev, "failed copy udata\n");
89 return -EFAULT;
90 }
91 srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
92
93 srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
94 0, 0);
95 if (IS_ERR(srq->umem)) {
96 mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
97 err = PTR_ERR(srq->umem);
98 return err;
99 }
100
101 mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages,
102 &page_shift, &ncont, NULL);
103 err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
104 &offset);
105 if (err) {
106 mlx5_ib_warn(dev, "bad offset\n");
107 goto err_umem;
108 }
109
110 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
111 *in = mlx5_vzalloc(*inlen);
112 if (!(*in)) {
113 err = -ENOMEM;
114 goto err_umem;
115 }
116
117 mlx5_ib_populate_pas(dev, srq->umem, page_shift, (*in)->pas, 0);
118
119 err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
120 ucmd.db_addr, &srq->db);
121 if (err) {
122 mlx5_ib_dbg(dev, "map doorbell failed\n");
123 goto err_in;
124 }
125
126 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
127 (*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
128
129 return 0;
130
131err_in:
132 mlx5_vfree(*in);
133
134err_umem:
135 ib_umem_release(srq->umem);
136
137 return err;
138}
139
140static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
141 struct mlx5_create_srq_mbox_in **in, int buf_size,
142 int *inlen)
143{
144 int err;
145 int i;
146 struct mlx5_wqe_srq_next_seg *next;
147 int page_shift;
148 int npages;
149
150 err = mlx5_db_alloc(&dev->mdev, &srq->db);
151 if (err) {
152 mlx5_ib_warn(dev, "alloc dbell rec failed\n");
153 return err;
154 }
155
156 *srq->db.db = 0;
157
158 if (mlx5_buf_alloc(&dev->mdev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
159 mlx5_ib_dbg(dev, "buf alloc failed\n");
160 err = -ENOMEM;
161 goto err_db;
162 }
163 page_shift = srq->buf.page_shift;
164
165 srq->head = 0;
166 srq->tail = srq->msrq.max - 1;
167 srq->wqe_ctr = 0;
168
169 for (i = 0; i < srq->msrq.max; i++) {
170 next = get_wqe(srq, i);
171 next->next_wqe_index =
172 cpu_to_be16((i + 1) & (srq->msrq.max - 1));
173 }
174
175 npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
176 mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
177 buf_size, page_shift, srq->buf.npages, npages);
178 *inlen = sizeof(**in) + sizeof(*(*in)->pas) * npages;
179 *in = mlx5_vzalloc(*inlen);
180 if (!*in) {
181 err = -ENOMEM;
182 goto err_buf;
183 }
184 mlx5_fill_page_array(&srq->buf, (*in)->pas);
185
186 srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
187 if (!srq->wrid) {
188 mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
189 (unsigned long)(srq->msrq.max * sizeof(u64)));
190 err = -ENOMEM;
191 goto err_in;
192 }
193 srq->wq_sig = !!srq_signature;
194
195 (*in)->ctx.log_pg_sz = page_shift - PAGE_SHIFT;
196
197 return 0;
198
199err_in:
200 mlx5_vfree(*in);
201
202err_buf:
203 mlx5_buf_free(&dev->mdev, &srq->buf);
204
205err_db:
206 mlx5_db_free(&dev->mdev, &srq->db);
207 return err;
208}
209
210static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
211{
212 mlx5_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
213 ib_umem_release(srq->umem);
214}
215
216
217static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
218{
219 kfree(srq->wrid);
220 mlx5_buf_free(&dev->mdev, &srq->buf);
221 mlx5_db_free(&dev->mdev, &srq->db);
222}
223
224struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
225 struct ib_srq_init_attr *init_attr,
226 struct ib_udata *udata)
227{
228 struct mlx5_ib_dev *dev = to_mdev(pd->device);
229 struct mlx5_ib_srq *srq;
230 int desc_size;
231 int buf_size;
232 int err;
233 struct mlx5_create_srq_mbox_in *uninitialized_var(in);
234 int uninitialized_var(inlen);
235 int is_xrc;
236 u32 flgs, xrcdn;
237
238 /* Sanity check SRQ size before proceeding */
239 if (init_attr->attr.max_wr >= dev->mdev.caps.max_srq_wqes) {
240 mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
241 init_attr->attr.max_wr,
242 dev->mdev.caps.max_srq_wqes);
243 return ERR_PTR(-EINVAL);
244 }
245
246 srq = kmalloc(sizeof(*srq), GFP_KERNEL);
247 if (!srq)
248 return ERR_PTR(-ENOMEM);
249
250 mutex_init(&srq->mutex);
251 spin_lock_init(&srq->lock);
252 srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
253 srq->msrq.max_gs = init_attr->attr.max_sge;
254
255 desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
256 srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
257 desc_size = roundup_pow_of_two(desc_size);
258 desc_size = max_t(int, 32, desc_size);
259 srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
260 sizeof(struct mlx5_wqe_data_seg);
261 srq->msrq.wqe_shift = ilog2(desc_size);
262 buf_size = srq->msrq.max * desc_size;
263 mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
264 desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
265 srq->msrq.max_avail_gather);
266
267 if (pd->uobject)
268 err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
269 else
270 err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
271
272 if (err) {
273 mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
274 pd->uobject ? "user" : "kernel", err);
275 goto err_srq;
276 }
277
278 is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
279 in->ctx.state_log_sz = ilog2(srq->msrq.max);
280 flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
281 xrcdn = 0;
282 if (is_xrc) {
283 xrcdn = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn;
284 in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(init_attr->ext.xrc.cq)->mcq.cqn);
285 } else if (init_attr->srq_type == IB_SRQT_BASIC) {
286 xrcdn = to_mxrcd(dev->devr.x0)->xrcdn;
287 in->ctx.pgoff_cqn |= cpu_to_be32(to_mcq(dev->devr.c0)->mcq.cqn);
288 }
289
290 in->ctx.flags_xrcd = cpu_to_be32((flgs & 0xFF000000) | (xrcdn & 0xFFFFFF));
291
292 in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
293 in->ctx.db_record = cpu_to_be64(srq->db.dma);
294 err = mlx5_core_create_srq(&dev->mdev, &srq->msrq, in, inlen);
295 mlx5_vfree(in);
296 if (err) {
297 mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
298 goto err_srq;
299 }
300
301 mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn);
302
303 srq->msrq.event = mlx5_ib_srq_event;
304 srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
305
306 if (pd->uobject)
307 if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
308 mlx5_ib_dbg(dev, "copy to user failed\n");
309 err = -EFAULT;
310 goto err_core;
311 }
312
313 init_attr->attr.max_wr = srq->msrq.max - 1;
314
315 return &srq->ibsrq;
316
317err_core:
318 mlx5_core_destroy_srq(&dev->mdev, &srq->msrq);
319 if (pd->uobject)
320 destroy_srq_user(pd, srq);
321 else
322 destroy_srq_kernel(dev, srq);
323
324err_srq:
325 kfree(srq);
326
327 return ERR_PTR(err);
328}
329
330int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
331 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
332{
333 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
334 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
335 int ret;
336
337 /* We don't support resizing SRQs yet */
338 if (attr_mask & IB_SRQ_MAX_WR)
339 return -EINVAL;
340
341 if (attr_mask & IB_SRQ_LIMIT) {
342 if (attr->srq_limit >= srq->msrq.max)
343 return -EINVAL;
344
345 mutex_lock(&srq->mutex);
346 ret = mlx5_core_arm_srq(&dev->mdev, &srq->msrq, attr->srq_limit, 1);
347 mutex_unlock(&srq->mutex);
348
349 if (ret)
350 return ret;
351 }
352
353 return 0;
354}
355
356int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
357{
358 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device);
359 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
360 int ret;
361 struct mlx5_query_srq_mbox_out *out;
362
363 out = kzalloc(sizeof(*out), GFP_KERNEL);
364 if (!out)
365 return -ENOMEM;
366
367 ret = mlx5_core_query_srq(&dev->mdev, &srq->msrq, out);
368 if (ret)
369 goto out_box;
370
371 srq_attr->srq_limit = be16_to_cpu(out->ctx.lwm);
372 srq_attr->max_wr = srq->msrq.max - 1;
373 srq_attr->max_sge = srq->msrq.max_gs;
374
375out_box:
376 kfree(out);
377 return ret;
378}
379
380int mlx5_ib_destroy_srq(struct ib_srq *srq)
381{
382 struct mlx5_ib_dev *dev = to_mdev(srq->device);
383 struct mlx5_ib_srq *msrq = to_msrq(srq);
384
385 mlx5_core_destroy_srq(&dev->mdev, &msrq->msrq);
386
387 if (srq->uobject) {
388 mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
389 ib_umem_release(msrq->umem);
390 } else {
391 kfree(msrq->wrid);
392 mlx5_buf_free(&dev->mdev, &msrq->buf);
393 mlx5_db_free(&dev->mdev, &msrq->db);
394 }
395
396 kfree(srq);
397 return 0;
398}
399
400void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
401{
402 struct mlx5_wqe_srq_next_seg *next;
403
404 /* always called with interrupts disabled. */
405 spin_lock(&srq->lock);
406
407 next = get_wqe(srq, srq->tail);
408 next->next_wqe_index = cpu_to_be16(wqe_index);
409 srq->tail = wqe_index;
410
411 spin_unlock(&srq->lock);
412}
413
414int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
415 struct ib_recv_wr **bad_wr)
416{
417 struct mlx5_ib_srq *srq = to_msrq(ibsrq);
418 struct mlx5_wqe_srq_next_seg *next;
419 struct mlx5_wqe_data_seg *scat;
420 unsigned long flags;
421 int err = 0;
422 int nreq;
423 int i;
424
425 spin_lock_irqsave(&srq->lock, flags);
426
427 for (nreq = 0; wr; nreq++, wr = wr->next) {
428 if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
429 err = -EINVAL;
430 *bad_wr = wr;
431 break;
432 }
433
434 if (unlikely(srq->head == srq->tail)) {
435 err = -ENOMEM;
436 *bad_wr = wr;
437 break;
438 }
439
440 srq->wrid[srq->head] = wr->wr_id;
441
442 next = get_wqe(srq, srq->head);
443 srq->head = be16_to_cpu(next->next_wqe_index);
444 scat = (struct mlx5_wqe_data_seg *)(next + 1);
445
446 for (i = 0; i < wr->num_sge; i++) {
447 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
448 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
449 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
450 }
451
452 if (i < srq->msrq.max_avail_gather) {
453 scat[i].byte_count = 0;
454 scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY);
455 scat[i].addr = 0;
456 }
457 }
458
459 if (likely(nreq)) {
460 srq->wqe_ctr += nreq;
461
462 /* Make sure that descriptors are written before
463 * doorbell record.
464 */
465 wmb();
466
467 *srq->db.db = cpu_to_be32(srq->wqe_ctr);
468 }
469
470 spin_unlock_irqrestore(&srq->lock, flags);
471
472 return err;
473}
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
new file mode 100644
index 000000000000..a886de3e593c
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -0,0 +1,121 @@
1/*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX5_IB_USER_H
34#define MLX5_IB_USER_H
35
36#include <linux/types.h>
37
38enum {
39 MLX5_QP_FLAG_SIGNATURE = 1 << 0,
40 MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
41};
42
43enum {
44 MLX5_SRQ_FLAG_SIGNATURE = 1 << 0,
45};
46
47
48/* Increment this value if any changes that break userspace ABI
49 * compatibility are made.
50 */
51#define MLX5_IB_UVERBS_ABI_VERSION 1
52
53/* Make sure that all structs defined in this file remain laid out so
54 * that they pack the same way on 32-bit and 64-bit architectures (to
55 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
56 * In particular do not use pointer types -- pass pointers in __u64
57 * instead.
58 */
59
60struct mlx5_ib_alloc_ucontext_req {
61 __u32 total_num_uuars;
62 __u32 num_low_latency_uuars;
63};
64
65struct mlx5_ib_alloc_ucontext_resp {
66 __u32 qp_tab_size;
67 __u32 bf_reg_size;
68 __u32 tot_uuars;
69 __u32 cache_line_size;
70 __u16 max_sq_desc_sz;
71 __u16 max_rq_desc_sz;
72 __u32 max_send_wqebb;
73 __u32 max_recv_wr;
74 __u32 max_srq_recv_wr;
75 __u16 num_ports;
76 __u16 reserved;
77};
78
79struct mlx5_ib_alloc_pd_resp {
80 __u32 pdn;
81};
82
83struct mlx5_ib_create_cq {
84 __u64 buf_addr;
85 __u64 db_addr;
86 __u32 cqe_size;
87};
88
89struct mlx5_ib_create_cq_resp {
90 __u32 cqn;
91 __u32 reserved;
92};
93
94struct mlx5_ib_resize_cq {
95 __u64 buf_addr;
96};
97
98struct mlx5_ib_create_srq {
99 __u64 buf_addr;
100 __u64 db_addr;
101 __u32 flags;
102};
103
104struct mlx5_ib_create_srq_resp {
105 __u32 srqn;
106 __u32 reserved;
107};
108
109struct mlx5_ib_create_qp {
110 __u64 buf_addr;
111 __u64 db_addr;
112 __u32 sq_wqe_count;
113 __u32 rq_wqe_count;
114 __u32 rq_wqe_shift;
115 __u32 flags;
116};
117
118struct mlx5_ib_create_qp_resp {
119 __u32 uuar_index;
120};
121#endif /* MLX5_IB_USER_H */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 48970af23679..d540180a8e42 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -42,8 +42,6 @@
42#define OCRDMA_ROCE_DEV_VERSION "1.0.0" 42#define OCRDMA_ROCE_DEV_VERSION "1.0.0"
43#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" 43#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
44 44
45#define ocrdma_err(format, arg...) printk(KERN_ERR format, ##arg)
46
47#define OCRDMA_MAX_AH 512 45#define OCRDMA_MAX_AH 512
48 46
49#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) 47#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
@@ -97,7 +95,6 @@ struct ocrdma_queue_info {
97 u16 id; /* qid, where to ring the doorbell. */ 95 u16 id; /* qid, where to ring the doorbell. */
98 u16 head, tail; 96 u16 head, tail;
99 bool created; 97 bool created;
100 atomic_t used; /* Number of valid elements in the queue */
101}; 98};
102 99
103struct ocrdma_eq { 100struct ocrdma_eq {
@@ -198,7 +195,6 @@ struct ocrdma_cq {
198 struct ocrdma_ucontext *ucontext; 195 struct ocrdma_ucontext *ucontext;
199 dma_addr_t pa; 196 dma_addr_t pa;
200 u32 len; 197 u32 len;
201 atomic_t use_cnt;
202 198
203 /* head of all qp's sq and rq for which cqes need to be flushed 199 /* head of all qp's sq and rq for which cqes need to be flushed
204 * by the software. 200 * by the software.
@@ -210,7 +206,6 @@ struct ocrdma_pd {
210 struct ib_pd ibpd; 206 struct ib_pd ibpd;
211 struct ocrdma_dev *dev; 207 struct ocrdma_dev *dev;
212 struct ocrdma_ucontext *uctx; 208 struct ocrdma_ucontext *uctx;
213 atomic_t use_cnt;
214 u32 id; 209 u32 id;
215 int num_dpp_qp; 210 int num_dpp_qp;
216 u32 dpp_page; 211 u32 dpp_page;
@@ -241,16 +236,16 @@ struct ocrdma_srq {
241 struct ib_srq ibsrq; 236 struct ib_srq ibsrq;
242 struct ocrdma_dev *dev; 237 struct ocrdma_dev *dev;
243 u8 __iomem *db; 238 u8 __iomem *db;
239 struct ocrdma_qp_hwq_info rq;
240 u64 *rqe_wr_id_tbl;
241 u32 *idx_bit_fields;
242 u32 bit_fields_len;
243
244 /* provide synchronization to multiple context(s) posting rqe */ 244 /* provide synchronization to multiple context(s) posting rqe */
245 spinlock_t q_lock ____cacheline_aligned; 245 spinlock_t q_lock ____cacheline_aligned;
246 246
247 struct ocrdma_qp_hwq_info rq;
248 struct ocrdma_pd *pd; 247 struct ocrdma_pd *pd;
249 atomic_t use_cnt;
250 u32 id; 248 u32 id;
251 u64 *rqe_wr_id_tbl;
252 u32 *idx_bit_fields;
253 u32 bit_fields_len;
254}; 249};
255 250
256struct ocrdma_qp { 251struct ocrdma_qp {
@@ -258,8 +253,6 @@ struct ocrdma_qp {
258 struct ocrdma_dev *dev; 253 struct ocrdma_dev *dev;
259 254
260 u8 __iomem *sq_db; 255 u8 __iomem *sq_db;
261 /* provide synchronization to multiple context(s) posting wqe, rqe */
262 spinlock_t q_lock ____cacheline_aligned;
263 struct ocrdma_qp_hwq_info sq; 256 struct ocrdma_qp_hwq_info sq;
264 struct { 257 struct {
265 uint64_t wrid; 258 uint64_t wrid;
@@ -269,6 +262,9 @@ struct ocrdma_qp {
269 uint8_t rsvd[3]; 262 uint8_t rsvd[3];
270 } *wqe_wr_id_tbl; 263 } *wqe_wr_id_tbl;
271 u32 max_inline_data; 264 u32 max_inline_data;
265
266 /* provide synchronization to multiple context(s) posting wqe, rqe */
267 spinlock_t q_lock ____cacheline_aligned;
272 struct ocrdma_cq *sq_cq; 268 struct ocrdma_cq *sq_cq;
273 /* list maintained per CQ to flush SQ errors */ 269 /* list maintained per CQ to flush SQ errors */
274 struct list_head sq_entry; 270 struct list_head sq_entry;
@@ -296,10 +292,6 @@ struct ocrdma_qp {
296 u8 *ird_q_va; 292 u8 *ird_q_va;
297}; 293};
298 294
299#define OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp) \
300 (((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) && \
301 (qp->id < 64)) ? 24 : 16)
302
303struct ocrdma_hw_mr { 295struct ocrdma_hw_mr {
304 struct ocrdma_dev *dev; 296 struct ocrdma_dev *dev;
305 u32 lkey; 297 u32 lkey;
@@ -390,4 +382,43 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
390 return container_of(ibsrq, struct ocrdma_srq, ibsrq); 382 return container_of(ibsrq, struct ocrdma_srq, ibsrq);
391} 383}
392 384
385
386static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp)
387{
388 return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY &&
389 qp->id < 64) ? 24 : 16);
390}
391
392static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe)
393{
394 int cqe_valid;
395 cqe_valid = le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID;
396 return ((cqe_valid == cq->phase) ? 1 : 0);
397}
398
399static inline int is_cqe_for_sq(struct ocrdma_cqe *cqe)
400{
401 return (le32_to_cpu(cqe->flags_status_srcqpn) &
402 OCRDMA_CQE_QTYPE) ? 0 : 1;
403}
404
405static inline int is_cqe_invalidated(struct ocrdma_cqe *cqe)
406{
407 return (le32_to_cpu(cqe->flags_status_srcqpn) &
408 OCRDMA_CQE_INVALIDATE) ? 1 : 0;
409}
410
411static inline int is_cqe_imm(struct ocrdma_cqe *cqe)
412{
413 return (le32_to_cpu(cqe->flags_status_srcqpn) &
414 OCRDMA_CQE_IMM) ? 1 : 0;
415}
416
417static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
418{
419 return (le32_to_cpu(cqe->flags_status_srcqpn) &
420 OCRDMA_CQE_WRITE_IMM) ? 1 : 0;
421}
422
423
393#endif 424#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 71942af4fce9..0965278dd2ed 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -128,7 +128,6 @@ static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev)
128static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev) 128static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
129{ 129{
130 dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1); 130 dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1);
131 atomic_inc(&dev->mq.sq.used);
132} 131}
133 132
134static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev) 133static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev)
@@ -564,32 +563,19 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
564 memset(cmd, 0, sizeof(*cmd)); 563 memset(cmd, 0, sizeof(*cmd));
565 num_pages = PAGES_4K_SPANNED(mq->va, mq->size); 564 num_pages = PAGES_4K_SPANNED(mq->va, mq->size);
566 565
567 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { 566 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT,
568 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ, 567 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
569 OCRDMA_SUBSYS_COMMON, sizeof(*cmd)); 568 cmd->req.rsvd_version = 1;
570 cmd->v0.pages = num_pages; 569 cmd->cqid_pages = num_pages;
571 cmd->v0.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID; 570 cmd->cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
572 cmd->v0.async_cqid_valid = (cq->id << 1); 571 cmd->async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
573 cmd->v0.cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << 572 cmd->async_event_bitmap = Bit(20);
574 OCRDMA_CREATE_MQ_RING_SIZE_SHIFT); 573 cmd->async_cqid_ringsize = cq->id;
575 cmd->v0.cqid_ringsize |= 574 cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
576 (cq->id << OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT); 575 OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
577 cmd->v0.valid = OCRDMA_CREATE_MQ_VALID; 576 cmd->valid = OCRDMA_CREATE_MQ_VALID;
578 pa = &cmd->v0.pa[0]; 577 pa = &cmd->pa[0];
579 } else { 578
580 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT,
581 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
582 cmd->req.rsvd_version = 1;
583 cmd->v1.cqid_pages = num_pages;
584 cmd->v1.cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
585 cmd->v1.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
586 cmd->v1.async_event_bitmap = Bit(20);
587 cmd->v1.async_cqid_ringsize = cq->id;
588 cmd->v1.async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
589 OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
590 cmd->v1.valid = OCRDMA_CREATE_MQ_VALID;
591 pa = &cmd->v1.pa[0];
592 }
593 ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K); 579 ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K);
594 status = be_roce_mcc_cmd(dev->nic_info.netdev, 580 status = be_roce_mcc_cmd(dev->nic_info.netdev,
595 cmd, sizeof(*cmd), NULL, NULL); 581 cmd, sizeof(*cmd), NULL, NULL);
@@ -745,7 +731,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
745 qp_event = 0; 731 qp_event = 0;
746 srq_event = 0; 732 srq_event = 0;
747 dev_event = 0; 733 dev_event = 0;
748 ocrdma_err("%s() unknown type=0x%x\n", __func__, type); 734 pr_err("%s() unknown type=0x%x\n", __func__, type);
749 break; 735 break;
750 } 736 }
751 737
@@ -775,8 +761,8 @@ static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
775 if (evt_code == OCRDMA_ASYNC_EVE_CODE) 761 if (evt_code == OCRDMA_ASYNC_EVE_CODE)
776 ocrdma_dispatch_ibevent(dev, cqe); 762 ocrdma_dispatch_ibevent(dev, cqe);
777 else 763 else
778 ocrdma_err("%s(%d) invalid evt code=0x%x\n", 764 pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
779 __func__, dev->id, evt_code); 765 dev->id, evt_code);
780} 766}
781 767
782static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) 768static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
@@ -790,8 +776,8 @@ static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
790 dev->mqe_ctx.cmd_done = true; 776 dev->mqe_ctx.cmd_done = true;
791 wake_up(&dev->mqe_ctx.cmd_wait); 777 wake_up(&dev->mqe_ctx.cmd_wait);
792 } else 778 } else
793 ocrdma_err("%s() cqe for invalid tag0x%x.expected=0x%x\n", 779 pr_err("%s() cqe for invalid tag0x%x.expected=0x%x\n",
794 __func__, cqe->tag_lo, dev->mqe_ctx.tag); 780 __func__, cqe->tag_lo, dev->mqe_ctx.tag);
795} 781}
796 782
797static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) 783static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
@@ -810,7 +796,7 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
810 else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK) 796 else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
811 ocrdma_process_mcqe(dev, cqe); 797 ocrdma_process_mcqe(dev, cqe);
812 else 798 else
813 ocrdma_err("%s() cqe->compl is not set.\n", __func__); 799 pr_err("%s() cqe->compl is not set.\n", __func__);
814 memset(cqe, 0, sizeof(struct ocrdma_mcqe)); 800 memset(cqe, 0, sizeof(struct ocrdma_mcqe));
815 ocrdma_mcq_inc_tail(dev); 801 ocrdma_mcq_inc_tail(dev);
816 } 802 }
@@ -869,7 +855,7 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
869 855
870 cq = dev->cq_tbl[cq_idx]; 856 cq = dev->cq_tbl[cq_idx];
871 if (cq == NULL) { 857 if (cq == NULL) {
872 ocrdma_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx); 858 pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
873 return; 859 return;
874 } 860 }
875 spin_lock_irqsave(&cq->cq_lock, flags); 861 spin_lock_irqsave(&cq->cq_lock, flags);
@@ -971,7 +957,7 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
971 rsp = ocrdma_get_mqe_rsp(dev); 957 rsp = ocrdma_get_mqe_rsp(dev);
972 ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe))); 958 ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
973 if (cqe_status || ext_status) { 959 if (cqe_status || ext_status) {
974 ocrdma_err 960 pr_err
975 ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n", 961 ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
976 __func__, 962 __func__,
977 (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> 963 (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
@@ -1353,8 +1339,8 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
1353 if (dpp_cq) 1339 if (dpp_cq)
1354 return -EINVAL; 1340 return -EINVAL;
1355 if (entries > dev->attr.max_cqe) { 1341 if (entries > dev->attr.max_cqe) {
1356 ocrdma_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n", 1342 pr_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n",
1357 __func__, dev->id, dev->attr.max_cqe, entries); 1343 __func__, dev->id, dev->attr.max_cqe, entries);
1358 return -EINVAL; 1344 return -EINVAL;
1359 } 1345 }
1360 if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)) 1346 if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY))
@@ -1621,7 +1607,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev,
1621 status = ocrdma_mbx_reg_mr(dev, hwmr, pdid, 1607 status = ocrdma_mbx_reg_mr(dev, hwmr, pdid,
1622 cur_pbl_cnt, hwmr->pbe_size, last); 1608 cur_pbl_cnt, hwmr->pbe_size, last);
1623 if (status) { 1609 if (status) {
1624 ocrdma_err("%s() status=%d\n", __func__, status); 1610 pr_err("%s() status=%d\n", __func__, status);
1625 return status; 1611 return status;
1626 } 1612 }
1627 /* if there is no more pbls to register then exit. */ 1613 /* if there is no more pbls to register then exit. */
@@ -1644,7 +1630,7 @@ int ocrdma_reg_mr(struct ocrdma_dev *dev,
1644 break; 1630 break;
1645 } 1631 }
1646 if (status) 1632 if (status)
1647 ocrdma_err("%s() err. status=%d\n", __func__, status); 1633 pr_err("%s() err. status=%d\n", __func__, status);
1648 1634
1649 return status; 1635 return status;
1650} 1636}
@@ -1841,8 +1827,8 @@ static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
1841 status = ocrdma_build_q_conf(&max_wqe_allocated, 1827 status = ocrdma_build_q_conf(&max_wqe_allocated,
1842 dev->attr.wqe_size, &hw_pages, &hw_page_size); 1828 dev->attr.wqe_size, &hw_pages, &hw_page_size);
1843 if (status) { 1829 if (status) {
1844 ocrdma_err("%s() req. max_send_wr=0x%x\n", __func__, 1830 pr_err("%s() req. max_send_wr=0x%x\n", __func__,
1845 max_wqe_allocated); 1831 max_wqe_allocated);
1846 return -EINVAL; 1832 return -EINVAL;
1847 } 1833 }
1848 qp->sq.max_cnt = max_wqe_allocated; 1834 qp->sq.max_cnt = max_wqe_allocated;
@@ -1891,8 +1877,8 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
1891 status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size, 1877 status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size,
1892 &hw_pages, &hw_page_size); 1878 &hw_pages, &hw_page_size);
1893 if (status) { 1879 if (status) {
1894 ocrdma_err("%s() req. max_recv_wr=0x%x\n", __func__, 1880 pr_err("%s() req. max_recv_wr=0x%x\n", __func__,
1895 attrs->cap.max_recv_wr + 1); 1881 attrs->cap.max_recv_wr + 1);
1896 return status; 1882 return status;
1897 } 1883 }
1898 qp->rq.max_cnt = max_rqe_allocated; 1884 qp->rq.max_cnt = max_rqe_allocated;
@@ -1900,7 +1886,7 @@ static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
1900 1886
1901 qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL); 1887 qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
1902 if (!qp->rq.va) 1888 if (!qp->rq.va)
1903 return status; 1889 return -ENOMEM;
1904 memset(qp->rq.va, 0, len); 1890 memset(qp->rq.va, 0, len);
1905 qp->rq.pa = pa; 1891 qp->rq.pa = pa;
1906 qp->rq.len = len; 1892 qp->rq.len = len;
@@ -2087,10 +2073,10 @@ mbx_err:
2087 if (qp->rq.va) 2073 if (qp->rq.va)
2088 dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa); 2074 dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa);
2089rq_err: 2075rq_err:
2090 ocrdma_err("%s(%d) rq_err\n", __func__, dev->id); 2076 pr_err("%s(%d) rq_err\n", __func__, dev->id);
2091 dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa); 2077 dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa);
2092sq_err: 2078sq_err:
2093 ocrdma_err("%s(%d) sq_err\n", __func__, dev->id); 2079 pr_err("%s(%d) sq_err\n", __func__, dev->id);
2094 kfree(cmd); 2080 kfree(cmd);
2095 return status; 2081 return status;
2096} 2082}
@@ -2127,7 +2113,7 @@ int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
2127 else if (rdma_link_local_addr(&in6)) 2113 else if (rdma_link_local_addr(&in6))
2128 rdma_get_ll_mac(&in6, mac_addr); 2114 rdma_get_ll_mac(&in6, mac_addr);
2129 else { 2115 else {
2130 ocrdma_err("%s() fail to resolve mac_addr.\n", __func__); 2116 pr_err("%s() fail to resolve mac_addr.\n", __func__);
2131 return -EINVAL; 2117 return -EINVAL;
2132 } 2118 }
2133 return 0; 2119 return 0;
@@ -2362,8 +2348,8 @@ int ocrdma_mbx_create_srq(struct ocrdma_srq *srq,
2362 dev->attr.rqe_size, 2348 dev->attr.rqe_size,
2363 &hw_pages, &hw_page_size); 2349 &hw_pages, &hw_page_size);
2364 if (status) { 2350 if (status) {
2365 ocrdma_err("%s() req. max_wr=0x%x\n", __func__, 2351 pr_err("%s() req. max_wr=0x%x\n", __func__,
2366 srq_attr->attr.max_wr); 2352 srq_attr->attr.max_wr);
2367 status = -EINVAL; 2353 status = -EINVAL;
2368 goto ret; 2354 goto ret;
2369 } 2355 }
@@ -2614,7 +2600,7 @@ mq_err:
2614 ocrdma_destroy_qp_eqs(dev); 2600 ocrdma_destroy_qp_eqs(dev);
2615qpeq_err: 2601qpeq_err:
2616 ocrdma_destroy_eq(dev, &dev->meq); 2602 ocrdma_destroy_eq(dev, &dev->meq);
2617 ocrdma_err("%s() status=%d\n", __func__, status); 2603 pr_err("%s() status=%d\n", __func__, status);
2618 return status; 2604 return status;
2619} 2605}
2620 2606
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 48928c8e7774..ded416f1adea 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -378,7 +378,7 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
378 spin_lock_init(&dev->flush_q_lock); 378 spin_lock_init(&dev->flush_q_lock);
379 return 0; 379 return 0;
380alloc_err: 380alloc_err:
381 ocrdma_err("%s(%d) error.\n", __func__, dev->id); 381 pr_err("%s(%d) error.\n", __func__, dev->id);
382 return -ENOMEM; 382 return -ENOMEM;
383} 383}
384 384
@@ -396,7 +396,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
396 396
397 dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); 397 dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
398 if (!dev) { 398 if (!dev) {
399 ocrdma_err("Unable to allocate ib device\n"); 399 pr_err("Unable to allocate ib device\n");
400 return NULL; 400 return NULL;
401 } 401 }
402 dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL); 402 dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL);
@@ -437,7 +437,7 @@ init_err:
437idr_err: 437idr_err:
438 kfree(dev->mbx_cmd); 438 kfree(dev->mbx_cmd);
439 ib_dealloc_device(&dev->ibdev); 439 ib_dealloc_device(&dev->ibdev);
440 ocrdma_err("%s() leaving. ret=%d\n", __func__, status); 440 pr_err("%s() leaving. ret=%d\n", __func__, status);
441 return NULL; 441 return NULL;
442} 442}
443 443
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index c75cbdfa87e7..36b062da2aea 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -608,16 +608,8 @@ enum {
608 OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0) 608 OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0)
609}; 609};
610 610
611struct ocrdma_create_mq_v0 { 611struct ocrdma_create_mq_req {
612 u32 pages; 612 struct ocrdma_mbx_hdr req;
613 u32 cqid_ringsize;
614 u32 valid;
615 u32 async_cqid_valid;
616 u32 rsvd;
617 struct ocrdma_pa pa[8];
618} __packed;
619
620struct ocrdma_create_mq_v1 {
621 u32 cqid_pages; 613 u32 cqid_pages;
622 u32 async_event_bitmap; 614 u32 async_event_bitmap;
623 u32 async_cqid_ringsize; 615 u32 async_cqid_ringsize;
@@ -627,14 +619,6 @@ struct ocrdma_create_mq_v1 {
627 struct ocrdma_pa pa[8]; 619 struct ocrdma_pa pa[8];
628} __packed; 620} __packed;
629 621
630struct ocrdma_create_mq_req {
631 struct ocrdma_mbx_hdr req;
632 union {
633 struct ocrdma_create_mq_v0 v0;
634 struct ocrdma_create_mq_v1 v1;
635 };
636} __packed;
637
638struct ocrdma_create_mq_rsp { 622struct ocrdma_create_mq_rsp {
639 struct ocrdma_mbx_rsp rsp; 623 struct ocrdma_mbx_rsp rsp;
640 u32 id; 624 u32 id;
@@ -1550,21 +1534,6 @@ struct ocrdma_cqe {
1550 u32 flags_status_srcqpn; /* w3 */ 1534 u32 flags_status_srcqpn; /* w3 */
1551} __packed; 1535} __packed;
1552 1536
1553#define is_cqe_valid(cq, cqe) \
1554 (((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID)\
1555 == cq->phase) ? 1 : 0)
1556#define is_cqe_for_sq(cqe) \
1557 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 0 : 1)
1558#define is_cqe_for_rq(cqe) \
1559 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 1 : 0)
1560#define is_cqe_invalidated(cqe) \
1561 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_INVALIDATE) ? \
1562 1 : 0)
1563#define is_cqe_imm(cqe) \
1564 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_IMM) ? 1 : 0)
1565#define is_cqe_wr_imm(cqe) \
1566 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_WRITE_IMM) ? 1 : 0)
1567
1568struct ocrdma_sge { 1537struct ocrdma_sge {
1569 u32 addr_hi; 1538 u32 addr_hi;
1570 u32 addr_lo; 1539 u32 addr_lo;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index b29a4246ef41..dcfbab177faa 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -114,8 +114,8 @@ int ocrdma_query_port(struct ib_device *ibdev,
114 114
115 dev = get_ocrdma_dev(ibdev); 115 dev = get_ocrdma_dev(ibdev);
116 if (port > 1) { 116 if (port > 1) {
117 ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, 117 pr_err("%s(%d) invalid_port=0x%x\n", __func__,
118 dev->id, port); 118 dev->id, port);
119 return -EINVAL; 119 return -EINVAL;
120 } 120 }
121 netdev = dev->nic_info.netdev; 121 netdev = dev->nic_info.netdev;
@@ -155,8 +155,7 @@ int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
155 155
156 dev = get_ocrdma_dev(ibdev); 156 dev = get_ocrdma_dev(ibdev);
157 if (port > 1) { 157 if (port > 1) {
158 ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__, 158 pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
159 dev->id, port);
160 return -EINVAL; 159 return -EINVAL;
161 } 160 }
162 return 0; 161 return 0;
@@ -398,7 +397,6 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
398 kfree(pd); 397 kfree(pd);
399 return ERR_PTR(status); 398 return ERR_PTR(status);
400 } 399 }
401 atomic_set(&pd->use_cnt, 0);
402 400
403 if (udata && context) { 401 if (udata && context) {
404 status = ocrdma_copy_pd_uresp(pd, context, udata); 402 status = ocrdma_copy_pd_uresp(pd, context, udata);
@@ -419,12 +417,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
419 int status; 417 int status;
420 u64 usr_db; 418 u64 usr_db;
421 419
422 if (atomic_read(&pd->use_cnt)) {
423 ocrdma_err("%s(%d) pd=0x%x is in use.\n",
424 __func__, dev->id, pd->id);
425 status = -EFAULT;
426 goto dealloc_err;
427 }
428 status = ocrdma_mbx_dealloc_pd(dev, pd); 420 status = ocrdma_mbx_dealloc_pd(dev, pd);
429 if (pd->uctx) { 421 if (pd->uctx) {
430 u64 dpp_db = dev->nic_info.dpp_unmapped_addr + 422 u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
@@ -436,7 +428,6 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
436 ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size); 428 ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
437 } 429 }
438 kfree(pd); 430 kfree(pd);
439dealloc_err:
440 return status; 431 return status;
441} 432}
442 433
@@ -450,8 +441,8 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
450 struct ocrdma_dev *dev = pd->dev; 441 struct ocrdma_dev *dev = pd->dev;
451 442
452 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) { 443 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
453 ocrdma_err("%s(%d) leaving err, invalid access rights\n", 444 pr_err("%s(%d) leaving err, invalid access rights\n",
454 __func__, dev->id); 445 __func__, dev->id);
455 return ERR_PTR(-EINVAL); 446 return ERR_PTR(-EINVAL);
456 } 447 }
457 448
@@ -474,7 +465,6 @@ static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
474 return ERR_PTR(-ENOMEM); 465 return ERR_PTR(-ENOMEM);
475 } 466 }
476 mr->pd = pd; 467 mr->pd = pd;
477 atomic_inc(&pd->use_cnt);
478 mr->ibmr.lkey = mr->hwmr.lkey; 468 mr->ibmr.lkey = mr->hwmr.lkey;
479 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) 469 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
480 mr->ibmr.rkey = mr->hwmr.lkey; 470 mr->ibmr.rkey = mr->hwmr.lkey;
@@ -664,7 +654,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
664 if (status) 654 if (status)
665 goto mbx_err; 655 goto mbx_err;
666 mr->pd = pd; 656 mr->pd = pd;
667 atomic_inc(&pd->use_cnt);
668 mr->ibmr.lkey = mr->hwmr.lkey; 657 mr->ibmr.lkey = mr->hwmr.lkey;
669 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd) 658 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
670 mr->ibmr.rkey = mr->hwmr.lkey; 659 mr->ibmr.rkey = mr->hwmr.lkey;
@@ -689,7 +678,6 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
689 if (mr->hwmr.fr_mr == 0) 678 if (mr->hwmr.fr_mr == 0)
690 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); 679 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
691 680
692 atomic_dec(&mr->pd->use_cnt);
693 /* it could be user registered memory. */ 681 /* it could be user registered memory. */
694 if (mr->umem) 682 if (mr->umem)
695 ib_umem_release(mr->umem); 683 ib_umem_release(mr->umem);
@@ -714,8 +702,8 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata,
714 uresp.phase_change = cq->phase_change ? 1 : 0; 702 uresp.phase_change = cq->phase_change ? 1 : 0;
715 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 703 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
716 if (status) { 704 if (status) {
717 ocrdma_err("%s(%d) copy error cqid=0x%x.\n", 705 pr_err("%s(%d) copy error cqid=0x%x.\n",
718 __func__, cq->dev->id, cq->id); 706 __func__, cq->dev->id, cq->id);
719 goto err; 707 goto err;
720 } 708 }
721 uctx = get_ocrdma_ucontext(ib_ctx); 709 uctx = get_ocrdma_ucontext(ib_ctx);
@@ -752,7 +740,6 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
752 740
753 spin_lock_init(&cq->cq_lock); 741 spin_lock_init(&cq->cq_lock);
754 spin_lock_init(&cq->comp_handler_lock); 742 spin_lock_init(&cq->comp_handler_lock);
755 atomic_set(&cq->use_cnt, 0);
756 INIT_LIST_HEAD(&cq->sq_head); 743 INIT_LIST_HEAD(&cq->sq_head);
757 INIT_LIST_HEAD(&cq->rq_head); 744 INIT_LIST_HEAD(&cq->rq_head);
758 cq->dev = dev; 745 cq->dev = dev;
@@ -799,9 +786,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
799 struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); 786 struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
800 struct ocrdma_dev *dev = cq->dev; 787 struct ocrdma_dev *dev = cq->dev;
801 788
802 if (atomic_read(&cq->use_cnt))
803 return -EINVAL;
804
805 status = ocrdma_mbx_destroy_cq(dev, cq); 789 status = ocrdma_mbx_destroy_cq(dev, cq);
806 790
807 if (cq->ucontext) { 791 if (cq->ucontext) {
@@ -837,57 +821,56 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
837 if (attrs->qp_type != IB_QPT_GSI && 821 if (attrs->qp_type != IB_QPT_GSI &&
838 attrs->qp_type != IB_QPT_RC && 822 attrs->qp_type != IB_QPT_RC &&
839 attrs->qp_type != IB_QPT_UD) { 823 attrs->qp_type != IB_QPT_UD) {
840 ocrdma_err("%s(%d) unsupported qp type=0x%x requested\n", 824 pr_err("%s(%d) unsupported qp type=0x%x requested\n",
841 __func__, dev->id, attrs->qp_type); 825 __func__, dev->id, attrs->qp_type);
842 return -EINVAL; 826 return -EINVAL;
843 } 827 }
844 if (attrs->cap.max_send_wr > dev->attr.max_wqe) { 828 if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
845 ocrdma_err("%s(%d) unsupported send_wr=0x%x requested\n", 829 pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
846 __func__, dev->id, attrs->cap.max_send_wr); 830 __func__, dev->id, attrs->cap.max_send_wr);
847 ocrdma_err("%s(%d) supported send_wr=0x%x\n", 831 pr_err("%s(%d) supported send_wr=0x%x\n",
848 __func__, dev->id, dev->attr.max_wqe); 832 __func__, dev->id, dev->attr.max_wqe);
849 return -EINVAL; 833 return -EINVAL;
850 } 834 }
851 if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) { 835 if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
852 ocrdma_err("%s(%d) unsupported recv_wr=0x%x requested\n", 836 pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
853 __func__, dev->id, attrs->cap.max_recv_wr); 837 __func__, dev->id, attrs->cap.max_recv_wr);
854 ocrdma_err("%s(%d) supported recv_wr=0x%x\n", 838 pr_err("%s(%d) supported recv_wr=0x%x\n",
855 __func__, dev->id, dev->attr.max_rqe); 839 __func__, dev->id, dev->attr.max_rqe);
856 return -EINVAL; 840 return -EINVAL;
857 } 841 }
858 if (attrs->cap.max_inline_data > dev->attr.max_inline_data) { 842 if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
859 ocrdma_err("%s(%d) unsupported inline data size=0x%x" 843 pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
860 " requested\n", __func__, dev->id, 844 __func__, dev->id, attrs->cap.max_inline_data);
861 attrs->cap.max_inline_data); 845 pr_err("%s(%d) supported inline data size=0x%x\n",
862 ocrdma_err("%s(%d) supported inline data size=0x%x\n", 846 __func__, dev->id, dev->attr.max_inline_data);
863 __func__, dev->id, dev->attr.max_inline_data);
864 return -EINVAL; 847 return -EINVAL;
865 } 848 }
866 if (attrs->cap.max_send_sge > dev->attr.max_send_sge) { 849 if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
867 ocrdma_err("%s(%d) unsupported send_sge=0x%x requested\n", 850 pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
868 __func__, dev->id, attrs->cap.max_send_sge); 851 __func__, dev->id, attrs->cap.max_send_sge);
869 ocrdma_err("%s(%d) supported send_sge=0x%x\n", 852 pr_err("%s(%d) supported send_sge=0x%x\n",
870 __func__, dev->id, dev->attr.max_send_sge); 853 __func__, dev->id, dev->attr.max_send_sge);
871 return -EINVAL; 854 return -EINVAL;
872 } 855 }
873 if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) { 856 if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
874 ocrdma_err("%s(%d) unsupported recv_sge=0x%x requested\n", 857 pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
875 __func__, dev->id, attrs->cap.max_recv_sge); 858 __func__, dev->id, attrs->cap.max_recv_sge);
876 ocrdma_err("%s(%d) supported recv_sge=0x%x\n", 859 pr_err("%s(%d) supported recv_sge=0x%x\n",
877 __func__, dev->id, dev->attr.max_recv_sge); 860 __func__, dev->id, dev->attr.max_recv_sge);
878 return -EINVAL; 861 return -EINVAL;
879 } 862 }
880 /* unprivileged user space cannot create special QP */ 863 /* unprivileged user space cannot create special QP */
881 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { 864 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
882 ocrdma_err 865 pr_err
883 ("%s(%d) Userspace can't create special QPs of type=0x%x\n", 866 ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
884 __func__, dev->id, attrs->qp_type); 867 __func__, dev->id, attrs->qp_type);
885 return -EINVAL; 868 return -EINVAL;
886 } 869 }
887 /* allow creating only one GSI type of QP */ 870 /* allow creating only one GSI type of QP */
888 if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) { 871 if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
889 ocrdma_err("%s(%d) GSI special QPs already created.\n", 872 pr_err("%s(%d) GSI special QPs already created.\n",
890 __func__, dev->id); 873 __func__, dev->id);
891 return -EINVAL; 874 return -EINVAL;
892 } 875 }
893 /* verify consumer QPs are not trying to use GSI QP's CQ */ 876 /* verify consumer QPs are not trying to use GSI QP's CQ */
@@ -896,8 +879,8 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
896 (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) || 879 (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
897 (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) || 880 (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
898 (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) { 881 (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
899 ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n", 882 pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
900 __func__, dev->id); 883 __func__, dev->id);
901 return -EINVAL; 884 return -EINVAL;
902 } 885 }
903 } 886 }
@@ -949,7 +932,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
949 } 932 }
950 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 933 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
951 if (status) { 934 if (status) {
952 ocrdma_err("%s(%d) user copy error.\n", __func__, dev->id); 935 pr_err("%s(%d) user copy error.\n", __func__, dev->id);
953 goto err; 936 goto err;
954 } 937 }
955 status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0], 938 status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
@@ -1023,15 +1006,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
1023 qp->state = OCRDMA_QPS_RST; 1006 qp->state = OCRDMA_QPS_RST;
1024} 1007}
1025 1008
1026static void ocrdma_set_qp_use_cnt(struct ocrdma_qp *qp, struct ocrdma_pd *pd)
1027{
1028 atomic_inc(&pd->use_cnt);
1029 atomic_inc(&qp->sq_cq->use_cnt);
1030 atomic_inc(&qp->rq_cq->use_cnt);
1031 if (qp->srq)
1032 atomic_inc(&qp->srq->use_cnt);
1033 qp->ibqp.qp_num = qp->id;
1034}
1035 1009
1036static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, 1010static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1037 struct ib_qp_init_attr *attrs) 1011 struct ib_qp_init_attr *attrs)
@@ -1099,7 +1073,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
1099 goto cpy_err; 1073 goto cpy_err;
1100 } 1074 }
1101 ocrdma_store_gsi_qp_cq(dev, attrs); 1075 ocrdma_store_gsi_qp_cq(dev, attrs);
1102 ocrdma_set_qp_use_cnt(qp, pd); 1076 qp->ibqp.qp_num = qp->id;
1103 mutex_unlock(&dev->dev_lock); 1077 mutex_unlock(&dev->dev_lock);
1104 return &qp->ibqp; 1078 return &qp->ibqp;
1105 1079
@@ -1112,7 +1086,7 @@ mbx_err:
1112 kfree(qp->wqe_wr_id_tbl); 1086 kfree(qp->wqe_wr_id_tbl);
1113 kfree(qp->rqe_wr_id_tbl); 1087 kfree(qp->rqe_wr_id_tbl);
1114 kfree(qp); 1088 kfree(qp);
1115 ocrdma_err("%s(%d) error=%d\n", __func__, dev->id, status); 1089 pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
1116gen_err: 1090gen_err:
1117 return ERR_PTR(status); 1091 return ERR_PTR(status);
1118} 1092}
@@ -1162,10 +1136,10 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1162 spin_unlock_irqrestore(&qp->q_lock, flags); 1136 spin_unlock_irqrestore(&qp->q_lock, flags);
1163 1137
1164 if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) { 1138 if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
1165 ocrdma_err("%s(%d) invalid attribute mask=0x%x specified for " 1139 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1166 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n", 1140 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1167 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type, 1141 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1168 old_qps, new_qps); 1142 old_qps, new_qps);
1169 goto param_err; 1143 goto param_err;
1170 } 1144 }
1171 1145
@@ -1475,11 +1449,6 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
1475 1449
1476 ocrdma_del_flush_qp(qp); 1450 ocrdma_del_flush_qp(qp);
1477 1451
1478 atomic_dec(&qp->pd->use_cnt);
1479 atomic_dec(&qp->sq_cq->use_cnt);
1480 atomic_dec(&qp->rq_cq->use_cnt);
1481 if (qp->srq)
1482 atomic_dec(&qp->srq->use_cnt);
1483 kfree(qp->wqe_wr_id_tbl); 1452 kfree(qp->wqe_wr_id_tbl);
1484 kfree(qp->rqe_wr_id_tbl); 1453 kfree(qp->rqe_wr_id_tbl);
1485 kfree(qp); 1454 kfree(qp);
@@ -1565,14 +1534,12 @@ struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
1565 goto arm_err; 1534 goto arm_err;
1566 } 1535 }
1567 1536
1568 atomic_set(&srq->use_cnt, 0);
1569 if (udata) { 1537 if (udata) {
1570 status = ocrdma_copy_srq_uresp(srq, udata); 1538 status = ocrdma_copy_srq_uresp(srq, udata);
1571 if (status) 1539 if (status)
1572 goto arm_err; 1540 goto arm_err;
1573 } 1541 }
1574 1542
1575 atomic_inc(&pd->use_cnt);
1576 return &srq->ibsrq; 1543 return &srq->ibsrq;
1577 1544
1578arm_err: 1545arm_err:
@@ -1618,18 +1585,12 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq)
1618 1585
1619 srq = get_ocrdma_srq(ibsrq); 1586 srq = get_ocrdma_srq(ibsrq);
1620 dev = srq->dev; 1587 dev = srq->dev;
1621 if (atomic_read(&srq->use_cnt)) {
1622 ocrdma_err("%s(%d) err, srq=0x%x in use\n",
1623 __func__, dev->id, srq->id);
1624 return -EAGAIN;
1625 }
1626 1588
1627 status = ocrdma_mbx_destroy_srq(dev, srq); 1589 status = ocrdma_mbx_destroy_srq(dev, srq);
1628 1590
1629 if (srq->pd->uctx) 1591 if (srq->pd->uctx)
1630 ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len); 1592 ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
1631 1593
1632 atomic_dec(&srq->pd->use_cnt);
1633 kfree(srq->idx_bit_fields); 1594 kfree(srq->idx_bit_fields);
1634 kfree(srq->rqe_wr_id_tbl); 1595 kfree(srq->rqe_wr_id_tbl);
1635 kfree(srq); 1596 kfree(srq);
@@ -1677,9 +1638,9 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
1677{ 1638{
1678 if (wr->send_flags & IB_SEND_INLINE) { 1639 if (wr->send_flags & IB_SEND_INLINE) {
1679 if (wr->sg_list[0].length > qp->max_inline_data) { 1640 if (wr->sg_list[0].length > qp->max_inline_data) {
1680 ocrdma_err("%s() supported_len=0x%x," 1641 pr_err("%s() supported_len=0x%x,\n"
1681 " unspported len req=0x%x\n", __func__, 1642 " unspported len req=0x%x\n", __func__,
1682 qp->max_inline_data, wr->sg_list[0].length); 1643 qp->max_inline_data, wr->sg_list[0].length);
1683 return -EINVAL; 1644 return -EINVAL;
1684 } 1645 }
1685 memcpy(sge, 1646 memcpy(sge,
@@ -1773,12 +1734,14 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1773 spin_lock_irqsave(&qp->q_lock, flags); 1734 spin_lock_irqsave(&qp->q_lock, flags);
1774 if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) { 1735 if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
1775 spin_unlock_irqrestore(&qp->q_lock, flags); 1736 spin_unlock_irqrestore(&qp->q_lock, flags);
1737 *bad_wr = wr;
1776 return -EINVAL; 1738 return -EINVAL;
1777 } 1739 }
1778 1740
1779 while (wr) { 1741 while (wr) {
1780 if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || 1742 if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
1781 wr->num_sge > qp->sq.max_sges) { 1743 wr->num_sge > qp->sq.max_sges) {
1744 *bad_wr = wr;
1782 status = -ENOMEM; 1745 status = -ENOMEM;
1783 break; 1746 break;
1784 } 1747 }
@@ -1856,7 +1819,7 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1856 1819
1857static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) 1820static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
1858{ 1821{
1859 u32 val = qp->rq.dbid | (1 << OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp)); 1822 u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp));
1860 1823
1861 iowrite32(val, qp->rq_db); 1824 iowrite32(val, qp->rq_db);
1862} 1825}
@@ -2094,8 +2057,8 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2094 break; 2057 break;
2095 default: 2058 default:
2096 ibwc->status = IB_WC_GENERAL_ERR; 2059 ibwc->status = IB_WC_GENERAL_ERR;
2097 ocrdma_err("%s() invalid opcode received = 0x%x\n", 2060 pr_err("%s() invalid opcode received = 0x%x\n",
2098 __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK); 2061 __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2099 break; 2062 break;
2100 }; 2063 };
2101} 2064}
diff --git a/drivers/infiniband/hw/qib/Kconfig b/drivers/infiniband/hw/qib/Kconfig
index 1e603a375069..d03ca4c1ff25 100644
--- a/drivers/infiniband/hw/qib/Kconfig
+++ b/drivers/infiniband/hw/qib/Kconfig
@@ -5,3 +5,11 @@ config INFINIBAND_QIB
5 This is a low-level driver for Intel PCIe QLE InfiniBand host 5 This is a low-level driver for Intel PCIe QLE InfiniBand host
6 channel adapters. This driver does not support the Intel 6 channel adapters. This driver does not support the Intel
7 HyperTransport card (model QHT7140). 7 HyperTransport card (model QHT7140).
8
9config INFINIBAND_QIB_DCA
10 bool "QIB DCA support"
11 depends on INFINIBAND_QIB && DCA && SMP && GENERIC_HARDIRQS && !(INFINIBAND_QIB=y && DCA=m)
12 default y
13 ---help---
14 Setting this enables DCA support on some Intel chip sets
15 with the iba7322 HCA.
diff --git a/drivers/infiniband/hw/qib/Makefile b/drivers/infiniband/hw/qib/Makefile
index f12d7bb8b39f..57f8103e51f8 100644
--- a/drivers/infiniband/hw/qib/Makefile
+++ b/drivers/infiniband/hw/qib/Makefile
@@ -13,3 +13,4 @@ ib_qib-$(CONFIG_PCI_MSI) += qib_iba6120.o
13 13
14ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o 14ib_qib-$(CONFIG_X86_64) += qib_wc_x86_64.o
15ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o 15ib_qib-$(CONFIG_PPC64) += qib_wc_ppc64.o
16ib_qib-$(CONFIG_DEBUG_FS) += qib_debugfs.o
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 4d11575c2010..4a9af795b88f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1,7 +1,7 @@
1#ifndef _QIB_KERNEL_H 1#ifndef _QIB_KERNEL_H
2#define _QIB_KERNEL_H 2#define _QIB_KERNEL_H
3/* 3/*
4 * Copyright (c) 2012 Intel Corporation. All rights reserved. 4 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
5 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 5 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
6 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 6 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
7 * 7 *
@@ -51,6 +51,7 @@
51#include <linux/completion.h> 51#include <linux/completion.h>
52#include <linux/kref.h> 52#include <linux/kref.h>
53#include <linux/sched.h> 53#include <linux/sched.h>
54#include <linux/kthread.h>
54 55
55#include "qib_common.h" 56#include "qib_common.h"
56#include "qib_verbs.h" 57#include "qib_verbs.h"
@@ -114,6 +115,11 @@ struct qib_eep_log_mask {
114/* 115/*
115 * Below contains all data related to a single context (formerly called port). 116 * Below contains all data related to a single context (formerly called port).
116 */ 117 */
118
119#ifdef CONFIG_DEBUG_FS
120struct qib_opcode_stats_perctx;
121#endif
122
117struct qib_ctxtdata { 123struct qib_ctxtdata {
118 void **rcvegrbuf; 124 void **rcvegrbuf;
119 dma_addr_t *rcvegrbuf_phys; 125 dma_addr_t *rcvegrbuf_phys;
@@ -154,6 +160,8 @@ struct qib_ctxtdata {
154 */ 160 */
155 /* instead of calculating it */ 161 /* instead of calculating it */
156 unsigned ctxt; 162 unsigned ctxt;
163 /* local node of context */
164 int node_id;
157 /* non-zero if ctxt is being shared. */ 165 /* non-zero if ctxt is being shared. */
158 u16 subctxt_cnt; 166 u16 subctxt_cnt;
159 /* non-zero if ctxt is being shared. */ 167 /* non-zero if ctxt is being shared. */
@@ -222,12 +230,15 @@ struct qib_ctxtdata {
222 u8 redirect_seq_cnt; 230 u8 redirect_seq_cnt;
223 /* ctxt rcvhdrq head offset */ 231 /* ctxt rcvhdrq head offset */
224 u32 head; 232 u32 head;
225 u32 pkt_count;
226 /* lookaside fields */ 233 /* lookaside fields */
227 struct qib_qp *lookaside_qp; 234 struct qib_qp *lookaside_qp;
228 u32 lookaside_qpn; 235 u32 lookaside_qpn;
229 /* QPs waiting for context processing */ 236 /* QPs waiting for context processing */
230 struct list_head qp_wait_list; 237 struct list_head qp_wait_list;
238#ifdef CONFIG_DEBUG_FS
239 /* verbs stats per CTX */
240 struct qib_opcode_stats_perctx *opstats;
241#endif
231}; 242};
232 243
233struct qib_sge_state; 244struct qib_sge_state;
@@ -428,9 +439,19 @@ struct qib_verbs_txreq {
428#define ACTIVITY_TIMER 5 439#define ACTIVITY_TIMER 5
429 440
430#define MAX_NAME_SIZE 64 441#define MAX_NAME_SIZE 64
442
443#ifdef CONFIG_INFINIBAND_QIB_DCA
444struct qib_irq_notify;
445#endif
446
431struct qib_msix_entry { 447struct qib_msix_entry {
432 struct msix_entry msix; 448 struct msix_entry msix;
433 void *arg; 449 void *arg;
450#ifdef CONFIG_INFINIBAND_QIB_DCA
451 int dca;
452 int rcv;
453 struct qib_irq_notify *notifier;
454#endif
434 char name[MAX_NAME_SIZE]; 455 char name[MAX_NAME_SIZE];
435 cpumask_var_t mask; 456 cpumask_var_t mask;
436}; 457};
@@ -828,6 +849,9 @@ struct qib_devdata {
828 struct qib_ctxtdata *); 849 struct qib_ctxtdata *);
829 void (*f_writescratch)(struct qib_devdata *, u32); 850 void (*f_writescratch)(struct qib_devdata *, u32);
830 int (*f_tempsense_rd)(struct qib_devdata *, int regnum); 851 int (*f_tempsense_rd)(struct qib_devdata *, int regnum);
852#ifdef CONFIG_INFINIBAND_QIB_DCA
853 int (*f_notify_dca)(struct qib_devdata *, unsigned long event);
854#endif
831 855
832 char *boardname; /* human readable board info */ 856 char *boardname; /* human readable board info */
833 857
@@ -1075,6 +1099,10 @@ struct qib_devdata {
1075 u16 psxmitwait_check_rate; 1099 u16 psxmitwait_check_rate;
1076 /* high volume overflow errors defered to tasklet */ 1100 /* high volume overflow errors defered to tasklet */
1077 struct tasklet_struct error_tasklet; 1101 struct tasklet_struct error_tasklet;
1102 /* per device cq worker */
1103 struct kthread_worker *worker;
1104
1105 int assigned_node_id; /* NUMA node closest to HCA */
1078}; 1106};
1079 1107
1080/* hol_state values */ 1108/* hol_state values */
@@ -1154,7 +1182,7 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *);
1154int qib_setup_eagerbufs(struct qib_ctxtdata *); 1182int qib_setup_eagerbufs(struct qib_ctxtdata *);
1155void qib_set_ctxtcnt(struct qib_devdata *); 1183void qib_set_ctxtcnt(struct qib_devdata *);
1156int qib_create_ctxts(struct qib_devdata *dd); 1184int qib_create_ctxts(struct qib_devdata *dd);
1157struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32); 1185struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
1158void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); 1186void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
1159void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); 1187void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
1160 1188
@@ -1320,7 +1348,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd)
1320 return ppd->sdma_state.current_state == qib_sdma_state_s99_running; 1348 return ppd->sdma_state.current_state == qib_sdma_state_s99_running;
1321} 1349}
1322int qib_sdma_running(struct qib_pportdata *); 1350int qib_sdma_running(struct qib_pportdata *);
1323 1351void dump_sdma_state(struct qib_pportdata *ppd);
1324void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); 1352void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
1325void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); 1353void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
1326 1354
@@ -1445,6 +1473,7 @@ extern unsigned qib_n_krcv_queues;
1445extern unsigned qib_sdma_fetch_arb; 1473extern unsigned qib_sdma_fetch_arb;
1446extern unsigned qib_compat_ddr_negotiate; 1474extern unsigned qib_compat_ddr_negotiate;
1447extern int qib_special_trigger; 1475extern int qib_special_trigger;
1476extern unsigned qib_numa_aware;
1448 1477
1449extern struct mutex qib_mutex; 1478extern struct mutex qib_mutex;
1450 1479
@@ -1474,27 +1503,23 @@ extern struct mutex qib_mutex;
1474 * first to avoid possible serial port delays from printk. 1503 * first to avoid possible serial port delays from printk.
1475 */ 1504 */
1476#define qib_early_err(dev, fmt, ...) \ 1505#define qib_early_err(dev, fmt, ...) \
1477 do { \ 1506 dev_err(dev, fmt, ##__VA_ARGS__)
1478 dev_err(dev, fmt, ##__VA_ARGS__); \
1479 } while (0)
1480 1507
1481#define qib_dev_err(dd, fmt, ...) \ 1508#define qib_dev_err(dd, fmt, ...) \
1482 do { \ 1509 dev_err(&(dd)->pcidev->dev, "%s: " fmt, \
1483 dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ 1510 qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
1484 qib_get_unit_name((dd)->unit), ##__VA_ARGS__); \ 1511
1485 } while (0) 1512#define qib_dev_warn(dd, fmt, ...) \
1513 dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \
1514 qib_get_unit_name((dd)->unit), ##__VA_ARGS__)
1486 1515
1487#define qib_dev_porterr(dd, port, fmt, ...) \ 1516#define qib_dev_porterr(dd, port, fmt, ...) \
1488 do { \ 1517 dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \
1489 dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ 1518 qib_get_unit_name((dd)->unit), (dd)->unit, (port), \
1490 qib_get_unit_name((dd)->unit), (dd)->unit, (port), \ 1519 ##__VA_ARGS__)
1491 ##__VA_ARGS__); \
1492 } while (0)
1493 1520
1494#define qib_devinfo(pcidev, fmt, ...) \ 1521#define qib_devinfo(pcidev, fmt, ...) \
1495 do { \ 1522 dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__)
1496 dev_info(&(pcidev)->dev, fmt, ##__VA_ARGS__); \
1497 } while (0)
1498 1523
1499/* 1524/*
1500 * this is used for formatting hw error messages... 1525 * this is used for formatting hw error messages...
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index d39e0183ff82..4f255b723ffd 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -279,7 +279,7 @@ struct qib_base_info {
279 * may not be implemented; the user code must deal with this if it 279 * may not be implemented; the user code must deal with this if it
280 * cares, or it must abort after initialization reports the difference. 280 * cares, or it must abort after initialization reports the difference.
281 */ 281 */
282#define QIB_USER_SWMINOR 11 282#define QIB_USER_SWMINOR 12
283 283
284#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR) 284#define QIB_USER_SWVERSION ((QIB_USER_SWMAJOR << 16) | QIB_USER_SWMINOR)
285 285
diff --git a/drivers/infiniband/hw/qib/qib_cq.c b/drivers/infiniband/hw/qib/qib_cq.c
index 5246aa486bbe..ab4e11cfab15 100644
--- a/drivers/infiniband/hw/qib/qib_cq.c
+++ b/drivers/infiniband/hw/qib/qib_cq.c
@@ -1,4 +1,5 @@
1/* 1/*
2 * Copyright (c) 2013 Intel Corporation. All rights reserved.
2 * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 5 *
@@ -34,8 +35,10 @@
34#include <linux/err.h> 35#include <linux/err.h>
35#include <linux/slab.h> 36#include <linux/slab.h>
36#include <linux/vmalloc.h> 37#include <linux/vmalloc.h>
38#include <linux/kthread.h>
37 39
38#include "qib_verbs.h" 40#include "qib_verbs.h"
41#include "qib.h"
39 42
40/** 43/**
41 * qib_cq_enter - add a new entry to the completion queue 44 * qib_cq_enter - add a new entry to the completion queue
@@ -102,13 +105,18 @@ void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
102 if (cq->notify == IB_CQ_NEXT_COMP || 105 if (cq->notify == IB_CQ_NEXT_COMP ||
103 (cq->notify == IB_CQ_SOLICITED && 106 (cq->notify == IB_CQ_SOLICITED &&
104 (solicited || entry->status != IB_WC_SUCCESS))) { 107 (solicited || entry->status != IB_WC_SUCCESS))) {
105 cq->notify = IB_CQ_NONE; 108 struct kthread_worker *worker;
106 cq->triggered++;
107 /* 109 /*
108 * This will cause send_complete() to be called in 110 * This will cause send_complete() to be called in
109 * another thread. 111 * another thread.
110 */ 112 */
111 queue_work(qib_cq_wq, &cq->comptask); 113 smp_rmb();
114 worker = cq->dd->worker;
115 if (likely(worker)) {
116 cq->notify = IB_CQ_NONE;
117 cq->triggered++;
118 queue_kthread_work(worker, &cq->comptask);
119 }
112 } 120 }
113 121
114 spin_unlock_irqrestore(&cq->lock, flags); 122 spin_unlock_irqrestore(&cq->lock, flags);
@@ -163,7 +171,7 @@ bail:
163 return npolled; 171 return npolled;
164} 172}
165 173
166static void send_complete(struct work_struct *work) 174static void send_complete(struct kthread_work *work)
167{ 175{
168 struct qib_cq *cq = container_of(work, struct qib_cq, comptask); 176 struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
169 177
@@ -287,11 +295,12 @@ struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
287 * The number of entries should be >= the number requested or return 295 * The number of entries should be >= the number requested or return
288 * an error. 296 * an error.
289 */ 297 */
298 cq->dd = dd_from_dev(dev);
290 cq->ibcq.cqe = entries; 299 cq->ibcq.cqe = entries;
291 cq->notify = IB_CQ_NONE; 300 cq->notify = IB_CQ_NONE;
292 cq->triggered = 0; 301 cq->triggered = 0;
293 spin_lock_init(&cq->lock); 302 spin_lock_init(&cq->lock);
294 INIT_WORK(&cq->comptask, send_complete); 303 init_kthread_work(&cq->comptask, send_complete);
295 wc->head = 0; 304 wc->head = 0;
296 wc->tail = 0; 305 wc->tail = 0;
297 cq->queue = wc; 306 cq->queue = wc;
@@ -323,7 +332,7 @@ int qib_destroy_cq(struct ib_cq *ibcq)
323 struct qib_ibdev *dev = to_idev(ibcq->device); 332 struct qib_ibdev *dev = to_idev(ibcq->device);
324 struct qib_cq *cq = to_icq(ibcq); 333 struct qib_cq *cq = to_icq(ibcq);
325 334
326 flush_work(&cq->comptask); 335 flush_kthread_work(&cq->comptask);
327 spin_lock(&dev->n_cqs_lock); 336 spin_lock(&dev->n_cqs_lock);
328 dev->n_cqs_allocated--; 337 dev->n_cqs_allocated--;
329 spin_unlock(&dev->n_cqs_lock); 338 spin_unlock(&dev->n_cqs_lock);
@@ -483,3 +492,49 @@ bail_free:
483bail: 492bail:
484 return ret; 493 return ret;
485} 494}
495
496int qib_cq_init(struct qib_devdata *dd)
497{
498 int ret = 0;
499 int cpu;
500 struct task_struct *task;
501
502 if (dd->worker)
503 return 0;
504 dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
505 if (!dd->worker)
506 return -ENOMEM;
507 init_kthread_worker(dd->worker);
508 task = kthread_create_on_node(
509 kthread_worker_fn,
510 dd->worker,
511 dd->assigned_node_id,
512 "qib_cq%d", dd->unit);
513 if (IS_ERR(task))
514 goto task_fail;
515 cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
516 kthread_bind(task, cpu);
517 wake_up_process(task);
518out:
519 return ret;
520task_fail:
521 ret = PTR_ERR(task);
522 kfree(dd->worker);
523 dd->worker = NULL;
524 goto out;
525}
526
527void qib_cq_exit(struct qib_devdata *dd)
528{
529 struct kthread_worker *worker;
530
531 worker = dd->worker;
532 if (!worker)
533 return;
534 /* blocks future queuing from send_complete() */
535 dd->worker = NULL;
536 smp_wmb();
537 flush_kthread_worker(worker);
538 kthread_stop(worker->task);
539 kfree(worker);
540}
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
new file mode 100644
index 000000000000..799a0c3bffc4
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.c
@@ -0,0 +1,283 @@
1#ifdef CONFIG_DEBUG_FS
2/*
3 * Copyright (c) 2013 Intel Corporation. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <linux/debugfs.h>
34#include <linux/seq_file.h>
35#include <linux/kernel.h>
36#include <linux/export.h>
37
38#include "qib.h"
39#include "qib_verbs.h"
40#include "qib_debugfs.h"
41
42static struct dentry *qib_dbg_root;
43
44#define DEBUGFS_FILE(name) \
45static const struct seq_operations _##name##_seq_ops = { \
46 .start = _##name##_seq_start, \
47 .next = _##name##_seq_next, \
48 .stop = _##name##_seq_stop, \
49 .show = _##name##_seq_show \
50}; \
51static int _##name##_open(struct inode *inode, struct file *s) \
52{ \
53 struct seq_file *seq; \
54 int ret; \
55 ret = seq_open(s, &_##name##_seq_ops); \
56 if (ret) \
57 return ret; \
58 seq = s->private_data; \
59 seq->private = inode->i_private; \
60 return 0; \
61} \
62static const struct file_operations _##name##_file_ops = { \
63 .owner = THIS_MODULE, \
64 .open = _##name##_open, \
65 .read = seq_read, \
66 .llseek = seq_lseek, \
67 .release = seq_release \
68};
69
70#define DEBUGFS_FILE_CREATE(name) \
71do { \
72 struct dentry *ent; \
73 ent = debugfs_create_file(#name , 0400, ibd->qib_ibdev_dbg, \
74 ibd, &_##name##_file_ops); \
75 if (!ent) \
76 pr_warn("create of " #name " failed\n"); \
77} while (0)
78
79static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
80{
81 struct qib_opcode_stats_perctx *opstats;
82
83 if (*pos >= ARRAY_SIZE(opstats->stats))
84 return NULL;
85 return pos;
86}
87
88static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
89{
90 struct qib_opcode_stats_perctx *opstats;
91
92 ++*pos;
93 if (*pos >= ARRAY_SIZE(opstats->stats))
94 return NULL;
95 return pos;
96}
97
98
99static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
100{
101 /* nothing allocated */
102}
103
104static int _opcode_stats_seq_show(struct seq_file *s, void *v)
105{
106 loff_t *spos = v;
107 loff_t i = *spos, j;
108 u64 n_packets = 0, n_bytes = 0;
109 struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
110 struct qib_devdata *dd = dd_from_dev(ibd);
111
112 for (j = 0; j < dd->first_user_ctxt; j++) {
113 if (!dd->rcd[j])
114 continue;
115 n_packets += dd->rcd[j]->opstats->stats[i].n_packets;
116 n_bytes += dd->rcd[j]->opstats->stats[i].n_bytes;
117 }
118 if (!n_packets && !n_bytes)
119 return SEQ_SKIP;
120 seq_printf(s, "%02llx %llu/%llu\n", i,
121 (unsigned long long) n_packets,
122 (unsigned long long) n_bytes);
123
124 return 0;
125}
126
127DEBUGFS_FILE(opcode_stats)
128
129static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos)
130{
131 struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
132 struct qib_devdata *dd = dd_from_dev(ibd);
133
134 if (!*pos)
135 return SEQ_START_TOKEN;
136 if (*pos >= dd->first_user_ctxt)
137 return NULL;
138 return pos;
139}
140
141static void *_ctx_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
142{
143 struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
144 struct qib_devdata *dd = dd_from_dev(ibd);
145
146 if (v == SEQ_START_TOKEN)
147 return pos;
148
149 ++*pos;
150 if (*pos >= dd->first_user_ctxt)
151 return NULL;
152 return pos;
153}
154
155static void _ctx_stats_seq_stop(struct seq_file *s, void *v)
156{
157 /* nothing allocated */
158}
159
160static int _ctx_stats_seq_show(struct seq_file *s, void *v)
161{
162 loff_t *spos;
163 loff_t i, j;
164 u64 n_packets = 0;
165 struct qib_ibdev *ibd = (struct qib_ibdev *)s->private;
166 struct qib_devdata *dd = dd_from_dev(ibd);
167
168 if (v == SEQ_START_TOKEN) {
169 seq_puts(s, "Ctx:npkts\n");
170 return 0;
171 }
172
173 spos = v;
174 i = *spos;
175
176 if (!dd->rcd[i])
177 return SEQ_SKIP;
178
179 for (j = 0; j < ARRAY_SIZE(dd->rcd[i]->opstats->stats); j++)
180 n_packets += dd->rcd[i]->opstats->stats[j].n_packets;
181
182 if (!n_packets)
183 return SEQ_SKIP;
184
185 seq_printf(s, " %llu:%llu\n", i, n_packets);
186 return 0;
187}
188
189DEBUGFS_FILE(ctx_stats)
190
191static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
192{
193 struct qib_qp_iter *iter;
194 loff_t n = *pos;
195
196 iter = qib_qp_iter_init(s->private);
197 if (!iter)
198 return NULL;
199
200 while (n--) {
201 if (qib_qp_iter_next(iter)) {
202 kfree(iter);
203 return NULL;
204 }
205 }
206
207 return iter;
208}
209
210static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
211 loff_t *pos)
212{
213 struct qib_qp_iter *iter = iter_ptr;
214
215 (*pos)++;
216
217 if (qib_qp_iter_next(iter)) {
218 kfree(iter);
219 return NULL;
220 }
221
222 return iter;
223}
224
225static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
226{
227 /* nothing for now */
228}
229
230static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
231{
232 struct qib_qp_iter *iter = iter_ptr;
233
234 if (!iter)
235 return 0;
236
237 qib_qp_iter_print(s, iter);
238
239 return 0;
240}
241
242DEBUGFS_FILE(qp_stats)
243
244void qib_dbg_ibdev_init(struct qib_ibdev *ibd)
245{
246 char name[10];
247
248 snprintf(name, sizeof(name), "qib%d", dd_from_dev(ibd)->unit);
249 ibd->qib_ibdev_dbg = debugfs_create_dir(name, qib_dbg_root);
250 if (!ibd->qib_ibdev_dbg) {
251 pr_warn("create of %s failed\n", name);
252 return;
253 }
254 DEBUGFS_FILE_CREATE(opcode_stats);
255 DEBUGFS_FILE_CREATE(ctx_stats);
256 DEBUGFS_FILE_CREATE(qp_stats);
257 return;
258}
259
260void qib_dbg_ibdev_exit(struct qib_ibdev *ibd)
261{
262 if (!qib_dbg_root)
263 goto out;
264 debugfs_remove_recursive(ibd->qib_ibdev_dbg);
265out:
266 ibd->qib_ibdev_dbg = NULL;
267}
268
269void qib_dbg_init(void)
270{
271 qib_dbg_root = debugfs_create_dir(QIB_DRV_NAME, NULL);
272 if (!qib_dbg_root)
273 pr_warn("init of debugfs failed\n");
274}
275
276void qib_dbg_exit(void)
277{
278 debugfs_remove_recursive(qib_dbg_root);
279 qib_dbg_root = NULL;
280}
281
282#endif
283
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.h b/drivers/infiniband/hw/qib/qib_debugfs.h
new file mode 100644
index 000000000000..7ae983a91b8b
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_debugfs.h
@@ -0,0 +1,45 @@
1#ifndef _QIB_DEBUGFS_H
2#define _QIB_DEBUGFS_H
3
4#ifdef CONFIG_DEBUG_FS
5/*
6 * Copyright (c) 2013 Intel Corporation. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses. You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 * Redistribution and use in source and binary forms, with or
15 * without modification, are permitted provided that the following
16 * conditions are met:
17 *
18 * - Redistributions of source code must retain the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer.
21 *
22 * - Redistributions in binary form must reproduce the above
23 * copyright notice, this list of conditions and the following
24 * disclaimer in the documentation and/or other materials
25 * provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37struct qib_ibdev;
38void qib_dbg_ibdev_init(struct qib_ibdev *ibd);
39void qib_dbg_ibdev_exit(struct qib_ibdev *ibd);
40void qib_dbg_init(void);
41void qib_dbg_exit(void);
42
43#endif
44
45#endif /* _QIB_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 216092477dfc..5bee08f16d74 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -558,7 +558,6 @@ move_along:
558 } 558 }
559 559
560 rcd->head = l; 560 rcd->head = l;
561 rcd->pkt_count += i;
562 561
563 /* 562 /*
564 * Iterate over all QPs waiting to respond. 563 * Iterate over all QPs waiting to respond.
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 9dd0bc89c3aa..b51a51486cb8 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
@@ -1155,6 +1155,49 @@ static unsigned int qib_poll(struct file *fp, struct poll_table_struct *pt)
1155 return pollflag; 1155 return pollflag;
1156} 1156}
1157 1157
1158static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd)
1159{
1160 struct qib_filedata *fd = fp->private_data;
1161 const unsigned int weight = cpumask_weight(&current->cpus_allowed);
1162 const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus);
1163 int local_cpu;
1164
1165 /*
1166 * If process has NOT already set it's affinity, select and
1167 * reserve a processor for it on the local NUMA node.
1168 */
1169 if ((weight >= qib_cpulist_count) &&
1170 (cpumask_weight(local_mask) <= qib_cpulist_count)) {
1171 for_each_cpu(local_cpu, local_mask)
1172 if (!test_and_set_bit(local_cpu, qib_cpulist)) {
1173 fd->rec_cpu_num = local_cpu;
1174 return;
1175 }
1176 }
1177
1178 /*
1179 * If process has NOT already set it's affinity, select and
1180 * reserve a processor for it, as a rendevous for all
1181 * users of the driver. If they don't actually later
1182 * set affinity to this cpu, or set it to some other cpu,
1183 * it just means that sooner or later we don't recommend
1184 * a cpu, and let the scheduler do it's best.
1185 */
1186 if (weight >= qib_cpulist_count) {
1187 int cpu;
1188 cpu = find_first_zero_bit(qib_cpulist,
1189 qib_cpulist_count);
1190 if (cpu == qib_cpulist_count)
1191 qib_dev_err(dd,
1192 "no cpus avail for affinity PID %u\n",
1193 current->pid);
1194 else {
1195 __set_bit(cpu, qib_cpulist);
1196 fd->rec_cpu_num = cpu;
1197 }
1198 }
1199}
1200
1158/* 1201/*
1159 * Check that userland and driver are compatible for subcontexts. 1202 * Check that userland and driver are compatible for subcontexts.
1160 */ 1203 */
@@ -1259,12 +1302,20 @@ bail:
1259static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, 1302static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
1260 struct file *fp, const struct qib_user_info *uinfo) 1303 struct file *fp, const struct qib_user_info *uinfo)
1261{ 1304{
1305 struct qib_filedata *fd = fp->private_data;
1262 struct qib_devdata *dd = ppd->dd; 1306 struct qib_devdata *dd = ppd->dd;
1263 struct qib_ctxtdata *rcd; 1307 struct qib_ctxtdata *rcd;
1264 void *ptmp = NULL; 1308 void *ptmp = NULL;
1265 int ret; 1309 int ret;
1310 int numa_id;
1311
1312 assign_ctxt_affinity(fp, dd);
1266 1313
1267 rcd = qib_create_ctxtdata(ppd, ctxt); 1314 numa_id = qib_numa_aware ? ((fd->rec_cpu_num != -1) ?
1315 cpu_to_node(fd->rec_cpu_num) :
1316 numa_node_id()) : dd->assigned_node_id;
1317
1318 rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
1268 1319
1269 /* 1320 /*
1270 * Allocate memory for use in qib_tid_update() at open to 1321 * Allocate memory for use in qib_tid_update() at open to
@@ -1296,6 +1347,9 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
1296 goto bail; 1347 goto bail;
1297 1348
1298bailerr: 1349bailerr:
1350 if (fd->rec_cpu_num != -1)
1351 __clear_bit(fd->rec_cpu_num, qib_cpulist);
1352
1299 dd->rcd[ctxt] = NULL; 1353 dd->rcd[ctxt] = NULL;
1300 kfree(rcd); 1354 kfree(rcd);
1301 kfree(ptmp); 1355 kfree(ptmp);
@@ -1485,6 +1539,57 @@ static int qib_open(struct inode *in, struct file *fp)
1485 return fp->private_data ? 0 : -ENOMEM; 1539 return fp->private_data ? 0 : -ENOMEM;
1486} 1540}
1487 1541
1542static int find_hca(unsigned int cpu, int *unit)
1543{
1544 int ret = 0, devmax, npresent, nup, ndev;
1545
1546 *unit = -1;
1547
1548 devmax = qib_count_units(&npresent, &nup);
1549 if (!npresent) {
1550 ret = -ENXIO;
1551 goto done;
1552 }
1553 if (!nup) {
1554 ret = -ENETDOWN;
1555 goto done;
1556 }
1557 for (ndev = 0; ndev < devmax; ndev++) {
1558 struct qib_devdata *dd = qib_lookup(ndev);
1559 if (dd) {
1560 if (pcibus_to_node(dd->pcidev->bus) < 0) {
1561 ret = -EINVAL;
1562 goto done;
1563 }
1564 if (cpu_to_node(cpu) ==
1565 pcibus_to_node(dd->pcidev->bus)) {
1566 *unit = ndev;
1567 goto done;
1568 }
1569 }
1570 }
1571done:
1572 return ret;
1573}
1574
1575static int do_qib_user_sdma_queue_create(struct file *fp)
1576{
1577 struct qib_filedata *fd = fp->private_data;
1578 struct qib_ctxtdata *rcd = fd->rcd;
1579 struct qib_devdata *dd = rcd->dd;
1580
1581 if (dd->flags & QIB_HAS_SEND_DMA)
1582
1583 fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
1584 dd->unit,
1585 rcd->ctxt,
1586 fd->subctxt);
1587 if (!fd->pq)
1588 return -ENOMEM;
1589
1590 return 0;
1591}
1592
1488/* 1593/*
1489 * Get ctxt early, so can set affinity prior to memory allocation. 1594 * Get ctxt early, so can set affinity prior to memory allocation.
1490 */ 1595 */
@@ -1517,61 +1622,36 @@ static int qib_assign_ctxt(struct file *fp, const struct qib_user_info *uinfo)
1517 if (qib_compatible_subctxts(swmajor, swminor) && 1622 if (qib_compatible_subctxts(swmajor, swminor) &&
1518 uinfo->spu_subctxt_cnt) { 1623 uinfo->spu_subctxt_cnt) {
1519 ret = find_shared_ctxt(fp, uinfo); 1624 ret = find_shared_ctxt(fp, uinfo);
1520 if (ret) { 1625 if (ret > 0) {
1521 if (ret > 0) 1626 ret = do_qib_user_sdma_queue_create(fp);
1522 ret = 0; 1627 if (!ret)
1523 goto done_chk_sdma; 1628 assign_ctxt_affinity(fp, (ctxt_fp(fp))->dd);
1629 goto done_ok;
1524 } 1630 }
1525 } 1631 }
1526 1632
1527 i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE; 1633 i_minor = iminor(file_inode(fp)) - QIB_USER_MINOR_BASE;
1528 if (i_minor) 1634 if (i_minor)
1529 ret = find_free_ctxt(i_minor - 1, fp, uinfo); 1635 ret = find_free_ctxt(i_minor - 1, fp, uinfo);
1530 else 1636 else {
1637 int unit;
1638 const unsigned int cpu = cpumask_first(&current->cpus_allowed);
1639 const unsigned int weight =
1640 cpumask_weight(&current->cpus_allowed);
1641
1642 if (weight == 1 && !test_bit(cpu, qib_cpulist))
1643 if (!find_hca(cpu, &unit) && unit >= 0)
1644 if (!find_free_ctxt(unit, fp, uinfo)) {
1645 ret = 0;
1646 goto done_chk_sdma;
1647 }
1531 ret = get_a_ctxt(fp, uinfo, alg); 1648 ret = get_a_ctxt(fp, uinfo, alg);
1532
1533done_chk_sdma:
1534 if (!ret) {
1535 struct qib_filedata *fd = fp->private_data;
1536 const struct qib_ctxtdata *rcd = fd->rcd;
1537 const struct qib_devdata *dd = rcd->dd;
1538 unsigned int weight;
1539
1540 if (dd->flags & QIB_HAS_SEND_DMA) {
1541 fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev,
1542 dd->unit,
1543 rcd->ctxt,
1544 fd->subctxt);
1545 if (!fd->pq)
1546 ret = -ENOMEM;
1547 }
1548
1549 /*
1550 * If process has NOT already set it's affinity, select and
1551 * reserve a processor for it, as a rendezvous for all
1552 * users of the driver. If they don't actually later
1553 * set affinity to this cpu, or set it to some other cpu,
1554 * it just means that sooner or later we don't recommend
1555 * a cpu, and let the scheduler do it's best.
1556 */
1557 weight = cpumask_weight(tsk_cpus_allowed(current));
1558 if (!ret && weight >= qib_cpulist_count) {
1559 int cpu;
1560 cpu = find_first_zero_bit(qib_cpulist,
1561 qib_cpulist_count);
1562 if (cpu != qib_cpulist_count) {
1563 __set_bit(cpu, qib_cpulist);
1564 fd->rec_cpu_num = cpu;
1565 }
1566 } else if (weight == 1 &&
1567 test_bit(cpumask_first(tsk_cpus_allowed(current)),
1568 qib_cpulist))
1569 qib_devinfo(dd->pcidev,
1570 "%s PID %u affinity set to cpu %d; already allocated\n",
1571 current->comm, current->pid,
1572 cpumask_first(tsk_cpus_allowed(current)));
1573 } 1649 }
1574 1650
1651done_chk_sdma:
1652 if (!ret)
1653 ret = do_qib_user_sdma_queue_create(fp);
1654done_ok:
1575 mutex_unlock(&qib_mutex); 1655 mutex_unlock(&qib_mutex);
1576 1656
1577done: 1657done:
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 0232ae56b1fa..84e593d6007b 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3464,6 +3464,13 @@ static int qib_6120_tempsense_rd(struct qib_devdata *dd, int regnum)
3464 return -ENXIO; 3464 return -ENXIO;
3465} 3465}
3466 3466
3467#ifdef CONFIG_INFINIBAND_QIB_DCA
3468static int qib_6120_notify_dca(struct qib_devdata *dd, unsigned long event)
3469{
3470 return 0;
3471}
3472#endif
3473
3467/* Dummy function, as 6120 boards never disable EEPROM Write */ 3474/* Dummy function, as 6120 boards never disable EEPROM Write */
3468static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen) 3475static int qib_6120_eeprom_wen(struct qib_devdata *dd, int wen)
3469{ 3476{
@@ -3539,6 +3546,9 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
3539 dd->f_xgxs_reset = qib_6120_xgxs_reset; 3546 dd->f_xgxs_reset = qib_6120_xgxs_reset;
3540 dd->f_writescratch = writescratch; 3547 dd->f_writescratch = writescratch;
3541 dd->f_tempsense_rd = qib_6120_tempsense_rd; 3548 dd->f_tempsense_rd = qib_6120_tempsense_rd;
3549#ifdef CONFIG_INFINIBAND_QIB_DCA
3550 dd->f_notify_dca = qib_6120_notify_dca;
3551#endif
3542 /* 3552 /*
3543 * Do remaining pcie setup and save pcie values in dd. 3553 * Do remaining pcie setup and save pcie values in dd.
3544 * Any error printing is already done by the init code. 3554 * Any error printing is already done by the init code.
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 64d0ecb90cdc..454c2e7668fe 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4513,6 +4513,13 @@ bail:
4513 return ret; 4513 return ret;
4514} 4514}
4515 4515
4516#ifdef CONFIG_INFINIBAND_QIB_DCA
4517static int qib_7220_notify_dca(struct qib_devdata *dd, unsigned long event)
4518{
4519 return 0;
4520}
4521#endif
4522
4516/* Dummy function, as 7220 boards never disable EEPROM Write */ 4523/* Dummy function, as 7220 boards never disable EEPROM Write */
4517static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) 4524static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen)
4518{ 4525{
@@ -4587,6 +4594,9 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
4587 dd->f_xgxs_reset = qib_7220_xgxs_reset; 4594 dd->f_xgxs_reset = qib_7220_xgxs_reset;
4588 dd->f_writescratch = writescratch; 4595 dd->f_writescratch = writescratch;
4589 dd->f_tempsense_rd = qib_7220_tempsense_rd; 4596 dd->f_tempsense_rd = qib_7220_tempsense_rd;
4597#ifdef CONFIG_INFINIBAND_QIB_DCA
4598 dd->f_notify_dca = qib_7220_notify_dca;
4599#endif
4590 /* 4600 /*
4591 * Do remaining pcie setup and save pcie values in dd. 4601 * Do remaining pcie setup and save pcie values in dd.
4592 * Any error printing is already done by the init code. 4602 * Any error printing is already done by the init code.
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 3f6b21e9dc11..21e8b09d4bf8 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -44,6 +44,9 @@
44#include <linux/module.h> 44#include <linux/module.h>
45#include <rdma/ib_verbs.h> 45#include <rdma/ib_verbs.h>
46#include <rdma/ib_smi.h> 46#include <rdma/ib_smi.h>
47#ifdef CONFIG_INFINIBAND_QIB_DCA
48#include <linux/dca.h>
49#endif
47 50
48#include "qib.h" 51#include "qib.h"
49#include "qib_7322_regs.h" 52#include "qib_7322_regs.h"
@@ -80,6 +83,7 @@ static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned);
80static void serdes_7322_los_enable(struct qib_pportdata *, int); 83static void serdes_7322_los_enable(struct qib_pportdata *, int);
81static int serdes_7322_init_old(struct qib_pportdata *); 84static int serdes_7322_init_old(struct qib_pportdata *);
82static int serdes_7322_init_new(struct qib_pportdata *); 85static int serdes_7322_init_new(struct qib_pportdata *);
86static void dump_sdma_7322_state(struct qib_pportdata *);
83 87
84#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) 88#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb))
85 89
@@ -519,6 +523,14 @@ static const u8 qib_7322_physportstate[0x20] = {
519 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN 523 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
520}; 524};
521 525
526#ifdef CONFIG_INFINIBAND_QIB_DCA
527struct qib_irq_notify {
528 int rcv;
529 void *arg;
530 struct irq_affinity_notify notify;
531};
532#endif
533
522struct qib_chip_specific { 534struct qib_chip_specific {
523 u64 __iomem *cregbase; 535 u64 __iomem *cregbase;
524 u64 *cntrs; 536 u64 *cntrs;
@@ -546,6 +558,12 @@ struct qib_chip_specific {
546 u32 lastbuf_for_pio; 558 u32 lastbuf_for_pio;
547 u32 stay_in_freeze; 559 u32 stay_in_freeze;
548 u32 recovery_ports_initted; 560 u32 recovery_ports_initted;
561#ifdef CONFIG_INFINIBAND_QIB_DCA
562 u32 dca_ctrl;
563 int rhdr_cpu[18];
564 int sdma_cpu[2];
565 u64 dca_rcvhdr_ctrl[5]; /* B, C, D, E, F */
566#endif
549 struct qib_msix_entry *msix_entries; 567 struct qib_msix_entry *msix_entries;
550 unsigned long *sendchkenable; 568 unsigned long *sendchkenable;
551 unsigned long *sendgrhchk; 569 unsigned long *sendgrhchk;
@@ -573,7 +591,7 @@ struct vendor_txdds_ent {
573static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *); 591static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *);
574 592
575#define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */ 593#define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */
576#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */ 594#define TXDDS_EXTRA_SZ 18 /* number of extra tx settings entries */
577#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */ 595#define TXDDS_MFG_SZ 2 /* number of mfg tx settings entries */
578#define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */ 596#define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */
579 597
@@ -635,6 +653,7 @@ struct qib_chippport_specific {
635 u8 ibmalfusesnap; 653 u8 ibmalfusesnap;
636 struct qib_qsfp_data qsfp_data; 654 struct qib_qsfp_data qsfp_data;
637 char epmsgbuf[192]; /* for port error interrupt msg buffer */ 655 char epmsgbuf[192]; /* for port error interrupt msg buffer */
656 char sdmamsgbuf[192]; /* for per-port sdma error messages */
638}; 657};
639 658
640static struct { 659static struct {
@@ -642,28 +661,76 @@ static struct {
642 irq_handler_t handler; 661 irq_handler_t handler;
643 int lsb; 662 int lsb;
644 int port; /* 0 if not port-specific, else port # */ 663 int port; /* 0 if not port-specific, else port # */
664 int dca;
645} irq_table[] = { 665} irq_table[] = {
646 { "", qib_7322intr, -1, 0 }, 666 { "", qib_7322intr, -1, 0, 0 },
647 { " (buf avail)", qib_7322bufavail, 667 { " (buf avail)", qib_7322bufavail,
648 SYM_LSB(IntStatus, SendBufAvail), 0 }, 668 SYM_LSB(IntStatus, SendBufAvail), 0, 0},
649 { " (sdma 0)", sdma_intr, 669 { " (sdma 0)", sdma_intr,
650 SYM_LSB(IntStatus, SDmaInt_0), 1 }, 670 SYM_LSB(IntStatus, SDmaInt_0), 1, 1 },
651 { " (sdma 1)", sdma_intr, 671 { " (sdma 1)", sdma_intr,
652 SYM_LSB(IntStatus, SDmaInt_1), 2 }, 672 SYM_LSB(IntStatus, SDmaInt_1), 2, 1 },
653 { " (sdmaI 0)", sdma_idle_intr, 673 { " (sdmaI 0)", sdma_idle_intr,
654 SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, 674 SYM_LSB(IntStatus, SDmaIdleInt_0), 1, 1},
655 { " (sdmaI 1)", sdma_idle_intr, 675 { " (sdmaI 1)", sdma_idle_intr,
656 SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, 676 SYM_LSB(IntStatus, SDmaIdleInt_1), 2, 1},
657 { " (sdmaP 0)", sdma_progress_intr, 677 { " (sdmaP 0)", sdma_progress_intr,
658 SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, 678 SYM_LSB(IntStatus, SDmaProgressInt_0), 1, 1 },
659 { " (sdmaP 1)", sdma_progress_intr, 679 { " (sdmaP 1)", sdma_progress_intr,
660 SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, 680 SYM_LSB(IntStatus, SDmaProgressInt_1), 2, 1 },
661 { " (sdmaC 0)", sdma_cleanup_intr, 681 { " (sdmaC 0)", sdma_cleanup_intr,
662 SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, 682 SYM_LSB(IntStatus, SDmaCleanupDone_0), 1, 0 },
663 { " (sdmaC 1)", sdma_cleanup_intr, 683 { " (sdmaC 1)", sdma_cleanup_intr,
664 SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, 684 SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 , 0},
665}; 685};
666 686
687#ifdef CONFIG_INFINIBAND_QIB_DCA
688
689static const struct dca_reg_map {
690 int shadow_inx;
691 int lsb;
692 u64 mask;
693 u16 regno;
694} dca_rcvhdr_reg_map[] = {
695 { 0, SYM_LSB(DCACtrlB, RcvHdrq0DCAOPH),
696 ~SYM_MASK(DCACtrlB, RcvHdrq0DCAOPH) , KREG_IDX(DCACtrlB) },
697 { 0, SYM_LSB(DCACtrlB, RcvHdrq1DCAOPH),
698 ~SYM_MASK(DCACtrlB, RcvHdrq1DCAOPH) , KREG_IDX(DCACtrlB) },
699 { 0, SYM_LSB(DCACtrlB, RcvHdrq2DCAOPH),
700 ~SYM_MASK(DCACtrlB, RcvHdrq2DCAOPH) , KREG_IDX(DCACtrlB) },
701 { 0, SYM_LSB(DCACtrlB, RcvHdrq3DCAOPH),
702 ~SYM_MASK(DCACtrlB, RcvHdrq3DCAOPH) , KREG_IDX(DCACtrlB) },
703 { 1, SYM_LSB(DCACtrlC, RcvHdrq4DCAOPH),
704 ~SYM_MASK(DCACtrlC, RcvHdrq4DCAOPH) , KREG_IDX(DCACtrlC) },
705 { 1, SYM_LSB(DCACtrlC, RcvHdrq5DCAOPH),
706 ~SYM_MASK(DCACtrlC, RcvHdrq5DCAOPH) , KREG_IDX(DCACtrlC) },
707 { 1, SYM_LSB(DCACtrlC, RcvHdrq6DCAOPH),
708 ~SYM_MASK(DCACtrlC, RcvHdrq6DCAOPH) , KREG_IDX(DCACtrlC) },
709 { 1, SYM_LSB(DCACtrlC, RcvHdrq7DCAOPH),
710 ~SYM_MASK(DCACtrlC, RcvHdrq7DCAOPH) , KREG_IDX(DCACtrlC) },
711 { 2, SYM_LSB(DCACtrlD, RcvHdrq8DCAOPH),
712 ~SYM_MASK(DCACtrlD, RcvHdrq8DCAOPH) , KREG_IDX(DCACtrlD) },
713 { 2, SYM_LSB(DCACtrlD, RcvHdrq9DCAOPH),
714 ~SYM_MASK(DCACtrlD, RcvHdrq9DCAOPH) , KREG_IDX(DCACtrlD) },
715 { 2, SYM_LSB(DCACtrlD, RcvHdrq10DCAOPH),
716 ~SYM_MASK(DCACtrlD, RcvHdrq10DCAOPH) , KREG_IDX(DCACtrlD) },
717 { 2, SYM_LSB(DCACtrlD, RcvHdrq11DCAOPH),
718 ~SYM_MASK(DCACtrlD, RcvHdrq11DCAOPH) , KREG_IDX(DCACtrlD) },
719 { 3, SYM_LSB(DCACtrlE, RcvHdrq12DCAOPH),
720 ~SYM_MASK(DCACtrlE, RcvHdrq12DCAOPH) , KREG_IDX(DCACtrlE) },
721 { 3, SYM_LSB(DCACtrlE, RcvHdrq13DCAOPH),
722 ~SYM_MASK(DCACtrlE, RcvHdrq13DCAOPH) , KREG_IDX(DCACtrlE) },
723 { 3, SYM_LSB(DCACtrlE, RcvHdrq14DCAOPH),
724 ~SYM_MASK(DCACtrlE, RcvHdrq14DCAOPH) , KREG_IDX(DCACtrlE) },
725 { 3, SYM_LSB(DCACtrlE, RcvHdrq15DCAOPH),
726 ~SYM_MASK(DCACtrlE, RcvHdrq15DCAOPH) , KREG_IDX(DCACtrlE) },
727 { 4, SYM_LSB(DCACtrlF, RcvHdrq16DCAOPH),
728 ~SYM_MASK(DCACtrlF, RcvHdrq16DCAOPH) , KREG_IDX(DCACtrlF) },
729 { 4, SYM_LSB(DCACtrlF, RcvHdrq17DCAOPH),
730 ~SYM_MASK(DCACtrlF, RcvHdrq17DCAOPH) , KREG_IDX(DCACtrlF) },
731};
732#endif
733
667/* ibcctrl bits */ 734/* ibcctrl bits */
668#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1 735#define QLOGIC_IB_IBCC_LINKINITCMD_DISABLE 1
669/* cycle through TS1/TS2 till OK */ 736/* cycle through TS1/TS2 till OK */
@@ -686,6 +753,13 @@ static void write_7322_init_portregs(struct qib_pportdata *);
686static void setup_7322_link_recovery(struct qib_pportdata *, u32); 753static void setup_7322_link_recovery(struct qib_pportdata *, u32);
687static void check_7322_rxe_status(struct qib_pportdata *); 754static void check_7322_rxe_status(struct qib_pportdata *);
688static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *); 755static u32 __iomem *qib_7322_getsendbuf(struct qib_pportdata *, u64, u32 *);
756#ifdef CONFIG_INFINIBAND_QIB_DCA
757static void qib_setup_dca(struct qib_devdata *dd);
758static void setup_dca_notifier(struct qib_devdata *dd,
759 struct qib_msix_entry *m);
760static void reset_dca_notifier(struct qib_devdata *dd,
761 struct qib_msix_entry *m);
762#endif
689 763
690/** 764/**
691 * qib_read_ureg32 - read 32-bit virtualized per-context register 765 * qib_read_ureg32 - read 32-bit virtualized per-context register
@@ -1529,6 +1603,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs)
1529 1603
1530 spin_lock_irqsave(&ppd->sdma_lock, flags); 1604 spin_lock_irqsave(&ppd->sdma_lock, flags);
1531 1605
1606 if (errs != QIB_E_P_SDMAHALT) {
1607 /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */
1608 qib_dev_porterr(dd, ppd->port,
1609 "SDMA %s 0x%016llx %s\n",
1610 qib_sdma_state_names[ppd->sdma_state.current_state],
1611 errs, ppd->cpspec->sdmamsgbuf);
1612 dump_sdma_7322_state(ppd);
1613 }
1614
1532 switch (ppd->sdma_state.current_state) { 1615 switch (ppd->sdma_state.current_state) {
1533 case qib_sdma_state_s00_hw_down: 1616 case qib_sdma_state_s00_hw_down:
1534 break; 1617 break;
@@ -2084,6 +2167,29 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
2084 2167
2085 qib_dev_err(dd, "%s hardware error\n", msg); 2168 qib_dev_err(dd, "%s hardware error\n", msg);
2086 2169
2170 if (hwerrs &
2171 (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) |
2172 SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) {
2173 int pidx = 0;
2174 int err;
2175 unsigned long flags;
2176 struct qib_pportdata *ppd = dd->pport;
2177 for (; pidx < dd->num_pports; ++pidx, ppd++) {
2178 err = 0;
2179 if (pidx == 0 && (hwerrs &
2180 SYM_MASK(HwErrMask, SDmaMemReadErrMask_0)))
2181 err++;
2182 if (pidx == 1 && (hwerrs &
2183 SYM_MASK(HwErrMask, SDmaMemReadErrMask_1)))
2184 err++;
2185 if (err) {
2186 spin_lock_irqsave(&ppd->sdma_lock, flags);
2187 dump_sdma_7322_state(ppd);
2188 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
2189 }
2190 }
2191 }
2192
2087 if (isfatal && !dd->diag_client) { 2193 if (isfatal && !dd->diag_client) {
2088 qib_dev_err(dd, 2194 qib_dev_err(dd,
2089 "Fatal Hardware Error, no longer usable, SN %.16s\n", 2195 "Fatal Hardware Error, no longer usable, SN %.16s\n",
@@ -2558,6 +2664,162 @@ static void qib_setup_7322_setextled(struct qib_pportdata *ppd, u32 on)
2558 qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink); 2664 qib_write_kreg_port(ppd, krp_rcvpktledcnt, ledblink);
2559} 2665}
2560 2666
2667#ifdef CONFIG_INFINIBAND_QIB_DCA
2668
2669static int qib_7322_notify_dca(struct qib_devdata *dd, unsigned long event)
2670{
2671 switch (event) {
2672 case DCA_PROVIDER_ADD:
2673 if (dd->flags & QIB_DCA_ENABLED)
2674 break;
2675 if (!dca_add_requester(&dd->pcidev->dev)) {
2676 qib_devinfo(dd->pcidev, "DCA enabled\n");
2677 dd->flags |= QIB_DCA_ENABLED;
2678 qib_setup_dca(dd);
2679 }
2680 break;
2681 case DCA_PROVIDER_REMOVE:
2682 if (dd->flags & QIB_DCA_ENABLED) {
2683 dca_remove_requester(&dd->pcidev->dev);
2684 dd->flags &= ~QIB_DCA_ENABLED;
2685 dd->cspec->dca_ctrl = 0;
2686 qib_write_kreg(dd, KREG_IDX(DCACtrlA),
2687 dd->cspec->dca_ctrl);
2688 }
2689 break;
2690 }
2691 return 0;
2692}
2693
2694static void qib_update_rhdrq_dca(struct qib_ctxtdata *rcd, int cpu)
2695{
2696 struct qib_devdata *dd = rcd->dd;
2697 struct qib_chip_specific *cspec = dd->cspec;
2698
2699 if (!(dd->flags & QIB_DCA_ENABLED))
2700 return;
2701 if (cspec->rhdr_cpu[rcd->ctxt] != cpu) {
2702 const struct dca_reg_map *rmp;
2703
2704 cspec->rhdr_cpu[rcd->ctxt] = cpu;
2705 rmp = &dca_rcvhdr_reg_map[rcd->ctxt];
2706 cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] &= rmp->mask;
2707 cspec->dca_rcvhdr_ctrl[rmp->shadow_inx] |=
2708 (u64) dca3_get_tag(&dd->pcidev->dev, cpu) << rmp->lsb;
2709 qib_devinfo(dd->pcidev,
2710 "Ctxt %d cpu %d dca %llx\n", rcd->ctxt, cpu,
2711 (long long) cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
2712 qib_write_kreg(dd, rmp->regno,
2713 cspec->dca_rcvhdr_ctrl[rmp->shadow_inx]);
2714 cspec->dca_ctrl |= SYM_MASK(DCACtrlA, RcvHdrqDCAEnable);
2715 qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
2716 }
2717}
2718
2719static void qib_update_sdma_dca(struct qib_pportdata *ppd, int cpu)
2720{
2721 struct qib_devdata *dd = ppd->dd;
2722 struct qib_chip_specific *cspec = dd->cspec;
2723 unsigned pidx = ppd->port - 1;
2724
2725 if (!(dd->flags & QIB_DCA_ENABLED))
2726 return;
2727 if (cspec->sdma_cpu[pidx] != cpu) {
2728 cspec->sdma_cpu[pidx] = cpu;
2729 cspec->dca_rcvhdr_ctrl[4] &= ~(ppd->hw_pidx ?
2730 SYM_MASK(DCACtrlF, SendDma1DCAOPH) :
2731 SYM_MASK(DCACtrlF, SendDma0DCAOPH));
2732 cspec->dca_rcvhdr_ctrl[4] |=
2733 (u64) dca3_get_tag(&dd->pcidev->dev, cpu) <<
2734 (ppd->hw_pidx ?
2735 SYM_LSB(DCACtrlF, SendDma1DCAOPH) :
2736 SYM_LSB(DCACtrlF, SendDma0DCAOPH));
2737 qib_devinfo(dd->pcidev,
2738 "sdma %d cpu %d dca %llx\n", ppd->hw_pidx, cpu,
2739 (long long) cspec->dca_rcvhdr_ctrl[4]);
2740 qib_write_kreg(dd, KREG_IDX(DCACtrlF),
2741 cspec->dca_rcvhdr_ctrl[4]);
2742 cspec->dca_ctrl |= ppd->hw_pidx ?
2743 SYM_MASK(DCACtrlA, SendDMAHead1DCAEnable) :
2744 SYM_MASK(DCACtrlA, SendDMAHead0DCAEnable);
2745 qib_write_kreg(dd, KREG_IDX(DCACtrlA), cspec->dca_ctrl);
2746 }
2747}
2748
2749static void qib_setup_dca(struct qib_devdata *dd)
2750{
2751 struct qib_chip_specific *cspec = dd->cspec;
2752 int i;
2753
2754 for (i = 0; i < ARRAY_SIZE(cspec->rhdr_cpu); i++)
2755 cspec->rhdr_cpu[i] = -1;
2756 for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
2757 cspec->sdma_cpu[i] = -1;
2758 cspec->dca_rcvhdr_ctrl[0] =
2759 (1ULL << SYM_LSB(DCACtrlB, RcvHdrq0DCAXfrCnt)) |
2760 (1ULL << SYM_LSB(DCACtrlB, RcvHdrq1DCAXfrCnt)) |
2761 (1ULL << SYM_LSB(DCACtrlB, RcvHdrq2DCAXfrCnt)) |
2762 (1ULL << SYM_LSB(DCACtrlB, RcvHdrq3DCAXfrCnt));
2763 cspec->dca_rcvhdr_ctrl[1] =
2764 (1ULL << SYM_LSB(DCACtrlC, RcvHdrq4DCAXfrCnt)) |
2765 (1ULL << SYM_LSB(DCACtrlC, RcvHdrq5DCAXfrCnt)) |
2766 (1ULL << SYM_LSB(DCACtrlC, RcvHdrq6DCAXfrCnt)) |
2767 (1ULL << SYM_LSB(DCACtrlC, RcvHdrq7DCAXfrCnt));
2768 cspec->dca_rcvhdr_ctrl[2] =
2769 (1ULL << SYM_LSB(DCACtrlD, RcvHdrq8DCAXfrCnt)) |
2770 (1ULL << SYM_LSB(DCACtrlD, RcvHdrq9DCAXfrCnt)) |
2771 (1ULL << SYM_LSB(DCACtrlD, RcvHdrq10DCAXfrCnt)) |
2772 (1ULL << SYM_LSB(DCACtrlD, RcvHdrq11DCAXfrCnt));
2773 cspec->dca_rcvhdr_ctrl[3] =
2774 (1ULL << SYM_LSB(DCACtrlE, RcvHdrq12DCAXfrCnt)) |
2775 (1ULL << SYM_LSB(DCACtrlE, RcvHdrq13DCAXfrCnt)) |
2776 (1ULL << SYM_LSB(DCACtrlE, RcvHdrq14DCAXfrCnt)) |
2777 (1ULL << SYM_LSB(DCACtrlE, RcvHdrq15DCAXfrCnt));
2778 cspec->dca_rcvhdr_ctrl[4] =
2779 (1ULL << SYM_LSB(DCACtrlF, RcvHdrq16DCAXfrCnt)) |
2780 (1ULL << SYM_LSB(DCACtrlF, RcvHdrq17DCAXfrCnt));
2781 for (i = 0; i < ARRAY_SIZE(cspec->sdma_cpu); i++)
2782 qib_write_kreg(dd, KREG_IDX(DCACtrlB) + i,
2783 cspec->dca_rcvhdr_ctrl[i]);
2784 for (i = 0; i < cspec->num_msix_entries; i++)
2785 setup_dca_notifier(dd, &cspec->msix_entries[i]);
2786}
2787
2788static void qib_irq_notifier_notify(struct irq_affinity_notify *notify,
2789 const cpumask_t *mask)
2790{
2791 struct qib_irq_notify *n =
2792 container_of(notify, struct qib_irq_notify, notify);
2793 int cpu = cpumask_first(mask);
2794
2795 if (n->rcv) {
2796 struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
2797 qib_update_rhdrq_dca(rcd, cpu);
2798 } else {
2799 struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
2800 qib_update_sdma_dca(ppd, cpu);
2801 }
2802}
2803
2804static void qib_irq_notifier_release(struct kref *ref)
2805{
2806 struct qib_irq_notify *n =
2807 container_of(ref, struct qib_irq_notify, notify.kref);
2808 struct qib_devdata *dd;
2809
2810 if (n->rcv) {
2811 struct qib_ctxtdata *rcd = (struct qib_ctxtdata *)n->arg;
2812 dd = rcd->dd;
2813 } else {
2814 struct qib_pportdata *ppd = (struct qib_pportdata *)n->arg;
2815 dd = ppd->dd;
2816 }
2817 qib_devinfo(dd->pcidev,
2818 "release on HCA notify 0x%p n 0x%p\n", ref, n);
2819 kfree(n);
2820}
2821#endif
2822
2561/* 2823/*
2562 * Disable MSIx interrupt if enabled, call generic MSIx code 2824 * Disable MSIx interrupt if enabled, call generic MSIx code
2563 * to cleanup, and clear pending MSIx interrupts. 2825 * to cleanup, and clear pending MSIx interrupts.
@@ -2575,6 +2837,9 @@ static void qib_7322_nomsix(struct qib_devdata *dd)
2575 2837
2576 dd->cspec->num_msix_entries = 0; 2838 dd->cspec->num_msix_entries = 0;
2577 for (i = 0; i < n; i++) { 2839 for (i = 0; i < n; i++) {
2840#ifdef CONFIG_INFINIBAND_QIB_DCA
2841 reset_dca_notifier(dd, &dd->cspec->msix_entries[i]);
2842#endif
2578 irq_set_affinity_hint( 2843 irq_set_affinity_hint(
2579 dd->cspec->msix_entries[i].msix.vector, NULL); 2844 dd->cspec->msix_entries[i].msix.vector, NULL);
2580 free_cpumask_var(dd->cspec->msix_entries[i].mask); 2845 free_cpumask_var(dd->cspec->msix_entries[i].mask);
@@ -2602,6 +2867,15 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
2602{ 2867{
2603 int i; 2868 int i;
2604 2869
2870#ifdef CONFIG_INFINIBAND_QIB_DCA
2871 if (dd->flags & QIB_DCA_ENABLED) {
2872 dca_remove_requester(&dd->pcidev->dev);
2873 dd->flags &= ~QIB_DCA_ENABLED;
2874 dd->cspec->dca_ctrl = 0;
2875 qib_write_kreg(dd, KREG_IDX(DCACtrlA), dd->cspec->dca_ctrl);
2876 }
2877#endif
2878
2605 qib_7322_free_irq(dd); 2879 qib_7322_free_irq(dd);
2606 kfree(dd->cspec->cntrs); 2880 kfree(dd->cspec->cntrs);
2607 kfree(dd->cspec->sendchkenable); 2881 kfree(dd->cspec->sendchkenable);
@@ -3068,6 +3342,53 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data)
3068 return IRQ_HANDLED; 3342 return IRQ_HANDLED;
3069} 3343}
3070 3344
3345#ifdef CONFIG_INFINIBAND_QIB_DCA
3346
3347static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
3348{
3349 if (!m->dca)
3350 return;
3351 qib_devinfo(dd->pcidev,
3352 "Disabling notifier on HCA %d irq %d\n",
3353 dd->unit,
3354 m->msix.vector);
3355 irq_set_affinity_notifier(
3356 m->msix.vector,
3357 NULL);
3358 m->notifier = NULL;
3359}
3360
3361static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
3362{
3363 struct qib_irq_notify *n;
3364
3365 if (!m->dca)
3366 return;
3367 n = kzalloc(sizeof(*n), GFP_KERNEL);
3368 if (n) {
3369 int ret;
3370
3371 m->notifier = n;
3372 n->notify.irq = m->msix.vector;
3373 n->notify.notify = qib_irq_notifier_notify;
3374 n->notify.release = qib_irq_notifier_release;
3375 n->arg = m->arg;
3376 n->rcv = m->rcv;
3377 qib_devinfo(dd->pcidev,
3378 "set notifier irq %d rcv %d notify %p\n",
3379 n->notify.irq, n->rcv, &n->notify);
3380 ret = irq_set_affinity_notifier(
3381 n->notify.irq,
3382 &n->notify);
3383 if (ret) {
3384 m->notifier = NULL;
3385 kfree(n);
3386 }
3387 }
3388}
3389
3390#endif
3391
3071/* 3392/*
3072 * Set up our chip-specific interrupt handler. 3393 * Set up our chip-specific interrupt handler.
3073 * The interrupt type has already been setup, so 3394 * The interrupt type has already been setup, so
@@ -3149,6 +3470,9 @@ try_intx:
3149 void *arg; 3470 void *arg;
3150 u64 val; 3471 u64 val;
3151 int lsb, reg, sh; 3472 int lsb, reg, sh;
3473#ifdef CONFIG_INFINIBAND_QIB_DCA
3474 int dca = 0;
3475#endif
3152 3476
3153 dd->cspec->msix_entries[msixnum]. 3477 dd->cspec->msix_entries[msixnum].
3154 name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] 3478 name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1]
@@ -3161,6 +3485,9 @@ try_intx:
3161 arg = dd->pport + irq_table[i].port - 1; 3485 arg = dd->pport + irq_table[i].port - 1;
3162 } else 3486 } else
3163 arg = dd; 3487 arg = dd;
3488#ifdef CONFIG_INFINIBAND_QIB_DCA
3489 dca = irq_table[i].dca;
3490#endif
3164 lsb = irq_table[i].lsb; 3491 lsb = irq_table[i].lsb;
3165 handler = irq_table[i].handler; 3492 handler = irq_table[i].handler;
3166 snprintf(dd->cspec->msix_entries[msixnum].name, 3493 snprintf(dd->cspec->msix_entries[msixnum].name,
@@ -3178,6 +3505,9 @@ try_intx:
3178 continue; 3505 continue;
3179 if (qib_krcvq01_no_msi && ctxt < 2) 3506 if (qib_krcvq01_no_msi && ctxt < 2)
3180 continue; 3507 continue;
3508#ifdef CONFIG_INFINIBAND_QIB_DCA
3509 dca = 1;
3510#endif
3181 lsb = QIB_I_RCVAVAIL_LSB + ctxt; 3511 lsb = QIB_I_RCVAVAIL_LSB + ctxt;
3182 handler = qib_7322pintr; 3512 handler = qib_7322pintr;
3183 snprintf(dd->cspec->msix_entries[msixnum].name, 3513 snprintf(dd->cspec->msix_entries[msixnum].name,
@@ -3203,6 +3533,11 @@ try_intx:
3203 goto try_intx; 3533 goto try_intx;
3204 } 3534 }
3205 dd->cspec->msix_entries[msixnum].arg = arg; 3535 dd->cspec->msix_entries[msixnum].arg = arg;
3536#ifdef CONFIG_INFINIBAND_QIB_DCA
3537 dd->cspec->msix_entries[msixnum].dca = dca;
3538 dd->cspec->msix_entries[msixnum].rcv =
3539 handler == qib_7322pintr;
3540#endif
3206 if (lsb >= 0) { 3541 if (lsb >= 0) {
3207 reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; 3542 reg = lsb / IBA7322_REDIRECT_VEC_PER_REG;
3208 sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * 3543 sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) *
@@ -6452,6 +6787,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt)
6452 qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt); 6787 qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt);
6453} 6788}
6454 6789
6790/*
6791 * sdma_lock should be acquired before calling this routine
6792 */
6793static void dump_sdma_7322_state(struct qib_pportdata *ppd)
6794{
6795 u64 reg, reg1, reg2;
6796
6797 reg = qib_read_kreg_port(ppd, krp_senddmastatus);
6798 qib_dev_porterr(ppd->dd, ppd->port,
6799 "SDMA senddmastatus: 0x%016llx\n", reg);
6800
6801 reg = qib_read_kreg_port(ppd, krp_sendctrl);
6802 qib_dev_porterr(ppd->dd, ppd->port,
6803 "SDMA sendctrl: 0x%016llx\n", reg);
6804
6805 reg = qib_read_kreg_port(ppd, krp_senddmabase);
6806 qib_dev_porterr(ppd->dd, ppd->port,
6807 "SDMA senddmabase: 0x%016llx\n", reg);
6808
6809 reg = qib_read_kreg_port(ppd, krp_senddmabufmask0);
6810 reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1);
6811 reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2);
6812 qib_dev_porterr(ppd->dd, ppd->port,
6813 "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n",
6814 reg, reg1, reg2);
6815
6816 /* get bufuse bits, clear them, and print them again if non-zero */
6817 reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
6818 qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg);
6819 reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
6820 qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1);
6821 reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
6822 qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2);
6823 /* 0 and 1 should always be zero, so print as short form */
6824 qib_dev_porterr(ppd->dd, ppd->port,
6825 "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
6826 reg, reg1, reg2);
6827 reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
6828 reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
6829 reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
6830 /* 0 and 1 should always be zero, so print as short form */
6831 qib_dev_porterr(ppd->dd, ppd->port,
6832 "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
6833 reg, reg1, reg2);
6834
6835 reg = qib_read_kreg_port(ppd, krp_senddmatail);
6836 qib_dev_porterr(ppd->dd, ppd->port,
6837 "SDMA senddmatail: 0x%016llx\n", reg);
6838
6839 reg = qib_read_kreg_port(ppd, krp_senddmahead);
6840 qib_dev_porterr(ppd->dd, ppd->port,
6841 "SDMA senddmahead: 0x%016llx\n", reg);
6842
6843 reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr);
6844 qib_dev_porterr(ppd->dd, ppd->port,
6845 "SDMA senddmaheadaddr: 0x%016llx\n", reg);
6846
6847 reg = qib_read_kreg_port(ppd, krp_senddmalengen);
6848 qib_dev_porterr(ppd->dd, ppd->port,
6849 "SDMA senddmalengen: 0x%016llx\n", reg);
6850
6851 reg = qib_read_kreg_port(ppd, krp_senddmadesccnt);
6852 qib_dev_porterr(ppd->dd, ppd->port,
6853 "SDMA senddmadesccnt: 0x%016llx\n", reg);
6854
6855 reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt);
6856 qib_dev_porterr(ppd->dd, ppd->port,
6857 "SDMA senddmaidlecnt: 0x%016llx\n", reg);
6858
6859 reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld);
6860 qib_dev_porterr(ppd->dd, ppd->port,
6861 "SDMA senddmapriorityhld: 0x%016llx\n", reg);
6862
6863 reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt);
6864 qib_dev_porterr(ppd->dd, ppd->port,
6865 "SDMA senddmareloadcnt: 0x%016llx\n", reg);
6866
6867 dump_sdma_state(ppd);
6868}
6869
6455static struct sdma_set_state_action sdma_7322_action_table[] = { 6870static struct sdma_set_state_action sdma_7322_action_table[] = {
6456 [qib_sdma_state_s00_hw_down] = { 6871 [qib_sdma_state_s00_hw_down] = {
6457 .go_s99_running_tofalse = 1, 6872 .go_s99_running_tofalse = 1,
@@ -6885,6 +7300,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
6885 dd->f_sdma_init_early = qib_7322_sdma_init_early; 7300 dd->f_sdma_init_early = qib_7322_sdma_init_early;
6886 dd->f_writescratch = writescratch; 7301 dd->f_writescratch = writescratch;
6887 dd->f_tempsense_rd = qib_7322_tempsense_rd; 7302 dd->f_tempsense_rd = qib_7322_tempsense_rd;
7303#ifdef CONFIG_INFINIBAND_QIB_DCA
7304 dd->f_notify_dca = qib_7322_notify_dca;
7305#endif
6888 /* 7306 /*
6889 * Do remaining PCIe setup and save PCIe values in dd. 7307 * Do remaining PCIe setup and save PCIe values in dd.
6890 * Any error printing is already done by the init code. 7308 * Any error printing is already done by the init code.
@@ -6921,7 +7339,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
6921 actual_cnt -= dd->num_pports; 7339 actual_cnt -= dd->num_pports;
6922 7340
6923 tabsize = actual_cnt; 7341 tabsize = actual_cnt;
6924 dd->cspec->msix_entries = kmalloc(tabsize * 7342 dd->cspec->msix_entries = kzalloc(tabsize *
6925 sizeof(struct qib_msix_entry), GFP_KERNEL); 7343 sizeof(struct qib_msix_entry), GFP_KERNEL);
6926 if (!dd->cspec->msix_entries) { 7344 if (!dd->cspec->msix_entries) {
6927 qib_dev_err(dd, "No memory for MSIx table\n"); 7345 qib_dev_err(dd, "No memory for MSIx table\n");
@@ -6941,7 +7359,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
6941 7359
6942 /* clear diagctrl register, in case diags were running and crashed */ 7360 /* clear diagctrl register, in case diags were running and crashed */
6943 qib_write_kreg(dd, kr_hwdiagctrl, 0); 7361 qib_write_kreg(dd, kr_hwdiagctrl, 0);
6944 7362#ifdef CONFIG_INFINIBAND_QIB_DCA
7363 if (!dca_add_requester(&pdev->dev)) {
7364 qib_devinfo(dd->pcidev, "DCA enabled\n");
7365 dd->flags |= QIB_DCA_ENABLED;
7366 qib_setup_dca(dd);
7367 }
7368#endif
6945 goto bail; 7369 goto bail;
6946 7370
6947bail_cleanup: 7371bail_cleanup:
@@ -7156,15 +7580,20 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = {
7156 { 0, 0, 0, 1 }, /* QMH7342 backplane settings */ 7580 { 0, 0, 0, 1 }, /* QMH7342 backplane settings */
7157 { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ 7581 { 0, 0, 0, 2 }, /* QMH7342 backplane settings */
7158 { 0, 0, 0, 2 }, /* QMH7342 backplane settings */ 7582 { 0, 0, 0, 2 }, /* QMH7342 backplane settings */
7159 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7160 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7161 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7162 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7163 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7164 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7165 { 0, 0, 0, 11 }, /* QME7342 backplane settings */
7166 { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ 7583 { 0, 0, 0, 3 }, /* QMH7342 backplane settings */
7167 { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ 7584 { 0, 0, 0, 4 }, /* QMH7342 backplane settings */
7585 { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
7586 { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
7587 { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
7588 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
7589 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
7590 { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
7591 { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
7592 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
7593 { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
7594 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
7595 { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
7596 { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
7168}; 7597};
7169 7598
7170static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { 7599static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
@@ -7173,15 +7602,20 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = {
7173 { 0, 0, 0, 7 }, /* QMH7342 backplane settings */ 7602 { 0, 0, 0, 7 }, /* QMH7342 backplane settings */
7174 { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ 7603 { 0, 0, 0, 8 }, /* QMH7342 backplane settings */
7175 { 0, 0, 0, 8 }, /* QMH7342 backplane settings */ 7604 { 0, 0, 0, 8 }, /* QMH7342 backplane settings */
7176 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7177 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7178 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7179 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7180 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7181 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7182 { 0, 0, 0, 13 }, /* QME7342 backplane settings */
7183 { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ 7605 { 0, 0, 0, 9 }, /* QMH7342 backplane settings */
7184 { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ 7606 { 0, 0, 0, 10 }, /* QMH7342 backplane settings */
7607 { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
7608 { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
7609 { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
7610 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
7611 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
7612 { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
7613 { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
7614 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
7615 { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
7616 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
7617 { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
7618 { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
7185}; 7619};
7186 7620
7187static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { 7621static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
@@ -7190,15 +7624,20 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = {
7190 { 0, 1, 0, 5 }, /* QMH7342 backplane settings */ 7624 { 0, 1, 0, 5 }, /* QMH7342 backplane settings */
7191 { 0, 1, 0, 6 }, /* QMH7342 backplane settings */ 7625 { 0, 1, 0, 6 }, /* QMH7342 backplane settings */
7192 { 0, 1, 0, 8 }, /* QMH7342 backplane settings */ 7626 { 0, 1, 0, 8 }, /* QMH7342 backplane settings */
7193 { 0, 1, 12, 10 }, /* QME7342 backplane setting */
7194 { 0, 1, 12, 11 }, /* QME7342 backplane setting */
7195 { 0, 1, 12, 12 }, /* QME7342 backplane setting */
7196 { 0, 1, 12, 14 }, /* QME7342 backplane setting */
7197 { 0, 1, 12, 6 }, /* QME7342 backplane setting */
7198 { 0, 1, 12, 7 }, /* QME7342 backplane setting */
7199 { 0, 1, 12, 8 }, /* QME7342 backplane setting */
7200 { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ 7627 { 0, 1, 0, 10 }, /* QMH7342 backplane settings */
7201 { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ 7628 { 0, 1, 0, 12 }, /* QMH7342 backplane settings */
7629 { 0, 1, 4, 15 }, /* QME7342 backplane settings 1.0 */
7630 { 0, 1, 3, 15 }, /* QME7342 backplane settings 1.0 */
7631 { 0, 1, 0, 12 }, /* QME7342 backplane settings 1.0 */
7632 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.0 */
7633 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.0 */
7634 { 0, 1, 0, 14 }, /* QME7342 backplane settings 1.0 */
7635 { 0, 1, 2, 15 }, /* QME7342 backplane settings 1.0 */
7636 { 0, 1, 0, 11 }, /* QME7342 backplane settings 1.1 */
7637 { 0, 1, 0, 7 }, /* QME7342 backplane settings 1.1 */
7638 { 0, 1, 0, 9 }, /* QME7342 backplane settings 1.1 */
7639 { 0, 1, 0, 6 }, /* QME7342 backplane settings 1.1 */
7640 { 0, 1, 0, 8 }, /* QME7342 backplane settings 1.1 */
7202}; 7641};
7203 7642
7204static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = { 7643static const struct txdds_ent txdds_extra_mfg[TXDDS_MFG_SZ] = {
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 173f805790da..36e048e0e1d9 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -39,10 +39,17 @@
39#include <linux/idr.h> 39#include <linux/idr.h>
40#include <linux/module.h> 40#include <linux/module.h>
41#include <linux/printk.h> 41#include <linux/printk.h>
42#ifdef CONFIG_INFINIBAND_QIB_DCA
43#include <linux/dca.h>
44#endif
42 45
43#include "qib.h" 46#include "qib.h"
44#include "qib_common.h" 47#include "qib_common.h"
45#include "qib_mad.h" 48#include "qib_mad.h"
49#ifdef CONFIG_DEBUG_FS
50#include "qib_debugfs.h"
51#include "qib_verbs.h"
52#endif
46 53
47#undef pr_fmt 54#undef pr_fmt
48#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt 55#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
@@ -64,6 +71,11 @@ ushort qib_cfgctxts;
64module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); 71module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
65MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); 72MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
66 73
74unsigned qib_numa_aware;
75module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
76MODULE_PARM_DESC(numa_aware,
77 "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
78
67/* 79/*
68 * If set, do not write to any regs if avoidable, hack to allow 80 * If set, do not write to any regs if avoidable, hack to allow
69 * check for deranged default register values. 81 * check for deranged default register values.
@@ -89,8 +101,6 @@ unsigned qib_wc_pat = 1; /* default (1) is to use PAT, not MTRR */
89module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO); 101module_param_named(wc_pat, qib_wc_pat, uint, S_IRUGO);
90MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism"); 102MODULE_PARM_DESC(wc_pat, "enable write-combining via PAT mechanism");
91 103
92struct workqueue_struct *qib_cq_wq;
93
94static void verify_interrupt(unsigned long); 104static void verify_interrupt(unsigned long);
95 105
96static struct idr qib_unit_table; 106static struct idr qib_unit_table;
@@ -121,6 +131,11 @@ int qib_create_ctxts(struct qib_devdata *dd)
121{ 131{
122 unsigned i; 132 unsigned i;
123 int ret; 133 int ret;
134 int local_node_id = pcibus_to_node(dd->pcidev->bus);
135
136 if (local_node_id < 0)
137 local_node_id = numa_node_id();
138 dd->assigned_node_id = local_node_id;
124 139
125 /* 140 /*
126 * Allocate full ctxtcnt array, rather than just cfgctxts, because 141 * Allocate full ctxtcnt array, rather than just cfgctxts, because
@@ -143,7 +158,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
143 continue; 158 continue;
144 159
145 ppd = dd->pport + (i % dd->num_pports); 160 ppd = dd->pport + (i % dd->num_pports);
146 rcd = qib_create_ctxtdata(ppd, i); 161
162 rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
147 if (!rcd) { 163 if (!rcd) {
148 qib_dev_err(dd, 164 qib_dev_err(dd,
149 "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); 165 "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
@@ -161,20 +177,33 @@ done:
161/* 177/*
162 * Common code for user and kernel context setup. 178 * Common code for user and kernel context setup.
163 */ 179 */
164struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) 180struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
181 int node_id)
165{ 182{
166 struct qib_devdata *dd = ppd->dd; 183 struct qib_devdata *dd = ppd->dd;
167 struct qib_ctxtdata *rcd; 184 struct qib_ctxtdata *rcd;
168 185
169 rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); 186 rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id);
170 if (rcd) { 187 if (rcd) {
171 INIT_LIST_HEAD(&rcd->qp_wait_list); 188 INIT_LIST_HEAD(&rcd->qp_wait_list);
189 rcd->node_id = node_id;
172 rcd->ppd = ppd; 190 rcd->ppd = ppd;
173 rcd->dd = dd; 191 rcd->dd = dd;
174 rcd->cnt = 1; 192 rcd->cnt = 1;
175 rcd->ctxt = ctxt; 193 rcd->ctxt = ctxt;
176 dd->rcd[ctxt] = rcd; 194 dd->rcd[ctxt] = rcd;
177 195#ifdef CONFIG_DEBUG_FS
196 if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
197 rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
198 GFP_KERNEL, node_id);
199 if (!rcd->opstats) {
200 kfree(rcd);
201 qib_dev_err(dd,
202 "Unable to allocate per ctxt stats buffer\n");
203 return NULL;
204 }
205 }
206#endif
178 dd->f_init_ctxt(rcd); 207 dd->f_init_ctxt(rcd);
179 208
180 /* 209 /*
@@ -429,6 +458,7 @@ static int loadtime_init(struct qib_devdata *dd)
429 dd->intrchk_timer.function = verify_interrupt; 458 dd->intrchk_timer.function = verify_interrupt;
430 dd->intrchk_timer.data = (unsigned long) dd; 459 dd->intrchk_timer.data = (unsigned long) dd;
431 460
461 ret = qib_cq_init(dd);
432done: 462done:
433 return ret; 463 return ret;
434} 464}
@@ -944,6 +974,10 @@ void qib_free_ctxtdata(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
944 vfree(rcd->subctxt_uregbase); 974 vfree(rcd->subctxt_uregbase);
945 vfree(rcd->subctxt_rcvegrbuf); 975 vfree(rcd->subctxt_rcvegrbuf);
946 vfree(rcd->subctxt_rcvhdr_base); 976 vfree(rcd->subctxt_rcvhdr_base);
977#ifdef CONFIG_DEBUG_FS
978 kfree(rcd->opstats);
979 rcd->opstats = NULL;
980#endif
947 kfree(rcd); 981 kfree(rcd);
948} 982}
949 983
@@ -1033,7 +1067,6 @@ done:
1033 dd->f_set_armlaunch(dd, 1); 1067 dd->f_set_armlaunch(dd, 1);
1034} 1068}
1035 1069
1036
1037void qib_free_devdata(struct qib_devdata *dd) 1070void qib_free_devdata(struct qib_devdata *dd)
1038{ 1071{
1039 unsigned long flags; 1072 unsigned long flags;
@@ -1043,6 +1076,9 @@ void qib_free_devdata(struct qib_devdata *dd)
1043 list_del(&dd->list); 1076 list_del(&dd->list);
1044 spin_unlock_irqrestore(&qib_devs_lock, flags); 1077 spin_unlock_irqrestore(&qib_devs_lock, flags);
1045 1078
1079#ifdef CONFIG_DEBUG_FS
1080 qib_dbg_ibdev_exit(&dd->verbs_dev);
1081#endif
1046 ib_dealloc_device(&dd->verbs_dev.ibdev); 1082 ib_dealloc_device(&dd->verbs_dev.ibdev);
1047} 1083}
1048 1084
@@ -1066,6 +1102,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
1066 goto bail; 1102 goto bail;
1067 } 1103 }
1068 1104
1105#ifdef CONFIG_DEBUG_FS
1106 qib_dbg_ibdev_init(&dd->verbs_dev);
1107#endif
1108
1069 idr_preload(GFP_KERNEL); 1109 idr_preload(GFP_KERNEL);
1070 spin_lock_irqsave(&qib_devs_lock, flags); 1110 spin_lock_irqsave(&qib_devs_lock, flags);
1071 1111
@@ -1081,6 +1121,9 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
1081 if (ret < 0) { 1121 if (ret < 0) {
1082 qib_early_err(&pdev->dev, 1122 qib_early_err(&pdev->dev,
1083 "Could not allocate unit ID: error %d\n", -ret); 1123 "Could not allocate unit ID: error %d\n", -ret);
1124#ifdef CONFIG_DEBUG_FS
1125 qib_dbg_ibdev_exit(&dd->verbs_dev);
1126#endif
1084 ib_dealloc_device(&dd->verbs_dev.ibdev); 1127 ib_dealloc_device(&dd->verbs_dev.ibdev);
1085 dd = ERR_PTR(ret); 1128 dd = ERR_PTR(ret);
1086 goto bail; 1129 goto bail;
@@ -1158,6 +1201,35 @@ struct pci_driver qib_driver = {
1158 .err_handler = &qib_pci_err_handler, 1201 .err_handler = &qib_pci_err_handler,
1159}; 1202};
1160 1203
1204#ifdef CONFIG_INFINIBAND_QIB_DCA
1205
1206static int qib_notify_dca(struct notifier_block *, unsigned long, void *);
1207static struct notifier_block dca_notifier = {
1208 .notifier_call = qib_notify_dca,
1209 .next = NULL,
1210 .priority = 0
1211};
1212
1213static int qib_notify_dca_device(struct device *device, void *data)
1214{
1215 struct qib_devdata *dd = dev_get_drvdata(device);
1216 unsigned long event = *(unsigned long *)data;
1217
1218 return dd->f_notify_dca(dd, event);
1219}
1220
1221static int qib_notify_dca(struct notifier_block *nb, unsigned long event,
1222 void *p)
1223{
1224 int rval;
1225
1226 rval = driver_for_each_device(&qib_driver.driver, NULL,
1227 &event, qib_notify_dca_device);
1228 return rval ? NOTIFY_BAD : NOTIFY_DONE;
1229}
1230
1231#endif
1232
1161/* 1233/*
1162 * Do all the generic driver unit- and chip-independent memory 1234 * Do all the generic driver unit- and chip-independent memory
1163 * allocation and initialization. 1235 * allocation and initialization.
@@ -1170,22 +1242,22 @@ static int __init qlogic_ib_init(void)
1170 if (ret) 1242 if (ret)
1171 goto bail; 1243 goto bail;
1172 1244
1173 qib_cq_wq = create_singlethread_workqueue("qib_cq");
1174 if (!qib_cq_wq) {
1175 ret = -ENOMEM;
1176 goto bail_dev;
1177 }
1178
1179 /* 1245 /*
1180 * These must be called before the driver is registered with 1246 * These must be called before the driver is registered with
1181 * the PCI subsystem. 1247 * the PCI subsystem.
1182 */ 1248 */
1183 idr_init(&qib_unit_table); 1249 idr_init(&qib_unit_table);
1184 1250
1251#ifdef CONFIG_INFINIBAND_QIB_DCA
1252 dca_register_notify(&dca_notifier);
1253#endif
1254#ifdef CONFIG_DEBUG_FS
1255 qib_dbg_init();
1256#endif
1185 ret = pci_register_driver(&qib_driver); 1257 ret = pci_register_driver(&qib_driver);
1186 if (ret < 0) { 1258 if (ret < 0) {
1187 pr_err("Unable to register driver: error %d\n", -ret); 1259 pr_err("Unable to register driver: error %d\n", -ret);
1188 goto bail_unit; 1260 goto bail_dev;
1189 } 1261 }
1190 1262
1191 /* not fatal if it doesn't work */ 1263 /* not fatal if it doesn't work */
@@ -1193,10 +1265,14 @@ static int __init qlogic_ib_init(void)
1193 pr_err("Unable to register ipathfs\n"); 1265 pr_err("Unable to register ipathfs\n");
1194 goto bail; /* all OK */ 1266 goto bail; /* all OK */
1195 1267
1196bail_unit:
1197 idr_destroy(&qib_unit_table);
1198 destroy_workqueue(qib_cq_wq);
1199bail_dev: 1268bail_dev:
1269#ifdef CONFIG_INFINIBAND_QIB_DCA
1270 dca_unregister_notify(&dca_notifier);
1271#endif
1272#ifdef CONFIG_DEBUG_FS
1273 qib_dbg_exit();
1274#endif
1275 idr_destroy(&qib_unit_table);
1200 qib_dev_cleanup(); 1276 qib_dev_cleanup();
1201bail: 1277bail:
1202 return ret; 1278 return ret;
@@ -1217,9 +1293,13 @@ static void __exit qlogic_ib_cleanup(void)
1217 "Unable to cleanup counter filesystem: error %d\n", 1293 "Unable to cleanup counter filesystem: error %d\n",
1218 -ret); 1294 -ret);
1219 1295
1296#ifdef CONFIG_INFINIBAND_QIB_DCA
1297 dca_unregister_notify(&dca_notifier);
1298#endif
1220 pci_unregister_driver(&qib_driver); 1299 pci_unregister_driver(&qib_driver);
1221 1300#ifdef CONFIG_DEBUG_FS
1222 destroy_workqueue(qib_cq_wq); 1301 qib_dbg_exit();
1302#endif
1223 1303
1224 qib_cpulist_count = 0; 1304 qib_cpulist_count = 0;
1225 kfree(qib_cpulist); 1305 kfree(qib_cpulist);
@@ -1270,7 +1350,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
1270 if (dd->pageshadow) { 1350 if (dd->pageshadow) {
1271 struct page **tmpp = dd->pageshadow; 1351 struct page **tmpp = dd->pageshadow;
1272 dma_addr_t *tmpd = dd->physshadow; 1352 dma_addr_t *tmpd = dd->physshadow;
1273 int i, cnt = 0; 1353 int i;
1274 1354
1275 for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) { 1355 for (ctxt = 0; ctxt < dd->cfgctxts; ctxt++) {
1276 int ctxt_tidbase = ctxt * dd->rcvtidcnt; 1356 int ctxt_tidbase = ctxt * dd->rcvtidcnt;
@@ -1283,13 +1363,13 @@ static void cleanup_device_data(struct qib_devdata *dd)
1283 PAGE_SIZE, PCI_DMA_FROMDEVICE); 1363 PAGE_SIZE, PCI_DMA_FROMDEVICE);
1284 qib_release_user_pages(&tmpp[i], 1); 1364 qib_release_user_pages(&tmpp[i], 1);
1285 tmpp[i] = NULL; 1365 tmpp[i] = NULL;
1286 cnt++;
1287 } 1366 }
1288 } 1367 }
1289 1368
1290 tmpp = dd->pageshadow;
1291 dd->pageshadow = NULL; 1369 dd->pageshadow = NULL;
1292 vfree(tmpp); 1370 vfree(tmpp);
1371 dd->physshadow = NULL;
1372 vfree(tmpd);
1293 } 1373 }
1294 1374
1295 /* 1375 /*
@@ -1311,6 +1391,7 @@ static void cleanup_device_data(struct qib_devdata *dd)
1311 } 1391 }
1312 kfree(tmp); 1392 kfree(tmp);
1313 kfree(dd->boardname); 1393 kfree(dd->boardname);
1394 qib_cq_exit(dd);
1314} 1395}
1315 1396
1316/* 1397/*
@@ -1483,6 +1564,7 @@ static void qib_remove_one(struct pci_dev *pdev)
1483int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 1564int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1484{ 1565{
1485 unsigned amt; 1566 unsigned amt;
1567 int old_node_id;
1486 1568
1487 if (!rcd->rcvhdrq) { 1569 if (!rcd->rcvhdrq) {
1488 dma_addr_t phys_hdrqtail; 1570 dma_addr_t phys_hdrqtail;
@@ -1492,9 +1574,13 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1492 sizeof(u32), PAGE_SIZE); 1574 sizeof(u32), PAGE_SIZE);
1493 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? 1575 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
1494 GFP_USER : GFP_KERNEL; 1576 GFP_USER : GFP_KERNEL;
1577
1578 old_node_id = dev_to_node(&dd->pcidev->dev);
1579 set_dev_node(&dd->pcidev->dev, rcd->node_id);
1495 rcd->rcvhdrq = dma_alloc_coherent( 1580 rcd->rcvhdrq = dma_alloc_coherent(
1496 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, 1581 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
1497 gfp_flags | __GFP_COMP); 1582 gfp_flags | __GFP_COMP);
1583 set_dev_node(&dd->pcidev->dev, old_node_id);
1498 1584
1499 if (!rcd->rcvhdrq) { 1585 if (!rcd->rcvhdrq) {
1500 qib_dev_err(dd, 1586 qib_dev_err(dd,
@@ -1510,9 +1596,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1510 } 1596 }
1511 1597
1512 if (!(dd->flags & QIB_NODMA_RTAIL)) { 1598 if (!(dd->flags & QIB_NODMA_RTAIL)) {
1599 set_dev_node(&dd->pcidev->dev, rcd->node_id);
1513 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( 1600 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
1514 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1601 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
1515 gfp_flags); 1602 gfp_flags);
1603 set_dev_node(&dd->pcidev->dev, old_node_id);
1516 if (!rcd->rcvhdrtail_kvaddr) 1604 if (!rcd->rcvhdrtail_kvaddr)
1517 goto bail_free; 1605 goto bail_free;
1518 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; 1606 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1556,6 +1644,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
1556 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 1644 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
1557 size_t size; 1645 size_t size;
1558 gfp_t gfp_flags; 1646 gfp_t gfp_flags;
1647 int old_node_id;
1559 1648
1560 /* 1649 /*
1561 * GFP_USER, but without GFP_FS, so buffer cache can be 1650 * GFP_USER, but without GFP_FS, so buffer cache can be
@@ -1574,25 +1663,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
1574 size = rcd->rcvegrbuf_size; 1663 size = rcd->rcvegrbuf_size;
1575 if (!rcd->rcvegrbuf) { 1664 if (!rcd->rcvegrbuf) {
1576 rcd->rcvegrbuf = 1665 rcd->rcvegrbuf =
1577 kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), 1666 kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
1578 GFP_KERNEL); 1667 GFP_KERNEL, rcd->node_id);
1579 if (!rcd->rcvegrbuf) 1668 if (!rcd->rcvegrbuf)
1580 goto bail; 1669 goto bail;
1581 } 1670 }
1582 if (!rcd->rcvegrbuf_phys) { 1671 if (!rcd->rcvegrbuf_phys) {
1583 rcd->rcvegrbuf_phys = 1672 rcd->rcvegrbuf_phys =
1584 kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), 1673 kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
1585 GFP_KERNEL); 1674 GFP_KERNEL, rcd->node_id);
1586 if (!rcd->rcvegrbuf_phys) 1675 if (!rcd->rcvegrbuf_phys)
1587 goto bail_rcvegrbuf; 1676 goto bail_rcvegrbuf;
1588 } 1677 }
1589 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 1678 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) {
1590 if (rcd->rcvegrbuf[e]) 1679 if (rcd->rcvegrbuf[e])
1591 continue; 1680 continue;
1681
1682 old_node_id = dev_to_node(&dd->pcidev->dev);
1683 set_dev_node(&dd->pcidev->dev, rcd->node_id);
1592 rcd->rcvegrbuf[e] = 1684 rcd->rcvegrbuf[e] =
1593 dma_alloc_coherent(&dd->pcidev->dev, size, 1685 dma_alloc_coherent(&dd->pcidev->dev, size,
1594 &rcd->rcvegrbuf_phys[e], 1686 &rcd->rcvegrbuf_phys[e],
1595 gfp_flags); 1687 gfp_flags);
1688 set_dev_node(&dd->pcidev->dev, old_node_id);
1596 if (!rcd->rcvegrbuf[e]) 1689 if (!rcd->rcvegrbuf[e])
1597 goto bail_rcvegrbuf_phys; 1690 goto bail_rcvegrbuf_phys;
1598 } 1691 }
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index a6a2cc2ba260..3cca55b51e54 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
@@ -35,6 +35,9 @@
35#include <linux/err.h> 35#include <linux/err.h>
36#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
37#include <linux/jhash.h> 37#include <linux/jhash.h>
38#ifdef CONFIG_DEBUG_FS
39#include <linux/seq_file.h>
40#endif
38 41
39#include "qib.h" 42#include "qib.h"
40 43
@@ -222,8 +225,8 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
222 unsigned long flags; 225 unsigned long flags;
223 unsigned n = qpn_hash(dev, qp->ibqp.qp_num); 226 unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
224 227
225 spin_lock_irqsave(&dev->qpt_lock, flags);
226 atomic_inc(&qp->refcount); 228 atomic_inc(&qp->refcount);
229 spin_lock_irqsave(&dev->qpt_lock, flags);
227 230
228 if (qp->ibqp.qp_num == 0) 231 if (qp->ibqp.qp_num == 0)
229 rcu_assign_pointer(ibp->qp0, qp); 232 rcu_assign_pointer(ibp->qp0, qp);
@@ -235,7 +238,6 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
235 } 238 }
236 239
237 spin_unlock_irqrestore(&dev->qpt_lock, flags); 240 spin_unlock_irqrestore(&dev->qpt_lock, flags);
238 synchronize_rcu();
239} 241}
240 242
241/* 243/*
@@ -247,36 +249,39 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
247 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 249 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
248 unsigned n = qpn_hash(dev, qp->ibqp.qp_num); 250 unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
249 unsigned long flags; 251 unsigned long flags;
252 int removed = 1;
250 253
251 spin_lock_irqsave(&dev->qpt_lock, flags); 254 spin_lock_irqsave(&dev->qpt_lock, flags);
252 255
253 if (rcu_dereference_protected(ibp->qp0, 256 if (rcu_dereference_protected(ibp->qp0,
254 lockdep_is_held(&dev->qpt_lock)) == qp) { 257 lockdep_is_held(&dev->qpt_lock)) == qp) {
255 atomic_dec(&qp->refcount);
256 rcu_assign_pointer(ibp->qp0, NULL); 258 rcu_assign_pointer(ibp->qp0, NULL);
257 } else if (rcu_dereference_protected(ibp->qp1, 259 } else if (rcu_dereference_protected(ibp->qp1,
258 lockdep_is_held(&dev->qpt_lock)) == qp) { 260 lockdep_is_held(&dev->qpt_lock)) == qp) {
259 atomic_dec(&qp->refcount);
260 rcu_assign_pointer(ibp->qp1, NULL); 261 rcu_assign_pointer(ibp->qp1, NULL);
261 } else { 262 } else {
262 struct qib_qp *q; 263 struct qib_qp *q;
263 struct qib_qp __rcu **qpp; 264 struct qib_qp __rcu **qpp;
264 265
266 removed = 0;
265 qpp = &dev->qp_table[n]; 267 qpp = &dev->qp_table[n];
266 for (; (q = rcu_dereference_protected(*qpp, 268 for (; (q = rcu_dereference_protected(*qpp,
267 lockdep_is_held(&dev->qpt_lock))) != NULL; 269 lockdep_is_held(&dev->qpt_lock))) != NULL;
268 qpp = &q->next) 270 qpp = &q->next)
269 if (q == qp) { 271 if (q == qp) {
270 atomic_dec(&qp->refcount);
271 rcu_assign_pointer(*qpp, 272 rcu_assign_pointer(*qpp,
272 rcu_dereference_protected(qp->next, 273 rcu_dereference_protected(qp->next,
273 lockdep_is_held(&dev->qpt_lock))); 274 lockdep_is_held(&dev->qpt_lock)));
275 removed = 1;
274 break; 276 break;
275 } 277 }
276 } 278 }
277 279
278 spin_unlock_irqrestore(&dev->qpt_lock, flags); 280 spin_unlock_irqrestore(&dev->qpt_lock, flags);
279 synchronize_rcu(); 281 if (removed) {
282 synchronize_rcu();
283 atomic_dec(&qp->refcount);
284 }
280} 285}
281 286
282/** 287/**
@@ -334,26 +339,25 @@ struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
334{ 339{
335 struct qib_qp *qp = NULL; 340 struct qib_qp *qp = NULL;
336 341
342 rcu_read_lock();
337 if (unlikely(qpn <= 1)) { 343 if (unlikely(qpn <= 1)) {
338 rcu_read_lock();
339 if (qpn == 0) 344 if (qpn == 0)
340 qp = rcu_dereference(ibp->qp0); 345 qp = rcu_dereference(ibp->qp0);
341 else 346 else
342 qp = rcu_dereference(ibp->qp1); 347 qp = rcu_dereference(ibp->qp1);
348 if (qp)
349 atomic_inc(&qp->refcount);
343 } else { 350 } else {
344 struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; 351 struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
345 unsigned n = qpn_hash(dev, qpn); 352 unsigned n = qpn_hash(dev, qpn);
346 353
347 rcu_read_lock();
348 for (qp = rcu_dereference(dev->qp_table[n]); qp; 354 for (qp = rcu_dereference(dev->qp_table[n]); qp;
349 qp = rcu_dereference(qp->next)) 355 qp = rcu_dereference(qp->next))
350 if (qp->ibqp.qp_num == qpn) 356 if (qp->ibqp.qp_num == qpn) {
357 atomic_inc(&qp->refcount);
351 break; 358 break;
359 }
352 } 360 }
353 if (qp)
354 if (unlikely(!atomic_inc_not_zero(&qp->refcount)))
355 qp = NULL;
356
357 rcu_read_unlock(); 361 rcu_read_unlock();
358 return qp; 362 return qp;
359} 363}
@@ -1286,3 +1290,94 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth)
1286 } 1290 }
1287 } 1291 }
1288} 1292}
1293
1294#ifdef CONFIG_DEBUG_FS
1295
1296struct qib_qp_iter {
1297 struct qib_ibdev *dev;
1298 struct qib_qp *qp;
1299 int n;
1300};
1301
1302struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev)
1303{
1304 struct qib_qp_iter *iter;
1305
1306 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1307 if (!iter)
1308 return NULL;
1309
1310 iter->dev = dev;
1311 if (qib_qp_iter_next(iter)) {
1312 kfree(iter);
1313 return NULL;
1314 }
1315
1316 return iter;
1317}
1318
1319int qib_qp_iter_next(struct qib_qp_iter *iter)
1320{
1321 struct qib_ibdev *dev = iter->dev;
1322 int n = iter->n;
1323 int ret = 1;
1324 struct qib_qp *pqp = iter->qp;
1325 struct qib_qp *qp;
1326
1327 rcu_read_lock();
1328 for (; n < dev->qp_table_size; n++) {
1329 if (pqp)
1330 qp = rcu_dereference(pqp->next);
1331 else
1332 qp = rcu_dereference(dev->qp_table[n]);
1333 pqp = qp;
1334 if (qp) {
1335 if (iter->qp)
1336 atomic_dec(&iter->qp->refcount);
1337 atomic_inc(&qp->refcount);
1338 rcu_read_unlock();
1339 iter->qp = qp;
1340 iter->n = n;
1341 return 0;
1342 }
1343 }
1344 rcu_read_unlock();
1345 if (iter->qp)
1346 atomic_dec(&iter->qp->refcount);
1347 return ret;
1348}
1349
1350static const char * const qp_type_str[] = {
1351 "SMI", "GSI", "RC", "UC", "UD",
1352};
1353
1354void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
1355{
1356 struct qib_swqe *wqe;
1357 struct qib_qp *qp = iter->qp;
1358
1359 wqe = get_swqe_ptr(qp, qp->s_last);
1360 seq_printf(s,
1361 "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
1362 iter->n,
1363 qp->ibqp.qp_num,
1364 qp_type_str[qp->ibqp.qp_type],
1365 qp->state,
1366 wqe->wr.opcode,
1367 qp->s_hdrwords,
1368 qp->s_flags,
1369 atomic_read(&qp->s_dma_busy),
1370 !list_empty(&qp->iowait),
1371 qp->timeout,
1372 wqe->ssn,
1373 qp->s_lsn,
1374 qp->s_last_psn,
1375 qp->s_psn, qp->s_next_psn,
1376 qp->s_sending_psn, qp->s_sending_hpsn,
1377 qp->s_last, qp->s_acked, qp->s_cur,
1378 qp->s_tail, qp->s_head, qp->s_size,
1379 qp->remote_qpn,
1380 qp->remote_ah_attr.dlid);
1381}
1382
1383#endif
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 3fc514431212..32162d355370 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -708,6 +708,62 @@ unlock:
708 return ret; 708 return ret;
709} 709}
710 710
711/*
712 * sdma_lock should be acquired before calling this routine
713 */
714void dump_sdma_state(struct qib_pportdata *ppd)
715{
716 struct qib_sdma_desc *descq;
717 struct qib_sdma_txreq *txp, *txpnext;
718 __le64 *descqp;
719 u64 desc[2];
720 dma_addr_t addr;
721 u16 gen, dwlen, dwoffset;
722 u16 head, tail, cnt;
723
724 head = ppd->sdma_descq_head;
725 tail = ppd->sdma_descq_tail;
726 cnt = qib_sdma_descq_freecnt(ppd);
727 descq = ppd->sdma_descq;
728
729 qib_dev_porterr(ppd->dd, ppd->port,
730 "SDMA ppd->sdma_descq_head: %u\n", head);
731 qib_dev_porterr(ppd->dd, ppd->port,
732 "SDMA ppd->sdma_descq_tail: %u\n", tail);
733 qib_dev_porterr(ppd->dd, ppd->port,
734 "SDMA sdma_descq_freecnt: %u\n", cnt);
735
736 /* print info for each entry in the descriptor queue */
737 while (head != tail) {
738 char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 };
739
740 descqp = &descq[head].qw[0];
741 desc[0] = le64_to_cpu(descqp[0]);
742 desc[1] = le64_to_cpu(descqp[1]);
743 flags[0] = (desc[0] & 1<<15) ? 'I' : '-';
744 flags[1] = (desc[0] & 1<<14) ? 'L' : 'S';
745 flags[2] = (desc[0] & 1<<13) ? 'H' : '-';
746 flags[3] = (desc[0] & 1<<12) ? 'F' : '-';
747 flags[4] = (desc[0] & 1<<11) ? 'L' : '-';
748 addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL);
749 gen = (desc[0] >> 30) & 3ULL;
750 dwlen = (desc[0] >> 14) & (0x7ffULL << 2);
751 dwoffset = (desc[0] & 0x7ffULL) << 2;
752 qib_dev_porterr(ppd->dd, ppd->port,
753 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n",
754 head, flags, addr, gen, dwlen, dwoffset);
755 if (++head == ppd->sdma_descq_cnt)
756 head = 0;
757 }
758
759 /* print dma descriptor indices from the TX requests */
760 list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist,
761 list)
762 qib_dev_porterr(ppd->dd, ppd->port,
763 "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n",
764 txp->start_idx, txp->next_descq_idx);
765}
766
711void qib_sdma_process_event(struct qib_pportdata *ppd, 767void qib_sdma_process_event(struct qib_pportdata *ppd,
712 enum qib_sdma_events event) 768 enum qib_sdma_events event)
713{ 769{
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 904c384aa361..092b0bb1bb78 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -645,9 +645,11 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
645 } else 645 } else
646 goto drop; 646 goto drop;
647 647
648 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 648 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
649 ibp->opstats[opcode & 0x7f].n_bytes += tlen; 649#ifdef CONFIG_DEBUG_FS
650 ibp->opstats[opcode & 0x7f].n_packets++; 650 rcd->opstats->stats[opcode].n_bytes += tlen;
651 rcd->opstats->stats[opcode].n_packets++;
652#endif
651 653
652 /* Get the destination QP number. */ 654 /* Get the destination QP number. */
653 qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK; 655 qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index aff8b2c17886..012e2c7575ad 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
@@ -41,6 +41,7 @@
41#include <linux/interrupt.h> 41#include <linux/interrupt.h>
42#include <linux/kref.h> 42#include <linux/kref.h>
43#include <linux/workqueue.h> 43#include <linux/workqueue.h>
44#include <linux/kthread.h>
44#include <linux/completion.h> 45#include <linux/completion.h>
45#include <rdma/ib_pack.h> 46#include <rdma/ib_pack.h>
46#include <rdma/ib_user_verbs.h> 47#include <rdma/ib_user_verbs.h>
@@ -267,7 +268,8 @@ struct qib_cq_wc {
267 */ 268 */
268struct qib_cq { 269struct qib_cq {
269 struct ib_cq ibcq; 270 struct ib_cq ibcq;
270 struct work_struct comptask; 271 struct kthread_work comptask;
272 struct qib_devdata *dd;
271 spinlock_t lock; /* protect changes in this struct */ 273 spinlock_t lock; /* protect changes in this struct */
272 u8 notify; 274 u8 notify;
273 u8 triggered; 275 u8 triggered;
@@ -658,6 +660,10 @@ struct qib_opcode_stats {
658 u64 n_bytes; /* total number of bytes */ 660 u64 n_bytes; /* total number of bytes */
659}; 661};
660 662
663struct qib_opcode_stats_perctx {
664 struct qib_opcode_stats stats[128];
665};
666
661struct qib_ibport { 667struct qib_ibport {
662 struct qib_qp __rcu *qp0; 668 struct qib_qp __rcu *qp0;
663 struct qib_qp __rcu *qp1; 669 struct qib_qp __rcu *qp1;
@@ -724,7 +730,6 @@ struct qib_ibport {
724 u8 vl_high_limit; 730 u8 vl_high_limit;
725 u8 sl_to_vl[16]; 731 u8 sl_to_vl[16];
726 732
727 struct qib_opcode_stats opstats[128];
728}; 733};
729 734
730 735
@@ -768,6 +773,10 @@ struct qib_ibdev {
768 spinlock_t n_srqs_lock; 773 spinlock_t n_srqs_lock;
769 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ 774 u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
770 spinlock_t n_mcast_grps_lock; 775 spinlock_t n_mcast_grps_lock;
776#ifdef CONFIG_DEBUG_FS
777 /* per HCA debugfs */
778 struct dentry *qib_ibdev_dbg;
779#endif
771}; 780};
772 781
773struct qib_verbs_counters { 782struct qib_verbs_counters {
@@ -832,8 +841,6 @@ static inline int qib_send_ok(struct qib_qp *qp)
832 !(qp->s_flags & QIB_S_ANY_WAIT_SEND)); 841 !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
833} 842}
834 843
835extern struct workqueue_struct *qib_cq_wq;
836
837/* 844/*
838 * This must be called with s_lock held. 845 * This must be called with s_lock held.
839 */ 846 */
@@ -910,6 +917,18 @@ void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
910 917
911void qib_free_qpn_table(struct qib_qpn_table *qpt); 918void qib_free_qpn_table(struct qib_qpn_table *qpt);
912 919
920#ifdef CONFIG_DEBUG_FS
921
922struct qib_qp_iter;
923
924struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev);
925
926int qib_qp_iter_next(struct qib_qp_iter *iter);
927
928void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
929
930#endif
931
913void qib_get_credit(struct qib_qp *qp, u32 aeth); 932void qib_get_credit(struct qib_qp *qp, u32 aeth);
914 933
915unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult); 934unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
@@ -972,6 +991,10 @@ int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
972 991
973int qib_destroy_srq(struct ib_srq *ibsrq); 992int qib_destroy_srq(struct ib_srq *ibsrq);
974 993
994int qib_cq_init(struct qib_devdata *dd);
995
996void qib_cq_exit(struct qib_devdata *dd);
997
975void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig); 998void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
976 999
977int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); 1000int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 7ccf3284dda3..f93baf8254c4 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -53,8 +53,8 @@
53 53
54#define DRV_NAME "ib_srp" 54#define DRV_NAME "ib_srp"
55#define PFX DRV_NAME ": " 55#define PFX DRV_NAME ": "
56#define DRV_VERSION "0.2" 56#define DRV_VERSION "1.0"
57#define DRV_RELDATE "November 1, 2005" 57#define DRV_RELDATE "July 1, 2013"
58 58
59MODULE_AUTHOR("Roland Dreier"); 59MODULE_AUTHOR("Roland Dreier");
60MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " 60MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator "
@@ -231,14 +231,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
231 return -ENOMEM; 231 return -ENOMEM;
232 232
233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, 233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
234 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); 234 srp_recv_completion, NULL, target, SRP_RQ_SIZE,
235 target->comp_vector);
235 if (IS_ERR(recv_cq)) { 236 if (IS_ERR(recv_cq)) {
236 ret = PTR_ERR(recv_cq); 237 ret = PTR_ERR(recv_cq);
237 goto err; 238 goto err;
238 } 239 }
239 240
240 send_cq = ib_create_cq(target->srp_host->srp_dev->dev, 241 send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
241 srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); 242 srp_send_completion, NULL, target, SRP_SQ_SIZE,
243 target->comp_vector);
242 if (IS_ERR(send_cq)) { 244 if (IS_ERR(send_cq)) {
243 ret = PTR_ERR(send_cq); 245 ret = PTR_ERR(send_cq);
244 goto err_recv_cq; 246 goto err_recv_cq;
@@ -542,11 +544,11 @@ static void srp_remove_work(struct work_struct *work)
542 544
543 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 545 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
544 546
547 srp_remove_target(target);
548
545 spin_lock(&target->srp_host->target_lock); 549 spin_lock(&target->srp_host->target_lock);
546 list_del(&target->list); 550 list_del(&target->list);
547 spin_unlock(&target->srp_host->target_lock); 551 spin_unlock(&target->srp_host->target_lock);
548
549 srp_remove_target(target);
550} 552}
551 553
552static void srp_rport_delete(struct srp_rport *rport) 554static void srp_rport_delete(struct srp_rport *rport)
@@ -1744,18 +1746,24 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1744{ 1746{
1745 struct srp_target_port *target = host_to_target(scmnd->device->host); 1747 struct srp_target_port *target = host_to_target(scmnd->device->host);
1746 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 1748 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
1749 int ret;
1747 1750
1748 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 1751 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
1749 1752
1750 if (!req || !srp_claim_req(target, req, scmnd)) 1753 if (!req || !srp_claim_req(target, req, scmnd))
1751 return FAILED; 1754 return FAILED;
1752 srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, 1755 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1753 SRP_TSK_ABORT_TASK); 1756 SRP_TSK_ABORT_TASK) == 0)
1757 ret = SUCCESS;
1758 else if (target->transport_offline)
1759 ret = FAST_IO_FAIL;
1760 else
1761 ret = FAILED;
1754 srp_free_req(target, req, scmnd, 0); 1762 srp_free_req(target, req, scmnd, 0);
1755 scmnd->result = DID_ABORT << 16; 1763 scmnd->result = DID_ABORT << 16;
1756 scmnd->scsi_done(scmnd); 1764 scmnd->scsi_done(scmnd);
1757 1765
1758 return SUCCESS; 1766 return ret;
1759} 1767}
1760 1768
1761static int srp_reset_device(struct scsi_cmnd *scmnd) 1769static int srp_reset_device(struct scsi_cmnd *scmnd)
@@ -1891,6 +1899,14 @@ static ssize_t show_local_ib_device(struct device *dev,
1891 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); 1899 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
1892} 1900}
1893 1901
1902static ssize_t show_comp_vector(struct device *dev,
1903 struct device_attribute *attr, char *buf)
1904{
1905 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1906
1907 return sprintf(buf, "%d\n", target->comp_vector);
1908}
1909
1894static ssize_t show_cmd_sg_entries(struct device *dev, 1910static ssize_t show_cmd_sg_entries(struct device *dev,
1895 struct device_attribute *attr, char *buf) 1911 struct device_attribute *attr, char *buf)
1896{ 1912{
@@ -1917,6 +1933,7 @@ static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
1917static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 1933static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
1918static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 1934static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
1919static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 1935static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
1936static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
1920static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 1937static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
1921static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 1938static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
1922 1939
@@ -1931,6 +1948,7 @@ static struct device_attribute *srp_host_attrs[] = {
1931 &dev_attr_zero_req_lim, 1948 &dev_attr_zero_req_lim,
1932 &dev_attr_local_ib_port, 1949 &dev_attr_local_ib_port,
1933 &dev_attr_local_ib_device, 1950 &dev_attr_local_ib_device,
1951 &dev_attr_comp_vector,
1934 &dev_attr_cmd_sg_entries, 1952 &dev_attr_cmd_sg_entries,
1935 &dev_attr_allow_ext_sg, 1953 &dev_attr_allow_ext_sg,
1936 NULL 1954 NULL
@@ -1946,6 +1964,7 @@ static struct scsi_host_template srp_template = {
1946 .eh_abort_handler = srp_abort, 1964 .eh_abort_handler = srp_abort,
1947 .eh_device_reset_handler = srp_reset_device, 1965 .eh_device_reset_handler = srp_reset_device,
1948 .eh_host_reset_handler = srp_reset_host, 1966 .eh_host_reset_handler = srp_reset_host,
1967 .skip_settle_delay = true,
1949 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 1968 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
1950 .can_queue = SRP_CMD_SQ_SIZE, 1969 .can_queue = SRP_CMD_SQ_SIZE,
1951 .this_id = -1, 1970 .this_id = -1,
@@ -2001,6 +2020,36 @@ static struct class srp_class = {
2001 .dev_release = srp_release_dev 2020 .dev_release = srp_release_dev
2002}; 2021};
2003 2022
2023/**
2024 * srp_conn_unique() - check whether the connection to a target is unique
2025 */
2026static bool srp_conn_unique(struct srp_host *host,
2027 struct srp_target_port *target)
2028{
2029 struct srp_target_port *t;
2030 bool ret = false;
2031
2032 if (target->state == SRP_TARGET_REMOVED)
2033 goto out;
2034
2035 ret = true;
2036
2037 spin_lock(&host->target_lock);
2038 list_for_each_entry(t, &host->target_list, list) {
2039 if (t != target &&
2040 target->id_ext == t->id_ext &&
2041 target->ioc_guid == t->ioc_guid &&
2042 target->initiator_ext == t->initiator_ext) {
2043 ret = false;
2044 break;
2045 }
2046 }
2047 spin_unlock(&host->target_lock);
2048
2049out:
2050 return ret;
2051}
2052
2004/* 2053/*
2005 * Target ports are added by writing 2054 * Target ports are added by writing
2006 * 2055 *
@@ -2023,6 +2072,7 @@ enum {
2023 SRP_OPT_CMD_SG_ENTRIES = 1 << 9, 2072 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2024 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 2073 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2025 SRP_OPT_SG_TABLESIZE = 1 << 11, 2074 SRP_OPT_SG_TABLESIZE = 1 << 11,
2075 SRP_OPT_COMP_VECTOR = 1 << 12,
2026 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 2076 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
2027 SRP_OPT_IOC_GUID | 2077 SRP_OPT_IOC_GUID |
2028 SRP_OPT_DGID | 2078 SRP_OPT_DGID |
@@ -2043,6 +2093,7 @@ static const match_table_t srp_opt_tokens = {
2043 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 2093 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
2044 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 2094 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
2045 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 2095 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
2096 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
2046 { SRP_OPT_ERR, NULL } 2097 { SRP_OPT_ERR, NULL }
2047}; 2098};
2048 2099
@@ -2198,6 +2249,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2198 target->sg_tablesize = token; 2249 target->sg_tablesize = token;
2199 break; 2250 break;
2200 2251
2252 case SRP_OPT_COMP_VECTOR:
2253 if (match_int(args, &token) || token < 0) {
2254 pr_warn("bad comp_vector parameter '%s'\n", p);
2255 goto out;
2256 }
2257 target->comp_vector = token;
2258 break;
2259
2201 default: 2260 default:
2202 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 2261 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
2203 p); 2262 p);
@@ -2257,6 +2316,16 @@ static ssize_t srp_create_target(struct device *dev,
2257 if (ret) 2316 if (ret)
2258 goto err; 2317 goto err;
2259 2318
2319 if (!srp_conn_unique(target->srp_host, target)) {
2320 shost_printk(KERN_INFO, target->scsi_host,
2321 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
2322 be64_to_cpu(target->id_ext),
2323 be64_to_cpu(target->ioc_guid),
2324 be64_to_cpu(target->initiator_ext));
2325 ret = -EEXIST;
2326 goto err;
2327 }
2328
2260 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && 2329 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
2261 target->cmd_sg_cnt < target->sg_tablesize) { 2330 target->cmd_sg_cnt < target->sg_tablesize) {
2262 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 2331 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
@@ -2507,6 +2576,8 @@ static void srp_remove_one(struct ib_device *device)
2507 struct srp_target_port *target; 2576 struct srp_target_port *target;
2508 2577
2509 srp_dev = ib_get_client_data(device, &srp_client); 2578 srp_dev = ib_get_client_data(device, &srp_client);
2579 if (!srp_dev)
2580 return;
2510 2581
2511 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 2582 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
2512 device_unregister(&host->dev); 2583 device_unregister(&host->dev);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 66fbedda4571..e641088c14dc 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -156,6 +156,7 @@ struct srp_target_port {
156 char target_name[32]; 156 char target_name[32];
157 unsigned int scsi_id; 157 unsigned int scsi_id;
158 unsigned int sg_tablesize; 158 unsigned int sg_tablesize;
159 int comp_vector;
159 160
160 struct ib_sa_path_rec path; 161 struct ib_sa_path_rec path;
161 __be16 orig_dgid[8]; 162 __be16 orig_dgid[8];