aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-14 00:16:39 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-14 00:16:39 -0500
commit93bbad8fe13a25dcf7f3bc628a71d1a7642ae61b (patch)
tree0f8fa56347979a3d2dae89f905d134d191d5c88a /drivers
parent9468482bd4c3b89abe04a770848d5eaa1ea830b0 (diff)
parentb2875d4c39759a732203db32f245cc6d8bbdd7cf (diff)
Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: IB/mthca: Always fill MTTs from CPU IB/mthca: Merge MR and FMR space on 64-bit systems IB/mthca: Fix access to MTT and MPT tables on non-cache-coherent CPUs IB/mthca: Give reserved MTTs a separate cache line IB/mthca: Fix reserved MTTs calculation on mem-free HCAs RDMA/cxgb3: Add driver for Chelsio T3 RNIC IB: Remove redundant "_wq" from workqueue names RDMA/cma: Increment port number after close to avoid re-use IB/ehca: Fix memleak on module unloading IB/mthca: Work around gcc bug on sparc64 IPoIB: Connected mode experimental support IB/core: Use ARRAY_SIZE macro for mandatory_table IB/mthca: Use correct structure size in call to memset()
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/addr.c2
-rw-r--r--drivers/infiniband/core/cma.c68
-rw-r--r--drivers/infiniband/core/device.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/Kconfig27
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile12
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c207
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c1280
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.h201
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c331
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.h70
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_wr.h685
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.c189
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h177
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c2081
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h223
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c225
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c231
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c172
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c1203
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h367
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c1007
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_user.h67
-rw-r--r--drivers/infiniband/hw/cxgb3/tcb.h632
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c40
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c127
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h9
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c110
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c14
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c9
-rw-r--r--drivers/infiniband/ulp/ipoib/Kconfig16
-rw-r--r--drivers/infiniband/ulp/ipoib/Makefile1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h215
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c1237
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c29
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c63
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c40
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
46 files changed, 11279 insertions, 114 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 9edfacee7d84..66b36de9fa6f 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -38,6 +38,7 @@ source "drivers/infiniband/hw/mthca/Kconfig"
38source "drivers/infiniband/hw/ipath/Kconfig" 38source "drivers/infiniband/hw/ipath/Kconfig"
39source "drivers/infiniband/hw/ehca/Kconfig" 39source "drivers/infiniband/hw/ehca/Kconfig"
40source "drivers/infiniband/hw/amso1100/Kconfig" 40source "drivers/infiniband/hw/amso1100/Kconfig"
41source "drivers/infiniband/hw/cxgb3/Kconfig"
41 42
42source "drivers/infiniband/ulp/ipoib/Kconfig" 43source "drivers/infiniband/ulp/ipoib/Kconfig"
43 44
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index 2b5d1098ef45..da2066c4f22c 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
3obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/ 3obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
4obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ 4obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
5obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ 5obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
6obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
6obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 7obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
7obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ 8obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
8obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ 9obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index d2bb5a9a303f..a91001c59b69 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -373,7 +373,7 @@ static struct notifier_block nb = {
373 373
374static int addr_init(void) 374static int addr_init(void)
375{ 375{
376 addr_wq = create_singlethread_workqueue("ib_addr_wq"); 376 addr_wq = create_singlethread_workqueue("ib_addr");
377 if (!addr_wq) 377 if (!addr_wq)
378 return -ENOMEM; 378 return -ENOMEM;
379 379
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 9e0ab048c878..db88e609bf42 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -71,6 +71,7 @@ static struct workqueue_struct *cma_wq;
71static DEFINE_IDR(sdp_ps); 71static DEFINE_IDR(sdp_ps);
72static DEFINE_IDR(tcp_ps); 72static DEFINE_IDR(tcp_ps);
73static DEFINE_IDR(udp_ps); 73static DEFINE_IDR(udp_ps);
74static int next_port;
74 75
75struct cma_device { 76struct cma_device {
76 struct list_head list; 77 struct list_head list;
@@ -1722,33 +1723,74 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,
1722 unsigned short snum) 1723 unsigned short snum)
1723{ 1724{
1724 struct rdma_bind_list *bind_list; 1725 struct rdma_bind_list *bind_list;
1725 int port, start, ret; 1726 int port, ret;
1726 1727
1727 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 1728 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1728 if (!bind_list) 1729 if (!bind_list)
1729 return -ENOMEM; 1730 return -ENOMEM;
1730 1731
1731 start = snum ? snum : sysctl_local_port_range[0]; 1732 do {
1733 ret = idr_get_new_above(ps, bind_list, snum, &port);
1734 } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1735
1736 if (ret)
1737 goto err1;
1738
1739 if (port != snum) {
1740 ret = -EADDRNOTAVAIL;
1741 goto err2;
1742 }
1743
1744 bind_list->ps = ps;
1745 bind_list->port = (unsigned short) port;
1746 cma_bind_port(bind_list, id_priv);
1747 return 0;
1748err2:
1749 idr_remove(ps, port);
1750err1:
1751 kfree(bind_list);
1752 return ret;
1753}
1732 1754
1755static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)
1756{
1757 struct rdma_bind_list *bind_list;
1758 int port, ret;
1759
1760 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);
1761 if (!bind_list)
1762 return -ENOMEM;
1763
1764retry:
1733 do { 1765 do {
1734 ret = idr_get_new_above(ps, bind_list, start, &port); 1766 ret = idr_get_new_above(ps, bind_list, next_port, &port);
1735 } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); 1767 } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL));
1736 1768
1737 if (ret) 1769 if (ret)
1738 goto err; 1770 goto err1;
1739 1771
1740 if ((snum && port != snum) || 1772 if (port > sysctl_local_port_range[1]) {
1741 (!snum && port > sysctl_local_port_range[1])) { 1773 if (next_port != sysctl_local_port_range[0]) {
1742 idr_remove(ps, port); 1774 idr_remove(ps, port);
1775 next_port = sysctl_local_port_range[0];
1776 goto retry;
1777 }
1743 ret = -EADDRNOTAVAIL; 1778 ret = -EADDRNOTAVAIL;
1744 goto err; 1779 goto err2;
1745 } 1780 }
1746 1781
1782 if (port == sysctl_local_port_range[1])
1783 next_port = sysctl_local_port_range[0];
1784 else
1785 next_port = port + 1;
1786
1747 bind_list->ps = ps; 1787 bind_list->ps = ps;
1748 bind_list->port = (unsigned short) port; 1788 bind_list->port = (unsigned short) port;
1749 cma_bind_port(bind_list, id_priv); 1789 cma_bind_port(bind_list, id_priv);
1750 return 0; 1790 return 0;
1751err: 1791err2:
1792 idr_remove(ps, port);
1793err1:
1752 kfree(bind_list); 1794 kfree(bind_list);
1753 return ret; 1795 return ret;
1754} 1796}
@@ -1811,7 +1853,7 @@ static int cma_get_port(struct rdma_id_private *id_priv)
1811 1853
1812 mutex_lock(&lock); 1854 mutex_lock(&lock);
1813 if (cma_any_port(&id_priv->id.route.addr.src_addr)) 1855 if (cma_any_port(&id_priv->id.route.addr.src_addr))
1814 ret = cma_alloc_port(ps, id_priv, 0); 1856 ret = cma_alloc_any_port(ps, id_priv);
1815 else 1857 else
1816 ret = cma_use_port(ps, id_priv); 1858 ret = cma_use_port(ps, id_priv);
1817 mutex_unlock(&lock); 1859 mutex_unlock(&lock);
@@ -2448,7 +2490,11 @@ static int cma_init(void)
2448{ 2490{
2449 int ret; 2491 int ret;
2450 2492
2451 cma_wq = create_singlethread_workqueue("rdma_cm_wq"); 2493 get_random_bytes(&next_port, sizeof next_port);
2494 next_port = (next_port % (sysctl_local_port_range[1] -
2495 sysctl_local_port_range[0])) +
2496 sysctl_local_port_range[0];
2497 cma_wq = create_singlethread_workqueue("rdma_cm");
2452 if (!cma_wq) 2498 if (!cma_wq)
2453 return -ENOMEM; 2499 return -ENOMEM;
2454 2500
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 63d2a39fb82c..7fabb425b033 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -36,6 +36,7 @@
36#include <linux/module.h> 36#include <linux/module.h>
37#include <linux/string.h> 37#include <linux/string.h>
38#include <linux/errno.h> 38#include <linux/errno.h>
39#include <linux/kernel.h>
39#include <linux/slab.h> 40#include <linux/slab.h>
40#include <linux/init.h> 41#include <linux/init.h>
41#include <linux/mutex.h> 42#include <linux/mutex.h>
@@ -93,7 +94,7 @@ static int ib_device_check_mandatory(struct ib_device *device)
93 }; 94 };
94 int i; 95 int i;
95 96
96 for (i = 0; i < sizeof mandatory_table / sizeof mandatory_table[0]; ++i) { 97 for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
97 if (!*(void **) ((void *) device + mandatory_table[i].offset)) { 98 if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
98 printk(KERN_WARNING "Device %s is missing mandatory function %s\n", 99 printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
99 device->name, mandatory_table[i].name); 100 device->name, mandatory_table[i].name);
diff --git a/drivers/infiniband/hw/cxgb3/Kconfig b/drivers/infiniband/hw/cxgb3/Kconfig
new file mode 100644
index 000000000000..77977f55dca3
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/Kconfig
@@ -0,0 +1,27 @@
1config INFINIBAND_CXGB3
2 tristate "Chelsio RDMA Driver"
3 depends on CHELSIO_T3 && INFINIBAND && INET
4 select GENERIC_ALLOCATOR
5 ---help---
6 This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
7 10GbE adapters.
8
9 For general information about Chelsio and our products, visit
10 our website at <http://www.chelsio.com>.
11
12 For customer support, please visit our customer support page at
13 <http://www.chelsio.com/support.htm>.
14
15 Please send feedback to <linux-bugs@chelsio.com>.
16
17 To compile this driver as a module, choose M here: the module
18 will be called iw_cxgb3.
19
20config INFINIBAND_CXGB3_DEBUG
21 bool "Verbose debugging output"
22 depends on INFINIBAND_CXGB3
23 default n
24 ---help---
25 This option causes the Chelsio RDMA driver to produce copious
26 amounts of debug messages. Select this if you are developing
27 the driver or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
new file mode 100644
index 000000000000..0e110f32f128
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/Makefile
@@ -0,0 +1,12 @@
1EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/cxgb3 \
2 -I$(TOPDIR)/drivers/infiniband/hw/cxgb3/core
3
4obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
5
6iw_cxgb3-y := iwch_cm.o iwch_ev.o iwch_cq.o iwch_qp.o iwch_mem.o \
7 iwch_provider.o iwch.o cxio_hal.o cxio_resource.o
8
9ifdef CONFIG_INFINIBAND_CXGB3_DEBUG
10EXTRA_CFLAGS += -DDEBUG
11iw_cxgb3-y += cxio_dbg.o
12endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
new file mode 100644
index 000000000000..5a7306f5efae
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -0,0 +1,207 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifdef DEBUG
34#include <linux/types.h>
35#include "common.h"
36#include "cxgb3_ioctl.h"
37#include "cxio_hal.h"
38#include "cxio_wr.h"
39
40void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
41{
42 struct ch_mem_range *m;
43 u64 *data;
44 int rc;
45 int size = 32;
46
47 m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
48 if (!m) {
49 PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
50 return;
51 }
52 m->mem_id = MEM_PMRX;
53 m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
54 m->len = size;
55 PDBG("%s TPT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len);
56 rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
57 if (rc) {
58 PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
59 kfree(m);
60 return;
61 }
62
63 data = (u64 *)m->buf;
64 while (size > 0) {
65 PDBG("TPT %08x: %016llx\n", m->addr, (unsigned long long) *data);
66 size -= 8;
67 data++;
68 m->addr += 8;
69 }
70 kfree(m);
71}
72
73void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
74{
75 struct ch_mem_range *m;
76 u64 *data;
77 int rc;
78 int size, npages;
79
80 shift += 12;
81 npages = (len + (1ULL << shift) - 1) >> shift;
82 size = npages * sizeof(u64);
83
84 m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
85 if (!m) {
86 PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
87 return;
88 }
89 m->mem_id = MEM_PMRX;
90 m->addr = pbl_addr;
91 m->len = size;
92 PDBG("%s PBL addr 0x%x len %d depth %d\n",
93 __FUNCTION__, m->addr, m->len, npages);
94 rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
95 if (rc) {
96 PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
97 kfree(m);
98 return;
99 }
100
101 data = (u64 *)m->buf;
102 while (size > 0) {
103 PDBG("PBL %08x: %016llx\n", m->addr, (unsigned long long) *data);
104 size -= 8;
105 data++;
106 m->addr += 8;
107 }
108 kfree(m);
109}
110
111void cxio_dump_wqe(union t3_wr *wqe)
112{
113 __be64 *data = (__be64 *)wqe;
114 uint size = (uint)(be64_to_cpu(*data) & 0xff);
115
116 if (size == 0)
117 size = 8;
118 while (size > 0) {
119 PDBG("WQE %p: %016llx\n", data,
120 (unsigned long long) be64_to_cpu(*data));
121 size--;
122 data++;
123 }
124}
125
126void cxio_dump_wce(struct t3_cqe *wce)
127{
128 __be64 *data = (__be64 *)wce;
129 int size = sizeof(*wce);
130
131 while (size > 0) {
132 PDBG("WCE %p: %016llx\n", data,
133 (unsigned long long) be64_to_cpu(*data));
134 size -= 8;
135 data++;
136 }
137}
138
139void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
140{
141 struct ch_mem_range *m;
142 int size = nents * 64;
143 u64 *data;
144 int rc;
145
146 m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
147 if (!m) {
148 PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
149 return;
150 }
151 m->mem_id = MEM_PMRX;
152 m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
153 m->len = size;
154 PDBG("%s RQT addr 0x%x len %d\n", __FUNCTION__, m->addr, m->len);
155 rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
156 if (rc) {
157 PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
158 kfree(m);
159 return;
160 }
161
162 data = (u64 *)m->buf;
163 while (size > 0) {
164 PDBG("RQT %08x: %016llx\n", m->addr, (unsigned long long) *data);
165 size -= 8;
166 data++;
167 m->addr += 8;
168 }
169 kfree(m);
170}
171
172void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
173{
174 struct ch_mem_range *m;
175 int size = TCB_SIZE;
176 u32 *data;
177 int rc;
178
179 m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
180 if (!m) {
181 PDBG("%s couldn't allocate memory.\n", __FUNCTION__);
182 return;
183 }
184 m->mem_id = MEM_CM;
185 m->addr = hwtid * size;
186 m->len = size;
187 PDBG("%s TCB %d len %d\n", __FUNCTION__, m->addr, m->len);
188 rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m);
189 if (rc) {
190 PDBG("%s toectl returned error %d\n", __FUNCTION__, rc);
191 kfree(m);
192 return;
193 }
194
195 data = (u32 *)m->buf;
196 while (size > 0) {
197 printk("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n",
198 m->addr,
199 *(data+2), *(data+3), *(data),*(data+1),
200 *(data+6), *(data+7), *(data+4), *(data+5));
201 size -= 32;
202 data += 8;
203 m->addr += 32;
204 }
205 kfree(m);
206}
207#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
new file mode 100644
index 000000000000..82fa72041989
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -0,0 +1,1280 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <asm/delay.h>
34
35#include <linux/mutex.h>
36#include <linux/netdevice.h>
37#include <linux/sched.h>
38#include <linux/spinlock.h>
39#include <linux/pci.h>
40
41#include "cxio_resource.h"
42#include "cxio_hal.h"
43#include "cxgb3_offload.h"
44#include "sge_defs.h"
45
46static LIST_HEAD(rdev_list);
47static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL;
48
49static inline struct cxio_rdev *cxio_hal_find_rdev_by_name(char *dev_name)
50{
51 struct cxio_rdev *rdev;
52
53 list_for_each_entry(rdev, &rdev_list, entry)
54 if (!strcmp(rdev->dev_name, dev_name))
55 return rdev;
56 return NULL;
57}
58
59static inline struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev
60 *tdev)
61{
62 struct cxio_rdev *rdev;
63
64 list_for_each_entry(rdev, &rdev_list, entry)
65 if (rdev->t3cdev_p == tdev)
66 return rdev;
67 return NULL;
68}
69
70int cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq,
71 enum t3_cq_opcode op, u32 credit)
72{
73 int ret;
74 struct t3_cqe *cqe;
75 u32 rptr;
76
77 struct rdma_cq_op setup;
78 setup.id = cq->cqid;
79 setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0;
80 setup.op = op;
81 ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup);
82
83 if ((ret < 0) || (op == CQ_CREDIT_UPDATE))
84 return ret;
85
86 /*
87 * If the rearm returned an index other than our current index,
88 * then there might be CQE's in flight (being DMA'd). We must wait
89 * here for them to complete or the consumer can miss a notification.
90 */
91 if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) {
92 int i=0;
93
94 rptr = cq->rptr;
95
96 /*
97 * Keep the generation correct by bumping rptr until it
98 * matches the index returned by the rearm - 1.
99 */
100 while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret)
101 rptr++;
102
103 /*
104 * Now rptr is the index for the (last) cqe that was
105 * in-flight at the time the HW rearmed the CQ. We
106 * spin until that CQE is valid.
107 */
108 cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2);
109 while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
110 udelay(1);
111 if (i++ > 1000000) {
112 BUG_ON(1);
113 printk(KERN_ERR "%s: stalled rnic\n",
114 rdev_p->dev_name);
115 return -EIO;
116 }
117 }
118 }
119 return 0;
120}
121
122static inline int cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid)
123{
124 struct rdma_cq_setup setup;
125 setup.id = cqid;
126 setup.base_addr = 0; /* NULL address */
127 setup.size = 0; /* disaable the CQ */
128 setup.credits = 0;
129 setup.credit_thres = 0;
130 setup.ovfl_mode = 0;
131 return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
132}
133
134int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
135{
136 u64 sge_cmd;
137 struct t3_modify_qp_wr *wqe;
138 struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
139 if (!skb) {
140 PDBG("%s alloc_skb failed\n", __FUNCTION__);
141 return -ENOMEM;
142 }
143 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
144 memset(wqe, 0, sizeof(*wqe));
145 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7);
146 wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
147 sge_cmd = qpid << 8 | 3;
148 wqe->sge_cmd = cpu_to_be64(sge_cmd);
149 skb->priority = CPL_PRIORITY_CONTROL;
150 return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
151}
152
153int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
154{
155 struct rdma_cq_setup setup;
156 int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
157
158 cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
159 if (!cq->cqid)
160 return -ENOMEM;
161 cq->sw_queue = kzalloc(size, GFP_KERNEL);
162 if (!cq->sw_queue)
163 return -ENOMEM;
164 cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
165 (1UL << (cq->size_log2)) *
166 sizeof(struct t3_cqe),
167 &(cq->dma_addr), GFP_KERNEL);
168 if (!cq->queue) {
169 kfree(cq->sw_queue);
170 return -ENOMEM;
171 }
172 pci_unmap_addr_set(cq, mapping, cq->dma_addr);
173 memset(cq->queue, 0, size);
174 setup.id = cq->cqid;
175 setup.base_addr = (u64) (cq->dma_addr);
176 setup.size = 1UL << cq->size_log2;
177 setup.credits = 65535;
178 setup.credit_thres = 1;
179 if (rdev_p->t3cdev_p->type == T3B)
180 setup.ovfl_mode = 0;
181 else
182 setup.ovfl_mode = 1;
183 return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
184}
185
186int cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
187{
188 struct rdma_cq_setup setup;
189 setup.id = cq->cqid;
190 setup.base_addr = (u64) (cq->dma_addr);
191 setup.size = 1UL << cq->size_log2;
192 setup.credits = setup.size;
193 setup.credit_thres = setup.size; /* TBD: overflow recovery */
194 setup.ovfl_mode = 1;
195 return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
196}
197
198static u32 get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
199{
200 struct cxio_qpid_list *entry;
201 u32 qpid;
202 int i;
203
204 mutex_lock(&uctx->lock);
205 if (!list_empty(&uctx->qpids)) {
206 entry = list_entry(uctx->qpids.next, struct cxio_qpid_list,
207 entry);
208 list_del(&entry->entry);
209 qpid = entry->qpid;
210 kfree(entry);
211 } else {
212 qpid = cxio_hal_get_qpid(rdev_p->rscp);
213 if (!qpid)
214 goto out;
215 for (i = qpid+1; i & rdev_p->qpmask; i++) {
216 entry = kmalloc(sizeof *entry, GFP_KERNEL);
217 if (!entry)
218 break;
219 entry->qpid = i;
220 list_add_tail(&entry->entry, &uctx->qpids);
221 }
222 }
223out:
224 mutex_unlock(&uctx->lock);
225 PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
226 return qpid;
227}
228
229static void put_qpid(struct cxio_rdev *rdev_p, u32 qpid,
230 struct cxio_ucontext *uctx)
231{
232 struct cxio_qpid_list *entry;
233
234 entry = kmalloc(sizeof *entry, GFP_KERNEL);
235 if (!entry)
236 return;
237 PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
238 entry->qpid = qpid;
239 mutex_lock(&uctx->lock);
240 list_add_tail(&entry->entry, &uctx->qpids);
241 mutex_unlock(&uctx->lock);
242}
243
244void cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
245{
246 struct list_head *pos, *nxt;
247 struct cxio_qpid_list *entry;
248
249 mutex_lock(&uctx->lock);
250 list_for_each_safe(pos, nxt, &uctx->qpids) {
251 entry = list_entry(pos, struct cxio_qpid_list, entry);
252 list_del_init(&entry->entry);
253 if (!(entry->qpid & rdev_p->qpmask))
254 cxio_hal_put_qpid(rdev_p->rscp, entry->qpid);
255 kfree(entry);
256 }
257 mutex_unlock(&uctx->lock);
258}
259
260void cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx)
261{
262 INIT_LIST_HEAD(&uctx->qpids);
263 mutex_init(&uctx->lock);
264}
265
266int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
267 struct t3_wq *wq, struct cxio_ucontext *uctx)
268{
269 int depth = 1UL << wq->size_log2;
270 int rqsize = 1UL << wq->rq_size_log2;
271
272 wq->qpid = get_qpid(rdev_p, uctx);
273 if (!wq->qpid)
274 return -ENOMEM;
275
276 wq->rq = kzalloc(depth * sizeof(u64), GFP_KERNEL);
277 if (!wq->rq)
278 goto err1;
279
280 wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize);
281 if (!wq->rq_addr)
282 goto err2;
283
284 wq->sq = kzalloc(depth * sizeof(struct t3_swsq), GFP_KERNEL);
285 if (!wq->sq)
286 goto err3;
287
288 wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
289 depth * sizeof(union t3_wr),
290 &(wq->dma_addr), GFP_KERNEL);
291 if (!wq->queue)
292 goto err4;
293
294 memset(wq->queue, 0, depth * sizeof(union t3_wr));
295 pci_unmap_addr_set(wq, mapping, wq->dma_addr);
296 wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
297 if (!kernel_domain)
298 wq->udb = (u64)rdev_p->rnic_info.udbell_physbase +
299 (wq->qpid << rdev_p->qpshift);
300 PDBG("%s qpid 0x%x doorbell 0x%p udb 0x%llx\n", __FUNCTION__,
301 wq->qpid, wq->doorbell, (unsigned long long) wq->udb);
302 return 0;
303err4:
304 kfree(wq->sq);
305err3:
306 cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize);
307err2:
308 kfree(wq->rq);
309err1:
310 put_qpid(rdev_p, wq->qpid, uctx);
311 return -ENOMEM;
312}
313
314int cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
315{
316 int err;
317 err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid);
318 kfree(cq->sw_queue);
319 dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
320 (1UL << (cq->size_log2))
321 * sizeof(struct t3_cqe), cq->queue,
322 pci_unmap_addr(cq, mapping));
323 cxio_hal_put_cqid(rdev_p->rscp, cq->cqid);
324 return err;
325}
326
327int cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq,
328 struct cxio_ucontext *uctx)
329{
330 dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
331 (1UL << (wq->size_log2))
332 * sizeof(union t3_wr), wq->queue,
333 pci_unmap_addr(wq, mapping));
334 kfree(wq->sq);
335 cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2));
336 kfree(wq->rq);
337 put_qpid(rdev_p, wq->qpid, uctx);
338 return 0;
339}
340
341static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
342{
343 struct t3_cqe cqe;
344
345 PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__,
346 wq, cq, cq->sw_rptr, cq->sw_wptr);
347 memset(&cqe, 0, sizeof(cqe));
348 cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
349 V_CQE_OPCODE(T3_SEND) |
350 V_CQE_TYPE(0) |
351 V_CQE_SWCQE(1) |
352 V_CQE_QPID(wq->qpid) |
353 V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
354 cq->size_log2)));
355 *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
356 cq->sw_wptr++;
357}
358
359void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
360{
361 u32 ptr;
362
363 PDBG("%s wq %p cq %p\n", __FUNCTION__, wq, cq);
364
365 /* flush RQ */
366 PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __FUNCTION__,
367 wq->rq_rptr, wq->rq_wptr, count);
368 ptr = wq->rq_rptr + count;
369 while (ptr++ != wq->rq_wptr)
370 insert_recv_cqe(wq, cq);
371}
372
373static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
374 struct t3_swsq *sqp)
375{
376 struct t3_cqe cqe;
377
378 PDBG("%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x\n", __FUNCTION__,
379 wq, cq, cq->sw_rptr, cq->sw_wptr);
380 memset(&cqe, 0, sizeof(cqe));
381 cqe.header = cpu_to_be32(V_CQE_STATUS(TPT_ERR_SWFLUSH) |
382 V_CQE_OPCODE(sqp->opcode) |
383 V_CQE_TYPE(1) |
384 V_CQE_SWCQE(1) |
385 V_CQE_QPID(wq->qpid) |
386 V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr,
387 cq->size_log2)));
388 cqe.u.scqe.wrid_hi = sqp->sq_wptr;
389
390 *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe;
391 cq->sw_wptr++;
392}
393
394void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
395{
396 __u32 ptr;
397 struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
398
399 ptr = wq->sq_rptr + count;
400 sqp += count;
401 while (ptr != wq->sq_wptr) {
402 insert_sq_cqe(wq, cq, sqp);
403 sqp++;
404 ptr++;
405 }
406}
407
408/*
409 * Move all CQEs from the HWCQ into the SWCQ.
410 */
411void cxio_flush_hw_cq(struct t3_cq *cq)
412{
413 struct t3_cqe *cqe, *swcqe;
414
415 PDBG("%s cq %p cqid 0x%x\n", __FUNCTION__, cq, cq->cqid);
416 cqe = cxio_next_hw_cqe(cq);
417 while (cqe) {
418 PDBG("%s flushing hwcq rptr 0x%x to swcq wptr 0x%x\n",
419 __FUNCTION__, cq->rptr, cq->sw_wptr);
420 swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2);
421 *swcqe = *cqe;
422 swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
423 cq->sw_wptr++;
424 cq->rptr++;
425 cqe = cxio_next_hw_cqe(cq);
426 }
427}
428
429static inline int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq)
430{
431 if (CQE_OPCODE(*cqe) == T3_TERMINATE)
432 return 0;
433
434 if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe))
435 return 0;
436
437 if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe))
438 return 0;
439
440 if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) &&
441 Q_EMPTY(wq->rq_rptr, wq->rq_wptr))
442 return 0;
443
444 return 1;
445}
446
447void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
448{
449 struct t3_cqe *cqe;
450 u32 ptr;
451
452 *count = 0;
453 ptr = cq->sw_rptr;
454 while (!Q_EMPTY(ptr, cq->sw_wptr)) {
455 cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
456 if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) &&
457 (CQE_QPID(*cqe) == wq->qpid))
458 (*count)++;
459 ptr++;
460 }
461 PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count);
462}
463
464void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
465{
466 struct t3_cqe *cqe;
467 u32 ptr;
468
469 *count = 0;
470 PDBG("%s count zero %d\n", __FUNCTION__, *count);
471 ptr = cq->sw_rptr;
472 while (!Q_EMPTY(ptr, cq->sw_wptr)) {
473 cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
474 if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) &&
475 (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq))
476 (*count)++;
477 ptr++;
478 }
479 PDBG("%s cq %p count %d\n", __FUNCTION__, cq, *count);
480}
481
482static int cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p)
483{
484 struct rdma_cq_setup setup;
485 setup.id = 0;
486 setup.base_addr = 0; /* NULL address */
487 setup.size = 1; /* enable the CQ */
488 setup.credits = 0;
489
490 /* force SGE to redirect to RspQ and interrupt */
491 setup.credit_thres = 0;
492 setup.ovfl_mode = 1;
493 return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup));
494}
495
496static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
497{
498 int err;
499 u64 sge_cmd, ctx0, ctx1;
500 u64 base_addr;
501 struct t3_modify_qp_wr *wqe;
502 struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_KERNEL);
503
504
505 if (!skb) {
506 PDBG("%s alloc_skb failed\n", __FUNCTION__);
507 return -ENOMEM;
508 }
509 err = cxio_hal_init_ctrl_cq(rdev_p);
510 if (err) {
511 PDBG("%s err %d initializing ctrl_cq\n", __FUNCTION__, err);
512 return err;
513 }
514 rdev_p->ctrl_qp.workq = dma_alloc_coherent(
515 &(rdev_p->rnic_info.pdev->dev),
516 (1 << T3_CTRL_QP_SIZE_LOG2) *
517 sizeof(union t3_wr),
518 &(rdev_p->ctrl_qp.dma_addr),
519 GFP_KERNEL);
520 if (!rdev_p->ctrl_qp.workq) {
521 PDBG("%s dma_alloc_coherent failed\n", __FUNCTION__);
522 return -ENOMEM;
523 }
524 pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping,
525 rdev_p->ctrl_qp.dma_addr);
526 rdev_p->ctrl_qp.doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
527 memset(rdev_p->ctrl_qp.workq, 0,
528 (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr));
529
530 mutex_init(&rdev_p->ctrl_qp.lock);
531 init_waitqueue_head(&rdev_p->ctrl_qp.waitq);
532
533 /* update HW Ctrl QP context */
534 base_addr = rdev_p->ctrl_qp.dma_addr;
535 base_addr >>= 12;
536 ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) |
537 V_EC_BASE_LO((u32) base_addr & 0xffff));
538 ctx0 <<= 32;
539 ctx0 |= V_EC_CREDITS(FW_WR_NUM);
540 base_addr >>= 16;
541 ctx1 = (u32) base_addr;
542 base_addr >>= 32;
543 ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) |
544 V_EC_TYPE(0) | V_EC_GEN(1) |
545 V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
546 wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
547 memset(wqe, 0, sizeof(*wqe));
548 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1,
549 T3_CTL_QP_TID, 7);
550 wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
551 sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
552 wqe->sge_cmd = cpu_to_be64(sge_cmd);
553 wqe->ctx1 = cpu_to_be64(ctx1);
554 wqe->ctx0 = cpu_to_be64(ctx0);
555 PDBG("CtrlQP dma_addr 0x%llx workq %p size %d\n",
556 (unsigned long long) rdev_p->ctrl_qp.dma_addr,
557 rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
558 skb->priority = CPL_PRIORITY_CONTROL;
559 return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
560}
561
562static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
563{
564 dma_free_coherent(&(rdev_p->rnic_info.pdev->dev),
565 (1UL << T3_CTRL_QP_SIZE_LOG2)
566 * sizeof(union t3_wr), rdev_p->ctrl_qp.workq,
567 pci_unmap_addr(&rdev_p->ctrl_qp, mapping));
568 return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID);
569}
570
571/* write len bytes of data into addr (32B aligned address)
572 * If data is NULL, clear len byte of memory to zero.
573 * caller aquires the ctrl_qp lock before the call
574 */
575static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
576 u32 len, void *data, int completion)
577{
578 u32 i, nr_wqe, copy_len;
579 u8 *copy_data;
580 u8 wr_len, utx_len; /* lenght in 8 byte flit */
581 enum t3_wr_flags flag;
582 __be64 *wqe;
583 u64 utx_cmd;
584 addr &= 0x7FFFFFF;
585 nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */
586 PDBG("%s wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x\n",
587 __FUNCTION__, rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len,
588 nr_wqe, data, addr);
589 utx_len = 3; /* in 32B unit */
590 for (i = 0; i < nr_wqe; i++) {
591 if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr,
592 T3_CTRL_QP_SIZE_LOG2)) {
593 PDBG("%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, "
594 "wait for more space i %d\n", __FUNCTION__,
595 rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i);
596 if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
597 !Q_FULL(rdev_p->ctrl_qp.rptr,
598 rdev_p->ctrl_qp.wptr,
599 T3_CTRL_QP_SIZE_LOG2))) {
600 PDBG("%s ctrl_qp workq interrupted\n",
601 __FUNCTION__);
602 return -ERESTARTSYS;
603 }
604 PDBG("%s ctrl_qp wakeup, continue posting work request "
605 "i %d\n", __FUNCTION__, i);
606 }
607 wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
608 (1 << T3_CTRL_QP_SIZE_LOG2)));
609 flag = 0;
610 if (i == (nr_wqe - 1)) {
611 /* last WQE */
612 flag = completion ? T3_COMPLETION_FLAG : 0;
613 if (len % 32)
614 utx_len = len / 32 + 1;
615 else
616 utx_len = len / 32;
617 }
618
619 /*
620 * Force a CQE to return the credit to the workq in case
621 * we posted more than half the max QP size of WRs
622 */
623 if ((i != 0) &&
624 (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) {
625 flag = T3_COMPLETION_FLAG;
626 PDBG("%s force completion at i %d\n", __FUNCTION__, i);
627 }
628
629 /* build the utx mem command */
630 wqe += (sizeof(struct t3_bypass_wr) >> 3);
631 utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3);
632 utx_cmd <<= 32;
633 utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1);
634 *wqe = cpu_to_be64(utx_cmd);
635 wqe++;
636 copy_data = (u8 *) data + i * 96;
637 copy_len = len > 96 ? 96 : len;
638
639 /* clear memory content if data is NULL */
640 if (data)
641 memcpy(wqe, copy_data, copy_len);
642 else
643 memset(wqe, 0, copy_len);
644 if (copy_len % 32)
645 memset(((u8 *) wqe) + copy_len, 0,
646 32 - (copy_len % 32));
647 wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 +
648 (utx_len << 2);
649 wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr %
650 (1 << T3_CTRL_QP_SIZE_LOG2)));
651
652 /* wptr in the WRID[31:0] */
653 ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr;
654
655 /*
656 * This must be the last write with a memory barrier
657 * for the genbit
658 */
659 build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag,
660 Q_GENBIT(rdev_p->ctrl_qp.wptr,
661 T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID,
662 wr_len);
663 if (flag == T3_COMPLETION_FLAG)
664 ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID);
665 len -= 96;
666 rdev_p->ctrl_qp.wptr++;
667 }
668 return 0;
669}
670
671/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
672 * OUT: stag index, actual pbl_size, pbl_addr allocated.
673 * TBD: shared memory region support
674 */
675static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
676 u32 *stag, u8 stag_state, u32 pdid,
677 enum tpt_mem_type type, enum tpt_mem_perm perm,
678 u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
679 u32 *pbl_size, u32 *pbl_addr)
680{
681 int err;
682 struct tpt_entry tpt;
683 u32 stag_idx;
684 u32 wptr;
685 int rereg = (*stag != T3_STAG_UNSET);
686
687 stag_state = stag_state > 0;
688 stag_idx = (*stag) >> 8;
689
690 if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) {
691 stag_idx = cxio_hal_get_stag(rdev_p->rscp);
692 if (!stag_idx)
693 return -ENOMEM;
694 *stag = (stag_idx << 8) | ((*stag) & 0xFF);
695 }
696 PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
697 __FUNCTION__, stag_state, type, pdid, stag_idx);
698
699 if (reset_tpt_entry)
700 cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
701 else if (!rereg) {
702 *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
703 if (!*pbl_addr) {
704 return -ENOMEM;
705 }
706 }
707
708 mutex_lock(&rdev_p->ctrl_qp.lock);
709
710 /* write PBL first if any - update pbl only if pbl list exist */
711 if (pbl) {
712
713 PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
714 __FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base,
715 *pbl_size);
716 err = cxio_hal_ctrl_qp_write_mem(rdev_p,
717 (*pbl_addr >> 5),
718 (*pbl_size << 3), pbl, 0);
719 if (err)
720 goto ret;
721 }
722
723 /* write TPT entry */
724 if (reset_tpt_entry)
725 memset(&tpt, 0, sizeof(tpt));
726 else {
727 tpt.valid_stag_pdid = cpu_to_be32(F_TPT_VALID |
728 V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) |
729 V_TPT_STAG_STATE(stag_state) |
730 V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid));
731 BUG_ON(page_size >= 28);
732 tpt.flags_pagesize_qpid = cpu_to_be32(V_TPT_PERM(perm) |
733 F_TPT_MW_BIND_ENABLE |
734 V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
735 V_TPT_PAGE_SIZE(page_size));
736 tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
737 cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
738 tpt.len = cpu_to_be32(len);
739 tpt.va_hi = cpu_to_be32((u32) (to >> 32));
740 tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
741 tpt.rsvd_bind_cnt_or_pstag = 0;
742 tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
743 cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
744 }
745 err = cxio_hal_ctrl_qp_write_mem(rdev_p,
746 stag_idx +
747 (rdev_p->rnic_info.tpt_base >> 5),
748 sizeof(tpt), &tpt, 1);
749
750 /* release the stag index to free pool */
751 if (reset_tpt_entry)
752 cxio_hal_put_stag(rdev_p->rscp, stag_idx);
753ret:
754 wptr = rdev_p->ctrl_qp.wptr;
755 mutex_unlock(&rdev_p->ctrl_qp.lock);
756 if (!err)
757 if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
758 SEQ32_GE(rdev_p->ctrl_qp.rptr,
759 wptr)))
760 return -ERESTARTSYS;
761 return err;
762}
763
764/* IN : stag key, pdid, pbl_size
765 * Out: stag index, actaul pbl_size, and pbl_addr allocated.
766 */
767int cxio_allocate_stag(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid,
768 enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr)
769{
770 *stag = T3_STAG_UNSET;
771 return (__cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_NON_SHARED_MR,
772 perm, 0, 0ULL, 0, 0, NULL, pbl_size, pbl_addr));
773}
774
775int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
776 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
777 u8 page_size, __be64 *pbl, u32 *pbl_size,
778 u32 *pbl_addr)
779{
780 *stag = T3_STAG_UNSET;
781 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
782 zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
783}
784
785int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
786 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
787 u8 page_size, __be64 *pbl, u32 *pbl_size,
788 u32 *pbl_addr)
789{
790 return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
791 zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
792}
793
794int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
795 u32 pbl_addr)
796{
797 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
798 &pbl_size, &pbl_addr);
799}
800
801int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
802{
803 u32 pbl_size = 0;
804 *stag = T3_STAG_UNSET;
805 return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
806 NULL, &pbl_size, NULL);
807}
808
809int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
810{
811 return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
812 NULL, NULL);
813}
814
815int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
816{
817 struct t3_rdma_init_wr *wqe;
818 struct sk_buff *skb = alloc_skb(sizeof(*wqe), GFP_ATOMIC);
819 if (!skb)
820 return -ENOMEM;
821 PDBG("%s rdev_p %p\n", __FUNCTION__, rdev_p);
822 wqe = (struct t3_rdma_init_wr *) __skb_put(skb, sizeof(*wqe));
823 wqe->wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_INIT));
824 wqe->wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(attr->tid) |
825 V_FW_RIWR_LEN(sizeof(*wqe) >> 3));
826 wqe->wrid.id1 = 0;
827 wqe->qpid = cpu_to_be32(attr->qpid);
828 wqe->pdid = cpu_to_be32(attr->pdid);
829 wqe->scqid = cpu_to_be32(attr->scqid);
830 wqe->rcqid = cpu_to_be32(attr->rcqid);
831 wqe->rq_addr = cpu_to_be32(attr->rq_addr - rdev_p->rnic_info.rqt_base);
832 wqe->rq_size = cpu_to_be32(attr->rq_size);
833 wqe->mpaattrs = attr->mpaattrs;
834 wqe->qpcaps = attr->qpcaps;
835 wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
836 wqe->flags = cpu_to_be32(attr->flags);
837 wqe->ord = cpu_to_be32(attr->ord);
838 wqe->ird = cpu_to_be32(attr->ird);
839 wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
840 wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
841 wqe->rsvd = 0;
842 skb->priority = 0; /* 0=>ToeQ; 1=>CtrlQ */
843 return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
844}
845
846void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
847{
848 cxio_ev_cb = ev_cb;
849}
850
851void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
852{
853 cxio_ev_cb = NULL;
854}
855
856static int cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct sk_buff *skb)
857{
858 static int cnt;
859 struct cxio_rdev *rdev_p = NULL;
860 struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
861 PDBG("%d: %s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x"
862 " se %0x notify %0x cqbranch %0x creditth %0x\n",
863 cnt, __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg),
864 RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg),
865 RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg),
866 RSPQ_CREDIT_THRESH(rsp_msg));
867 PDBG("CQE: QPID 0x%0x genbit %0x type 0x%0x status 0x%0x opcode %d "
868 "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
869 CQE_QPID(rsp_msg->cqe), CQE_GENBIT(rsp_msg->cqe),
870 CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
871 CQE_OPCODE(rsp_msg->cqe), CQE_LEN(rsp_msg->cqe),
872 CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
873 rdev_p = (struct cxio_rdev *)t3cdev_p->ulp;
874 if (!rdev_p) {
875 PDBG("%s called by t3cdev %p with null ulp\n", __FUNCTION__,
876 t3cdev_p);
877 return 0;
878 }
879 if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) {
880 rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1;
881 wake_up_interruptible(&rdev_p->ctrl_qp.waitq);
882 dev_kfree_skb_irq(skb);
883 } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8)
884 dev_kfree_skb_irq(skb);
885 else if (cxio_ev_cb)
886 (*cxio_ev_cb) (rdev_p, skb);
887 else
888 dev_kfree_skb_irq(skb);
889 cnt++;
890 return 0;
891}
892
893/* Caller takes care of locking if needed */
894int cxio_rdev_open(struct cxio_rdev *rdev_p)
895{
896 struct net_device *netdev_p = NULL;
897 int err = 0;
898 if (strlen(rdev_p->dev_name)) {
899 if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) {
900 return -EBUSY;
901 }
902 netdev_p = dev_get_by_name(rdev_p->dev_name);
903 if (!netdev_p) {
904 return -EINVAL;
905 }
906 dev_put(netdev_p);
907 } else if (rdev_p->t3cdev_p) {
908 if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) {
909 return -EBUSY;
910 }
911 netdev_p = rdev_p->t3cdev_p->lldev;
912 strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name,
913 T3_MAX_DEV_NAME_LEN);
914 } else {
915 PDBG("%s t3cdev_p or dev_name must be set\n", __FUNCTION__);
916 return -EINVAL;
917 }
918
919 list_add_tail(&rdev_p->entry, &rdev_list);
920
921 PDBG("%s opening rnic dev %s\n", __FUNCTION__, rdev_p->dev_name);
922 memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp));
923 if (!rdev_p->t3cdev_p)
924 rdev_p->t3cdev_p = T3CDEV(netdev_p);
925 rdev_p->t3cdev_p->ulp = (void *) rdev_p;
926 err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS,
927 &(rdev_p->rnic_info));
928 if (err) {
929 printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
930 __FUNCTION__, rdev_p->t3cdev_p, err);
931 goto err1;
932 }
933 err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS,
934 &(rdev_p->port_info));
935 if (err) {
936 printk(KERN_ERR "%s t3cdev_p(%p)->ctl returned error %d.\n",
937 __FUNCTION__, rdev_p->t3cdev_p, err);
938 goto err1;
939 }
940
941 /*
942 * qpshift is the number of bits to shift the qpid left in order
943 * to get the correct address of the doorbell for that qp.
944 */
945 cxio_init_ucontext(rdev_p, &rdev_p->uctx);
946 rdev_p->qpshift = PAGE_SHIFT -
947 ilog2(65536 >>
948 ilog2(rdev_p->rnic_info.udbell_len >>
949 PAGE_SHIFT));
950 rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT;
951 rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1;
952 PDBG("%s rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d "
953 "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x\n",
954 __FUNCTION__, rdev_p->dev_name, rdev_p->rnic_info.tpt_base,
955 rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p),
956 rdev_p->rnic_info.pbl_base,
957 rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base,
958 rdev_p->rnic_info.rqt_top);
959 PDBG("udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu "
960 "qpnr %d qpmask 0x%x\n",
961 rdev_p->rnic_info.udbell_len,
962 rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr,
963 rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask);
964
965 err = cxio_hal_init_ctrl_qp(rdev_p);
966 if (err) {
967 printk(KERN_ERR "%s error %d initializing ctrl_qp.\n",
968 __FUNCTION__, err);
969 goto err1;
970 }
971 err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0,
972 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ,
973 T3_MAX_NUM_PD);
974 if (err) {
975 printk(KERN_ERR "%s error %d initializing hal resources.\n",
976 __FUNCTION__, err);
977 goto err2;
978 }
979 err = cxio_hal_pblpool_create(rdev_p);
980 if (err) {
981 printk(KERN_ERR "%s error %d initializing pbl mem pool.\n",
982 __FUNCTION__, err);
983 goto err3;
984 }
985 err = cxio_hal_rqtpool_create(rdev_p);
986 if (err) {
987 printk(KERN_ERR "%s error %d initializing rqt mem pool.\n",
988 __FUNCTION__, err);
989 goto err4;
990 }
991 return 0;
992err4:
993 cxio_hal_pblpool_destroy(rdev_p);
994err3:
995 cxio_hal_destroy_resource(rdev_p->rscp);
996err2:
997 cxio_hal_destroy_ctrl_qp(rdev_p);
998err1:
999 list_del(&rdev_p->entry);
1000 return err;
1001}
1002
1003void cxio_rdev_close(struct cxio_rdev *rdev_p)
1004{
1005 if (rdev_p) {
1006 cxio_hal_pblpool_destroy(rdev_p);
1007 cxio_hal_rqtpool_destroy(rdev_p);
1008 list_del(&rdev_p->entry);
1009 rdev_p->t3cdev_p->ulp = NULL;
1010 cxio_hal_destroy_ctrl_qp(rdev_p);
1011 cxio_hal_destroy_resource(rdev_p->rscp);
1012 }
1013}
1014
1015int __init cxio_hal_init(void)
1016{
1017 if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI))
1018 return -ENOMEM;
1019 t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler);
1020 return 0;
1021}
1022
1023void __exit cxio_hal_exit(void)
1024{
1025 struct cxio_rdev *rdev, *tmp;
1026
1027 t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL);
1028 list_for_each_entry_safe(rdev, tmp, &rdev_list, entry)
1029 cxio_rdev_close(rdev);
1030 cxio_hal_destroy_rhdl_resource();
1031}
1032
1033static inline void flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq)
1034{
1035 struct t3_swsq *sqp;
1036 __u32 ptr = wq->sq_rptr;
1037 int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr);
1038
1039 sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
1040 while (count--)
1041 if (!sqp->signaled) {
1042 ptr++;
1043 sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2);
1044 } else if (sqp->complete) {
1045
1046 /*
1047 * Insert this completed cqe into the swcq.
1048 */
1049 PDBG("%s moving cqe into swcq sq idx %ld cq idx %ld\n",
1050 __FUNCTION__, Q_PTR2IDX(ptr, wq->sq_size_log2),
1051 Q_PTR2IDX(cq->sw_wptr, cq->size_log2));
1052 sqp->cqe.header |= htonl(V_CQE_SWCQE(1));
1053 *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2))
1054 = sqp->cqe;
1055 cq->sw_wptr++;
1056 sqp->signaled = 0;
1057 break;
1058 } else
1059 break;
1060}
1061
1062static inline void create_read_req_cqe(struct t3_wq *wq,
1063 struct t3_cqe *hw_cqe,
1064 struct t3_cqe *read_cqe)
1065{
1066 read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr;
1067 read_cqe->len = wq->oldest_read->read_len;
1068 read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) |
1069 V_CQE_SWCQE(SW_CQE(*hw_cqe)) |
1070 V_CQE_OPCODE(T3_READ_REQ) |
1071 V_CQE_TYPE(1));
1072}
1073
1074/*
1075 * Return a ptr to the next read wr in the SWSQ or NULL.
1076 */
1077static inline void advance_oldest_read(struct t3_wq *wq)
1078{
1079
1080 u32 rptr = wq->oldest_read - wq->sq + 1;
1081 u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2);
1082
1083 while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) {
1084 wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2);
1085
1086 if (wq->oldest_read->opcode == T3_READ_REQ)
1087 return;
1088 rptr++;
1089 }
1090 wq->oldest_read = NULL;
1091}
1092
1093/*
1094 * cxio_poll_cq
1095 *
1096 * Caller must:
1097 * check the validity of the first CQE,
1098 * supply the wq assicated with the qpid.
1099 *
1100 * credit: cq credit to return to sge.
1101 * cqe_flushed: 1 iff the CQE is flushed.
1102 * cqe: copy of the polled CQE.
1103 *
1104 * return value:
1105 * 0 CQE returned,
1106 * -1 CQE skipped, try again.
1107 */
1108int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
1109 u8 *cqe_flushed, u64 *cookie, u32 *credit)
1110{
1111 int ret = 0;
1112 struct t3_cqe *hw_cqe, read_cqe;
1113
1114 *cqe_flushed = 0;
1115 *credit = 0;
1116 hw_cqe = cxio_next_cqe(cq);
1117
1118 PDBG("%s CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x"
1119 " opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x\n",
1120 __FUNCTION__, CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe),
1121 CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe),
1122 CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe),
1123 CQE_WRID_LOW(*hw_cqe));
1124
1125 /*
1126 * skip cqe's not affiliated with a QP.
1127 */
1128 if (wq == NULL) {
1129 ret = -1;
1130 goto skip_cqe;
1131 }
1132
1133 /*
1134 * Gotta tweak READ completions:
1135 * 1) the cqe doesn't contain the sq_wptr from the wr.
1136 * 2) opcode not reflected from the wr.
1137 * 3) read_len not reflected from the wr.
1138 * 4) cq_type is RQ_TYPE not SQ_TYPE.
1139 */
1140 if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
1141
1142 /*
1143 * Don't write to the HWCQ, so create a new read req CQE
1144 * in local memory.
1145 */
1146 create_read_req_cqe(wq, hw_cqe, &read_cqe);
1147 hw_cqe = &read_cqe;
1148 advance_oldest_read(wq);
1149 }
1150
1151 /*
1152 * T3A: Discard TERMINATE CQEs.
1153 */
1154 if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) {
1155 ret = -1;
1156 wq->error = 1;
1157 goto skip_cqe;
1158 }
1159
1160 if (CQE_STATUS(*hw_cqe) || wq->error) {
1161 *cqe_flushed = wq->error;
1162 wq->error = 1;
1163
1164 /*
1165 * T3A inserts errors into the CQE. We cannot return
1166 * these as work completions.
1167 */
1168 /* incoming write failures */
1169 if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE)
1170 && RQ_TYPE(*hw_cqe)) {
1171 ret = -1;
1172 goto skip_cqe;
1173 }
1174 /* incoming read request failures */
1175 if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) {
1176 ret = -1;
1177 goto skip_cqe;
1178 }
1179
1180 /* incoming SEND with no receive posted failures */
1181 if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) &&
1182 Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) {
1183 ret = -1;
1184 goto skip_cqe;
1185 }
1186 goto proc_cqe;
1187 }
1188
1189 /*
1190 * RECV completion.
1191 */
1192 if (RQ_TYPE(*hw_cqe)) {
1193
1194 /*
1195 * HW only validates 4 bits of MSN. So we must validate that
1196 * the MSN in the SEND is the next expected MSN. If its not,
1197 * then we complete this with TPT_ERR_MSN and mark the wq in
1198 * error.
1199 */
1200 if (unlikely((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) {
1201 wq->error = 1;
1202 hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN));
1203 goto proc_cqe;
1204 }
1205 goto proc_cqe;
1206 }
1207
1208 /*
1209 * If we get here its a send completion.
1210 *
1211 * Handle out of order completion. These get stuffed
1212 * in the SW SQ. Then the SW SQ is walked to move any
1213 * now in-order completions into the SW CQ. This handles
1214 * 2 cases:
1215 * 1) reaping unsignaled WRs when the first subsequent
1216 * signaled WR is completed.
1217 * 2) out of order read completions.
1218 */
1219 if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) {
1220 struct t3_swsq *sqp;
1221
1222 PDBG("%s out of order completion going in swsq at idx %ld\n",
1223 __FUNCTION__,
1224 Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2));
1225 sqp = wq->sq +
1226 Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2);
1227 sqp->cqe = *hw_cqe;
1228 sqp->complete = 1;
1229 ret = -1;
1230 goto flush_wq;
1231 }
1232
1233proc_cqe:
1234 *cqe = *hw_cqe;
1235
1236 /*
1237 * Reap the associated WR(s) that are freed up with this
1238 * completion.
1239 */
1240 if (SQ_TYPE(*hw_cqe)) {
1241 wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe);
1242 PDBG("%s completing sq idx %ld\n", __FUNCTION__,
1243 Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2));
1244 *cookie = (wq->sq +
1245 Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id;
1246 wq->sq_rptr++;
1247 } else {
1248 PDBG("%s completing rq idx %ld\n", __FUNCTION__,
1249 Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
1250 *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2));
1251 wq->rq_rptr++;
1252 }
1253
1254flush_wq:
1255 /*
1256 * Flush any completed cqes that are now in-order.
1257 */
1258 flush_completed_wrs(wq, cq);
1259
1260skip_cqe:
1261 if (SW_CQE(*hw_cqe)) {
1262 PDBG("%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x\n",
1263 __FUNCTION__, cq, cq->cqid, cq->sw_rptr);
1264 ++cq->sw_rptr;
1265 } else {
1266 PDBG("%s cq %p cqid 0x%x skip hw cqe rptr 0x%x\n",
1267 __FUNCTION__, cq, cq->cqid, cq->rptr);
1268 ++cq->rptr;
1269
1270 /*
1271 * T3A: compute credits.
1272 */
1273 if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1)))
1274 || ((cq->rptr - cq->wptr) >= 128)) {
1275 *credit = cq->rptr - cq->wptr;
1276 cq->wptr = cq->rptr;
1277 }
1278 }
1279 return ret;
1280}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
new file mode 100644
index 000000000000..1b97e80b8780
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -0,0 +1,201 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef __CXIO_HAL_H__
34#define __CXIO_HAL_H__
35
36#include <linux/list.h>
37#include <linux/mutex.h>
38
39#include "t3_cpl.h"
40#include "t3cdev.h"
41#include "cxgb3_ctl_defs.h"
42#include "cxio_wr.h"
43
44#define T3_CTRL_QP_ID FW_RI_SGEEC_START
45#define T3_CTL_QP_TID FW_RI_TID_START
46#define T3_CTRL_QP_SIZE_LOG2 8
47#define T3_CTRL_CQ_ID 0
48
49/* TBD */
50#define T3_MAX_NUM_RI (1<<15)
51#define T3_MAX_NUM_QP (1<<15)
52#define T3_MAX_NUM_CQ (1<<15)
53#define T3_MAX_NUM_PD (1<<15)
54#define T3_MAX_PBL_SIZE 256
55#define T3_MAX_RQ_SIZE 1024
56#define T3_MAX_NUM_STAG (1<<15)
57
58#define T3_STAG_UNSET 0xffffffff
59
60#define T3_MAX_DEV_NAME_LEN 32
61
62struct cxio_hal_ctrl_qp {
63 u32 wptr;
64 u32 rptr;
65 struct mutex lock; /* for the wtpr, can sleep */
66 wait_queue_head_t waitq;/* wait for RspQ/CQE msg */
67 union t3_wr *workq; /* the work request queue */
68 dma_addr_t dma_addr; /* pci bus address of the workq */
69 DECLARE_PCI_UNMAP_ADDR(mapping)
70 void __iomem *doorbell;
71};
72
73struct cxio_hal_resource {
74 struct kfifo *tpt_fifo;
75 spinlock_t tpt_fifo_lock;
76 struct kfifo *qpid_fifo;
77 spinlock_t qpid_fifo_lock;
78 struct kfifo *cqid_fifo;
79 spinlock_t cqid_fifo_lock;
80 struct kfifo *pdid_fifo;
81 spinlock_t pdid_fifo_lock;
82};
83
84struct cxio_qpid_list {
85 struct list_head entry;
86 u32 qpid;
87};
88
89struct cxio_ucontext {
90 struct list_head qpids;
91 struct mutex lock;
92};
93
94struct cxio_rdev {
95 char dev_name[T3_MAX_DEV_NAME_LEN];
96 struct t3cdev *t3cdev_p;
97 struct rdma_info rnic_info;
98 struct adap_ports port_info;
99 struct cxio_hal_resource *rscp;
100 struct cxio_hal_ctrl_qp ctrl_qp;
101 void *ulp;
102 unsigned long qpshift;
103 u32 qpnr;
104 u32 qpmask;
105 struct cxio_ucontext uctx;
106 struct gen_pool *pbl_pool;
107 struct gen_pool *rqt_pool;
108 struct list_head entry;
109};
110
111static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
112{
113 return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
114}
115
116typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p,
117 struct sk_buff * skb);
118
119#define RSPQ_CQID(rsp) (be32_to_cpu(rsp->cq_ptrid) & 0xffff)
120#define RSPQ_CQPTR(rsp) ((be32_to_cpu(rsp->cq_ptrid) >> 16) & 0xffff)
121#define RSPQ_GENBIT(rsp) ((be32_to_cpu(rsp->flags) >> 16) & 1)
122#define RSPQ_OVERFLOW(rsp) ((be32_to_cpu(rsp->flags) >> 17) & 1)
123#define RSPQ_AN(rsp) ((be32_to_cpu(rsp->flags) >> 18) & 1)
124#define RSPQ_SE(rsp) ((be32_to_cpu(rsp->flags) >> 19) & 1)
125#define RSPQ_NOTIFY(rsp) ((be32_to_cpu(rsp->flags) >> 20) & 1)
126#define RSPQ_CQBRANCH(rsp) ((be32_to_cpu(rsp->flags) >> 21) & 1)
127#define RSPQ_CREDIT_THRESH(rsp) ((be32_to_cpu(rsp->flags) >> 22) & 1)
128
129struct respQ_msg_t {
130 __be32 flags; /* flit 0 */
131 __be32 cq_ptrid;
132 __be64 rsvd; /* flit 1 */
133 struct t3_cqe cqe; /* flits 2-3 */
134};
135
136enum t3_cq_opcode {
137 CQ_ARM_AN = 0x2,
138 CQ_ARM_SE = 0x6,
139 CQ_FORCE_AN = 0x3,
140 CQ_CREDIT_UPDATE = 0x7
141};
142
143int cxio_rdev_open(struct cxio_rdev *rdev);
144void cxio_rdev_close(struct cxio_rdev *rdev);
145int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
146 enum t3_cq_opcode op, u32 credit);
147int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev, u32 qpid);
148int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
149int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
150int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
151void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
152void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
153int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
154 struct cxio_ucontext *uctx);
155int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
156 struct cxio_ucontext *uctx);
157int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
158int cxio_allocate_stag(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
159 enum tpt_mem_perm perm, u32 * pbl_size, u32 * pbl_addr);
160int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
161 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
162 u8 page_size, __be64 *pbl, u32 *pbl_size,
163 u32 *pbl_addr);
164int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
165 enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
166 u8 page_size, __be64 *pbl, u32 *pbl_size,
167 u32 *pbl_addr);
168int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
169 u32 pbl_addr);
170int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
171int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag);
172int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr);
173void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
174void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb);
175u32 cxio_hal_get_rhdl(void);
176void cxio_hal_put_rhdl(u32 rhdl);
177u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
178void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
179int __init cxio_hal_init(void);
180void __exit cxio_hal_exit(void);
181void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
182void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
183void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
184void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
185void cxio_flush_hw_cq(struct t3_cq *cq);
186int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
187 u8 *cqe_flushed, u64 *cookie, u32 *credit);
188
189#define MOD "iw_cxgb3: "
190#define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
191
192#ifdef DEBUG
193void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag);
194void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint len, u8 shift);
195void cxio_dump_wqe(union t3_wr *wqe);
196void cxio_dump_wce(struct t3_cqe *wce);
197void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents);
198void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid);
199#endif
200
201#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
new file mode 100644
index 000000000000..997aa32cbf07
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -0,0 +1,331 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33/* Crude resource management */
34#include <linux/kernel.h>
35#include <linux/random.h>
36#include <linux/slab.h>
37#include <linux/kfifo.h>
38#include <linux/spinlock.h>
39#include <linux/errno.h>
40#include "cxio_resource.h"
41#include "cxio_hal.h"
42
43static struct kfifo *rhdl_fifo;
44static spinlock_t rhdl_fifo_lock;
45
46#define RANDOM_SIZE 16
47
48static int __cxio_init_resource_fifo(struct kfifo **fifo,
49 spinlock_t *fifo_lock,
50 u32 nr, u32 skip_low,
51 u32 skip_high,
52 int random)
53{
54 u32 i, j, entry = 0, idx;
55 u32 random_bytes;
56 u32 rarray[16];
57 spin_lock_init(fifo_lock);
58
59 *fifo = kfifo_alloc(nr * sizeof(u32), GFP_KERNEL, fifo_lock);
60 if (IS_ERR(*fifo))
61 return -ENOMEM;
62
63 for (i = 0; i < skip_low + skip_high; i++)
64 __kfifo_put(*fifo, (unsigned char *) &entry, sizeof(u32));
65 if (random) {
66 j = 0;
67 random_bytes = random32();
68 for (i = 0; i < RANDOM_SIZE; i++)
69 rarray[i] = i + skip_low;
70 for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
71 if (j >= RANDOM_SIZE) {
72 j = 0;
73 random_bytes = random32();
74 }
75 idx = (random_bytes >> (j * 2)) & 0xF;
76 __kfifo_put(*fifo,
77 (unsigned char *) &rarray[idx],
78 sizeof(u32));
79 rarray[idx] = i;
80 j++;
81 }
82 for (i = 0; i < RANDOM_SIZE; i++)
83 __kfifo_put(*fifo,
84 (unsigned char *) &rarray[i],
85 sizeof(u32));
86 } else
87 for (i = skip_low; i < nr - skip_high; i++)
88 __kfifo_put(*fifo, (unsigned char *) &i, sizeof(u32));
89
90 for (i = 0; i < skip_low + skip_high; i++)
91 kfifo_get(*fifo, (unsigned char *) &entry, sizeof(u32));
92 return 0;
93}
94
95static int cxio_init_resource_fifo(struct kfifo **fifo, spinlock_t * fifo_lock,
96 u32 nr, u32 skip_low, u32 skip_high)
97{
98 return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
99 skip_high, 0));
100}
101
102static int cxio_init_resource_fifo_random(struct kfifo **fifo,
103 spinlock_t * fifo_lock,
104 u32 nr, u32 skip_low, u32 skip_high)
105{
106
107 return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
108 skip_high, 1));
109}
110
111static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
112{
113 u32 i;
114
115 spin_lock_init(&rdev_p->rscp->qpid_fifo_lock);
116
117 rdev_p->rscp->qpid_fifo = kfifo_alloc(T3_MAX_NUM_QP * sizeof(u32),
118 GFP_KERNEL,
119 &rdev_p->rscp->qpid_fifo_lock);
120 if (IS_ERR(rdev_p->rscp->qpid_fifo))
121 return -ENOMEM;
122
123 for (i = 16; i < T3_MAX_NUM_QP; i++)
124 if (!(i & rdev_p->qpmask))
125 __kfifo_put(rdev_p->rscp->qpid_fifo,
126 (unsigned char *) &i, sizeof(u32));
127 return 0;
128}
129
130int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
131{
132 return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1,
133 0);
134}
135
136void cxio_hal_destroy_rhdl_resource(void)
137{
138 kfifo_free(rhdl_fifo);
139}
140
141/* nr_* must be power of 2 */
142int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
143 u32 nr_tpt, u32 nr_pbl,
144 u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid)
145{
146 int err = 0;
147 struct cxio_hal_resource *rscp;
148
149 rscp = kmalloc(sizeof(*rscp), GFP_KERNEL);
150 if (!rscp)
151 return -ENOMEM;
152 rdev_p->rscp = rscp;
153 err = cxio_init_resource_fifo_random(&rscp->tpt_fifo,
154 &rscp->tpt_fifo_lock,
155 nr_tpt, 1, 0);
156 if (err)
157 goto tpt_err;
158 err = cxio_init_qpid_fifo(rdev_p);
159 if (err)
160 goto qpid_err;
161 err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock,
162 nr_cqid, 1, 0);
163 if (err)
164 goto cqid_err;
165 err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock,
166 nr_pdid, 1, 0);
167 if (err)
168 goto pdid_err;
169 return 0;
170pdid_err:
171 kfifo_free(rscp->cqid_fifo);
172cqid_err:
173 kfifo_free(rscp->qpid_fifo);
174qpid_err:
175 kfifo_free(rscp->tpt_fifo);
176tpt_err:
177 return -ENOMEM;
178}
179
180/*
181 * returns 0 if no resource available
182 */
183static inline u32 cxio_hal_get_resource(struct kfifo *fifo)
184{
185 u32 entry;
186 if (kfifo_get(fifo, (unsigned char *) &entry, sizeof(u32)))
187 return entry;
188 else
189 return 0; /* fifo emptry */
190}
191
192static inline void cxio_hal_put_resource(struct kfifo *fifo, u32 entry)
193{
194 BUG_ON(kfifo_put(fifo, (unsigned char *) &entry, sizeof(u32)) == 0);
195}
196
197u32 cxio_hal_get_rhdl(void)
198{
199 return cxio_hal_get_resource(rhdl_fifo);
200}
201
202void cxio_hal_put_rhdl(u32 rhdl)
203{
204 cxio_hal_put_resource(rhdl_fifo, rhdl);
205}
206
207u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp)
208{
209 return cxio_hal_get_resource(rscp->tpt_fifo);
210}
211
212void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag)
213{
214 cxio_hal_put_resource(rscp->tpt_fifo, stag);
215}
216
217u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp)
218{
219 u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo);
220 PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
221 return qpid;
222}
223
224void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid)
225{
226 PDBG("%s qpid 0x%x\n", __FUNCTION__, qpid);
227 cxio_hal_put_resource(rscp->qpid_fifo, qpid);
228}
229
230u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp)
231{
232 return cxio_hal_get_resource(rscp->cqid_fifo);
233}
234
235void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid)
236{
237 cxio_hal_put_resource(rscp->cqid_fifo, cqid);
238}
239
240u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp)
241{
242 return cxio_hal_get_resource(rscp->pdid_fifo);
243}
244
245void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
246{
247 cxio_hal_put_resource(rscp->pdid_fifo, pdid);
248}
249
250void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
251{
252 kfifo_free(rscp->tpt_fifo);
253 kfifo_free(rscp->cqid_fifo);
254 kfifo_free(rscp->qpid_fifo);
255 kfifo_free(rscp->pdid_fifo);
256 kfree(rscp);
257}
258
259/*
260 * PBL Memory Manager. Uses Linux generic allocator.
261 */
262
263#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
264#define PBL_CHUNK 2*1024*1024
265
266u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
267{
268 unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size);
269 PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size);
270 return (u32)addr;
271}
272
273void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
274{
275 PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size);
276 gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size);
277}
278
279int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
280{
281 unsigned long i;
282 rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
283 if (rdev_p->pbl_pool)
284 for (i = rdev_p->rnic_info.pbl_base;
285 i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1;
286 i += PBL_CHUNK)
287 gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1);
288 return rdev_p->pbl_pool ? 0 : -ENOMEM;
289}
290
291void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
292{
293 gen_pool_destroy(rdev_p->pbl_pool);
294}
295
296/*
297 * RQT Memory Manager. Uses Linux generic allocator.
298 */
299
300#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */
301#define RQT_CHUNK 2*1024*1024
302
303u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size)
304{
305 unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6);
306 PDBG("%s addr 0x%x size %d\n", __FUNCTION__, (u32)addr, size << 6);
307 return (u32)addr;
308}
309
310void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
311{
312 PDBG("%s addr 0x%x size %d\n", __FUNCTION__, addr, size << 6);
313 gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6);
314}
315
316int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p)
317{
318 unsigned long i;
319 rdev_p->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
320 if (rdev_p->rqt_pool)
321 for (i = rdev_p->rnic_info.rqt_base;
322 i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1;
323 i += RQT_CHUNK)
324 gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1);
325 return rdev_p->rqt_pool ? 0 : -ENOMEM;
326}
327
328void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p)
329{
330 gen_pool_destroy(rdev_p->rqt_pool);
331}
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.h b/drivers/infiniband/hw/cxgb3/cxio_resource.h
new file mode 100644
index 000000000000..a6bbe8370d81
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.h
@@ -0,0 +1,70 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef __CXIO_RESOURCE_H__
34#define __CXIO_RESOURCE_H__
35
36#include <linux/kernel.h>
37#include <linux/random.h>
38#include <linux/slab.h>
39#include <linux/kfifo.h>
40#include <linux/spinlock.h>
41#include <linux/errno.h>
42#include <linux/genalloc.h>
43#include "cxio_hal.h"
44
45extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl);
46extern void cxio_hal_destroy_rhdl_resource(void);
47extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
48 u32 nr_tpt, u32 nr_pbl,
49 u32 nr_rqt, u32 nr_qpid, u32 nr_cqid,
50 u32 nr_pdid);
51extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp);
52extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag);
53extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp);
54extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid);
55extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp);
56extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid);
57extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp);
58
59#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base )
60extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p);
61extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p);
62extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size);
63extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
64
65#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base )
66extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p);
67extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p);
68extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size);
69extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size);
70#endif
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
new file mode 100644
index 000000000000..103fc42d6976
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -0,0 +1,685 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef __CXIO_WR_H__
34#define __CXIO_WR_H__
35
36#include <asm/io.h>
37#include <linux/pci.h>
38#include <linux/timer.h>
39#include "firmware_exports.h"
40
41#define T3_MAX_SGE 4
42
43#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr))
44#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \
45 ((rptr)!=(wptr)) )
46#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1))
47#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr)))
48#define Q_COUNT(rptr,wptr) ((wptr)-(rptr))
49#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1))
50
51static inline void ring_doorbell(void __iomem *doorbell, u32 qpid)
52{
53 writel(((1<<31) | qpid), doorbell);
54}
55
56#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 ))
57
58enum t3_wr_flags {
59 T3_COMPLETION_FLAG = 0x01,
60 T3_NOTIFY_FLAG = 0x02,
61 T3_SOLICITED_EVENT_FLAG = 0x04,
62 T3_READ_FENCE_FLAG = 0x08,
63 T3_LOCAL_FENCE_FLAG = 0x10
64} __attribute__ ((packed));
65
66enum t3_wr_opcode {
67 T3_WR_BP = FW_WROPCODE_RI_BYPASS,
68 T3_WR_SEND = FW_WROPCODE_RI_SEND,
69 T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE,
70 T3_WR_READ = FW_WROPCODE_RI_RDMA_READ,
71 T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV,
72 T3_WR_BIND = FW_WROPCODE_RI_BIND_MW,
73 T3_WR_RCV = FW_WROPCODE_RI_RECEIVE,
74 T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT,
75 T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP
76} __attribute__ ((packed));
77
78enum t3_rdma_opcode {
79 T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */
80 T3_READ_REQ,
81 T3_READ_RESP,
82 T3_SEND,
83 T3_SEND_WITH_INV,
84 T3_SEND_WITH_SE,
85 T3_SEND_WITH_SE_INV,
86 T3_TERMINATE,
87 T3_RDMA_INIT, /* CHELSIO RI specific ... */
88 T3_BIND_MW,
89 T3_FAST_REGISTER,
90 T3_LOCAL_INV,
91 T3_QP_MOD,
92 T3_BYPASS
93} __attribute__ ((packed));
94
95static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop)
96{
97 switch (wrop) {
98 case T3_WR_BP: return T3_BYPASS;
99 case T3_WR_SEND: return T3_SEND;
100 case T3_WR_WRITE: return T3_RDMA_WRITE;
101 case T3_WR_READ: return T3_READ_REQ;
102 case T3_WR_INV_STAG: return T3_LOCAL_INV;
103 case T3_WR_BIND: return T3_BIND_MW;
104 case T3_WR_INIT: return T3_RDMA_INIT;
105 case T3_WR_QP_MOD: return T3_QP_MOD;
106 default: break;
107 }
108 return -1;
109}
110
111
112/* Work request id */
113union t3_wrid {
114 struct {
115 u32 hi;
116 u32 low;
117 } id0;
118 u64 id1;
119};
120
121#define WRID(wrid) (wrid.id1)
122#define WRID_GEN(wrid) (wrid.id0.wr_gen)
123#define WRID_IDX(wrid) (wrid.id0.wr_idx)
124#define WRID_LO(wrid) (wrid.id0.wr_lo)
125
126struct fw_riwrh {
127 __be32 op_seop_flags;
128 __be32 gen_tid_len;
129};
130
131#define S_FW_RIWR_OP 24
132#define M_FW_RIWR_OP 0xff
133#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP)
134#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP)
135
136#define S_FW_RIWR_SOPEOP 22
137#define M_FW_RIWR_SOPEOP 0x3
138#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP)
139
140#define S_FW_RIWR_FLAGS 8
141#define M_FW_RIWR_FLAGS 0x3fffff
142#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS)
143#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS)
144
145#define S_FW_RIWR_TID 8
146#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID)
147
148#define S_FW_RIWR_LEN 0
149#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN)
150
151#define S_FW_RIWR_GEN 31
152#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN)
153
154struct t3_sge {
155 __be32 stag;
156 __be32 len;
157 __be64 to;
158};
159
160/* If num_sgle is zero, flit 5+ contains immediate data.*/
161struct t3_send_wr {
162 struct fw_riwrh wrh; /* 0 */
163 union t3_wrid wrid; /* 1 */
164
165 u8 rdmaop; /* 2 */
166 u8 reserved[3];
167 __be32 rem_stag;
168 __be32 plen; /* 3 */
169 __be32 num_sgle;
170 struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */
171};
172
173struct t3_local_inv_wr {
174 struct fw_riwrh wrh; /* 0 */
175 union t3_wrid wrid; /* 1 */
176 __be32 stag; /* 2 */
177 __be32 reserved3;
178};
179
180struct t3_rdma_write_wr {
181 struct fw_riwrh wrh; /* 0 */
182 union t3_wrid wrid; /* 1 */
183 u8 rdmaop; /* 2 */
184 u8 reserved[3];
185 __be32 stag_sink;
186 __be64 to_sink; /* 3 */
187 __be32 plen; /* 4 */
188 __be32 num_sgle;
189 struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */
190};
191
192struct t3_rdma_read_wr {
193 struct fw_riwrh wrh; /* 0 */
194 union t3_wrid wrid; /* 1 */
195 u8 rdmaop; /* 2 */
196 u8 reserved[3];
197 __be32 rem_stag;
198 __be64 rem_to; /* 3 */
199 __be32 local_stag; /* 4 */
200 __be32 local_len;
201 __be64 local_to; /* 5 */
202};
203
204enum t3_addr_type {
205 T3_VA_BASED_TO = 0x0,
206 T3_ZERO_BASED_TO = 0x1
207} __attribute__ ((packed));
208
209enum t3_mem_perms {
210 T3_MEM_ACCESS_LOCAL_READ = 0x1,
211 T3_MEM_ACCESS_LOCAL_WRITE = 0x2,
212 T3_MEM_ACCESS_REM_READ = 0x4,
213 T3_MEM_ACCESS_REM_WRITE = 0x8
214} __attribute__ ((packed));
215
216struct t3_bind_mw_wr {
217 struct fw_riwrh wrh; /* 0 */
218 union t3_wrid wrid; /* 1 */
219 u16 reserved; /* 2 */
220 u8 type;
221 u8 perms;
222 __be32 mr_stag;
223 __be32 mw_stag; /* 3 */
224 __be32 mw_len;
225 __be64 mw_va; /* 4 */
226 __be32 mr_pbl_addr; /* 5 */
227 u8 reserved2[3];
228 u8 mr_pagesz;
229};
230
231struct t3_receive_wr {
232 struct fw_riwrh wrh; /* 0 */
233 union t3_wrid wrid; /* 1 */
234 u8 pagesz[T3_MAX_SGE];
235 __be32 num_sgle; /* 2 */
236 struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */
237 __be32 pbl_addr[T3_MAX_SGE];
238};
239
240struct t3_bypass_wr {
241 struct fw_riwrh wrh;
242 union t3_wrid wrid; /* 1 */
243};
244
245struct t3_modify_qp_wr {
246 struct fw_riwrh wrh; /* 0 */
247 union t3_wrid wrid; /* 1 */
248 __be32 flags; /* 2 */
249 __be32 quiesce; /* 2 */
250 __be32 max_ird; /* 3 */
251 __be32 max_ord; /* 3 */
252 __be64 sge_cmd; /* 4 */
253 __be64 ctx1; /* 5 */
254 __be64 ctx0; /* 6 */
255};
256
257enum t3_modify_qp_flags {
258 MODQP_QUIESCE = 0x01,
259 MODQP_MAX_IRD = 0x02,
260 MODQP_MAX_ORD = 0x04,
261 MODQP_WRITE_EC = 0x08,
262 MODQP_READ_EC = 0x10,
263};
264
265
266enum t3_mpa_attrs {
267 uP_RI_MPA_RX_MARKER_ENABLE = 0x1,
268 uP_RI_MPA_TX_MARKER_ENABLE = 0x2,
269 uP_RI_MPA_CRC_ENABLE = 0x4,
270 uP_RI_MPA_IETF_ENABLE = 0x8
271} __attribute__ ((packed));
272
273enum t3_qp_caps {
274 uP_RI_QP_RDMA_READ_ENABLE = 0x01,
275 uP_RI_QP_RDMA_WRITE_ENABLE = 0x02,
276 uP_RI_QP_BIND_ENABLE = 0x04,
277 uP_RI_QP_FAST_REGISTER_ENABLE = 0x08,
278 uP_RI_QP_STAG0_ENABLE = 0x10
279} __attribute__ ((packed));
280
281struct t3_rdma_init_attr {
282 u32 tid;
283 u32 qpid;
284 u32 pdid;
285 u32 scqid;
286 u32 rcqid;
287 u32 rq_addr;
288 u32 rq_size;
289 enum t3_mpa_attrs mpaattrs;
290 enum t3_qp_caps qpcaps;
291 u16 tcp_emss;
292 u32 ord;
293 u32 ird;
294 u64 qp_dma_addr;
295 u32 qp_dma_size;
296 u32 flags;
297};
298
299struct t3_rdma_init_wr {
300 struct fw_riwrh wrh; /* 0 */
301 union t3_wrid wrid; /* 1 */
302 __be32 qpid; /* 2 */
303 __be32 pdid;
304 __be32 scqid; /* 3 */
305 __be32 rcqid;
306 __be32 rq_addr; /* 4 */
307 __be32 rq_size;
308 u8 mpaattrs; /* 5 */
309 u8 qpcaps;
310 __be16 ulpdu_size;
311 __be32 flags; /* bits 31-1 - reservered */
312 /* bit 0 - set if RECV posted */
313 __be32 ord; /* 6 */
314 __be32 ird;
315 __be64 qp_dma_addr; /* 7 */
316 __be32 qp_dma_size; /* 8 */
317 u32 rsvd;
318};
319
320struct t3_genbit {
321 u64 flit[15];
322 __be64 genbit;
323};
324
325enum rdma_init_wr_flags {
326 RECVS_POSTED = 1,
327};
328
329union t3_wr {
330 struct t3_send_wr send;
331 struct t3_rdma_write_wr write;
332 struct t3_rdma_read_wr read;
333 struct t3_receive_wr recv;
334 struct t3_local_inv_wr local_inv;
335 struct t3_bind_mw_wr bind;
336 struct t3_bypass_wr bypass;
337 struct t3_rdma_init_wr init;
338 struct t3_modify_qp_wr qp_mod;
339 struct t3_genbit genbit;
340 u64 flit[16];
341};
342
343#define T3_SQ_CQE_FLIT 13
344#define T3_SQ_COOKIE_FLIT 14
345
346#define T3_RQ_COOKIE_FLIT 13
347#define T3_RQ_CQE_FLIT 14
348
349static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe)
350{
351 return G_FW_RIWR_OP(be32_to_cpu(wqe->op_seop_flags));
352}
353
354static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op,
355 enum t3_wr_flags flags, u8 genbit, u32 tid,
356 u8 len)
357{
358 wqe->op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(op) |
359 V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) |
360 V_FW_RIWR_FLAGS(flags));
361 wmb();
362 wqe->gen_tid_len = cpu_to_be32(V_FW_RIWR_GEN(genbit) |
363 V_FW_RIWR_TID(tid) |
364 V_FW_RIWR_LEN(len));
365 /* 2nd gen bit... */
366 ((union t3_wr *)wqe)->genbit.genbit = cpu_to_be64(genbit);
367}
368
369/*
370 * T3 ULP2_TX commands
371 */
372enum t3_utx_mem_op {
373 T3_UTX_MEM_READ = 2,
374 T3_UTX_MEM_WRITE = 3
375};
376
377/* T3 MC7 RDMA TPT entry format */
378
379enum tpt_mem_type {
380 TPT_NON_SHARED_MR = 0x0,
381 TPT_SHARED_MR = 0x1,
382 TPT_MW = 0x2,
383 TPT_MW_RELAXED_PROTECTION = 0x3
384};
385
386enum tpt_addr_type {
387 TPT_ZBTO = 0,
388 TPT_VATO = 1
389};
390
391enum tpt_mem_perm {
392 TPT_LOCAL_READ = 0x8,
393 TPT_LOCAL_WRITE = 0x4,
394 TPT_REMOTE_READ = 0x2,
395 TPT_REMOTE_WRITE = 0x1
396};
397
398struct tpt_entry {
399 __be32 valid_stag_pdid;
400 __be32 flags_pagesize_qpid;
401
402 __be32 rsvd_pbl_addr;
403 __be32 len;
404 __be32 va_hi;
405 __be32 va_low_or_fbo;
406
407 __be32 rsvd_bind_cnt_or_pstag;
408 __be32 rsvd_pbl_size;
409};
410
411#define S_TPT_VALID 31
412#define V_TPT_VALID(x) ((x) << S_TPT_VALID)
413#define F_TPT_VALID V_TPT_VALID(1U)
414
415#define S_TPT_STAG_KEY 23
416#define M_TPT_STAG_KEY 0xFF
417#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY)
418#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY)
419
420#define S_TPT_STAG_STATE 22
421#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE)
422#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U)
423
424#define S_TPT_STAG_TYPE 20
425#define M_TPT_STAG_TYPE 0x3
426#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE)
427#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE)
428
429#define S_TPT_PDID 0
430#define M_TPT_PDID 0xFFFFF
431#define V_TPT_PDID(x) ((x) << S_TPT_PDID)
432#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID)
433
434#define S_TPT_PERM 28
435#define M_TPT_PERM 0xF
436#define V_TPT_PERM(x) ((x) << S_TPT_PERM)
437#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM)
438
439#define S_TPT_REM_INV_DIS 27
440#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS)
441#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U)
442
443#define S_TPT_ADDR_TYPE 26
444#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE)
445#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U)
446
447#define S_TPT_MW_BIND_ENABLE 25
448#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE)
449#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U)
450
451#define S_TPT_PAGE_SIZE 20
452#define M_TPT_PAGE_SIZE 0x1F
453#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE)
454#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE)
455
456#define S_TPT_PBL_ADDR 0
457#define M_TPT_PBL_ADDR 0x1FFFFFFF
458#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR)
459#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR)
460
461#define S_TPT_QPID 0
462#define M_TPT_QPID 0xFFFFF
463#define V_TPT_QPID(x) ((x) << S_TPT_QPID)
464#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID)
465
466#define S_TPT_PSTAG 0
467#define M_TPT_PSTAG 0xFFFFFF
468#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG)
469#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG)
470
471#define S_TPT_PBL_SIZE 0
472#define M_TPT_PBL_SIZE 0xFFFFF
473#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE)
474#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE)
475
476/*
477 * CQE defs
478 */
479struct t3_cqe {
480 __be32 header;
481 __be32 len;
482 union {
483 struct {
484 __be32 stag;
485 __be32 msn;
486 } rcqe;
487 struct {
488 u32 wrid_hi;
489 u32 wrid_low;
490 } scqe;
491 } u;
492};
493
494#define S_CQE_OOO 31
495#define M_CQE_OOO 0x1
496#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO)
497#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO)
498
499#define S_CQE_QPID 12
500#define M_CQE_QPID 0x7FFFF
501#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
502#define V_CQE_QPID(x) ((x)<<S_CQE_QPID)
503
504#define S_CQE_SWCQE 11
505#define M_CQE_SWCQE 0x1
506#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
507#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE)
508
509#define S_CQE_GENBIT 10
510#define M_CQE_GENBIT 0x1
511#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
512#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT)
513
514#define S_CQE_STATUS 5
515#define M_CQE_STATUS 0x1F
516#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
517#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS)
518
519#define S_CQE_TYPE 4
520#define M_CQE_TYPE 0x1
521#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
522#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE)
523
524#define S_CQE_OPCODE 0
525#define M_CQE_OPCODE 0xF
526#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
527#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE)
528
529#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x).header)))
530#define CQE_OOO(x) (G_CQE_OOO(be32_to_cpu((x).header)))
531#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x).header)))
532#define CQE_GENBIT(x) (G_CQE_GENBIT(be32_to_cpu((x).header)))
533#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x).header)))
534#define SQ_TYPE(x) (CQE_TYPE((x)))
535#define RQ_TYPE(x) (!CQE_TYPE((x)))
536#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x).header)))
537#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x).header)))
538
539#define CQE_LEN(x) (be32_to_cpu((x).len))
540
541/* used for RQ completion processing */
542#define CQE_WRID_STAG(x) (be32_to_cpu((x).u.rcqe.stag))
543#define CQE_WRID_MSN(x) (be32_to_cpu((x).u.rcqe.msn))
544
545/* used for SQ completion processing */
546#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi)
547#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low)
548
549/* generic accessor macros */
550#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi)
551#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low)
552
553#define TPT_ERR_SUCCESS 0x0
554#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */
555 /* STAG is offlimt, being 0, */
556 /* or STAG_key mismatch */
557#define TPT_ERR_PDID 0x2 /* PDID mismatch */
558#define TPT_ERR_QPID 0x3 /* QPID mismatch */
559#define TPT_ERR_ACCESS 0x4 /* Invalid access right */
560#define TPT_ERR_WRAP 0x5 /* Wrap error */
561#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */
562#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */
563 /* shared memory region */
564#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */
565 /* shared memory region */
566#define TPT_ERR_ECC 0x9 /* ECC error detected */
567#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */
568 /* reading PSTAG for a MW */
569 /* Invalidate */
570#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */
571 /* software error */
572#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */
573#define TPT_ERR_CRC 0x10 /* CRC error */
574#define TPT_ERR_MARKER 0x11 /* Marker error */
575#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */
576#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */
577#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */
578#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */
579#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */
580#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */
581#define TPT_ERR_MSN 0x18 /* MSN error */
582#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */
583#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */
584 /* or READ_REQ */
585#define TPT_ERR_MSN_GAP 0x1B
586#define TPT_ERR_MSN_RANGE 0x1C
587#define TPT_ERR_IRD_OVERFLOW 0x1D
588#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */
589 /* software error */
590#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */
591 /* mismatch) */
592
593struct t3_swsq {
594 __u64 wr_id;
595 struct t3_cqe cqe;
596 __u32 sq_wptr;
597 __be32 read_len;
598 int opcode;
599 int complete;
600 int signaled;
601};
602
603/*
604 * A T3 WQ implements both the SQ and RQ.
605 */
606struct t3_wq {
607 union t3_wr *queue; /* DMA accessable memory */
608 dma_addr_t dma_addr; /* DMA address for HW */
609 DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */
610 u32 error; /* 1 once we go to ERROR */
611 u32 qpid;
612 u32 wptr; /* idx to next available WR slot */
613 u32 size_log2; /* total wq size */
614 struct t3_swsq *sq; /* SW SQ */
615 struct t3_swsq *oldest_read; /* tracks oldest pending read */
616 u32 sq_wptr; /* sq_wptr - sq_rptr == count of */
617 u32 sq_rptr; /* pending wrs */
618 u32 sq_size_log2; /* sq size */
619 u64 *rq; /* SW RQ (holds consumer wr_ids */
620 u32 rq_wptr; /* rq_wptr - rq_rptr == count of */
621 u32 rq_rptr; /* pending wrs */
622 u64 *rq_oldest_wr; /* oldest wr on the SW RQ */
623 u32 rq_size_log2; /* rq size */
624 u32 rq_addr; /* rq adapter address */
625 void __iomem *doorbell; /* kernel db */
626 u64 udb; /* user db if any */
627};
628
629struct t3_cq {
630 u32 cqid;
631 u32 rptr;
632 u32 wptr;
633 u32 size_log2;
634 dma_addr_t dma_addr;
635 DECLARE_PCI_UNMAP_ADDR(mapping)
636 struct t3_cqe *queue;
637 struct t3_cqe *sw_queue;
638 u32 sw_rptr;
639 u32 sw_wptr;
640};
641
642#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \
643 CQE_GENBIT(*cqe))
644
645static inline void cxio_set_wq_in_error(struct t3_wq *wq)
646{
647 wq->queue->flit[13] = 1;
648}
649
650static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
651{
652 struct t3_cqe *cqe;
653
654 cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
655 if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
656 return cqe;
657 return NULL;
658}
659
660static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq)
661{
662 struct t3_cqe *cqe;
663
664 if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
665 cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
666 return cqe;
667 }
668 return NULL;
669}
670
671static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq)
672{
673 struct t3_cqe *cqe;
674
675 if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) {
676 cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2));
677 return cqe;
678 }
679 cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2));
680 if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe))
681 return cqe;
682 return NULL;
683}
684
685#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
new file mode 100644
index 000000000000..4611afa52220
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -0,0 +1,189 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <linux/module.h>
34#include <linux/moduleparam.h>
35
36#include <rdma/ib_verbs.h>
37
38#include "cxgb3_offload.h"
39#include "iwch_provider.h"
40#include "iwch_user.h"
41#include "iwch.h"
42#include "iwch_cm.h"
43
44#define DRV_VERSION "1.1"
45
46MODULE_AUTHOR("Boyd Faulkner, Steve Wise");
47MODULE_DESCRIPTION("Chelsio T3 RDMA Driver");
48MODULE_LICENSE("Dual BSD/GPL");
49MODULE_VERSION(DRV_VERSION);
50
51cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
52
53static void open_rnic_dev(struct t3cdev *);
54static void close_rnic_dev(struct t3cdev *);
55
56struct cxgb3_client t3c_client = {
57 .name = "iw_cxgb3",
58 .add = open_rnic_dev,
59 .remove = close_rnic_dev,
60 .handlers = t3c_handlers,
61 .redirect = iwch_ep_redirect
62};
63
64static LIST_HEAD(dev_list);
65static DEFINE_MUTEX(dev_mutex);
66
67static void rnic_init(struct iwch_dev *rnicp)
68{
69 PDBG("%s iwch_dev %p\n", __FUNCTION__, rnicp);
70 idr_init(&rnicp->cqidr);
71 idr_init(&rnicp->qpidr);
72 idr_init(&rnicp->mmidr);
73 spin_lock_init(&rnicp->lock);
74
75 rnicp->attr.vendor_id = 0x168;
76 rnicp->attr.vendor_part_id = 7;
77 rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
78 rnicp->attr.max_wrs = (1UL << 24) - 1;
79 rnicp->attr.max_sge_per_wr = T3_MAX_SGE;
80 rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE;
81 rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1;
82 rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1;
83 rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev);
84 rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
85 rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
86 rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */
87 rnicp->attr.can_resize_wq = 0;
88 rnicp->attr.max_rdma_reads_per_qp = 8;
89 rnicp->attr.max_rdma_read_resources =
90 rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps;
91 rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */
92 rnicp->attr.max_rdma_read_depth =
93 rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps;
94 rnicp->attr.rq_overflow_handled = 0;
95 rnicp->attr.can_modify_ird = 0;
96 rnicp->attr.can_modify_ord = 0;
97 rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1;
98 rnicp->attr.stag0_value = 1;
99 rnicp->attr.zbva_support = 1;
100 rnicp->attr.local_invalidate_fence = 1;
101 rnicp->attr.cq_overflow_detection = 1;
102 return;
103}
104
105static void open_rnic_dev(struct t3cdev *tdev)
106{
107 struct iwch_dev *rnicp;
108 static int vers_printed;
109
110 PDBG("%s t3cdev %p\n", __FUNCTION__, tdev);
111 if (!vers_printed++)
112 printk(KERN_INFO MOD "Chelsio T3 RDMA Driver - version %s\n",
113 DRV_VERSION);
114 rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
115 if (!rnicp) {
116 printk(KERN_ERR MOD "Cannot allocate ib device\n");
117 return;
118 }
119 rnicp->rdev.ulp = rnicp;
120 rnicp->rdev.t3cdev_p = tdev;
121
122 mutex_lock(&dev_mutex);
123
124 if (cxio_rdev_open(&rnicp->rdev)) {
125 mutex_unlock(&dev_mutex);
126 printk(KERN_ERR MOD "Unable to open CXIO rdev\n");
127 ib_dealloc_device(&rnicp->ibdev);
128 return;
129 }
130
131 rnic_init(rnicp);
132
133 list_add_tail(&rnicp->entry, &dev_list);
134 mutex_unlock(&dev_mutex);
135
136 if (iwch_register_device(rnicp)) {
137 printk(KERN_ERR MOD "Unable to register device\n");
138 close_rnic_dev(tdev);
139 }
140 printk(KERN_INFO MOD "Initialized device %s\n",
141 pci_name(rnicp->rdev.rnic_info.pdev));
142 return;
143}
144
145static void close_rnic_dev(struct t3cdev *tdev)
146{
147 struct iwch_dev *dev, *tmp;
148 PDBG("%s t3cdev %p\n", __FUNCTION__, tdev);
149 mutex_lock(&dev_mutex);
150 list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
151 if (dev->rdev.t3cdev_p == tdev) {
152 list_del(&dev->entry);
153 iwch_unregister_device(dev);
154 cxio_rdev_close(&dev->rdev);
155 idr_destroy(&dev->cqidr);
156 idr_destroy(&dev->qpidr);
157 idr_destroy(&dev->mmidr);
158 ib_dealloc_device(&dev->ibdev);
159 break;
160 }
161 }
162 mutex_unlock(&dev_mutex);
163}
164
165static int __init iwch_init_module(void)
166{
167 int err;
168
169 err = cxio_hal_init();
170 if (err)
171 return err;
172 err = iwch_cm_init();
173 if (err)
174 return err;
175 cxio_register_ev_cb(iwch_ev_dispatch);
176 cxgb3_register_client(&t3c_client);
177 return 0;
178}
179
180static void __exit iwch_exit_module(void)
181{
182 cxgb3_unregister_client(&t3c_client);
183 cxio_unregister_ev_cb(iwch_ev_dispatch);
184 iwch_cm_term();
185 cxio_hal_exit();
186}
187
188module_init(iwch_init_module);
189module_exit(iwch_exit_module);
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
new file mode 100644
index 000000000000..6517ef85026f
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -0,0 +1,177 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef __IWCH_H__
34#define __IWCH_H__
35
36#include <linux/mutex.h>
37#include <linux/list.h>
38#include <linux/spinlock.h>
39#include <linux/idr.h>
40
41#include <rdma/ib_verbs.h>
42
43#include "cxio_hal.h"
44#include "cxgb3_offload.h"
45
46struct iwch_pd;
47struct iwch_cq;
48struct iwch_qp;
49struct iwch_mr;
50
51struct iwch_rnic_attributes {
52 u32 vendor_id;
53 u32 vendor_part_id;
54 u32 max_qps;
55 u32 max_wrs; /* Max for any SQ/RQ */
56 u32 max_sge_per_wr;
57 u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */
58 u32 max_cqs;
59 u32 max_cqes_per_cq;
60 u32 max_mem_regs;
61 u32 max_phys_buf_entries; /* for phys buf list */
62 u32 max_pds;
63
64 /*
65 * The memory page sizes supported by this RNIC.
66 * Bit position i in bitmap indicates page of
67 * size (4k)^i. Phys block list mode unsupported.
68 */
69 u32 mem_pgsizes_bitmask;
70 u8 can_resize_wq;
71
72 /*
73 * The maximum number of RDMA Reads that can be outstanding
74 * per QP with this RNIC as the target.
75 */
76 u32 max_rdma_reads_per_qp;
77
78 /*
79 * The maximum number of resources used for RDMA Reads
80 * by this RNIC with this RNIC as the target.
81 */
82 u32 max_rdma_read_resources;
83
84 /*
85 * The max depth per QP for initiation of RDMA Read
86 * by this RNIC.
87 */
88 u32 max_rdma_read_qp_depth;
89
90 /*
91 * The maximum depth for initiation of RDMA Read
92 * operations by this RNIC on all QPs
93 */
94 u32 max_rdma_read_depth;
95 u8 rq_overflow_handled;
96 u32 can_modify_ird;
97 u32 can_modify_ord;
98 u32 max_mem_windows;
99 u32 stag0_value;
100 u8 zbva_support;
101 u8 local_invalidate_fence;
102 u32 cq_overflow_detection;
103};
104
105struct iwch_dev {
106 struct ib_device ibdev;
107 struct cxio_rdev rdev;
108 u32 device_cap_flags;
109 struct iwch_rnic_attributes attr;
110 struct idr cqidr;
111 struct idr qpidr;
112 struct idr mmidr;
113 spinlock_t lock;
114 struct list_head entry;
115};
116
117static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
118{
119 return container_of(ibdev, struct iwch_dev, ibdev);
120}
121
122static inline int t3b_device(const struct iwch_dev *rhp)
123{
124 return rhp->rdev.t3cdev_p->type == T3B;
125}
126
127static inline int t3a_device(const struct iwch_dev *rhp)
128{
129 return rhp->rdev.t3cdev_p->type == T3A;
130}
131
132static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid)
133{
134 return idr_find(&rhp->cqidr, cqid);
135}
136
137static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid)
138{
139 return idr_find(&rhp->qpidr, qpid);
140}
141
142static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid)
143{
144 return idr_find(&rhp->mmidr, mmid);
145}
146
147static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
148 void *handle, u32 id)
149{
150 int ret;
151 u32 newid;
152
153 do {
154 if (!idr_pre_get(idr, GFP_KERNEL)) {
155 return -ENOMEM;
156 }
157 spin_lock_irq(&rhp->lock);
158 ret = idr_get_new_above(idr, handle, id, &newid);
159 BUG_ON(newid != id);
160 spin_unlock_irq(&rhp->lock);
161 } while (ret == -EAGAIN);
162
163 return ret;
164}
165
166static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
167{
168 spin_lock_irq(&rhp->lock);
169 idr_remove(idr, id);
170 spin_unlock_irq(&rhp->lock);
171}
172
173extern struct cxgb3_client t3c_client;
174extern cxgb3_cpl_handler_func t3c_handlers[NUM_CPL_CMDS];
175extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb);
176
177#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
new file mode 100644
index 000000000000..a522b1baa3b4
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -0,0 +1,2081 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <linux/module.h>
34#include <linux/list.h>
35#include <linux/workqueue.h>
36#include <linux/skbuff.h>
37#include <linux/timer.h>
38#include <linux/notifier.h>
39
40#include <net/neighbour.h>
41#include <net/netevent.h>
42#include <net/route.h>
43
44#include "tcb.h"
45#include "cxgb3_offload.h"
46#include "iwch.h"
47#include "iwch_provider.h"
48#include "iwch_cm.h"
49
50static char *states[] = {
51 "idle",
52 "listen",
53 "connecting",
54 "mpa_wait_req",
55 "mpa_req_sent",
56 "mpa_req_rcvd",
57 "mpa_rep_sent",
58 "fpdu_mode",
59 "aborting",
60 "closing",
61 "moribund",
62 "dead",
63 NULL,
64};
65
66static int ep_timeout_secs = 10;
67module_param(ep_timeout_secs, int, 0444);
68MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
69 "in seconds (default=10)");
70
71static int mpa_rev = 1;
72module_param(mpa_rev, int, 0444);
73MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
74 "1 is spec compliant. (default=1)");
75
76static int markers_enabled = 0;
77module_param(markers_enabled, int, 0444);
78MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
79
80static int crc_enabled = 1;
81module_param(crc_enabled, int, 0444);
82MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
83
84static int rcv_win = 256 * 1024;
85module_param(rcv_win, int, 0444);
86MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256)");
87
88static int snd_win = 32 * 1024;
89module_param(snd_win, int, 0444);
90MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=32KB)");
91
92static unsigned int nocong = 0;
93module_param(nocong, uint, 0444);
94MODULE_PARM_DESC(nocong, "Turn off congestion control (default=0)");
95
96static unsigned int cong_flavor = 1;
97module_param(cong_flavor, uint, 0444);
98MODULE_PARM_DESC(cong_flavor, "TCP Congestion control flavor (default=1)");
99
100static void process_work(struct work_struct *work);
101static struct workqueue_struct *workq;
102static DECLARE_WORK(skb_work, process_work);
103
104static struct sk_buff_head rxq;
105static cxgb3_cpl_handler_func work_handlers[NUM_CPL_CMDS];
106
107static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
108static void ep_timeout(unsigned long arg);
109static void connect_reply_upcall(struct iwch_ep *ep, int status);
110
111static void start_ep_timer(struct iwch_ep *ep)
112{
113 PDBG("%s ep %p\n", __FUNCTION__, ep);
114 if (timer_pending(&ep->timer)) {
115 PDBG("%s stopped / restarted timer ep %p\n", __FUNCTION__, ep);
116 del_timer_sync(&ep->timer);
117 } else
118 get_ep(&ep->com);
119 ep->timer.expires = jiffies + ep_timeout_secs * HZ;
120 ep->timer.data = (unsigned long)ep;
121 ep->timer.function = ep_timeout;
122 add_timer(&ep->timer);
123}
124
125static void stop_ep_timer(struct iwch_ep *ep)
126{
127 PDBG("%s ep %p\n", __FUNCTION__, ep);
128 del_timer_sync(&ep->timer);
129 put_ep(&ep->com);
130}
131
132static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
133{
134 struct cpl_tid_release *req;
135
136 skb = get_skb(skb, sizeof *req, GFP_KERNEL);
137 if (!skb)
138 return;
139 req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
140 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
141 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
142 skb->priority = CPL_PRIORITY_SETUP;
143 tdev->send(tdev, skb);
144 return;
145}
146
147int iwch_quiesce_tid(struct iwch_ep *ep)
148{
149 struct cpl_set_tcb_field *req;
150 struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
151
152 if (!skb)
153 return -ENOMEM;
154 req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
155 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
156 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
157 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
158 req->reply = 0;
159 req->cpu_idx = 0;
160 req->word = htons(W_TCB_RX_QUIESCE);
161 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
162 req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
163
164 skb->priority = CPL_PRIORITY_DATA;
165 ep->com.tdev->send(ep->com.tdev, skb);
166 return 0;
167}
168
169int iwch_resume_tid(struct iwch_ep *ep)
170{
171 struct cpl_set_tcb_field *req;
172 struct sk_buff *skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
173
174 if (!skb)
175 return -ENOMEM;
176 req = (struct cpl_set_tcb_field *) skb_put(skb, sizeof(*req));
177 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
178 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
179 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid));
180 req->reply = 0;
181 req->cpu_idx = 0;
182 req->word = htons(W_TCB_RX_QUIESCE);
183 req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE);
184 req->val = 0;
185
186 skb->priority = CPL_PRIORITY_DATA;
187 ep->com.tdev->send(ep->com.tdev, skb);
188 return 0;
189}
190
191static void set_emss(struct iwch_ep *ep, u16 opt)
192{
193 PDBG("%s ep %p opt %u\n", __FUNCTION__, ep, opt);
194 ep->emss = T3C_DATA(ep->com.tdev)->mtus[G_TCPOPT_MSS(opt)] - 40;
195 if (G_TCPOPT_TSTAMP(opt))
196 ep->emss -= 12;
197 if (ep->emss < 128)
198 ep->emss = 128;
199 PDBG("emss=%d\n", ep->emss);
200}
201
202static enum iwch_ep_state state_read(struct iwch_ep_common *epc)
203{
204 unsigned long flags;
205 enum iwch_ep_state state;
206
207 spin_lock_irqsave(&epc->lock, flags);
208 state = epc->state;
209 spin_unlock_irqrestore(&epc->lock, flags);
210 return state;
211}
212
213static inline void __state_set(struct iwch_ep_common *epc,
214 enum iwch_ep_state new)
215{
216 epc->state = new;
217}
218
219static void state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
220{
221 unsigned long flags;
222
223 spin_lock_irqsave(&epc->lock, flags);
224 PDBG("%s - %s -> %s\n", __FUNCTION__, states[epc->state], states[new]);
225 __state_set(epc, new);
226 spin_unlock_irqrestore(&epc->lock, flags);
227 return;
228}
229
230static void *alloc_ep(int size, gfp_t gfp)
231{
232 struct iwch_ep_common *epc;
233
234 epc = kmalloc(size, gfp);
235 if (epc) {
236 memset(epc, 0, size);
237 kref_init(&epc->kref);
238 spin_lock_init(&epc->lock);
239 init_waitqueue_head(&epc->waitq);
240 }
241 PDBG("%s alloc ep %p\n", __FUNCTION__, epc);
242 return epc;
243}
244
245void __free_ep(struct kref *kref)
246{
247 struct iwch_ep_common *epc;
248 epc = container_of(kref, struct iwch_ep_common, kref);
249 PDBG("%s ep %p state %s\n", __FUNCTION__, epc, states[state_read(epc)]);
250 kfree(epc);
251}
252
253static void release_ep_resources(struct iwch_ep *ep)
254{
255 PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
256 cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
257 dst_release(ep->dst);
258 l2t_release(L2DATA(ep->com.tdev), ep->l2t);
259 if (ep->com.tdev->type == T3B)
260 release_tid(ep->com.tdev, ep->hwtid, NULL);
261 put_ep(&ep->com);
262}
263
264static void process_work(struct work_struct *work)
265{
266 struct sk_buff *skb = NULL;
267 void *ep;
268 struct t3cdev *tdev;
269 int ret;
270
271 while ((skb = skb_dequeue(&rxq))) {
272 ep = *((void **) (skb->cb));
273 tdev = *((struct t3cdev **) (skb->cb + sizeof(void *)));
274 ret = work_handlers[G_OPCODE(ntohl((__force __be32)skb->csum))](tdev, skb, ep);
275 if (ret & CPL_RET_BUF_DONE)
276 kfree_skb(skb);
277
278 /*
279 * ep was referenced in sched(), and is freed here.
280 */
281 put_ep((struct iwch_ep_common *)ep);
282 }
283}
284
285static int status2errno(int status)
286{
287 switch (status) {
288 case CPL_ERR_NONE:
289 return 0;
290 case CPL_ERR_CONN_RESET:
291 return -ECONNRESET;
292 case CPL_ERR_ARP_MISS:
293 return -EHOSTUNREACH;
294 case CPL_ERR_CONN_TIMEDOUT:
295 return -ETIMEDOUT;
296 case CPL_ERR_TCAM_FULL:
297 return -ENOMEM;
298 case CPL_ERR_CONN_EXIST:
299 return -EADDRINUSE;
300 default:
301 return -EIO;
302 }
303}
304
305/*
306 * Try and reuse skbs already allocated...
307 */
308static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
309{
310 if (skb) {
311 BUG_ON(skb_cloned(skb));
312 skb_trim(skb, 0);
313 skb_get(skb);
314 } else {
315 skb = alloc_skb(len, gfp);
316 }
317 return skb;
318}
319
320static struct rtable *find_route(struct t3cdev *dev, __be32 local_ip,
321 __be32 peer_ip, __be16 local_port,
322 __be16 peer_port, u8 tos)
323{
324 struct rtable *rt;
325 struct flowi fl = {
326 .oif = 0,
327 .nl_u = {
328 .ip4_u = {
329 .daddr = peer_ip,
330 .saddr = local_ip,
331 .tos = tos}
332 },
333 .proto = IPPROTO_TCP,
334 .uli_u = {
335 .ports = {
336 .sport = local_port,
337 .dport = peer_port}
338 }
339 };
340
341 if (ip_route_output_flow(&rt, &fl, NULL, 0))
342 return NULL;
343 return rt;
344}
345
346static unsigned int find_best_mtu(const struct t3c_data *d, unsigned short mtu)
347{
348 int i = 0;
349
350 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu)
351 ++i;
352 return i;
353}
354
355static void arp_failure_discard(struct t3cdev *dev, struct sk_buff *skb)
356{
357 PDBG("%s t3cdev %p\n", __FUNCTION__, dev);
358 kfree_skb(skb);
359}
360
361/*
362 * Handle an ARP failure for an active open.
363 */
364static void act_open_req_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
365{
366 printk(KERN_ERR MOD "ARP failure duing connect\n");
367 kfree_skb(skb);
368}
369
370/*
371 * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant
372 * and send it along.
373 */
374static void abort_arp_failure(struct t3cdev *dev, struct sk_buff *skb)
375{
376 struct cpl_abort_req *req = cplhdr(skb);
377
378 PDBG("%s t3cdev %p\n", __FUNCTION__, dev);
379 req->cmd = CPL_ABORT_NO_RST;
380 cxgb3_ofld_send(dev, skb);
381}
382
383static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
384{
385 struct cpl_close_con_req *req;
386 struct sk_buff *skb;
387
388 PDBG("%s ep %p\n", __FUNCTION__, ep);
389 skb = get_skb(NULL, sizeof(*req), gfp);
390 if (!skb) {
391 printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__);
392 return -ENOMEM;
393 }
394 skb->priority = CPL_PRIORITY_DATA;
395 set_arp_failure_handler(skb, arp_failure_discard);
396 req = (struct cpl_close_con_req *) skb_put(skb, sizeof(*req));
397 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
398 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
399 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
400 l2t_send(ep->com.tdev, skb, ep->l2t);
401 return 0;
402}
403
404static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
405{
406 struct cpl_abort_req *req;
407
408 PDBG("%s ep %p\n", __FUNCTION__, ep);
409 skb = get_skb(skb, sizeof(*req), gfp);
410 if (!skb) {
411 printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
412 __FUNCTION__);
413 return -ENOMEM;
414 }
415 skb->priority = CPL_PRIORITY_DATA;
416 set_arp_failure_handler(skb, abort_arp_failure);
417 req = (struct cpl_abort_req *) skb_put(skb, sizeof(*req));
418 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
419 req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
420 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
421 req->cmd = CPL_ABORT_SEND_RST;
422 l2t_send(ep->com.tdev, skb, ep->l2t);
423 return 0;
424}
425
426static int send_connect(struct iwch_ep *ep)
427{
428 struct cpl_act_open_req *req;
429 struct sk_buff *skb;
430 u32 opt0h, opt0l, opt2;
431 unsigned int mtu_idx;
432 int wscale;
433
434 PDBG("%s ep %p\n", __FUNCTION__, ep);
435
436 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
437 if (!skb) {
438 printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
439 __FUNCTION__);
440 return -ENOMEM;
441 }
442 mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
443 wscale = compute_wscale(rcv_win);
444 opt0h = V_NAGLE(0) |
445 V_NO_CONG(nocong) |
446 V_KEEP_ALIVE(1) |
447 F_TCAM_BYPASS |
448 V_WND_SCALE(wscale) |
449 V_MSS_IDX(mtu_idx) |
450 V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
451 opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
452 opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
453 skb->priority = CPL_PRIORITY_SETUP;
454 set_arp_failure_handler(skb, act_open_req_arp_failure);
455
456 req = (struct cpl_act_open_req *) skb_put(skb, sizeof(*req));
457 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
458 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ep->atid));
459 req->local_port = ep->com.local_addr.sin_port;
460 req->peer_port = ep->com.remote_addr.sin_port;
461 req->local_ip = ep->com.local_addr.sin_addr.s_addr;
462 req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
463 req->opt0h = htonl(opt0h);
464 req->opt0l = htonl(opt0l);
465 req->params = 0;
466 req->opt2 = htonl(opt2);
467 l2t_send(ep->com.tdev, skb, ep->l2t);
468 return 0;
469}
470
471static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
472{
473 int mpalen;
474 struct tx_data_wr *req;
475 struct mpa_message *mpa;
476 int len;
477
478 PDBG("%s ep %p pd_len %d\n", __FUNCTION__, ep, ep->plen);
479
480 BUG_ON(skb_cloned(skb));
481
482 mpalen = sizeof(*mpa) + ep->plen;
483 if (skb->data + mpalen + sizeof(*req) > skb->end) {
484 kfree_skb(skb);
485 skb=alloc_skb(mpalen + sizeof(*req), GFP_KERNEL);
486 if (!skb) {
487 connect_reply_upcall(ep, -ENOMEM);
488 return;
489 }
490 }
491 skb_trim(skb, 0);
492 skb_reserve(skb, sizeof(*req));
493 skb_put(skb, mpalen);
494 skb->priority = CPL_PRIORITY_DATA;
495 mpa = (struct mpa_message *) skb->data;
496 memset(mpa, 0, sizeof(*mpa));
497 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
498 mpa->flags = (crc_enabled ? MPA_CRC : 0) |
499 (markers_enabled ? MPA_MARKERS : 0);
500 mpa->private_data_size = htons(ep->plen);
501 mpa->revision = mpa_rev;
502
503 if (ep->plen)
504 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
505
506 /*
507 * Reference the mpa skb. This ensures the data area
508 * will remain in memory until the hw acks the tx.
509 * Function tx_ack() will deref it.
510 */
511 skb_get(skb);
512 set_arp_failure_handler(skb, arp_failure_discard);
513 skb->h.raw = skb->data;
514 len = skb->len;
515 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
516 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
517 req->wr_lo = htonl(V_WR_TID(ep->hwtid));
518 req->len = htonl(len);
519 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
520 V_TX_SNDBUF(snd_win>>15));
521 req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
522 req->sndseq = htonl(ep->snd_seq);
523 BUG_ON(ep->mpa_skb);
524 ep->mpa_skb = skb;
525 l2t_send(ep->com.tdev, skb, ep->l2t);
526 start_ep_timer(ep);
527 state_set(&ep->com, MPA_REQ_SENT);
528 return;
529}
530
531static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
532{
533 int mpalen;
534 struct tx_data_wr *req;
535 struct mpa_message *mpa;
536 struct sk_buff *skb;
537
538 PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen);
539
540 mpalen = sizeof(*mpa) + plen;
541
542 skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
543 if (!skb) {
544 printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__);
545 return -ENOMEM;
546 }
547 skb_reserve(skb, sizeof(*req));
548 mpa = (struct mpa_message *) skb_put(skb, mpalen);
549 memset(mpa, 0, sizeof(*mpa));
550 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
551 mpa->flags = MPA_REJECT;
552 mpa->revision = mpa_rev;
553 mpa->private_data_size = htons(plen);
554 if (plen)
555 memcpy(mpa->private_data, pdata, plen);
556
557 /*
558 * Reference the mpa skb again. This ensures the data area
559 * will remain in memory until the hw acks the tx.
560 * Function tx_ack() will deref it.
561 */
562 skb_get(skb);
563 skb->priority = CPL_PRIORITY_DATA;
564 set_arp_failure_handler(skb, arp_failure_discard);
565 skb->h.raw = skb->data;
566 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
567 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
568 req->wr_lo = htonl(V_WR_TID(ep->hwtid));
569 req->len = htonl(mpalen);
570 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
571 V_TX_SNDBUF(snd_win>>15));
572 req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
573 req->sndseq = htonl(ep->snd_seq);
574 BUG_ON(ep->mpa_skb);
575 ep->mpa_skb = skb;
576 l2t_send(ep->com.tdev, skb, ep->l2t);
577 return 0;
578}
579
580static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
581{
582 int mpalen;
583 struct tx_data_wr *req;
584 struct mpa_message *mpa;
585 int len;
586 struct sk_buff *skb;
587
588 PDBG("%s ep %p plen %d\n", __FUNCTION__, ep, plen);
589
590 mpalen = sizeof(*mpa) + plen;
591
592 skb = get_skb(NULL, mpalen + sizeof(*req), GFP_KERNEL);
593 if (!skb) {
594 printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __FUNCTION__);
595 return -ENOMEM;
596 }
597 skb->priority = CPL_PRIORITY_DATA;
598 skb_reserve(skb, sizeof(*req));
599 mpa = (struct mpa_message *) skb_put(skb, mpalen);
600 memset(mpa, 0, sizeof(*mpa));
601 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
602 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
603 (markers_enabled ? MPA_MARKERS : 0);
604 mpa->revision = mpa_rev;
605 mpa->private_data_size = htons(plen);
606 if (plen)
607 memcpy(mpa->private_data, pdata, plen);
608
609 /*
610 * Reference the mpa skb. This ensures the data area
611 * will remain in memory until the hw acks the tx.
612 * Function tx_ack() will deref it.
613 */
614 skb_get(skb);
615 set_arp_failure_handler(skb, arp_failure_discard);
616 skb->h.raw = skb->data;
617 len = skb->len;
618 req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
619 req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
620 req->wr_lo = htonl(V_WR_TID(ep->hwtid));
621 req->len = htonl(len);
622 req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
623 V_TX_SNDBUF(snd_win>>15));
624 req->flags = htonl(F_TX_MORE | F_TX_IMM_ACK | F_TX_INIT);
625 req->sndseq = htonl(ep->snd_seq);
626 ep->mpa_skb = skb;
627 state_set(&ep->com, MPA_REP_SENT);
628 l2t_send(ep->com.tdev, skb, ep->l2t);
629 return 0;
630}
631
632static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
633{
634 struct iwch_ep *ep = ctx;
635 struct cpl_act_establish *req = cplhdr(skb);
636 unsigned int tid = GET_TID(req);
637
638 PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, tid);
639
640 dst_confirm(ep->dst);
641
642 /* setup the hwtid for this connection */
643 ep->hwtid = tid;
644 cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
645
646 ep->snd_seq = ntohl(req->snd_isn);
647
648 set_emss(ep, ntohs(req->tcp_opt));
649
650 /* dealloc the atid */
651 cxgb3_free_atid(ep->com.tdev, ep->atid);
652
653 /* start MPA negotiation */
654 send_mpa_req(ep, skb);
655
656 return 0;
657}
658
659static void abort_connection(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
660{
661 PDBG("%s ep %p\n", __FILE__, ep);
662 state_set(&ep->com, ABORTING);
663 send_abort(ep, skb, gfp);
664}
665
666static void close_complete_upcall(struct iwch_ep *ep)
667{
668 struct iw_cm_event event;
669
670 PDBG("%s ep %p\n", __FUNCTION__, ep);
671 memset(&event, 0, sizeof(event));
672 event.event = IW_CM_EVENT_CLOSE;
673 if (ep->com.cm_id) {
674 PDBG("close complete delivered ep %p cm_id %p tid %d\n",
675 ep, ep->com.cm_id, ep->hwtid);
676 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
677 ep->com.cm_id->rem_ref(ep->com.cm_id);
678 ep->com.cm_id = NULL;
679 ep->com.qp = NULL;
680 }
681}
682
683static void peer_close_upcall(struct iwch_ep *ep)
684{
685 struct iw_cm_event event;
686
687 PDBG("%s ep %p\n", __FUNCTION__, ep);
688 memset(&event, 0, sizeof(event));
689 event.event = IW_CM_EVENT_DISCONNECT;
690 if (ep->com.cm_id) {
691 PDBG("peer close delivered ep %p cm_id %p tid %d\n",
692 ep, ep->com.cm_id, ep->hwtid);
693 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
694 }
695}
696
697static void peer_abort_upcall(struct iwch_ep *ep)
698{
699 struct iw_cm_event event;
700
701 PDBG("%s ep %p\n", __FUNCTION__, ep);
702 memset(&event, 0, sizeof(event));
703 event.event = IW_CM_EVENT_CLOSE;
704 event.status = -ECONNRESET;
705 if (ep->com.cm_id) {
706 PDBG("abort delivered ep %p cm_id %p tid %d\n", ep,
707 ep->com.cm_id, ep->hwtid);
708 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
709 ep->com.cm_id->rem_ref(ep->com.cm_id);
710 ep->com.cm_id = NULL;
711 ep->com.qp = NULL;
712 }
713}
714
715static void connect_reply_upcall(struct iwch_ep *ep, int status)
716{
717 struct iw_cm_event event;
718
719 PDBG("%s ep %p status %d\n", __FUNCTION__, ep, status);
720 memset(&event, 0, sizeof(event));
721 event.event = IW_CM_EVENT_CONNECT_REPLY;
722 event.status = status;
723 event.local_addr = ep->com.local_addr;
724 event.remote_addr = ep->com.remote_addr;
725
726 if ((status == 0) || (status == -ECONNREFUSED)) {
727 event.private_data_len = ep->plen;
728 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
729 }
730 if (ep->com.cm_id) {
731 PDBG("%s ep %p tid %d status %d\n", __FUNCTION__, ep,
732 ep->hwtid, status);
733 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
734 }
735 if (status < 0) {
736 ep->com.cm_id->rem_ref(ep->com.cm_id);
737 ep->com.cm_id = NULL;
738 ep->com.qp = NULL;
739 }
740}
741
742static void connect_request_upcall(struct iwch_ep *ep)
743{
744 struct iw_cm_event event;
745
746 PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
747 memset(&event, 0, sizeof(event));
748 event.event = IW_CM_EVENT_CONNECT_REQUEST;
749 event.local_addr = ep->com.local_addr;
750 event.remote_addr = ep->com.remote_addr;
751 event.private_data_len = ep->plen;
752 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
753 event.provider_data = ep;
754 if (state_read(&ep->parent_ep->com) != DEAD)
755 ep->parent_ep->com.cm_id->event_handler(
756 ep->parent_ep->com.cm_id,
757 &event);
758 put_ep(&ep->parent_ep->com);
759 ep->parent_ep = NULL;
760}
761
762static void established_upcall(struct iwch_ep *ep)
763{
764 struct iw_cm_event event;
765
766 PDBG("%s ep %p\n", __FUNCTION__, ep);
767 memset(&event, 0, sizeof(event));
768 event.event = IW_CM_EVENT_ESTABLISHED;
769 if (ep->com.cm_id) {
770 PDBG("%s ep %p tid %d\n", __FUNCTION__, ep, ep->hwtid);
771 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
772 }
773}
774
775static int update_rx_credits(struct iwch_ep *ep, u32 credits)
776{
777 struct cpl_rx_data_ack *req;
778 struct sk_buff *skb;
779
780 PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
781 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
782 if (!skb) {
783 printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
784 return 0;
785 }
786
787 req = (struct cpl_rx_data_ack *) skb_put(skb, sizeof(*req));
788 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
789 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
790 req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
791 skb->priority = CPL_PRIORITY_ACK;
792 ep->com.tdev->send(ep->com.tdev, skb);
793 return credits;
794}
795
796static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
797{
798 struct mpa_message *mpa;
799 u16 plen;
800 struct iwch_qp_attributes attrs;
801 enum iwch_qp_attr_mask mask;
802 int err;
803
804 PDBG("%s ep %p\n", __FUNCTION__, ep);
805
806 /*
807 * Stop mpa timer. If it expired, then the state has
808 * changed and we bail since ep_timeout already aborted
809 * the connection.
810 */
811 stop_ep_timer(ep);
812 if (state_read(&ep->com) != MPA_REQ_SENT)
813 return;
814
815 /*
816 * If we get more than the supported amount of private data
817 * then we must fail this connection.
818 */
819 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
820 err = -EINVAL;
821 goto err;
822 }
823
824 /*
825 * copy the new data into our accumulation buffer.
826 */
827 memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
828 ep->mpa_pkt_len += skb->len;
829
830 /*
831 * if we don't even have the mpa message, then bail.
832 */
833 if (ep->mpa_pkt_len < sizeof(*mpa))
834 return;
835 mpa = (struct mpa_message *) ep->mpa_pkt;
836
837 /* Validate MPA header. */
838 if (mpa->revision != mpa_rev) {
839 err = -EPROTO;
840 goto err;
841 }
842 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
843 err = -EPROTO;
844 goto err;
845 }
846
847 plen = ntohs(mpa->private_data_size);
848
849 /*
850 * Fail if there's too much private data.
851 */
852 if (plen > MPA_MAX_PRIVATE_DATA) {
853 err = -EPROTO;
854 goto err;
855 }
856
857 /*
858 * If plen does not account for pkt size
859 */
860 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
861 err = -EPROTO;
862 goto err;
863 }
864
865 ep->plen = (u8) plen;
866
867 /*
868 * If we don't have all the pdata yet, then bail.
869 * We'll continue process when more data arrives.
870 */
871 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
872 return;
873
874 if (mpa->flags & MPA_REJECT) {
875 err = -ECONNREFUSED;
876 goto err;
877 }
878
879 /*
880 * If we get here we have accumulated the entire mpa
881 * start reply message including private data. And
882 * the MPA header is valid.
883 */
884 state_set(&ep->com, FPDU_MODE);
885 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
886 ep->mpa_attr.recv_marker_enabled = markers_enabled;
887 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
888 ep->mpa_attr.version = mpa_rev;
889 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
890 "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__,
891 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
892 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
893
894 attrs.mpa_attr = ep->mpa_attr;
895 attrs.max_ird = ep->ird;
896 attrs.max_ord = ep->ord;
897 attrs.llp_stream_handle = ep;
898 attrs.next_state = IWCH_QP_STATE_RTS;
899
900 mask = IWCH_QP_ATTR_NEXT_STATE |
901 IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
902 IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
903
904 /* bind QP and TID with INIT_WR */
905 err = iwch_modify_qp(ep->com.qp->rhp,
906 ep->com.qp, mask, &attrs, 1);
907 if (!err)
908 goto out;
909err:
910 abort_connection(ep, skb, GFP_KERNEL);
911out:
912 connect_reply_upcall(ep, err);
913 return;
914}
915
916static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
917{
918 struct mpa_message *mpa;
919 u16 plen;
920
921 PDBG("%s ep %p\n", __FUNCTION__, ep);
922
923 /*
924 * Stop mpa timer. If it expired, then the state has
925 * changed and we bail since ep_timeout already aborted
926 * the connection.
927 */
928 stop_ep_timer(ep);
929 if (state_read(&ep->com) != MPA_REQ_WAIT)
930 return;
931
932 /*
933 * If we get more than the supported amount of private data
934 * then we must fail this connection.
935 */
936 if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
937 abort_connection(ep, skb, GFP_KERNEL);
938 return;
939 }
940
941 PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__);
942
943 /*
944 * Copy the new data into our accumulation buffer.
945 */
946 memcpy(&(ep->mpa_pkt[ep->mpa_pkt_len]), skb->data, skb->len);
947 ep->mpa_pkt_len += skb->len;
948
949 /*
950 * If we don't even have the mpa message, then bail.
951 * We'll continue process when more data arrives.
952 */
953 if (ep->mpa_pkt_len < sizeof(*mpa))
954 return;
955 PDBG("%s enter (%s line %u)\n", __FUNCTION__, __FILE__, __LINE__);
956 mpa = (struct mpa_message *) ep->mpa_pkt;
957
958 /*
959 * Validate MPA Header.
960 */
961 if (mpa->revision != mpa_rev) {
962 abort_connection(ep, skb, GFP_KERNEL);
963 return;
964 }
965
966 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
967 abort_connection(ep, skb, GFP_KERNEL);
968 return;
969 }
970
971 plen = ntohs(mpa->private_data_size);
972
973 /*
974 * Fail if there's too much private data.
975 */
976 if (plen > MPA_MAX_PRIVATE_DATA) {
977 abort_connection(ep, skb, GFP_KERNEL);
978 return;
979 }
980
981 /*
982 * If plen does not account for pkt size
983 */
984 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
985 abort_connection(ep, skb, GFP_KERNEL);
986 return;
987 }
988 ep->plen = (u8) plen;
989
990 /*
991 * If we don't have all the pdata yet, then bail.
992 */
993 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
994 return;
995
996 /*
997 * If we get here we have accumulated the entire mpa
998 * start reply message including private data.
999 */
1000 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1001 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1002 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1003 ep->mpa_attr.version = mpa_rev;
1004 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1005 "xmit_marker_enabled=%d, version=%d\n", __FUNCTION__,
1006 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1007 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
1008
1009 state_set(&ep->com, MPA_REQ_RCVD);
1010
1011 /* drive upcall */
1012 connect_request_upcall(ep);
1013 return;
1014}
1015
1016static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1017{
1018 struct iwch_ep *ep = ctx;
1019 struct cpl_rx_data *hdr = cplhdr(skb);
1020 unsigned int dlen = ntohs(hdr->len);
1021
1022 PDBG("%s ep %p dlen %u\n", __FUNCTION__, ep, dlen);
1023
1024 skb_pull(skb, sizeof(*hdr));
1025 skb_trim(skb, dlen);
1026
1027 switch (state_read(&ep->com)) {
1028 case MPA_REQ_SENT:
1029 process_mpa_reply(ep, skb);
1030 break;
1031 case MPA_REQ_WAIT:
1032 process_mpa_request(ep, skb);
1033 break;
1034 case MPA_REP_SENT:
1035 break;
1036 default:
1037 printk(KERN_ERR MOD "%s Unexpected streaming data."
1038 " ep %p state %d tid %d\n",
1039 __FUNCTION__, ep, state_read(&ep->com), ep->hwtid);
1040
1041 /*
1042 * The ep will timeout and inform the ULP of the failure.
1043 * See ep_timeout().
1044 */
1045 break;
1046 }
1047
1048 /* update RX credits */
1049 update_rx_credits(ep, dlen);
1050
1051 return CPL_RET_BUF_DONE;
1052}
1053
1054/*
1055 * Upcall from the adapter indicating data has been transmitted.
1056 * For us its just the single MPA request or reply. We can now free
1057 * the skb holding the mpa message.
1058 */
1059static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1060{
1061 struct iwch_ep *ep = ctx;
1062 struct cpl_wr_ack *hdr = cplhdr(skb);
1063 unsigned int credits = ntohs(hdr->credits);
1064 enum iwch_qp_attr_mask mask;
1065
1066 PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
1067
1068 if (credits == 0)
1069 return CPL_RET_BUF_DONE;
1070 BUG_ON(credits != 1);
1071 BUG_ON(ep->mpa_skb == NULL);
1072 kfree_skb(ep->mpa_skb);
1073 ep->mpa_skb = NULL;
1074 dst_confirm(ep->dst);
1075 if (state_read(&ep->com) == MPA_REP_SENT) {
1076 struct iwch_qp_attributes attrs;
1077
1078 /* bind QP to EP and move to RTS */
1079 attrs.mpa_attr = ep->mpa_attr;
1080 attrs.max_ird = ep->ord;
1081 attrs.max_ord = ep->ord;
1082 attrs.llp_stream_handle = ep;
1083 attrs.next_state = IWCH_QP_STATE_RTS;
1084
1085 /* bind QP and TID with INIT_WR */
1086 mask = IWCH_QP_ATTR_NEXT_STATE |
1087 IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1088 IWCH_QP_ATTR_MPA_ATTR |
1089 IWCH_QP_ATTR_MAX_IRD |
1090 IWCH_QP_ATTR_MAX_ORD;
1091
1092 ep->com.rpl_err = iwch_modify_qp(ep->com.qp->rhp,
1093 ep->com.qp, mask, &attrs, 1);
1094
1095 if (!ep->com.rpl_err) {
1096 state_set(&ep->com, FPDU_MODE);
1097 established_upcall(ep);
1098 }
1099
1100 ep->com.rpl_done = 1;
1101 PDBG("waking up ep %p\n", ep);
1102 wake_up(&ep->com.waitq);
1103 }
1104 return CPL_RET_BUF_DONE;
1105}
1106
1107static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1108{
1109 struct iwch_ep *ep = ctx;
1110
1111 PDBG("%s ep %p\n", __FUNCTION__, ep);
1112
1113 close_complete_upcall(ep);
1114 state_set(&ep->com, DEAD);
1115 release_ep_resources(ep);
1116 return CPL_RET_BUF_DONE;
1117}
1118
1119static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1120{
1121 struct iwch_ep *ep = ctx;
1122 struct cpl_act_open_rpl *rpl = cplhdr(skb);
1123
1124 PDBG("%s ep %p status %u errno %d\n", __FUNCTION__, ep, rpl->status,
1125 status2errno(rpl->status));
1126 connect_reply_upcall(ep, status2errno(rpl->status));
1127 state_set(&ep->com, DEAD);
1128 if (ep->com.tdev->type == T3B)
1129 release_tid(ep->com.tdev, GET_TID(rpl), NULL);
1130 cxgb3_free_atid(ep->com.tdev, ep->atid);
1131 dst_release(ep->dst);
1132 l2t_release(L2DATA(ep->com.tdev), ep->l2t);
1133 put_ep(&ep->com);
1134 return CPL_RET_BUF_DONE;
1135}
1136
1137static int listen_start(struct iwch_listen_ep *ep)
1138{
1139 struct sk_buff *skb;
1140 struct cpl_pass_open_req *req;
1141
1142 PDBG("%s ep %p\n", __FUNCTION__, ep);
1143 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1144 if (!skb) {
1145 printk(KERN_ERR MOD "t3c_listen_start failed to alloc skb!\n");
1146 return -ENOMEM;
1147 }
1148
1149 req = (struct cpl_pass_open_req *) skb_put(skb, sizeof(*req));
1150 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1151 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, ep->stid));
1152 req->local_port = ep->com.local_addr.sin_port;
1153 req->local_ip = ep->com.local_addr.sin_addr.s_addr;
1154 req->peer_port = 0;
1155 req->peer_ip = 0;
1156 req->peer_netmask = 0;
1157 req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
1158 req->opt0l = htonl(V_RCV_BUFSIZ(rcv_win>>10));
1159 req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
1160
1161 skb->priority = 1;
1162 ep->com.tdev->send(ep->com.tdev, skb);
1163 return 0;
1164}
1165
1166static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1167{
1168 struct iwch_listen_ep *ep = ctx;
1169 struct cpl_pass_open_rpl *rpl = cplhdr(skb);
1170
1171 PDBG("%s ep %p status %d error %d\n", __FUNCTION__, ep,
1172 rpl->status, status2errno(rpl->status));
1173 ep->com.rpl_err = status2errno(rpl->status);
1174 ep->com.rpl_done = 1;
1175 wake_up(&ep->com.waitq);
1176
1177 return CPL_RET_BUF_DONE;
1178}
1179
1180static int listen_stop(struct iwch_listen_ep *ep)
1181{
1182 struct sk_buff *skb;
1183 struct cpl_close_listserv_req *req;
1184
1185 PDBG("%s ep %p\n", __FUNCTION__, ep);
1186 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1187 if (!skb) {
1188 printk(KERN_ERR MOD "%s - failed to alloc skb\n", __FUNCTION__);
1189 return -ENOMEM;
1190 }
1191 req = (struct cpl_close_listserv_req *) skb_put(skb, sizeof(*req));
1192 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1193 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
1194 skb->priority = 1;
1195 ep->com.tdev->send(ep->com.tdev, skb);
1196 return 0;
1197}
1198
1199static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
1200 void *ctx)
1201{
1202 struct iwch_listen_ep *ep = ctx;
1203 struct cpl_close_listserv_rpl *rpl = cplhdr(skb);
1204
1205 PDBG("%s ep %p\n", __FUNCTION__, ep);
1206 ep->com.rpl_err = status2errno(rpl->status);
1207 ep->com.rpl_done = 1;
1208 wake_up(&ep->com.waitq);
1209 return CPL_RET_BUF_DONE;
1210}
1211
1212static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb)
1213{
1214 struct cpl_pass_accept_rpl *rpl;
1215 unsigned int mtu_idx;
1216 u32 opt0h, opt0l, opt2;
1217 int wscale;
1218
1219 PDBG("%s ep %p\n", __FUNCTION__, ep);
1220 BUG_ON(skb_cloned(skb));
1221 skb_trim(skb, sizeof(*rpl));
1222 skb_get(skb);
1223 mtu_idx = find_best_mtu(T3C_DATA(ep->com.tdev), dst_mtu(ep->dst));
1224 wscale = compute_wscale(rcv_win);
1225 opt0h = V_NAGLE(0) |
1226 V_NO_CONG(nocong) |
1227 V_KEEP_ALIVE(1) |
1228 F_TCAM_BYPASS |
1229 V_WND_SCALE(wscale) |
1230 V_MSS_IDX(mtu_idx) |
1231 V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx);
1232 opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10);
1233 opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor);
1234
1235 rpl = cplhdr(skb);
1236 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1237 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid));
1238 rpl->peer_ip = peer_ip;
1239 rpl->opt0h = htonl(opt0h);
1240 rpl->opt0l_status = htonl(opt0l | CPL_PASS_OPEN_ACCEPT);
1241 rpl->opt2 = htonl(opt2);
1242 rpl->rsvd = rpl->opt2; /* workaround for HW bug */
1243 skb->priority = CPL_PRIORITY_SETUP;
1244 l2t_send(ep->com.tdev, skb, ep->l2t);
1245
1246 return;
1247}
1248
1249static void reject_cr(struct t3cdev *tdev, u32 hwtid, __be32 peer_ip,
1250 struct sk_buff *skb)
1251{
1252 PDBG("%s t3cdev %p tid %u peer_ip %x\n", __FUNCTION__, tdev, hwtid,
1253 peer_ip);
1254 BUG_ON(skb_cloned(skb));
1255 skb_trim(skb, sizeof(struct cpl_tid_release));
1256 skb_get(skb);
1257
1258 if (tdev->type == T3B)
1259 release_tid(tdev, hwtid, skb);
1260 else {
1261 struct cpl_pass_accept_rpl *rpl;
1262
1263 rpl = cplhdr(skb);
1264 skb->priority = CPL_PRIORITY_SETUP;
1265 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1266 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
1267 hwtid));
1268 rpl->peer_ip = peer_ip;
1269 rpl->opt0h = htonl(F_TCAM_BYPASS);
1270 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
1271 rpl->opt2 = 0;
1272 rpl->rsvd = rpl->opt2;
1273 tdev->send(tdev, skb);
1274 }
1275}
1276
1277static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1278{
1279 struct iwch_ep *child_ep, *parent_ep = ctx;
1280 struct cpl_pass_accept_req *req = cplhdr(skb);
1281 unsigned int hwtid = GET_TID(req);
1282 struct dst_entry *dst;
1283 struct l2t_entry *l2t;
1284 struct rtable *rt;
1285 struct iff_mac tim;
1286
1287 PDBG("%s parent ep %p tid %u\n", __FUNCTION__, parent_ep, hwtid);
1288
1289 if (state_read(&parent_ep->com) != LISTEN) {
1290 printk(KERN_ERR "%s - listening ep not in LISTEN\n",
1291 __FUNCTION__);
1292 goto reject;
1293 }
1294
1295 /*
1296 * Find the netdev for this connection request.
1297 */
1298 tim.mac_addr = req->dst_mac;
1299 tim.vlan_tag = ntohs(req->vlan_tag);
1300 if (tdev->ctl(tdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
1301 printk(KERN_ERR
1302 "%s bad dst mac %02x %02x %02x %02x %02x %02x\n",
1303 __FUNCTION__,
1304 req->dst_mac[0],
1305 req->dst_mac[1],
1306 req->dst_mac[2],
1307 req->dst_mac[3],
1308 req->dst_mac[4],
1309 req->dst_mac[5]);
1310 goto reject;
1311 }
1312
1313 /* Find output route */
1314 rt = find_route(tdev,
1315 req->local_ip,
1316 req->peer_ip,
1317 req->local_port,
1318 req->peer_port, G_PASS_OPEN_TOS(ntohl(req->tos_tid)));
1319 if (!rt) {
1320 printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
1321 __FUNCTION__);
1322 goto reject;
1323 }
1324 dst = &rt->u.dst;
1325 l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
1326 if (!l2t) {
1327 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
1328 __FUNCTION__);
1329 dst_release(dst);
1330 goto reject;
1331 }
1332 child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
1333 if (!child_ep) {
1334 printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
1335 __FUNCTION__);
1336 l2t_release(L2DATA(tdev), l2t);
1337 dst_release(dst);
1338 goto reject;
1339 }
1340 state_set(&child_ep->com, CONNECTING);
1341 child_ep->com.tdev = tdev;
1342 child_ep->com.cm_id = NULL;
1343 child_ep->com.local_addr.sin_family = PF_INET;
1344 child_ep->com.local_addr.sin_port = req->local_port;
1345 child_ep->com.local_addr.sin_addr.s_addr = req->local_ip;
1346 child_ep->com.remote_addr.sin_family = PF_INET;
1347 child_ep->com.remote_addr.sin_port = req->peer_port;
1348 child_ep->com.remote_addr.sin_addr.s_addr = req->peer_ip;
1349 get_ep(&parent_ep->com);
1350 child_ep->parent_ep = parent_ep;
1351 child_ep->tos = G_PASS_OPEN_TOS(ntohl(req->tos_tid));
1352 child_ep->l2t = l2t;
1353 child_ep->dst = dst;
1354 child_ep->hwtid = hwtid;
1355 init_timer(&child_ep->timer);
1356 cxgb3_insert_tid(tdev, &t3c_client, child_ep, hwtid);
1357 accept_cr(child_ep, req->peer_ip, skb);
1358 goto out;
1359reject:
1360 reject_cr(tdev, hwtid, req->peer_ip, skb);
1361out:
1362 return CPL_RET_BUF_DONE;
1363}
1364
1365static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1366{
1367 struct iwch_ep *ep = ctx;
1368 struct cpl_pass_establish *req = cplhdr(skb);
1369
1370 PDBG("%s ep %p\n", __FUNCTION__, ep);
1371 ep->snd_seq = ntohl(req->snd_isn);
1372
1373 set_emss(ep, ntohs(req->tcp_opt));
1374
1375 dst_confirm(ep->dst);
1376 state_set(&ep->com, MPA_REQ_WAIT);
1377 start_ep_timer(ep);
1378
1379 return CPL_RET_BUF_DONE;
1380}
1381
1382static int peer_close(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1383{
1384 struct iwch_ep *ep = ctx;
1385 struct iwch_qp_attributes attrs;
1386 unsigned long flags;
1387 int disconnect = 1;
1388 int release = 0;
1389
1390 PDBG("%s ep %p\n", __FUNCTION__, ep);
1391 dst_confirm(ep->dst);
1392
1393 spin_lock_irqsave(&ep->com.lock, flags);
1394 switch (ep->com.state) {
1395 case MPA_REQ_WAIT:
1396 __state_set(&ep->com, CLOSING);
1397 break;
1398 case MPA_REQ_SENT:
1399 __state_set(&ep->com, CLOSING);
1400 connect_reply_upcall(ep, -ECONNRESET);
1401 break;
1402 case MPA_REQ_RCVD:
1403
1404 /*
1405 * We're gonna mark this puppy DEAD, but keep
1406 * the reference on it until the ULP accepts or
1407 * rejects the CR.
1408 */
1409 __state_set(&ep->com, CLOSING);
1410 get_ep(&ep->com);
1411 break;
1412 case MPA_REP_SENT:
1413 __state_set(&ep->com, CLOSING);
1414 ep->com.rpl_done = 1;
1415 ep->com.rpl_err = -ECONNRESET;
1416 PDBG("waking up ep %p\n", ep);
1417 wake_up(&ep->com.waitq);
1418 break;
1419 case FPDU_MODE:
1420 __state_set(&ep->com, CLOSING);
1421 attrs.next_state = IWCH_QP_STATE_CLOSING;
1422 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1423 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1424 peer_close_upcall(ep);
1425 break;
1426 case ABORTING:
1427 disconnect = 0;
1428 break;
1429 case CLOSING:
1430 start_ep_timer(ep);
1431 __state_set(&ep->com, MORIBUND);
1432 disconnect = 0;
1433 break;
1434 case MORIBUND:
1435 stop_ep_timer(ep);
1436 if (ep->com.cm_id && ep->com.qp) {
1437 attrs.next_state = IWCH_QP_STATE_IDLE;
1438 iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
1439 IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
1440 }
1441 close_complete_upcall(ep);
1442 __state_set(&ep->com, DEAD);
1443 release = 1;
1444 disconnect = 0;
1445 break;
1446 case DEAD:
1447 disconnect = 0;
1448 break;
1449 default:
1450 BUG_ON(1);
1451 }
1452 spin_unlock_irqrestore(&ep->com.lock, flags);
1453 if (disconnect)
1454 iwch_ep_disconnect(ep, 0, GFP_KERNEL);
1455 if (release)
1456 release_ep_resources(ep);
1457 return CPL_RET_BUF_DONE;
1458}
1459
1460/*
1461 * Returns whether an ABORT_REQ_RSS message is a negative advice.
1462 */
1463static inline int is_neg_adv_abort(unsigned int status)
1464{
1465 return status == CPL_ERR_RTX_NEG_ADVICE ||
1466 status == CPL_ERR_PERSIST_NEG_ADVICE;
1467}
1468
1469static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1470{
1471 struct cpl_abort_req_rss *req = cplhdr(skb);
1472 struct iwch_ep *ep = ctx;
1473 struct cpl_abort_rpl *rpl;
1474 struct sk_buff *rpl_skb;
1475 struct iwch_qp_attributes attrs;
1476 int ret;
1477 int state;
1478
1479 if (is_neg_adv_abort(req->status)) {
1480 PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
1481 ep->hwtid);
1482 t3_l2t_send_event(ep->com.tdev, ep->l2t);
1483 return CPL_RET_BUF_DONE;
1484 }
1485
1486 state = state_read(&ep->com);
1487 PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state);
1488 switch (state) {
1489 case CONNECTING:
1490 break;
1491 case MPA_REQ_WAIT:
1492 break;
1493 case MPA_REQ_SENT:
1494 connect_reply_upcall(ep, -ECONNRESET);
1495 break;
1496 case MPA_REP_SENT:
1497 ep->com.rpl_done = 1;
1498 ep->com.rpl_err = -ECONNRESET;
1499 PDBG("waking up ep %p\n", ep);
1500 wake_up(&ep->com.waitq);
1501 break;
1502 case MPA_REQ_RCVD:
1503
1504 /*
1505 * We're gonna mark this puppy DEAD, but keep
1506 * the reference on it until the ULP accepts or
1507 * rejects the CR.
1508 */
1509 get_ep(&ep->com);
1510 break;
1511 case MORIBUND:
1512 stop_ep_timer(ep);
1513 case FPDU_MODE:
1514 case CLOSING:
1515 if (ep->com.cm_id && ep->com.qp) {
1516 attrs.next_state = IWCH_QP_STATE_ERROR;
1517 ret = iwch_modify_qp(ep->com.qp->rhp,
1518 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1519 &attrs, 1);
1520 if (ret)
1521 printk(KERN_ERR MOD
1522 "%s - qp <- error failed!\n",
1523 __FUNCTION__);
1524 }
1525 peer_abort_upcall(ep);
1526 break;
1527 case ABORTING:
1528 break;
1529 case DEAD:
1530 PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __FUNCTION__);
1531 return CPL_RET_BUF_DONE;
1532 default:
1533 BUG_ON(1);
1534 break;
1535 }
1536 dst_confirm(ep->dst);
1537
1538 rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
1539 if (!rpl_skb) {
1540 printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
1541 __FUNCTION__);
1542 dst_release(ep->dst);
1543 l2t_release(L2DATA(ep->com.tdev), ep->l2t);
1544 put_ep(&ep->com);
1545 return CPL_RET_BUF_DONE;
1546 }
1547 rpl_skb->priority = CPL_PRIORITY_DATA;
1548 rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
1549 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
1550 rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
1551 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
1552 rpl->cmd = CPL_ABORT_NO_RST;
1553 ep->com.tdev->send(ep->com.tdev, rpl_skb);
1554 if (state != ABORTING) {
1555 state_set(&ep->com, DEAD);
1556 release_ep_resources(ep);
1557 }
1558 return CPL_RET_BUF_DONE;
1559}
1560
1561static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1562{
1563 struct iwch_ep *ep = ctx;
1564 struct iwch_qp_attributes attrs;
1565 unsigned long flags;
1566 int release = 0;
1567
1568 PDBG("%s ep %p\n", __FUNCTION__, ep);
1569 BUG_ON(!ep);
1570
1571 /* The cm_id may be null if we failed to connect */
1572 spin_lock_irqsave(&ep->com.lock, flags);
1573 switch (ep->com.state) {
1574 case CLOSING:
1575 start_ep_timer(ep);
1576 __state_set(&ep->com, MORIBUND);
1577 break;
1578 case MORIBUND:
1579 stop_ep_timer(ep);
1580 if ((ep->com.cm_id) && (ep->com.qp)) {
1581 attrs.next_state = IWCH_QP_STATE_IDLE;
1582 iwch_modify_qp(ep->com.qp->rhp,
1583 ep->com.qp,
1584 IWCH_QP_ATTR_NEXT_STATE,
1585 &attrs, 1);
1586 }
1587 close_complete_upcall(ep);
1588 __state_set(&ep->com, DEAD);
1589 release = 1;
1590 break;
1591 case DEAD:
1592 default:
1593 BUG_ON(1);
1594 break;
1595 }
1596 spin_unlock_irqrestore(&ep->com.lock, flags);
1597 if (release)
1598 release_ep_resources(ep);
1599 return CPL_RET_BUF_DONE;
1600}
1601
1602/*
1603 * T3A does 3 things when a TERM is received:
1604 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1605 * 2) generate an async event on the QP with the TERMINATE opcode
1606 * 3) post a TERMINATE opcde cqe into the associated CQ.
1607 *
1608 * For (1), we save the message in the qp for later consumer consumption.
1609 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
1610 * For (3), we toss the CQE in cxio_poll_cq().
1611 *
1612 * terminate() handles case (1)...
1613 */
1614static int terminate(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1615{
1616 struct iwch_ep *ep = ctx;
1617
1618 PDBG("%s ep %p\n", __FUNCTION__, ep);
1619 skb_pull(skb, sizeof(struct cpl_rdma_terminate));
1620 PDBG("%s saving %d bytes of term msg\n", __FUNCTION__, skb->len);
1621 memcpy(ep->com.qp->attr.terminate_buffer, skb->data, skb->len);
1622 ep->com.qp->attr.terminate_msg_len = skb->len;
1623 ep->com.qp->attr.is_terminate_local = 0;
1624 return CPL_RET_BUF_DONE;
1625}
1626
1627static int ec_status(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1628{
1629 struct cpl_rdma_ec_status *rep = cplhdr(skb);
1630 struct iwch_ep *ep = ctx;
1631
1632 PDBG("%s ep %p tid %u status %d\n", __FUNCTION__, ep, ep->hwtid,
1633 rep->status);
1634 if (rep->status) {
1635 struct iwch_qp_attributes attrs;
1636
1637 printk(KERN_ERR MOD "%s BAD CLOSE - Aborting tid %u\n",
1638 __FUNCTION__, ep->hwtid);
1639 attrs.next_state = IWCH_QP_STATE_ERROR;
1640 iwch_modify_qp(ep->com.qp->rhp,
1641 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1642 &attrs, 1);
1643 abort_connection(ep, NULL, GFP_KERNEL);
1644 }
1645 return CPL_RET_BUF_DONE;
1646}
1647
1648static void ep_timeout(unsigned long arg)
1649{
1650 struct iwch_ep *ep = (struct iwch_ep *)arg;
1651 struct iwch_qp_attributes attrs;
1652 unsigned long flags;
1653
1654 spin_lock_irqsave(&ep->com.lock, flags);
1655 PDBG("%s ep %p tid %u state %d\n", __FUNCTION__, ep, ep->hwtid,
1656 ep->com.state);
1657 switch (ep->com.state) {
1658 case MPA_REQ_SENT:
1659 connect_reply_upcall(ep, -ETIMEDOUT);
1660 break;
1661 case MPA_REQ_WAIT:
1662 break;
1663 case MORIBUND:
1664 if (ep->com.cm_id && ep->com.qp) {
1665 attrs.next_state = IWCH_QP_STATE_ERROR;
1666 iwch_modify_qp(ep->com.qp->rhp,
1667 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
1668 &attrs, 1);
1669 }
1670 break;
1671 default:
1672 BUG();
1673 }
1674 __state_set(&ep->com, CLOSING);
1675 spin_unlock_irqrestore(&ep->com.lock, flags);
1676 abort_connection(ep, NULL, GFP_ATOMIC);
1677 put_ep(&ep->com);
1678}
1679
1680int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
1681{
1682 int err;
1683 struct iwch_ep *ep = to_ep(cm_id);
1684 PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
1685
1686 if (state_read(&ep->com) == DEAD) {
1687 put_ep(&ep->com);
1688 return -ECONNRESET;
1689 }
1690 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1691 state_set(&ep->com, CLOSING);
1692 if (mpa_rev == 0)
1693 abort_connection(ep, NULL, GFP_KERNEL);
1694 else {
1695 err = send_mpa_reject(ep, pdata, pdata_len);
1696 err = send_halfclose(ep, GFP_KERNEL);
1697 }
1698 return 0;
1699}
1700
1701int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1702{
1703 int err;
1704 struct iwch_qp_attributes attrs;
1705 enum iwch_qp_attr_mask mask;
1706 struct iwch_ep *ep = to_ep(cm_id);
1707 struct iwch_dev *h = to_iwch_dev(cm_id->device);
1708 struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
1709
1710 PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
1711 if (state_read(&ep->com) == DEAD) {
1712 put_ep(&ep->com);
1713 return -ECONNRESET;
1714 }
1715
1716 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
1717 BUG_ON(!qp);
1718
1719 if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
1720 (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
1721 abort_connection(ep, NULL, GFP_KERNEL);
1722 return -EINVAL;
1723 }
1724
1725 cm_id->add_ref(cm_id);
1726 ep->com.cm_id = cm_id;
1727 ep->com.qp = qp;
1728
1729 ep->com.rpl_done = 0;
1730 ep->com.rpl_err = 0;
1731 ep->ird = conn_param->ird;
1732 ep->ord = conn_param->ord;
1733 PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord);
1734 get_ep(&ep->com);
1735 err = send_mpa_reply(ep, conn_param->private_data,
1736 conn_param->private_data_len);
1737 if (err) {
1738 ep->com.cm_id = NULL;
1739 ep->com.qp = NULL;
1740 cm_id->rem_ref(cm_id);
1741 abort_connection(ep, NULL, GFP_KERNEL);
1742 put_ep(&ep->com);
1743 return err;
1744 }
1745
1746 /* bind QP to EP and move to RTS */
1747 attrs.mpa_attr = ep->mpa_attr;
1748 attrs.max_ird = ep->ord;
1749 attrs.max_ord = ep->ord;
1750 attrs.llp_stream_handle = ep;
1751 attrs.next_state = IWCH_QP_STATE_RTS;
1752
1753 /* bind QP and TID with INIT_WR */
1754 mask = IWCH_QP_ATTR_NEXT_STATE |
1755 IWCH_QP_ATTR_LLP_STREAM_HANDLE |
1756 IWCH_QP_ATTR_MPA_ATTR |
1757 IWCH_QP_ATTR_MAX_IRD |
1758 IWCH_QP_ATTR_MAX_ORD;
1759
1760 err = iwch_modify_qp(ep->com.qp->rhp,
1761 ep->com.qp, mask, &attrs, 1);
1762
1763 if (err) {
1764 ep->com.cm_id = NULL;
1765 ep->com.qp = NULL;
1766 cm_id->rem_ref(cm_id);
1767 abort_connection(ep, NULL, GFP_KERNEL);
1768 } else {
1769 state_set(&ep->com, FPDU_MODE);
1770 established_upcall(ep);
1771 }
1772 put_ep(&ep->com);
1773 return err;
1774}
1775
1776int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1777{
1778 int err = 0;
1779 struct iwch_dev *h = to_iwch_dev(cm_id->device);
1780 struct iwch_ep *ep;
1781 struct rtable *rt;
1782
1783 ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
1784 if (!ep) {
1785 printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__);
1786 err = -ENOMEM;
1787 goto out;
1788 }
1789 init_timer(&ep->timer);
1790 ep->plen = conn_param->private_data_len;
1791 if (ep->plen)
1792 memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
1793 conn_param->private_data, ep->plen);
1794 ep->ird = conn_param->ird;
1795 ep->ord = conn_param->ord;
1796 ep->com.tdev = h->rdev.t3cdev_p;
1797
1798 cm_id->add_ref(cm_id);
1799 ep->com.cm_id = cm_id;
1800 ep->com.qp = get_qhp(h, conn_param->qpn);
1801 BUG_ON(!ep->com.qp);
1802 PDBG("%s qpn 0x%x qp %p cm_id %p\n", __FUNCTION__, conn_param->qpn,
1803 ep->com.qp, cm_id);
1804
1805 /*
1806 * Allocate an active TID to initiate a TCP connection.
1807 */
1808 ep->atid = cxgb3_alloc_atid(h->rdev.t3cdev_p, &t3c_client, ep);
1809 if (ep->atid == -1) {
1810 printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__);
1811 err = -ENOMEM;
1812 goto fail2;
1813 }
1814
1815 /* find a route */
1816 rt = find_route(h->rdev.t3cdev_p,
1817 cm_id->local_addr.sin_addr.s_addr,
1818 cm_id->remote_addr.sin_addr.s_addr,
1819 cm_id->local_addr.sin_port,
1820 cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
1821 if (!rt) {
1822 printk(KERN_ERR MOD "%s - cannot find route.\n", __FUNCTION__);
1823 err = -EHOSTUNREACH;
1824 goto fail3;
1825 }
1826 ep->dst = &rt->u.dst;
1827
1828 /* get a l2t entry */
1829 ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
1830 ep->dst->neighbour->dev);
1831 if (!ep->l2t) {
1832 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __FUNCTION__);
1833 err = -ENOMEM;
1834 goto fail4;
1835 }
1836
1837 state_set(&ep->com, CONNECTING);
1838 ep->tos = IPTOS_LOWDELAY;
1839 ep->com.local_addr = cm_id->local_addr;
1840 ep->com.remote_addr = cm_id->remote_addr;
1841
1842 /* send connect request to rnic */
1843 err = send_connect(ep);
1844 if (!err)
1845 goto out;
1846
1847 l2t_release(L2DATA(h->rdev.t3cdev_p), ep->l2t);
1848fail4:
1849 dst_release(ep->dst);
1850fail3:
1851 cxgb3_free_atid(ep->com.tdev, ep->atid);
1852fail2:
1853 put_ep(&ep->com);
1854out:
1855 return err;
1856}
1857
1858int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
1859{
1860 int err = 0;
1861 struct iwch_dev *h = to_iwch_dev(cm_id->device);
1862 struct iwch_listen_ep *ep;
1863
1864
1865 might_sleep();
1866
1867 ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
1868 if (!ep) {
1869 printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __FUNCTION__);
1870 err = -ENOMEM;
1871 goto fail1;
1872 }
1873 PDBG("%s ep %p\n", __FUNCTION__, ep);
1874 ep->com.tdev = h->rdev.t3cdev_p;
1875 cm_id->add_ref(cm_id);
1876 ep->com.cm_id = cm_id;
1877 ep->backlog = backlog;
1878 ep->com.local_addr = cm_id->local_addr;
1879
1880 /*
1881 * Allocate a server TID.
1882 */
1883 ep->stid = cxgb3_alloc_stid(h->rdev.t3cdev_p, &t3c_client, ep);
1884 if (ep->stid == -1) {
1885 printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __FUNCTION__);
1886 err = -ENOMEM;
1887 goto fail2;
1888 }
1889
1890 state_set(&ep->com, LISTEN);
1891 err = listen_start(ep);
1892 if (err)
1893 goto fail3;
1894
1895 /* wait for pass_open_rpl */
1896 wait_event(ep->com.waitq, ep->com.rpl_done);
1897 err = ep->com.rpl_err;
1898 if (!err) {
1899 cm_id->provider_data = ep;
1900 goto out;
1901 }
1902fail3:
1903 cxgb3_free_stid(ep->com.tdev, ep->stid);
1904fail2:
1905 put_ep(&ep->com);
1906fail1:
1907out:
1908 return err;
1909}
1910
1911int iwch_destroy_listen(struct iw_cm_id *cm_id)
1912{
1913 int err;
1914 struct iwch_listen_ep *ep = to_listen_ep(cm_id);
1915
1916 PDBG("%s ep %p\n", __FUNCTION__, ep);
1917
1918 might_sleep();
1919 state_set(&ep->com, DEAD);
1920 ep->com.rpl_done = 0;
1921 ep->com.rpl_err = 0;
1922 err = listen_stop(ep);
1923 wait_event(ep->com.waitq, ep->com.rpl_done);
1924 cxgb3_free_stid(ep->com.tdev, ep->stid);
1925 err = ep->com.rpl_err;
1926 cm_id->rem_ref(cm_id);
1927 put_ep(&ep->com);
1928 return err;
1929}
1930
1931int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
1932{
1933 int ret=0;
1934 unsigned long flags;
1935 int close = 0;
1936
1937 spin_lock_irqsave(&ep->com.lock, flags);
1938
1939 PDBG("%s ep %p state %s, abrupt %d\n", __FUNCTION__, ep,
1940 states[ep->com.state], abrupt);
1941
1942 if (ep->com.state == DEAD) {
1943 PDBG("%s already dead ep %p\n", __FUNCTION__, ep);
1944 goto out;
1945 }
1946
1947 if (abrupt) {
1948 if (ep->com.state != ABORTING) {
1949 ep->com.state = ABORTING;
1950 close = 1;
1951 }
1952 goto out;
1953 }
1954
1955 switch (ep->com.state) {
1956 case MPA_REQ_WAIT:
1957 case MPA_REQ_SENT:
1958 case MPA_REQ_RCVD:
1959 case MPA_REP_SENT:
1960 case FPDU_MODE:
1961 ep->com.state = CLOSING;
1962 close = 1;
1963 break;
1964 case CLOSING:
1965 start_ep_timer(ep);
1966 ep->com.state = MORIBUND;
1967 close = 1;
1968 break;
1969 case MORIBUND:
1970 break;
1971 default:
1972 BUG();
1973 break;
1974 }
1975out:
1976 spin_unlock_irqrestore(&ep->com.lock, flags);
1977 if (close) {
1978 if (abrupt)
1979 ret = send_abort(ep, NULL, gfp);
1980 else
1981 ret = send_halfclose(ep, gfp);
1982 }
1983 return ret;
1984}
1985
1986int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
1987 struct l2t_entry *l2t)
1988{
1989 struct iwch_ep *ep = ctx;
1990
1991 if (ep->dst != old)
1992 return 0;
1993
1994 PDBG("%s ep %p redirect to dst %p l2t %p\n", __FUNCTION__, ep, new,
1995 l2t);
1996 dst_hold(new);
1997 l2t_release(L2DATA(ep->com.tdev), ep->l2t);
1998 ep->l2t = l2t;
1999 dst_release(old);
2000 ep->dst = new;
2001 return 1;
2002}
2003
2004/*
2005 * All the CM events are handled on a work queue to have a safe context.
2006 */
2007static int sched(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
2008{
2009 struct iwch_ep_common *epc = ctx;
2010
2011 get_ep(epc);
2012
2013 /*
2014 * Save ctx and tdev in the skb->cb area.
2015 */
2016 *((void **) skb->cb) = ctx;
2017 *((struct t3cdev **) (skb->cb + sizeof(void *))) = tdev;
2018
2019 /*
2020 * Queue the skb and schedule the worker thread.
2021 */
2022 skb_queue_tail(&rxq, skb);
2023 queue_work(workq, &skb_work);
2024 return 0;
2025}
2026
2027int __init iwch_cm_init(void)
2028{
2029 skb_queue_head_init(&rxq);
2030
2031 workq = create_singlethread_workqueue("iw_cxgb3");
2032 if (!workq)
2033 return -ENOMEM;
2034
2035 /*
2036 * All upcalls from the T3 Core go to sched() to
2037 * schedule the processing on a work queue.
2038 */
2039 t3c_handlers[CPL_ACT_ESTABLISH] = sched;
2040 t3c_handlers[CPL_ACT_OPEN_RPL] = sched;
2041 t3c_handlers[CPL_RX_DATA] = sched;
2042 t3c_handlers[CPL_TX_DMA_ACK] = sched;
2043 t3c_handlers[CPL_ABORT_RPL_RSS] = sched;
2044 t3c_handlers[CPL_ABORT_RPL] = sched;
2045 t3c_handlers[CPL_PASS_OPEN_RPL] = sched;
2046 t3c_handlers[CPL_CLOSE_LISTSRV_RPL] = sched;
2047 t3c_handlers[CPL_PASS_ACCEPT_REQ] = sched;
2048 t3c_handlers[CPL_PASS_ESTABLISH] = sched;
2049 t3c_handlers[CPL_PEER_CLOSE] = sched;
2050 t3c_handlers[CPL_CLOSE_CON_RPL] = sched;
2051 t3c_handlers[CPL_ABORT_REQ_RSS] = sched;
2052 t3c_handlers[CPL_RDMA_TERMINATE] = sched;
2053 t3c_handlers[CPL_RDMA_EC_STATUS] = sched;
2054
2055 /*
2056 * These are the real handlers that are called from a
2057 * work queue.
2058 */
2059 work_handlers[CPL_ACT_ESTABLISH] = act_establish;
2060 work_handlers[CPL_ACT_OPEN_RPL] = act_open_rpl;
2061 work_handlers[CPL_RX_DATA] = rx_data;
2062 work_handlers[CPL_TX_DMA_ACK] = tx_ack;
2063 work_handlers[CPL_ABORT_RPL_RSS] = abort_rpl;
2064 work_handlers[CPL_ABORT_RPL] = abort_rpl;
2065 work_handlers[CPL_PASS_OPEN_RPL] = pass_open_rpl;
2066 work_handlers[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl;
2067 work_handlers[CPL_PASS_ACCEPT_REQ] = pass_accept_req;
2068 work_handlers[CPL_PASS_ESTABLISH] = pass_establish;
2069 work_handlers[CPL_PEER_CLOSE] = peer_close;
2070 work_handlers[CPL_ABORT_REQ_RSS] = peer_abort;
2071 work_handlers[CPL_CLOSE_CON_RPL] = close_con_rpl;
2072 work_handlers[CPL_RDMA_TERMINATE] = terminate;
2073 work_handlers[CPL_RDMA_EC_STATUS] = ec_status;
2074 return 0;
2075}
2076
2077void __exit iwch_cm_term(void)
2078{
2079 flush_workqueue(workq);
2080 destroy_workqueue(workq);
2081}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
new file mode 100644
index 000000000000..7c810d904279
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -0,0 +1,223 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef _IWCH_CM_H_
34#define _IWCH_CM_H_
35
36#include <linux/inet.h>
37#include <linux/wait.h>
38#include <linux/spinlock.h>
39#include <linux/kref.h>
40
41#include <rdma/ib_verbs.h>
42#include <rdma/iw_cm.h>
43
44#include "cxgb3_offload.h"
45#include "iwch_provider.h"
46
47#define MPA_KEY_REQ "MPA ID Req Frame"
48#define MPA_KEY_REP "MPA ID Rep Frame"
49
50#define MPA_MAX_PRIVATE_DATA 256
51#define MPA_REV 0 /* XXX - amso1100 uses rev 0 ! */
52#define MPA_REJECT 0x20
53#define MPA_CRC 0x40
54#define MPA_MARKERS 0x80
55#define MPA_FLAGS_MASK 0xE0
56
57#define put_ep(ep) { \
58 PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \
59 ep, atomic_read(&((ep)->kref.refcount))); \
60 kref_put(&((ep)->kref), __free_ep); \
61}
62
63#define get_ep(ep) { \
64 PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \
65 ep, atomic_read(&((ep)->kref.refcount))); \
66 kref_get(&((ep)->kref)); \
67}
68
69struct mpa_message {
70 u8 key[16];
71 u8 flags;
72 u8 revision;
73 __be16 private_data_size;
74 u8 private_data[0];
75};
76
77struct terminate_message {
78 u8 layer_etype;
79 u8 ecode;
80 __be16 hdrct_rsvd;
81 u8 len_hdrs[0];
82};
83
84#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
85
86enum iwch_layers_types {
87 LAYER_RDMAP = 0x00,
88 LAYER_DDP = 0x10,
89 LAYER_MPA = 0x20,
90 RDMAP_LOCAL_CATA = 0x00,
91 RDMAP_REMOTE_PROT = 0x01,
92 RDMAP_REMOTE_OP = 0x02,
93 DDP_LOCAL_CATA = 0x00,
94 DDP_TAGGED_ERR = 0x01,
95 DDP_UNTAGGED_ERR = 0x02,
96 DDP_LLP = 0x03
97};
98
99enum iwch_rdma_ecodes {
100 RDMAP_INV_STAG = 0x00,
101 RDMAP_BASE_BOUNDS = 0x01,
102 RDMAP_ACC_VIOL = 0x02,
103 RDMAP_STAG_NOT_ASSOC = 0x03,
104 RDMAP_TO_WRAP = 0x04,
105 RDMAP_INV_VERS = 0x05,
106 RDMAP_INV_OPCODE = 0x06,
107 RDMAP_STREAM_CATA = 0x07,
108 RDMAP_GLOBAL_CATA = 0x08,
109 RDMAP_CANT_INV_STAG = 0x09,
110 RDMAP_UNSPECIFIED = 0xff
111};
112
113enum iwch_ddp_ecodes {
114 DDPT_INV_STAG = 0x00,
115 DDPT_BASE_BOUNDS = 0x01,
116 DDPT_STAG_NOT_ASSOC = 0x02,
117 DDPT_TO_WRAP = 0x03,
118 DDPT_INV_VERS = 0x04,
119 DDPU_INV_QN = 0x01,
120 DDPU_INV_MSN_NOBUF = 0x02,
121 DDPU_INV_MSN_RANGE = 0x03,
122 DDPU_INV_MO = 0x04,
123 DDPU_MSG_TOOBIG = 0x05,
124 DDPU_INV_VERS = 0x06
125};
126
127enum iwch_mpa_ecodes {
128 MPA_CRC_ERR = 0x02,
129 MPA_MARKER_ERR = 0x03
130};
131
132enum iwch_ep_state {
133 IDLE = 0,
134 LISTEN,
135 CONNECTING,
136 MPA_REQ_WAIT,
137 MPA_REQ_SENT,
138 MPA_REQ_RCVD,
139 MPA_REP_SENT,
140 FPDU_MODE,
141 ABORTING,
142 CLOSING,
143 MORIBUND,
144 DEAD,
145};
146
147struct iwch_ep_common {
148 struct iw_cm_id *cm_id;
149 struct iwch_qp *qp;
150 struct t3cdev *tdev;
151 enum iwch_ep_state state;
152 struct kref kref;
153 spinlock_t lock;
154 struct sockaddr_in local_addr;
155 struct sockaddr_in remote_addr;
156 wait_queue_head_t waitq;
157 int rpl_done;
158 int rpl_err;
159};
160
161struct iwch_listen_ep {
162 struct iwch_ep_common com;
163 unsigned int stid;
164 int backlog;
165};
166
167struct iwch_ep {
168 struct iwch_ep_common com;
169 struct iwch_ep *parent_ep;
170 struct timer_list timer;
171 unsigned int atid;
172 u32 hwtid;
173 u32 snd_seq;
174 struct l2t_entry *l2t;
175 struct dst_entry *dst;
176 struct sk_buff *mpa_skb;
177 struct iwch_mpa_attributes mpa_attr;
178 unsigned int mpa_pkt_len;
179 u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
180 u8 tos;
181 u16 emss;
182 u16 plen;
183 u32 ird;
184 u32 ord;
185};
186
187static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
188{
189 return cm_id->provider_data;
190}
191
192static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
193{
194 return cm_id->provider_data;
195}
196
197static inline int compute_wscale(int win)
198{
199 int wscale = 0;
200
201 while (wscale < 14 && (65535<<wscale) < win)
202 wscale++;
203 return wscale;
204}
205
206/* CM prototypes */
207
208int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
209int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
210int iwch_destroy_listen(struct iw_cm_id *cm_id);
211int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
212int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
213int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp);
214int iwch_quiesce_tid(struct iwch_ep *ep);
215int iwch_resume_tid(struct iwch_ep *ep);
216void __free_ep(struct kref *kref);
217void iwch_rearp(struct iwch_ep *ep);
218int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, struct l2t_entry *l2t);
219
220int __init iwch_cm_init(void);
221void __exit iwch_cm_term(void);
222
223#endif /* _IWCH_CM_H_ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
new file mode 100644
index 000000000000..98b3bdb5de9e
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -0,0 +1,225 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include "iwch_provider.h"
34#include "iwch.h"
35
36/*
37 * Get one cq entry from cxio and map it to openib.
38 *
39 * Returns:
40 * 0 EMPTY;
41 * 1 cqe returned
42 * -EAGAIN caller must try again
43 * any other -errno fatal error
44 */
45static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
46 struct ib_wc *wc)
47{
48 struct iwch_qp *qhp = NULL;
49 struct t3_cqe cqe, *rd_cqe;
50 struct t3_wq *wq;
51 u32 credit = 0;
52 u8 cqe_flushed;
53 u64 cookie;
54 int ret = 1;
55
56 rd_cqe = cxio_next_cqe(&chp->cq);
57
58 if (!rd_cqe)
59 return 0;
60
61 qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
62 if (!qhp)
63 wq = NULL;
64 else {
65 spin_lock(&qhp->lock);
66 wq = &(qhp->wq);
67 }
68 ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
69 &credit);
70 if (t3a_device(chp->rhp) && credit) {
71 PDBG("%s updating %d cq credits on id %d\n", __FUNCTION__,
72 credit, chp->cq.cqid);
73 cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit);
74 }
75
76 if (ret) {
77 ret = -EAGAIN;
78 goto out;
79 }
80 ret = 1;
81
82 wc->wr_id = cookie;
83 wc->qp = &qhp->ibqp;
84 wc->vendor_err = CQE_STATUS(cqe);
85
86 PDBG("%s qpid 0x%x type %d opcode %d status 0x%x wrid hi 0x%x "
87 "lo 0x%x cookie 0x%llx\n", __FUNCTION__,
88 CQE_QPID(cqe), CQE_TYPE(cqe),
89 CQE_OPCODE(cqe), CQE_STATUS(cqe), CQE_WRID_HI(cqe),
90 CQE_WRID_LOW(cqe), (unsigned long long) cookie);
91
92 if (CQE_TYPE(cqe) == 0) {
93 if (!CQE_STATUS(cqe))
94 wc->byte_len = CQE_LEN(cqe);
95 else
96 wc->byte_len = 0;
97 wc->opcode = IB_WC_RECV;
98 } else {
99 switch (CQE_OPCODE(cqe)) {
100 case T3_RDMA_WRITE:
101 wc->opcode = IB_WC_RDMA_WRITE;
102 break;
103 case T3_READ_REQ:
104 wc->opcode = IB_WC_RDMA_READ;
105 wc->byte_len = CQE_LEN(cqe);
106 break;
107 case T3_SEND:
108 case T3_SEND_WITH_SE:
109 wc->opcode = IB_WC_SEND;
110 break;
111 case T3_BIND_MW:
112 wc->opcode = IB_WC_BIND_MW;
113 break;
114
115 /* these aren't supported yet */
116 case T3_SEND_WITH_INV:
117 case T3_SEND_WITH_SE_INV:
118 case T3_LOCAL_INV:
119 case T3_FAST_REGISTER:
120 default:
121 printk(KERN_ERR MOD "Unexpected opcode %d "
122 "in the CQE received for QPID=0x%0x\n",
123 CQE_OPCODE(cqe), CQE_QPID(cqe));
124 ret = -EINVAL;
125 goto out;
126 }
127 }
128
129 if (cqe_flushed)
130 wc->status = IB_WC_WR_FLUSH_ERR;
131 else {
132
133 switch (CQE_STATUS(cqe)) {
134 case TPT_ERR_SUCCESS:
135 wc->status = IB_WC_SUCCESS;
136 break;
137 case TPT_ERR_STAG:
138 wc->status = IB_WC_LOC_ACCESS_ERR;
139 break;
140 case TPT_ERR_PDID:
141 wc->status = IB_WC_LOC_PROT_ERR;
142 break;
143 case TPT_ERR_QPID:
144 case TPT_ERR_ACCESS:
145 wc->status = IB_WC_LOC_ACCESS_ERR;
146 break;
147 case TPT_ERR_WRAP:
148 wc->status = IB_WC_GENERAL_ERR;
149 break;
150 case TPT_ERR_BOUND:
151 wc->status = IB_WC_LOC_LEN_ERR;
152 break;
153 case TPT_ERR_INVALIDATE_SHARED_MR:
154 case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
155 wc->status = IB_WC_MW_BIND_ERR;
156 break;
157 case TPT_ERR_CRC:
158 case TPT_ERR_MARKER:
159 case TPT_ERR_PDU_LEN_ERR:
160 case TPT_ERR_OUT_OF_RQE:
161 case TPT_ERR_DDP_VERSION:
162 case TPT_ERR_RDMA_VERSION:
163 case TPT_ERR_DDP_QUEUE_NUM:
164 case TPT_ERR_MSN:
165 case TPT_ERR_TBIT:
166 case TPT_ERR_MO:
167 case TPT_ERR_MSN_RANGE:
168 case TPT_ERR_IRD_OVERFLOW:
169 case TPT_ERR_OPCODE:
170 wc->status = IB_WC_FATAL_ERR;
171 break;
172 case TPT_ERR_SWFLUSH:
173 wc->status = IB_WC_WR_FLUSH_ERR;
174 break;
175 default:
176 printk(KERN_ERR MOD "Unexpected cqe_status 0x%x for "
177 "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe));
178 ret = -EINVAL;
179 }
180 }
181out:
182 if (wq)
183 spin_unlock(&qhp->lock);
184 return ret;
185}
186
187int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
188{
189 struct iwch_dev *rhp;
190 struct iwch_cq *chp;
191 unsigned long flags;
192 int npolled;
193 int err = 0;
194
195 chp = to_iwch_cq(ibcq);
196 rhp = chp->rhp;
197
198 spin_lock_irqsave(&chp->lock, flags);
199 for (npolled = 0; npolled < num_entries; ++npolled) {
200#ifdef DEBUG
201 int i=0;
202#endif
203
204 /*
205 * Because T3 can post CQEs that are _not_ associated
206 * with a WR, we might have to poll again after removing
207 * one of these.
208 */
209 do {
210 err = iwch_poll_cq_one(rhp, chp, wc + npolled);
211#ifdef DEBUG
212 BUG_ON(++i > 1000);
213#endif
214 } while (err == -EAGAIN);
215 if (err <= 0)
216 break;
217 }
218 spin_unlock_irqrestore(&chp->lock, flags);
219
220 if (err < 0)
221 return err;
222 else {
223 return npolled;
224 }
225}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
new file mode 100644
index 000000000000..a6efa8fe15d8
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -0,0 +1,231 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <linux/slab.h>
34#include <linux/mman.h>
35#include <net/sock.h>
36#include "iwch_provider.h"
37#include "iwch.h"
38#include "iwch_cm.h"
39#include "cxio_hal.h"
40#include "cxio_wr.h"
41
42static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
43 struct respQ_msg_t *rsp_msg,
44 enum ib_event_type ib_event,
45 int send_term)
46{
47 struct ib_event event;
48 struct iwch_qp_attributes attrs;
49 struct iwch_qp *qhp;
50
51 printk(KERN_ERR "%s - AE qpid 0x%x opcode %d status 0x%x "
52 "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
53 CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
54 CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
55 CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
56
57 spin_lock(&rnicp->lock);
58 qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
59
60 if (!qhp) {
61 printk(KERN_ERR "%s unaffiliated error 0x%x qpid 0x%x\n",
62 __FUNCTION__, CQE_STATUS(rsp_msg->cqe),
63 CQE_QPID(rsp_msg->cqe));
64 spin_unlock(&rnicp->lock);
65 return;
66 }
67
68 if ((qhp->attr.state == IWCH_QP_STATE_ERROR) ||
69 (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) {
70 PDBG("%s AE received after RTS - "
71 "qp state %d qpid 0x%x status 0x%x\n", __FUNCTION__,
72 qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe));
73 spin_unlock(&rnicp->lock);
74 return;
75 }
76
77 atomic_inc(&qhp->refcnt);
78 spin_unlock(&rnicp->lock);
79
80 event.event = ib_event;
81 event.device = chp->ibcq.device;
82 if (ib_event == IB_EVENT_CQ_ERR)
83 event.element.cq = &chp->ibcq;
84 else
85 event.element.qp = &qhp->ibqp;
86
87 if (qhp->ibqp.event_handler)
88 (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
89
90 if (qhp->attr.state == IWCH_QP_STATE_RTS) {
91 attrs.next_state = IWCH_QP_STATE_TERMINATE;
92 iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE,
93 &attrs, 1);
94 if (send_term)
95 iwch_post_terminate(qhp, rsp_msg);
96 }
97
98 if (atomic_dec_and_test(&qhp->refcnt))
99 wake_up(&qhp->wait);
100}
101
102void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
103{
104 struct iwch_dev *rnicp;
105 struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
106 struct iwch_cq *chp;
107 struct iwch_qp *qhp;
108 u32 cqid = RSPQ_CQID(rsp_msg);
109
110 rnicp = (struct iwch_dev *) rdev_p->ulp;
111 spin_lock(&rnicp->lock);
112 chp = get_chp(rnicp, cqid);
113 qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
114 if (!chp || !qhp) {
115 printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
116 "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
117 cqid, CQE_QPID(rsp_msg->cqe),
118 CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
119 CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
120 CQE_WRID_LOW(rsp_msg->cqe));
121 spin_unlock(&rnicp->lock);
122 goto out;
123 }
124 iwch_qp_add_ref(&qhp->ibqp);
125 atomic_inc(&chp->refcnt);
126 spin_unlock(&rnicp->lock);
127
128 /*
129 * 1) completion of our sending a TERMINATE.
130 * 2) incoming TERMINATE message.
131 */
132 if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
133 (CQE_STATUS(rsp_msg->cqe) == 0)) {
134 if (SQ_TYPE(rsp_msg->cqe)) {
135 PDBG("%s QPID 0x%x ep %p disconnecting\n",
136 __FUNCTION__, qhp->wq.qpid, qhp->ep);
137 iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
138 } else {
139 PDBG("%s post REQ_ERR AE QPID 0x%x\n", __FUNCTION__,
140 qhp->wq.qpid);
141 post_qp_event(rnicp, chp, rsp_msg,
142 IB_EVENT_QP_REQ_ERR, 0);
143 iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
144 }
145 goto done;
146 }
147
148 /* Bad incoming Read request */
149 if (SQ_TYPE(rsp_msg->cqe) &&
150 (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
151 post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
152 goto done;
153 }
154
155 /* Bad incoming write */
156 if (RQ_TYPE(rsp_msg->cqe) &&
157 (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
158 post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
159 goto done;
160 }
161
162 switch (CQE_STATUS(rsp_msg->cqe)) {
163
164 /* Completion Events */
165 case TPT_ERR_SUCCESS:
166
167 /*
168 * Confirm the destination entry if this is a RECV completion.
169 */
170 if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
171 dst_confirm(qhp->ep->dst);
172 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
173 break;
174
175 case TPT_ERR_STAG:
176 case TPT_ERR_PDID:
177 case TPT_ERR_QPID:
178 case TPT_ERR_ACCESS:
179 case TPT_ERR_WRAP:
180 case TPT_ERR_BOUND:
181 case TPT_ERR_INVALIDATE_SHARED_MR:
182 case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
183 printk(KERN_ERR "%s - CQE Err qpid 0x%x opcode %d status 0x%x "
184 "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__,
185 CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe),
186 CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe),
187 CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe));
188 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
189 post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
190 break;
191
192 /* Device Fatal Errors */
193 case TPT_ERR_ECC:
194 case TPT_ERR_ECC_PSTAG:
195 case TPT_ERR_INTERNAL_ERR:
196 post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
197 break;
198
199 /* QP Fatal Errors */
200 case TPT_ERR_OUT_OF_RQE:
201 case TPT_ERR_PBL_ADDR_BOUND:
202 case TPT_ERR_CRC:
203 case TPT_ERR_MARKER:
204 case TPT_ERR_PDU_LEN_ERR:
205 case TPT_ERR_DDP_VERSION:
206 case TPT_ERR_RDMA_VERSION:
207 case TPT_ERR_OPCODE:
208 case TPT_ERR_DDP_QUEUE_NUM:
209 case TPT_ERR_MSN:
210 case TPT_ERR_TBIT:
211 case TPT_ERR_MO:
212 case TPT_ERR_MSN_GAP:
213 case TPT_ERR_MSN_RANGE:
214 case TPT_ERR_RQE_ADDR_BOUND:
215 case TPT_ERR_IRD_OVERFLOW:
216 post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
217 break;
218
219 default:
220 printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n",
221 CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
222 post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
223 break;
224 }
225done:
226 if (atomic_dec_and_test(&chp->refcnt))
227 wake_up(&chp->wait);
228 iwch_qp_rem_ref(&qhp->ibqp);
229out:
230 dev_kfree_skb_irq(skb);
231}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
new file mode 100644
index 000000000000..2b6cd53bb3fc
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -0,0 +1,172 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <asm/byteorder.h>
34
35#include <rdma/iw_cm.h>
36#include <rdma/ib_verbs.h>
37
38#include "cxio_hal.h"
39#include "iwch.h"
40#include "iwch_provider.h"
41
42int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
43 struct iwch_mr *mhp,
44 int shift,
45 __be64 *page_list)
46{
47 u32 stag;
48 u32 mmid;
49
50
51 if (cxio_register_phys_mem(&rhp->rdev,
52 &stag, mhp->attr.pdid,
53 mhp->attr.perms,
54 mhp->attr.zbva,
55 mhp->attr.va_fbo,
56 mhp->attr.len,
57 shift-12,
58 page_list,
59 &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
60 return -ENOMEM;
61 mhp->attr.state = 1;
62 mhp->attr.stag = stag;
63 mmid = stag >> 8;
64 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
65 insert_handle(rhp, &rhp->mmidr, mhp, mmid);
66 PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp);
67 return 0;
68}
69
70int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
71 struct iwch_mr *mhp,
72 int shift,
73 __be64 *page_list,
74 int npages)
75{
76 u32 stag;
77 u32 mmid;
78
79
80 /* We could support this... */
81 if (npages > mhp->attr.pbl_size)
82 return -ENOMEM;
83
84 stag = mhp->attr.stag;
85 if (cxio_reregister_phys_mem(&rhp->rdev,
86 &stag, mhp->attr.pdid,
87 mhp->attr.perms,
88 mhp->attr.zbva,
89 mhp->attr.va_fbo,
90 mhp->attr.len,
91 shift-12,
92 page_list,
93 &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
94 return -ENOMEM;
95 mhp->attr.state = 1;
96 mhp->attr.stag = stag;
97 mmid = stag >> 8;
98 mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
99 insert_handle(rhp, &rhp->mmidr, mhp, mmid);
100 PDBG("%s mmid 0x%x mhp %p\n", __FUNCTION__, mmid, mhp);
101 return 0;
102}
103
104int build_phys_page_list(struct ib_phys_buf *buffer_list,
105 int num_phys_buf,
106 u64 *iova_start,
107 u64 *total_size,
108 int *npages,
109 int *shift,
110 __be64 **page_list)
111{
112 u64 mask;
113 int i, j, n;
114
115 mask = 0;
116 *total_size = 0;
117 for (i = 0; i < num_phys_buf; ++i) {
118 if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
119 return -EINVAL;
120 if (i != 0 && i != num_phys_buf - 1 &&
121 (buffer_list[i].size & ~PAGE_MASK))
122 return -EINVAL;
123 *total_size += buffer_list[i].size;
124 if (i > 0)
125 mask |= buffer_list[i].addr;
126 }
127
128 if (*total_size > 0xFFFFFFFFULL)
129 return -ENOMEM;
130
131 /* Find largest page shift we can use to cover buffers */
132 for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
133 if (num_phys_buf > 1) {
134 if ((1ULL << *shift) & mask)
135 break;
136 } else
137 if (1ULL << *shift >=
138 buffer_list[0].size +
139 (buffer_list[0].addr & ((1ULL << *shift) - 1)))
140 break;
141
142 buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
143 buffer_list[0].addr &= ~0ull << *shift;
144
145 *npages = 0;
146 for (i = 0; i < num_phys_buf; ++i)
147 *npages += (buffer_list[i].size +
148 (1ULL << *shift) - 1) >> *shift;
149
150 if (!*npages)
151 return -EINVAL;
152
153 *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
154 if (!*page_list)
155 return -ENOMEM;
156
157 n = 0;
158 for (i = 0; i < num_phys_buf; ++i)
159 for (j = 0;
160 j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
161 ++j)
162 (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
163 ((u64) j << *shift));
164
165 PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
166 __FUNCTION__, (unsigned long long) *iova_start,
167 (unsigned long long) mask, *shift, (unsigned long long) *total_size,
168 *npages);
169
170 return 0;
171
172}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
new file mode 100644
index 000000000000..6861087d776c
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -0,0 +1,1203 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include <linux/module.h>
34#include <linux/moduleparam.h>
35#include <linux/device.h>
36#include <linux/netdevice.h>
37#include <linux/etherdevice.h>
38#include <linux/delay.h>
39#include <linux/errno.h>
40#include <linux/list.h>
41#include <linux/spinlock.h>
42#include <linux/ethtool.h>
43
44#include <asm/io.h>
45#include <asm/irq.h>
46#include <asm/byteorder.h>
47
48#include <rdma/iw_cm.h>
49#include <rdma/ib_verbs.h>
50#include <rdma/ib_smi.h>
51#include <rdma/ib_user_verbs.h>
52
53#include "cxio_hal.h"
54#include "iwch.h"
55#include "iwch_provider.h"
56#include "iwch_cm.h"
57#include "iwch_user.h"
58
59static int iwch_modify_port(struct ib_device *ibdev,
60 u8 port, int port_modify_mask,
61 struct ib_port_modify *props)
62{
63 return -ENOSYS;
64}
65
66static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
67 struct ib_ah_attr *ah_attr)
68{
69 return ERR_PTR(-ENOSYS);
70}
71
72static int iwch_ah_destroy(struct ib_ah *ah)
73{
74 return -ENOSYS;
75}
76
77static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
78{
79 return -ENOSYS;
80}
81
82static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
83{
84 return -ENOSYS;
85}
86
87static int iwch_process_mad(struct ib_device *ibdev,
88 int mad_flags,
89 u8 port_num,
90 struct ib_wc *in_wc,
91 struct ib_grh *in_grh,
92 struct ib_mad *in_mad, struct ib_mad *out_mad)
93{
94 return -ENOSYS;
95}
96
97static int iwch_dealloc_ucontext(struct ib_ucontext *context)
98{
99 struct iwch_dev *rhp = to_iwch_dev(context->device);
100 struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
101 struct iwch_mm_entry *mm, *tmp;
102
103 PDBG("%s context %p\n", __FUNCTION__, context);
104 list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
105 kfree(mm);
106 cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
107 kfree(ucontext);
108 return 0;
109}
110
111static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
112 struct ib_udata *udata)
113{
114 struct iwch_ucontext *context;
115 struct iwch_dev *rhp = to_iwch_dev(ibdev);
116
117 PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
118 context = kzalloc(sizeof(*context), GFP_KERNEL);
119 if (!context)
120 return ERR_PTR(-ENOMEM);
121 cxio_init_ucontext(&rhp->rdev, &context->uctx);
122 INIT_LIST_HEAD(&context->mmaps);
123 spin_lock_init(&context->mmap_lock);
124 return &context->ibucontext;
125}
126
127static int iwch_destroy_cq(struct ib_cq *ib_cq)
128{
129 struct iwch_cq *chp;
130
131 PDBG("%s ib_cq %p\n", __FUNCTION__, ib_cq);
132 chp = to_iwch_cq(ib_cq);
133
134 remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
135 atomic_dec(&chp->refcnt);
136 wait_event(chp->wait, !atomic_read(&chp->refcnt));
137
138 cxio_destroy_cq(&chp->rhp->rdev, &chp->cq);
139 kfree(chp);
140 return 0;
141}
142
143static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries,
144 struct ib_ucontext *ib_context,
145 struct ib_udata *udata)
146{
147 struct iwch_dev *rhp;
148 struct iwch_cq *chp;
149 struct iwch_create_cq_resp uresp;
150 struct iwch_create_cq_req ureq;
151 struct iwch_ucontext *ucontext = NULL;
152
153 PDBG("%s ib_dev %p entries %d\n", __FUNCTION__, ibdev, entries);
154 rhp = to_iwch_dev(ibdev);
155 chp = kzalloc(sizeof(*chp), GFP_KERNEL);
156 if (!chp)
157 return ERR_PTR(-ENOMEM);
158
159 if (ib_context) {
160 ucontext = to_iwch_ucontext(ib_context);
161 if (!t3a_device(rhp)) {
162 if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) {
163 kfree(chp);
164 return ERR_PTR(-EFAULT);
165 }
166 chp->user_rptr_addr = (u32 __user *)(unsigned long)ureq.user_rptr_addr;
167 }
168 }
169
170 if (t3a_device(rhp)) {
171
172 /*
173 * T3A: Add some fluff to handle extra CQEs inserted
174 * for various errors.
175 * Additional CQE possibilities:
176 * TERMINATE,
177 * incoming RDMA WRITE Failures
178 * incoming RDMA READ REQUEST FAILUREs
179 * NOTE: We cannot ensure the CQ won't overflow.
180 */
181 entries += 16;
182 }
183 entries = roundup_pow_of_two(entries);
184 chp->cq.size_log2 = ilog2(entries);
185
186 if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
187 kfree(chp);
188 return ERR_PTR(-ENOMEM);
189 }
190 chp->rhp = rhp;
191 chp->ibcq.cqe = (1 << chp->cq.size_log2) - 1;
192 spin_lock_init(&chp->lock);
193 atomic_set(&chp->refcnt, 1);
194 init_waitqueue_head(&chp->wait);
195 insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
196
197 if (ucontext) {
198 struct iwch_mm_entry *mm;
199
200 mm = kmalloc(sizeof *mm, GFP_KERNEL);
201 if (!mm) {
202 iwch_destroy_cq(&chp->ibcq);
203 return ERR_PTR(-ENOMEM);
204 }
205 uresp.cqid = chp->cq.cqid;
206 uresp.size_log2 = chp->cq.size_log2;
207 spin_lock(&ucontext->mmap_lock);
208 uresp.key = ucontext->key;
209 ucontext->key += PAGE_SIZE;
210 spin_unlock(&ucontext->mmap_lock);
211 if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
212 kfree(mm);
213 iwch_destroy_cq(&chp->ibcq);
214 return ERR_PTR(-EFAULT);
215 }
216 mm->key = uresp.key;
217 mm->addr = virt_to_phys(chp->cq.queue);
218 mm->len = PAGE_ALIGN((1UL << uresp.size_log2) *
219 sizeof (struct t3_cqe));
220 insert_mmap(ucontext, mm);
221 }
222 PDBG("created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx\n",
223 chp->cq.cqid, chp, (1 << chp->cq.size_log2),
224 (unsigned long long) chp->cq.dma_addr);
225 return &chp->ibcq;
226}
227
228static int iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
229{
230#ifdef notyet
231 struct iwch_cq *chp = to_iwch_cq(cq);
232 struct t3_cq oldcq, newcq;
233 int ret;
234
235 PDBG("%s ib_cq %p cqe %d\n", __FUNCTION__, cq, cqe);
236
237 /* We don't downsize... */
238 if (cqe <= cq->cqe)
239 return 0;
240
241 /* create new t3_cq with new size */
242 cqe = roundup_pow_of_two(cqe+1);
243 newcq.size_log2 = ilog2(cqe);
244
245 /* Dont allow resize to less than the current wce count */
246 if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) {
247 return -ENOMEM;
248 }
249
250 /* Quiesce all QPs using this CQ */
251 ret = iwch_quiesce_qps(chp);
252 if (ret) {
253 return ret;
254 }
255
256 ret = cxio_create_cq(&chp->rhp->rdev, &newcq);
257 if (ret) {
258 return ret;
259 }
260
261 /* copy CQEs */
262 memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) *
263 sizeof(struct t3_cqe));
264
265 /* old iwch_qp gets new t3_cq but keeps old cqid */
266 oldcq = chp->cq;
267 chp->cq = newcq;
268 chp->cq.cqid = oldcq.cqid;
269
270 /* resize new t3_cq to update the HW context */
271 ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq);
272 if (ret) {
273 chp->cq = oldcq;
274 return ret;
275 }
276 chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1;
277
278 /* destroy old t3_cq */
279 oldcq.cqid = newcq.cqid;
280 ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq);
281 if (ret) {
282 printk(KERN_ERR MOD "%s - cxio_destroy_cq failed %d\n",
283 __FUNCTION__, ret);
284 }
285
286 /* add user hooks here */
287
288 /* resume qps */
289 ret = iwch_resume_qps(chp);
290 return ret;
291#else
292 return -ENOSYS;
293#endif
294}
295
296static int iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
297{
298 struct iwch_dev *rhp;
299 struct iwch_cq *chp;
300 enum t3_cq_opcode cq_op;
301 int err;
302 unsigned long flag;
303 u32 rptr;
304
305 chp = to_iwch_cq(ibcq);
306 rhp = chp->rhp;
307 if (notify == IB_CQ_SOLICITED)
308 cq_op = CQ_ARM_SE;
309 else
310 cq_op = CQ_ARM_AN;
311 if (chp->user_rptr_addr) {
312 if (get_user(rptr, chp->user_rptr_addr))
313 return -EFAULT;
314 spin_lock_irqsave(&chp->lock, flag);
315 chp->cq.rptr = rptr;
316 } else
317 spin_lock_irqsave(&chp->lock, flag);
318 PDBG("%s rptr 0x%x\n", __FUNCTION__, chp->cq.rptr);
319 err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0);
320 spin_unlock_irqrestore(&chp->lock, flag);
321 if (err)
322 printk(KERN_ERR MOD "Error %d rearming CQID 0x%x\n", err,
323 chp->cq.cqid);
324 return err;
325}
326
327static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
328{
329 int len = vma->vm_end - vma->vm_start;
330 u32 key = vma->vm_pgoff << PAGE_SHIFT;
331 struct cxio_rdev *rdev_p;
332 int ret = 0;
333 struct iwch_mm_entry *mm;
334 struct iwch_ucontext *ucontext;
335
336 PDBG("%s pgoff 0x%lx key 0x%x len %d\n", __FUNCTION__, vma->vm_pgoff,
337 key, len);
338
339 if (vma->vm_start & (PAGE_SIZE-1)) {
340 return -EINVAL;
341 }
342
343 rdev_p = &(to_iwch_dev(context->device)->rdev);
344 ucontext = to_iwch_ucontext(context);
345
346 mm = remove_mmap(ucontext, key, len);
347 if (!mm)
348 return -EINVAL;
349 kfree(mm);
350
351 if ((mm->addr >= rdev_p->rnic_info.udbell_physbase) &&
352 (mm->addr < (rdev_p->rnic_info.udbell_physbase +
353 rdev_p->rnic_info.udbell_len))) {
354
355 /*
356 * Map T3 DB register.
357 */
358 if (vma->vm_flags & VM_READ) {
359 return -EPERM;
360 }
361
362 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
363 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
364 vma->vm_flags &= ~VM_MAYREAD;
365 ret = io_remap_pfn_range(vma, vma->vm_start,
366 mm->addr >> PAGE_SHIFT,
367 len, vma->vm_page_prot);
368 } else {
369
370 /*
371 * Map WQ or CQ contig dma memory...
372 */
373 ret = remap_pfn_range(vma, vma->vm_start,
374 mm->addr >> PAGE_SHIFT,
375 len, vma->vm_page_prot);
376 }
377
378 return ret;
379}
380
381static int iwch_deallocate_pd(struct ib_pd *pd)
382{
383 struct iwch_dev *rhp;
384 struct iwch_pd *php;
385
386 php = to_iwch_pd(pd);
387 rhp = php->rhp;
388 PDBG("%s ibpd %p pdid 0x%x\n", __FUNCTION__, pd, php->pdid);
389 cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
390 kfree(php);
391 return 0;
392}
393
394static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
395 struct ib_ucontext *context,
396 struct ib_udata *udata)
397{
398 struct iwch_pd *php;
399 u32 pdid;
400 struct iwch_dev *rhp;
401
402 PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
403 rhp = (struct iwch_dev *) ibdev;
404 pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
405 if (!pdid)
406 return ERR_PTR(-EINVAL);
407 php = kzalloc(sizeof(*php), GFP_KERNEL);
408 if (!php) {
409 cxio_hal_put_pdid(rhp->rdev.rscp, pdid);
410 return ERR_PTR(-ENOMEM);
411 }
412 php->pdid = pdid;
413 php->rhp = rhp;
414 if (context) {
415 if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) {
416 iwch_deallocate_pd(&php->ibpd);
417 return ERR_PTR(-EFAULT);
418 }
419 }
420 PDBG("%s pdid 0x%0x ptr 0x%p\n", __FUNCTION__, pdid, php);
421 return &php->ibpd;
422}
423
424static int iwch_dereg_mr(struct ib_mr *ib_mr)
425{
426 struct iwch_dev *rhp;
427 struct iwch_mr *mhp;
428 u32 mmid;
429
430 PDBG("%s ib_mr %p\n", __FUNCTION__, ib_mr);
431 /* There can be no memory windows */
432 if (atomic_read(&ib_mr->usecnt))
433 return -EINVAL;
434
435 mhp = to_iwch_mr(ib_mr);
436 rhp = mhp->rhp;
437 mmid = mhp->attr.stag >> 8;
438 cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
439 mhp->attr.pbl_addr);
440 remove_handle(rhp, &rhp->mmidr, mmid);
441 if (mhp->kva)
442 kfree((void *) (unsigned long) mhp->kva);
443 PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
444 kfree(mhp);
445 return 0;
446}
447
448static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
449 struct ib_phys_buf *buffer_list,
450 int num_phys_buf,
451 int acc,
452 u64 *iova_start)
453{
454 __be64 *page_list;
455 int shift;
456 u64 total_size;
457 int npages;
458 struct iwch_dev *rhp;
459 struct iwch_pd *php;
460 struct iwch_mr *mhp;
461 int ret;
462
463 PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
464 php = to_iwch_pd(pd);
465 rhp = php->rhp;
466
467 acc = iwch_convert_access(acc);
468
469
470 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
471 if (!mhp)
472 return ERR_PTR(-ENOMEM);
473
474 /* First check that we have enough alignment */
475 if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
476 ret = -EINVAL;
477 goto err;
478 }
479
480 if (num_phys_buf > 1 &&
481 ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
482 ret = -EINVAL;
483 goto err;
484 }
485
486 ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
487 &total_size, &npages, &shift, &page_list);
488 if (ret)
489 goto err;
490
491 mhp->rhp = rhp;
492 mhp->attr.pdid = php->pdid;
493 mhp->attr.zbva = 0;
494
495 /* NOTE: TPT perms are backwards from BIND WR perms! */
496 mhp->attr.perms = (acc & 0x1) << 3;
497 mhp->attr.perms |= (acc & 0x2) << 1;
498 mhp->attr.perms |= (acc & 0x4) >> 1;
499 mhp->attr.perms |= (acc & 0x8) >> 3;
500
501 mhp->attr.va_fbo = *iova_start;
502 mhp->attr.page_size = shift - 12;
503
504 mhp->attr.len = (u32) total_size;
505 mhp->attr.pbl_size = npages;
506 ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
507 kfree(page_list);
508 if (ret) {
509 goto err;
510 }
511 return &mhp->ibmr;
512err:
513 kfree(mhp);
514 return ERR_PTR(ret);
515
516}
517
518static int iwch_reregister_phys_mem(struct ib_mr *mr,
519 int mr_rereg_mask,
520 struct ib_pd *pd,
521 struct ib_phys_buf *buffer_list,
522 int num_phys_buf,
523 int acc, u64 * iova_start)
524{
525
526 struct iwch_mr mh, *mhp;
527 struct iwch_pd *php;
528 struct iwch_dev *rhp;
529 int new_acc;
530 __be64 *page_list = NULL;
531 int shift = 0;
532 u64 total_size;
533 int npages;
534 int ret;
535
536 PDBG("%s ib_mr %p ib_pd %p\n", __FUNCTION__, mr, pd);
537
538 /* There can be no memory windows */
539 if (atomic_read(&mr->usecnt))
540 return -EINVAL;
541
542 mhp = to_iwch_mr(mr);
543 rhp = mhp->rhp;
544 php = to_iwch_pd(mr->pd);
545
546 /* make sure we are on the same adapter */
547 if (rhp != php->rhp)
548 return -EINVAL;
549
550 new_acc = mhp->attr.perms;
551
552 memcpy(&mh, mhp, sizeof *mhp);
553
554 if (mr_rereg_mask & IB_MR_REREG_PD)
555 php = to_iwch_pd(pd);
556 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
557 mh.attr.perms = iwch_convert_access(acc);
558 if (mr_rereg_mask & IB_MR_REREG_TRANS)
559 ret = build_phys_page_list(buffer_list, num_phys_buf,
560 iova_start,
561 &total_size, &npages,
562 &shift, &page_list);
563
564 ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
565 kfree(page_list);
566 if (ret) {
567 return ret;
568 }
569 if (mr_rereg_mask & IB_MR_REREG_PD)
570 mhp->attr.pdid = php->pdid;
571 if (mr_rereg_mask & IB_MR_REREG_ACCESS)
572 mhp->attr.perms = acc;
573 if (mr_rereg_mask & IB_MR_REREG_TRANS) {
574 mhp->attr.zbva = 0;
575 mhp->attr.va_fbo = *iova_start;
576 mhp->attr.page_size = shift - 12;
577 mhp->attr.len = (u32) total_size;
578 mhp->attr.pbl_size = npages;
579 }
580
581 return 0;
582}
583
584
585static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
586 int acc, struct ib_udata *udata)
587{
588 __be64 *pages;
589 int shift, n, len;
590 int i, j, k;
591 int err = 0;
592 struct ib_umem_chunk *chunk;
593 struct iwch_dev *rhp;
594 struct iwch_pd *php;
595 struct iwch_mr *mhp;
596 struct iwch_reg_user_mr_resp uresp;
597
598 PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
599 shift = ffs(region->page_size) - 1;
600
601 php = to_iwch_pd(pd);
602 rhp = php->rhp;
603 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
604 if (!mhp)
605 return ERR_PTR(-ENOMEM);
606
607 n = 0;
608 list_for_each_entry(chunk, &region->chunk_list, list)
609 n += chunk->nents;
610
611 pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
612 if (!pages) {
613 err = -ENOMEM;
614 goto err;
615 }
616
617 acc = iwch_convert_access(acc);
618
619 i = n = 0;
620
621 list_for_each_entry(chunk, &region->chunk_list, list)
622 for (j = 0; j < chunk->nmap; ++j) {
623 len = sg_dma_len(&chunk->page_list[j]) >> shift;
624 for (k = 0; k < len; ++k) {
625 pages[i++] = cpu_to_be64(sg_dma_address(
626 &chunk->page_list[j]) +
627 region->page_size * k);
628 }
629 }
630
631 mhp->rhp = rhp;
632 mhp->attr.pdid = php->pdid;
633 mhp->attr.zbva = 0;
634 mhp->attr.perms = (acc & 0x1) << 3;
635 mhp->attr.perms |= (acc & 0x2) << 1;
636 mhp->attr.perms |= (acc & 0x4) >> 1;
637 mhp->attr.perms |= (acc & 0x8) >> 3;
638 mhp->attr.va_fbo = region->virt_base;
639 mhp->attr.page_size = shift - 12;
640 mhp->attr.len = (u32) region->length;
641 mhp->attr.pbl_size = i;
642 err = iwch_register_mem(rhp, php, mhp, shift, pages);
643 kfree(pages);
644 if (err)
645 goto err;
646
647 if (udata && t3b_device(rhp)) {
648 uresp.pbl_addr = (mhp->attr.pbl_addr -
649 rhp->rdev.rnic_info.pbl_base) >> 3;
650 PDBG("%s user resp pbl_addr 0x%x\n", __FUNCTION__,
651 uresp.pbl_addr);
652
653 if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
654 iwch_dereg_mr(&mhp->ibmr);
655 err = -EFAULT;
656 goto err;
657 }
658 }
659
660 return &mhp->ibmr;
661
662err:
663 kfree(mhp);
664 return ERR_PTR(err);
665}
666
667static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
668{
669 struct ib_phys_buf bl;
670 u64 kva;
671 struct ib_mr *ibmr;
672
673 PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
674
675 /*
676 * T3 only supports 32 bits of size.
677 */
678 bl.size = 0xffffffff;
679 bl.addr = 0;
680 kva = 0;
681 ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
682 return ibmr;
683}
684
685static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd)
686{
687 struct iwch_dev *rhp;
688 struct iwch_pd *php;
689 struct iwch_mw *mhp;
690 u32 mmid;
691 u32 stag = 0;
692 int ret;
693
694 php = to_iwch_pd(pd);
695 rhp = php->rhp;
696 mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
697 if (!mhp)
698 return ERR_PTR(-ENOMEM);
699 ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid);
700 if (ret) {
701 kfree(mhp);
702 return ERR_PTR(ret);
703 }
704 mhp->rhp = rhp;
705 mhp->attr.pdid = php->pdid;
706 mhp->attr.type = TPT_MW;
707 mhp->attr.stag = stag;
708 mmid = (stag) >> 8;
709 insert_handle(rhp, &rhp->mmidr, mhp, mmid);
710 PDBG("%s mmid 0x%x mhp %p stag 0x%x\n", __FUNCTION__, mmid, mhp, stag);
711 return &(mhp->ibmw);
712}
713
714static int iwch_dealloc_mw(struct ib_mw *mw)
715{
716 struct iwch_dev *rhp;
717 struct iwch_mw *mhp;
718 u32 mmid;
719
720 mhp = to_iwch_mw(mw);
721 rhp = mhp->rhp;
722 mmid = (mw->rkey) >> 8;
723 cxio_deallocate_window(&rhp->rdev, mhp->attr.stag);
724 remove_handle(rhp, &rhp->mmidr, mmid);
725 kfree(mhp);
726 PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __FUNCTION__, mw, mmid, mhp);
727 return 0;
728}
729
730static int iwch_destroy_qp(struct ib_qp *ib_qp)
731{
732 struct iwch_dev *rhp;
733 struct iwch_qp *qhp;
734 struct iwch_qp_attributes attrs;
735 struct iwch_ucontext *ucontext;
736
737 qhp = to_iwch_qp(ib_qp);
738 rhp = qhp->rhp;
739
740 if (qhp->attr.state == IWCH_QP_STATE_RTS) {
741 attrs.next_state = IWCH_QP_STATE_ERROR;
742 iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0);
743 }
744 wait_event(qhp->wait, !qhp->ep);
745
746 remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid);
747
748 atomic_dec(&qhp->refcnt);
749 wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
750
751 ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context)
752 : NULL;
753 cxio_destroy_qp(&rhp->rdev, &qhp->wq,
754 ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
755
756 PDBG("%s ib_qp %p qpid 0x%0x qhp %p\n", __FUNCTION__,
757 ib_qp, qhp->wq.qpid, qhp);
758 kfree(qhp);
759 return 0;
760}
761
762static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
763 struct ib_qp_init_attr *attrs,
764 struct ib_udata *udata)
765{
766 struct iwch_dev *rhp;
767 struct iwch_qp *qhp;
768 struct iwch_pd *php;
769 struct iwch_cq *schp;
770 struct iwch_cq *rchp;
771 struct iwch_create_qp_resp uresp;
772 int wqsize, sqsize, rqsize;
773 struct iwch_ucontext *ucontext;
774
775 PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
776 if (attrs->qp_type != IB_QPT_RC)
777 return ERR_PTR(-EINVAL);
778 php = to_iwch_pd(pd);
779 rhp = php->rhp;
780 schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid);
781 rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid);
782 if (!schp || !rchp)
783 return ERR_PTR(-EINVAL);
784
785 /* The RQT size must be # of entries + 1 rounded up to a power of two */
786 rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr);
787 if (rqsize == attrs->cap.max_recv_wr)
788 rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1);
789
790 /* T3 doesn't support RQT depth < 16 */
791 if (rqsize < 16)
792 rqsize = 16;
793
794 if (rqsize > T3_MAX_RQ_SIZE)
795 return ERR_PTR(-EINVAL);
796
797 /*
798 * NOTE: The SQ and total WQ sizes don't need to be
799 * a power of two. However, all the code assumes
800 * they are. EG: Q_FREECNT() and friends.
801 */
802 sqsize = roundup_pow_of_two(attrs->cap.max_send_wr);
803 wqsize = roundup_pow_of_two(rqsize + sqsize);
804 PDBG("%s wqsize %d sqsize %d rqsize %d\n", __FUNCTION__,
805 wqsize, sqsize, rqsize);
806 qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
807 if (!qhp)
808 return ERR_PTR(-ENOMEM);
809 qhp->wq.size_log2 = ilog2(wqsize);
810 qhp->wq.rq_size_log2 = ilog2(rqsize);
811 qhp->wq.sq_size_log2 = ilog2(sqsize);
812 ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
813 if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq,
814 ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) {
815 kfree(qhp);
816 return ERR_PTR(-ENOMEM);
817 }
818 attrs->cap.max_recv_wr = rqsize - 1;
819 attrs->cap.max_send_wr = sqsize;
820 qhp->rhp = rhp;
821 qhp->attr.pd = php->pdid;
822 qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid;
823 qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid;
824 qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
825 qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
826 qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
827 qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
828 qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
829 qhp->attr.state = IWCH_QP_STATE_IDLE;
830 qhp->attr.next_state = IWCH_QP_STATE_IDLE;
831
832 /*
833 * XXX - These don't get passed in from the openib user
834 * at create time. The CM sets them via a QP modify.
835 * Need to fix... I think the CM should
836 */
837 qhp->attr.enable_rdma_read = 1;
838 qhp->attr.enable_rdma_write = 1;
839 qhp->attr.enable_bind = 1;
840 qhp->attr.max_ord = 1;
841 qhp->attr.max_ird = 1;
842
843 spin_lock_init(&qhp->lock);
844 init_waitqueue_head(&qhp->wait);
845 atomic_set(&qhp->refcnt, 1);
846 insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid);
847
848 if (udata) {
849
850 struct iwch_mm_entry *mm1, *mm2;
851
852 mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
853 if (!mm1) {
854 iwch_destroy_qp(&qhp->ibqp);
855 return ERR_PTR(-ENOMEM);
856 }
857
858 mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
859 if (!mm2) {
860 kfree(mm1);
861 iwch_destroy_qp(&qhp->ibqp);
862 return ERR_PTR(-ENOMEM);
863 }
864
865 uresp.qpid = qhp->wq.qpid;
866 uresp.size_log2 = qhp->wq.size_log2;
867 uresp.sq_size_log2 = qhp->wq.sq_size_log2;
868 uresp.rq_size_log2 = qhp->wq.rq_size_log2;
869 spin_lock(&ucontext->mmap_lock);
870 uresp.key = ucontext->key;
871 ucontext->key += PAGE_SIZE;
872 uresp.db_key = ucontext->key;
873 ucontext->key += PAGE_SIZE;
874 spin_unlock(&ucontext->mmap_lock);
875 if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) {
876 kfree(mm1);
877 kfree(mm2);
878 iwch_destroy_qp(&qhp->ibqp);
879 return ERR_PTR(-EFAULT);
880 }
881 mm1->key = uresp.key;
882 mm1->addr = virt_to_phys(qhp->wq.queue);
883 mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr));
884 insert_mmap(ucontext, mm1);
885 mm2->key = uresp.db_key;
886 mm2->addr = qhp->wq.udb & PAGE_MASK;
887 mm2->len = PAGE_SIZE;
888 insert_mmap(ucontext, mm2);
889 }
890 qhp->ibqp.qp_num = qhp->wq.qpid;
891 init_timer(&(qhp->timer));
892 PDBG("%s sq_num_entries %d, rq_num_entries %d "
893 "qpid 0x%0x qhp %p dma_addr 0x%llx size %d\n",
894 __FUNCTION__, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
895 qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr,
896 1 << qhp->wq.size_log2);
897 return &qhp->ibqp;
898}
899
900static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
901 int attr_mask, struct ib_udata *udata)
902{
903 struct iwch_dev *rhp;
904 struct iwch_qp *qhp;
905 enum iwch_qp_attr_mask mask = 0;
906 struct iwch_qp_attributes attrs;
907
908 PDBG("%s ib_qp %p\n", __FUNCTION__, ibqp);
909
910 /* iwarp does not support the RTR state */
911 if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
912 attr_mask &= ~IB_QP_STATE;
913
914 /* Make sure we still have something left to do */
915 if (!attr_mask)
916 return 0;
917
918 memset(&attrs, 0, sizeof attrs);
919 qhp = to_iwch_qp(ibqp);
920 rhp = qhp->rhp;
921
922 attrs.next_state = iwch_convert_state(attr->qp_state);
923 attrs.enable_rdma_read = (attr->qp_access_flags &
924 IB_ACCESS_REMOTE_READ) ? 1 : 0;
925 attrs.enable_rdma_write = (attr->qp_access_flags &
926 IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
927 attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
928
929
930 mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0;
931 mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
932 (IWCH_QP_ATTR_ENABLE_RDMA_READ |
933 IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
934 IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0;
935
936 return iwch_modify_qp(rhp, qhp, mask, &attrs, 0);
937}
938
939void iwch_qp_add_ref(struct ib_qp *qp)
940{
941 PDBG("%s ib_qp %p\n", __FUNCTION__, qp);
942 atomic_inc(&(to_iwch_qp(qp)->refcnt));
943}
944
945void iwch_qp_rem_ref(struct ib_qp *qp)
946{
947 PDBG("%s ib_qp %p\n", __FUNCTION__, qp);
948 if (atomic_dec_and_test(&(to_iwch_qp(qp)->refcnt)))
949 wake_up(&(to_iwch_qp(qp)->wait));
950}
951
952struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn)
953{
954 PDBG("%s ib_dev %p qpn 0x%x\n", __FUNCTION__, dev, qpn);
955 return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn);
956}
957
958
959static int iwch_query_pkey(struct ib_device *ibdev,
960 u8 port, u16 index, u16 * pkey)
961{
962 PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
963 *pkey = 0;
964 return 0;
965}
966
967static int iwch_query_gid(struct ib_device *ibdev, u8 port,
968 int index, union ib_gid *gid)
969{
970 struct iwch_dev *dev;
971
972 PDBG("%s ibdev %p, port %d, index %d, gid %p\n",
973 __FUNCTION__, ibdev, port, index, gid);
974 dev = to_iwch_dev(ibdev);
975 BUG_ON(port == 0 || port > 2);
976 memset(&(gid->raw[0]), 0, sizeof(gid->raw));
977 memcpy(&(gid->raw[0]), dev->rdev.port_info.lldevs[port-1]->dev_addr, 6);
978 return 0;
979}
980
981static int iwch_query_device(struct ib_device *ibdev,
982 struct ib_device_attr *props)
983{
984
985 struct iwch_dev *dev;
986 PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
987
988 dev = to_iwch_dev(ibdev);
989 memset(props, 0, sizeof *props);
990 memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
991 props->device_cap_flags = dev->device_cap_flags;
992 props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
993 props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
994 props->max_mr_size = ~0ull;
995 props->max_qp = dev->attr.max_qps;
996 props->max_qp_wr = dev->attr.max_wrs;
997 props->max_sge = dev->attr.max_sge_per_wr;
998 props->max_sge_rd = 1;
999 props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
1000 props->max_cq = dev->attr.max_cqs;
1001 props->max_cqe = dev->attr.max_cqes_per_cq;
1002 props->max_mr = dev->attr.max_mem_regs;
1003 props->max_pd = dev->attr.max_pds;
1004 props->local_ca_ack_delay = 0;
1005
1006 return 0;
1007}
1008
1009static int iwch_query_port(struct ib_device *ibdev,
1010 u8 port, struct ib_port_attr *props)
1011{
1012 PDBG("%s ibdev %p\n", __FUNCTION__, ibdev);
1013 props->max_mtu = IB_MTU_4096;
1014 props->lid = 0;
1015 props->lmc = 0;
1016 props->sm_lid = 0;
1017 props->sm_sl = 0;
1018 props->state = IB_PORT_ACTIVE;
1019 props->phys_state = 0;
1020 props->port_cap_flags =
1021 IB_PORT_CM_SUP |
1022 IB_PORT_SNMP_TUNNEL_SUP |
1023 IB_PORT_REINIT_SUP |
1024 IB_PORT_DEVICE_MGMT_SUP |
1025 IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
1026 props->gid_tbl_len = 1;
1027 props->pkey_tbl_len = 1;
1028 props->qkey_viol_cntr = 0;
1029 props->active_width = 2;
1030 props->active_speed = 2;
1031 props->max_msg_sz = -1;
1032
1033 return 0;
1034}
1035
1036static ssize_t show_rev(struct class_device *cdev, char *buf)
1037{
1038 struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
1039 ibdev.class_dev);
1040 PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
1041 return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type);
1042}
1043
1044static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
1045{
1046 struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
1047 ibdev.class_dev);
1048 struct ethtool_drvinfo info;
1049 struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
1050
1051 PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
1052 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1053 return sprintf(buf, "%s\n", info.fw_version);
1054}
1055
1056static ssize_t show_hca(struct class_device *cdev, char *buf)
1057{
1058 struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
1059 ibdev.class_dev);
1060 struct ethtool_drvinfo info;
1061 struct net_device *lldev = dev->rdev.t3cdev_p->lldev;
1062
1063 PDBG("%s class dev 0x%p\n", __FUNCTION__, cdev);
1064 lldev->ethtool_ops->get_drvinfo(lldev, &info);
1065 return sprintf(buf, "%s\n", info.driver);
1066}
1067
1068static ssize_t show_board(struct class_device *cdev, char *buf)
1069{
1070 struct iwch_dev *dev = container_of(cdev, struct iwch_dev,
1071 ibdev.class_dev);
1072 PDBG("%s class dev 0x%p\n", __FUNCTION__, dev);
1073 return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor,
1074 dev->rdev.rnic_info.pdev->device);
1075}
1076
1077static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
1078static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
1079static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
1080static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
1081
1082static struct class_device_attribute *iwch_class_attributes[] = {
1083 &class_device_attr_hw_rev,
1084 &class_device_attr_fw_ver,
1085 &class_device_attr_hca_type,
1086 &class_device_attr_board_id
1087};
1088
1089int iwch_register_device(struct iwch_dev *dev)
1090{
1091 int ret;
1092 int i;
1093
1094 PDBG("%s iwch_dev %p\n", __FUNCTION__, dev);
1095 strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX);
1096 memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid));
1097 memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6);
1098 dev->ibdev.owner = THIS_MODULE;
1099 dev->device_cap_flags =
1100 (IB_DEVICE_ZERO_STAG |
1101 IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW);
1102
1103 dev->ibdev.uverbs_cmd_mask =
1104 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
1105 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
1106 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
1107 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
1108 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
1109 (1ull << IB_USER_VERBS_CMD_REG_MR) |
1110 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
1111 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
1112 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
1113 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
1114 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
1115 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
1116 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
1117 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
1118 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
1119 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
1120 (1ull << IB_USER_VERBS_CMD_POST_RECV);
1121 dev->ibdev.node_type = RDMA_NODE_RNIC;
1122 memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
1123 dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
1124 dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
1125 dev->ibdev.class_dev.dev = &(dev->rdev.rnic_info.pdev->dev);
1126 dev->ibdev.query_device = iwch_query_device;
1127 dev->ibdev.query_port = iwch_query_port;
1128 dev->ibdev.modify_port = iwch_modify_port;
1129 dev->ibdev.query_pkey = iwch_query_pkey;
1130 dev->ibdev.query_gid = iwch_query_gid;
1131 dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
1132 dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
1133 dev->ibdev.mmap = iwch_mmap;
1134 dev->ibdev.alloc_pd = iwch_allocate_pd;
1135 dev->ibdev.dealloc_pd = iwch_deallocate_pd;
1136 dev->ibdev.create_ah = iwch_ah_create;
1137 dev->ibdev.destroy_ah = iwch_ah_destroy;
1138 dev->ibdev.create_qp = iwch_create_qp;
1139 dev->ibdev.modify_qp = iwch_ib_modify_qp;
1140 dev->ibdev.destroy_qp = iwch_destroy_qp;
1141 dev->ibdev.create_cq = iwch_create_cq;
1142 dev->ibdev.destroy_cq = iwch_destroy_cq;
1143 dev->ibdev.resize_cq = iwch_resize_cq;
1144 dev->ibdev.poll_cq = iwch_poll_cq;
1145 dev->ibdev.get_dma_mr = iwch_get_dma_mr;
1146 dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
1147 dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
1148 dev->ibdev.reg_user_mr = iwch_reg_user_mr;
1149 dev->ibdev.dereg_mr = iwch_dereg_mr;
1150 dev->ibdev.alloc_mw = iwch_alloc_mw;
1151 dev->ibdev.bind_mw = iwch_bind_mw;
1152 dev->ibdev.dealloc_mw = iwch_dealloc_mw;
1153
1154 dev->ibdev.attach_mcast = iwch_multicast_attach;
1155 dev->ibdev.detach_mcast = iwch_multicast_detach;
1156 dev->ibdev.process_mad = iwch_process_mad;
1157
1158 dev->ibdev.req_notify_cq = iwch_arm_cq;
1159 dev->ibdev.post_send = iwch_post_send;
1160 dev->ibdev.post_recv = iwch_post_receive;
1161
1162
1163 dev->ibdev.iwcm =
1164 (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
1165 GFP_KERNEL);
1166 dev->ibdev.iwcm->connect = iwch_connect;
1167 dev->ibdev.iwcm->accept = iwch_accept_cr;
1168 dev->ibdev.iwcm->reject = iwch_reject_cr;
1169 dev->ibdev.iwcm->create_listen = iwch_create_listen;
1170 dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen;
1171 dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
1172 dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
1173 dev->ibdev.iwcm->get_qp = iwch_get_qp;
1174
1175 ret = ib_register_device(&dev->ibdev);
1176 if (ret)
1177 goto bail1;
1178
1179 for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) {
1180 ret = class_device_create_file(&dev->ibdev.class_dev,
1181 iwch_class_attributes[i]);
1182 if (ret) {
1183 goto bail2;
1184 }
1185 }
1186 return 0;
1187bail2:
1188 ib_unregister_device(&dev->ibdev);
1189bail1:
1190 return ret;
1191}
1192
1193void iwch_unregister_device(struct iwch_dev *dev)
1194{
1195 int i;
1196
1197 PDBG("%s iwch_dev %p\n", __FUNCTION__, dev);
1198 for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i)
1199 class_device_remove_file(&dev->ibdev.class_dev,
1200 iwch_class_attributes[i]);
1201 ib_unregister_device(&dev->ibdev);
1202 return;
1203}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
new file mode 100644
index 000000000000..61e3278fd7a8
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -0,0 +1,367 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef __IWCH_PROVIDER_H__
34#define __IWCH_PROVIDER_H__
35
36#include <linux/list.h>
37#include <linux/spinlock.h>
38#include <rdma/ib_verbs.h>
39#include <asm/types.h>
40#include "t3cdev.h"
41#include "iwch.h"
42#include "cxio_wr.h"
43#include "cxio_hal.h"
44
45struct iwch_pd {
46 struct ib_pd ibpd;
47 u32 pdid;
48 struct iwch_dev *rhp;
49};
50
51static inline struct iwch_pd *to_iwch_pd(struct ib_pd *ibpd)
52{
53 return container_of(ibpd, struct iwch_pd, ibpd);
54}
55
56struct tpt_attributes {
57 u32 stag;
58 u32 state:1;
59 u32 type:2;
60 u32 rsvd:1;
61 enum tpt_mem_perm perms;
62 u32 remote_invaliate_disable:1;
63 u32 zbva:1;
64 u32 mw_bind_enable:1;
65 u32 page_size:5;
66
67 u32 pdid;
68 u32 qpid;
69 u32 pbl_addr;
70 u32 len;
71 u64 va_fbo;
72 u32 pbl_size;
73};
74
75struct iwch_mr {
76 struct ib_mr ibmr;
77 struct iwch_dev *rhp;
78 u64 kva;
79 struct tpt_attributes attr;
80};
81
82typedef struct iwch_mw iwch_mw_handle;
83
84static inline struct iwch_mr *to_iwch_mr(struct ib_mr *ibmr)
85{
86 return container_of(ibmr, struct iwch_mr, ibmr);
87}
88
89struct iwch_mw {
90 struct ib_mw ibmw;
91 struct iwch_dev *rhp;
92 u64 kva;
93 struct tpt_attributes attr;
94};
95
96static inline struct iwch_mw *to_iwch_mw(struct ib_mw *ibmw)
97{
98 return container_of(ibmw, struct iwch_mw, ibmw);
99}
100
101struct iwch_cq {
102 struct ib_cq ibcq;
103 struct iwch_dev *rhp;
104 struct t3_cq cq;
105 spinlock_t lock;
106 atomic_t refcnt;
107 wait_queue_head_t wait;
108 u32 __user *user_rptr_addr;
109};
110
111static inline struct iwch_cq *to_iwch_cq(struct ib_cq *ibcq)
112{
113 return container_of(ibcq, struct iwch_cq, ibcq);
114}
115
116enum IWCH_QP_FLAGS {
117 QP_QUIESCED = 0x01
118};
119
120struct iwch_mpa_attributes {
121 u8 recv_marker_enabled;
122 u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */
123 u8 crc_enabled;
124 u8 version; /* 0 or 1 */
125};
126
127struct iwch_qp_attributes {
128 u32 scq;
129 u32 rcq;
130 u32 sq_num_entries;
131 u32 rq_num_entries;
132 u32 sq_max_sges;
133 u32 sq_max_sges_rdma_write;
134 u32 rq_max_sges;
135 u32 state;
136 u8 enable_rdma_read;
137 u8 enable_rdma_write; /* enable inbound Read Resp. */
138 u8 enable_bind;
139 u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */
140 /*
141 * Next QP state. If specify the current state, only the
142 * QP attributes will be modified.
143 */
144 u32 max_ord;
145 u32 max_ird;
146 u32 pd; /* IN */
147 u32 next_state;
148 char terminate_buffer[52];
149 u32 terminate_msg_len;
150 u8 is_terminate_local;
151 struct iwch_mpa_attributes mpa_attr; /* IN-OUT */
152 struct iwch_ep *llp_stream_handle;
153 char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */
154 u32 stream_msg_buf_len; /* Only on Idle -> RTS */
155};
156
157struct iwch_qp {
158 struct ib_qp ibqp;
159 struct iwch_dev *rhp;
160 struct iwch_ep *ep;
161 struct iwch_qp_attributes attr;
162 struct t3_wq wq;
163 spinlock_t lock;
164 atomic_t refcnt;
165 wait_queue_head_t wait;
166 enum IWCH_QP_FLAGS flags;
167 struct timer_list timer;
168};
169
170static inline int qp_quiesced(struct iwch_qp *qhp)
171{
172 return qhp->flags & QP_QUIESCED;
173}
174
175static inline struct iwch_qp *to_iwch_qp(struct ib_qp *ibqp)
176{
177 return container_of(ibqp, struct iwch_qp, ibqp);
178}
179
180void iwch_qp_add_ref(struct ib_qp *qp);
181void iwch_qp_rem_ref(struct ib_qp *qp);
182struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn);
183
184struct iwch_ucontext {
185 struct ib_ucontext ibucontext;
186 struct cxio_ucontext uctx;
187 u32 key;
188 spinlock_t mmap_lock;
189 struct list_head mmaps;
190};
191
192static inline struct iwch_ucontext *to_iwch_ucontext(struct ib_ucontext *c)
193{
194 return container_of(c, struct iwch_ucontext, ibucontext);
195}
196
197struct iwch_mm_entry {
198 struct list_head entry;
199 u64 addr;
200 u32 key;
201 unsigned len;
202};
203
204static inline struct iwch_mm_entry *remove_mmap(struct iwch_ucontext *ucontext,
205 u32 key, unsigned len)
206{
207 struct list_head *pos, *nxt;
208 struct iwch_mm_entry *mm;
209
210 spin_lock(&ucontext->mmap_lock);
211 list_for_each_safe(pos, nxt, &ucontext->mmaps) {
212
213 mm = list_entry(pos, struct iwch_mm_entry, entry);
214 if (mm->key == key && mm->len == len) {
215 list_del_init(&mm->entry);
216 spin_unlock(&ucontext->mmap_lock);
217 PDBG("%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__,
218 key, (unsigned long long) mm->addr, mm->len);
219 return mm;
220 }
221 }
222 spin_unlock(&ucontext->mmap_lock);
223 return NULL;
224}
225
226static inline void insert_mmap(struct iwch_ucontext *ucontext,
227 struct iwch_mm_entry *mm)
228{
229 spin_lock(&ucontext->mmap_lock);
230 PDBG("%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__,
231 mm->key, (unsigned long long) mm->addr, mm->len);
232 list_add_tail(&mm->entry, &ucontext->mmaps);
233 spin_unlock(&ucontext->mmap_lock);
234}
235
236enum iwch_qp_attr_mask {
237 IWCH_QP_ATTR_NEXT_STATE = 1 << 0,
238 IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
239 IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
240 IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
241 IWCH_QP_ATTR_MAX_ORD = 1 << 11,
242 IWCH_QP_ATTR_MAX_IRD = 1 << 12,
243 IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
244 IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
245 IWCH_QP_ATTR_MPA_ATTR = 1 << 24,
246 IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
247 IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ |
248 IWCH_QP_ATTR_ENABLE_RDMA_WRITE |
249 IWCH_QP_ATTR_MAX_ORD |
250 IWCH_QP_ATTR_MAX_IRD |
251 IWCH_QP_ATTR_LLP_STREAM_HANDLE |
252 IWCH_QP_ATTR_STREAM_MSG_BUFFER |
253 IWCH_QP_ATTR_MPA_ATTR |
254 IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE)
255};
256
257int iwch_modify_qp(struct iwch_dev *rhp,
258 struct iwch_qp *qhp,
259 enum iwch_qp_attr_mask mask,
260 struct iwch_qp_attributes *attrs,
261 int internal);
262
263enum iwch_qp_state {
264 IWCH_QP_STATE_IDLE,
265 IWCH_QP_STATE_RTS,
266 IWCH_QP_STATE_ERROR,
267 IWCH_QP_STATE_TERMINATE,
268 IWCH_QP_STATE_CLOSING,
269 IWCH_QP_STATE_TOT
270};
271
272static inline int iwch_convert_state(enum ib_qp_state ib_state)
273{
274 switch (ib_state) {
275 case IB_QPS_RESET:
276 case IB_QPS_INIT:
277 return IWCH_QP_STATE_IDLE;
278 case IB_QPS_RTS:
279 return IWCH_QP_STATE_RTS;
280 case IB_QPS_SQD:
281 return IWCH_QP_STATE_CLOSING;
282 case IB_QPS_SQE:
283 return IWCH_QP_STATE_TERMINATE;
284 case IB_QPS_ERR:
285 return IWCH_QP_STATE_ERROR;
286 default:
287 return -1;
288 }
289}
290
291enum iwch_mem_perms {
292 IWCH_MEM_ACCESS_LOCAL_READ = 1 << 0,
293 IWCH_MEM_ACCESS_LOCAL_WRITE = 1 << 1,
294 IWCH_MEM_ACCESS_REMOTE_READ = 1 << 2,
295 IWCH_MEM_ACCESS_REMOTE_WRITE = 1 << 3,
296 IWCH_MEM_ACCESS_ATOMICS = 1 << 4,
297 IWCH_MEM_ACCESS_BINDING = 1 << 5,
298 IWCH_MEM_ACCESS_LOCAL =
299 (IWCH_MEM_ACCESS_LOCAL_READ | IWCH_MEM_ACCESS_LOCAL_WRITE),
300 IWCH_MEM_ACCESS_REMOTE =
301 (IWCH_MEM_ACCESS_REMOTE_WRITE | IWCH_MEM_ACCESS_REMOTE_READ)
302 /* cannot go beyond 1 << 31 */
303} __attribute__ ((packed));
304
305static inline u32 iwch_convert_access(int acc)
306{
307 return (acc & IB_ACCESS_REMOTE_WRITE ? IWCH_MEM_ACCESS_REMOTE_WRITE : 0)
308 | (acc & IB_ACCESS_REMOTE_READ ? IWCH_MEM_ACCESS_REMOTE_READ : 0) |
309 (acc & IB_ACCESS_LOCAL_WRITE ? IWCH_MEM_ACCESS_LOCAL_WRITE : 0) |
310 (acc & IB_ACCESS_MW_BIND ? IWCH_MEM_ACCESS_BINDING : 0) |
311 IWCH_MEM_ACCESS_LOCAL_READ;
312}
313
314enum iwch_mmid_state {
315 IWCH_STAG_STATE_VALID,
316 IWCH_STAG_STATE_INVALID
317};
318
319enum iwch_qp_query_flags {
320 IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */
321 IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */
322 IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */
323
324 /*
325 * Quiesce QP context; Consumer
326 * will NOT replay outstanding WR
327 */
328 IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4,
329 IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8,
330 IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */
331};
332
333int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
334 struct ib_send_wr **bad_wr);
335int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
336 struct ib_recv_wr **bad_wr);
337int iwch_bind_mw(struct ib_qp *qp,
338 struct ib_mw *mw,
339 struct ib_mw_bind *mw_bind);
340int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
341int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
342int iwch_register_device(struct iwch_dev *dev);
343void iwch_unregister_device(struct iwch_dev *dev);
344int iwch_quiesce_qps(struct iwch_cq *chp);
345int iwch_resume_qps(struct iwch_cq *chp);
346void stop_read_rep_timer(struct iwch_qp *qhp);
347int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
348 struct iwch_mr *mhp,
349 int shift,
350 __be64 *page_list);
351int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
352 struct iwch_mr *mhp,
353 int shift,
354 __be64 *page_list,
355 int npages);
356int build_phys_page_list(struct ib_phys_buf *buffer_list,
357 int num_phys_buf,
358 u64 *iova_start,
359 u64 *total_size,
360 int *npages,
361 int *shift,
362 __be64 **page_list);
363
364
365#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
366
367#endif
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
new file mode 100644
index 000000000000..e066727504b6
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -0,0 +1,1007 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#include "iwch_provider.h"
34#include "iwch.h"
35#include "iwch_cm.h"
36#include "cxio_hal.h"
37
38#define NO_SUPPORT -1
39
40static inline int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
41 u8 * flit_cnt)
42{
43 int i;
44 u32 plen;
45
46 switch (wr->opcode) {
47 case IB_WR_SEND:
48 case IB_WR_SEND_WITH_IMM:
49 if (wr->send_flags & IB_SEND_SOLICITED)
50 wqe->send.rdmaop = T3_SEND_WITH_SE;
51 else
52 wqe->send.rdmaop = T3_SEND;
53 wqe->send.rem_stag = 0;
54 break;
55#if 0 /* Not currently supported */
56 case TYPE_SEND_INVALIDATE:
57 case TYPE_SEND_INVALIDATE_IMMEDIATE:
58 wqe->send.rdmaop = T3_SEND_WITH_INV;
59 wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
60 break;
61 case TYPE_SEND_SE_INVALIDATE:
62 wqe->send.rdmaop = T3_SEND_WITH_SE_INV;
63 wqe->send.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
64 break;
65#endif
66 default:
67 break;
68 }
69 if (wr->num_sge > T3_MAX_SGE)
70 return -EINVAL;
71 wqe->send.reserved[0] = 0;
72 wqe->send.reserved[1] = 0;
73 wqe->send.reserved[2] = 0;
74 if (wr->opcode == IB_WR_SEND_WITH_IMM) {
75 plen = 4;
76 wqe->send.sgl[0].stag = wr->imm_data;
77 wqe->send.sgl[0].len = __constant_cpu_to_be32(0);
78 wqe->send.num_sgle = __constant_cpu_to_be32(0);
79 *flit_cnt = 5;
80 } else {
81 plen = 0;
82 for (i = 0; i < wr->num_sge; i++) {
83 if ((plen + wr->sg_list[i].length) < plen) {
84 return -EMSGSIZE;
85 }
86 plen += wr->sg_list[i].length;
87 wqe->send.sgl[i].stag =
88 cpu_to_be32(wr->sg_list[i].lkey);
89 wqe->send.sgl[i].len =
90 cpu_to_be32(wr->sg_list[i].length);
91 wqe->send.sgl[i].to = cpu_to_be64(wr->sg_list[i].addr);
92 }
93 wqe->send.num_sgle = cpu_to_be32(wr->num_sge);
94 *flit_cnt = 4 + ((wr->num_sge) << 1);
95 }
96 wqe->send.plen = cpu_to_be32(plen);
97 return 0;
98}
99
100static inline int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
101 u8 *flit_cnt)
102{
103 int i;
104 u32 plen;
105 if (wr->num_sge > T3_MAX_SGE)
106 return -EINVAL;
107 wqe->write.rdmaop = T3_RDMA_WRITE;
108 wqe->write.reserved[0] = 0;
109 wqe->write.reserved[1] = 0;
110 wqe->write.reserved[2] = 0;
111 wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
112 wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
113
114 if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
115 plen = 4;
116 wqe->write.sgl[0].stag = wr->imm_data;
117 wqe->write.sgl[0].len = __constant_cpu_to_be32(0);
118 wqe->write.num_sgle = __constant_cpu_to_be32(0);
119 *flit_cnt = 6;
120 } else {
121 plen = 0;
122 for (i = 0; i < wr->num_sge; i++) {
123 if ((plen + wr->sg_list[i].length) < plen) {
124 return -EMSGSIZE;
125 }
126 plen += wr->sg_list[i].length;
127 wqe->write.sgl[i].stag =
128 cpu_to_be32(wr->sg_list[i].lkey);
129 wqe->write.sgl[i].len =
130 cpu_to_be32(wr->sg_list[i].length);
131 wqe->write.sgl[i].to =
132 cpu_to_be64(wr->sg_list[i].addr);
133 }
134 wqe->write.num_sgle = cpu_to_be32(wr->num_sge);
135 *flit_cnt = 5 + ((wr->num_sge) << 1);
136 }
137 wqe->write.plen = cpu_to_be32(plen);
138 return 0;
139}
140
141static inline int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
142 u8 *flit_cnt)
143{
144 if (wr->num_sge > 1)
145 return -EINVAL;
146 wqe->read.rdmaop = T3_READ_REQ;
147 wqe->read.reserved[0] = 0;
148 wqe->read.reserved[1] = 0;
149 wqe->read.reserved[2] = 0;
150 wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
151 wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
152 wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
153 wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
154 wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
155 *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
156 return 0;
157}
158
159/*
160 * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now.
161 */
162static inline int iwch_sgl2pbl_map(struct iwch_dev *rhp,
163 struct ib_sge *sg_list, u32 num_sgle,
164 u32 * pbl_addr, u8 * page_size)
165{
166 int i;
167 struct iwch_mr *mhp;
168 u32 offset;
169 for (i = 0; i < num_sgle; i++) {
170
171 mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8);
172 if (!mhp) {
173 PDBG("%s %d\n", __FUNCTION__, __LINE__);
174 return -EIO;
175 }
176 if (!mhp->attr.state) {
177 PDBG("%s %d\n", __FUNCTION__, __LINE__);
178 return -EIO;
179 }
180 if (mhp->attr.zbva) {
181 PDBG("%s %d\n", __FUNCTION__, __LINE__);
182 return -EIO;
183 }
184
185 if (sg_list[i].addr < mhp->attr.va_fbo) {
186 PDBG("%s %d\n", __FUNCTION__, __LINE__);
187 return -EINVAL;
188 }
189 if (sg_list[i].addr + ((u64) sg_list[i].length) <
190 sg_list[i].addr) {
191 PDBG("%s %d\n", __FUNCTION__, __LINE__);
192 return -EINVAL;
193 }
194 if (sg_list[i].addr + ((u64) sg_list[i].length) >
195 mhp->attr.va_fbo + ((u64) mhp->attr.len)) {
196 PDBG("%s %d\n", __FUNCTION__, __LINE__);
197 return -EINVAL;
198 }
199 offset = sg_list[i].addr - mhp->attr.va_fbo;
200 offset += ((u32) mhp->attr.va_fbo) %
201 (1UL << (12 + mhp->attr.page_size));
202 pbl_addr[i] = ((mhp->attr.pbl_addr -
203 rhp->rdev.rnic_info.pbl_base) >> 3) +
204 (offset >> (12 + mhp->attr.page_size));
205 page_size[i] = mhp->attr.page_size;
206 }
207 return 0;
208}
209
210static inline int iwch_build_rdma_recv(struct iwch_dev *rhp,
211 union t3_wr *wqe,
212 struct ib_recv_wr *wr)
213{
214 int i, err = 0;
215 u32 pbl_addr[4];
216 u8 page_size[4];
217 if (wr->num_sge > T3_MAX_SGE)
218 return -EINVAL;
219 err = iwch_sgl2pbl_map(rhp, wr->sg_list, wr->num_sge, pbl_addr,
220 page_size);
221 if (err)
222 return err;
223 wqe->recv.pagesz[0] = page_size[0];
224 wqe->recv.pagesz[1] = page_size[1];
225 wqe->recv.pagesz[2] = page_size[2];
226 wqe->recv.pagesz[3] = page_size[3];
227 wqe->recv.num_sgle = cpu_to_be32(wr->num_sge);
228 for (i = 0; i < wr->num_sge; i++) {
229 wqe->recv.sgl[i].stag = cpu_to_be32(wr->sg_list[i].lkey);
230 wqe->recv.sgl[i].len = cpu_to_be32(wr->sg_list[i].length);
231
232 /* to in the WQE == the offset into the page */
233 wqe->recv.sgl[i].to = cpu_to_be64(((u32) wr->sg_list[i].addr) %
234 (1UL << (12 + page_size[i])));
235
236 /* pbl_addr is the adapters address in the PBL */
237 wqe->recv.pbl_addr[i] = cpu_to_be32(pbl_addr[i]);
238 }
239 for (; i < T3_MAX_SGE; i++) {
240 wqe->recv.sgl[i].stag = 0;
241 wqe->recv.sgl[i].len = 0;
242 wqe->recv.sgl[i].to = 0;
243 wqe->recv.pbl_addr[i] = 0;
244 }
245 return 0;
246}
247
248int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
249 struct ib_send_wr **bad_wr)
250{
251 int err = 0;
252 u8 t3_wr_flit_cnt;
253 enum t3_wr_opcode t3_wr_opcode = 0;
254 enum t3_wr_flags t3_wr_flags;
255 struct iwch_qp *qhp;
256 u32 idx;
257 union t3_wr *wqe;
258 u32 num_wrs;
259 unsigned long flag;
260 struct t3_swsq *sqp;
261
262 qhp = to_iwch_qp(ibqp);
263 spin_lock_irqsave(&qhp->lock, flag);
264 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
265 spin_unlock_irqrestore(&qhp->lock, flag);
266 return -EINVAL;
267 }
268 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
269 qhp->wq.sq_size_log2);
270 if (num_wrs <= 0) {
271 spin_unlock_irqrestore(&qhp->lock, flag);
272 return -ENOMEM;
273 }
274 while (wr) {
275 if (num_wrs == 0) {
276 err = -ENOMEM;
277 *bad_wr = wr;
278 break;
279 }
280 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
281 wqe = (union t3_wr *) (qhp->wq.queue + idx);
282 t3_wr_flags = 0;
283 if (wr->send_flags & IB_SEND_SOLICITED)
284 t3_wr_flags |= T3_SOLICITED_EVENT_FLAG;
285 if (wr->send_flags & IB_SEND_FENCE)
286 t3_wr_flags |= T3_READ_FENCE_FLAG;
287 if (wr->send_flags & IB_SEND_SIGNALED)
288 t3_wr_flags |= T3_COMPLETION_FLAG;
289 sqp = qhp->wq.sq +
290 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
291 switch (wr->opcode) {
292 case IB_WR_SEND:
293 case IB_WR_SEND_WITH_IMM:
294 t3_wr_opcode = T3_WR_SEND;
295 err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt);
296 break;
297 case IB_WR_RDMA_WRITE:
298 case IB_WR_RDMA_WRITE_WITH_IMM:
299 t3_wr_opcode = T3_WR_WRITE;
300 err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt);
301 break;
302 case IB_WR_RDMA_READ:
303 t3_wr_opcode = T3_WR_READ;
304 t3_wr_flags = 0; /* T3 reads are always signaled */
305 err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt);
306 if (err)
307 break;
308 sqp->read_len = wqe->read.local_len;
309 if (!qhp->wq.oldest_read)
310 qhp->wq.oldest_read = sqp;
311 break;
312 default:
313 PDBG("%s post of type=%d TBD!\n", __FUNCTION__,
314 wr->opcode);
315 err = -EINVAL;
316 }
317 if (err) {
318 *bad_wr = wr;
319 break;
320 }
321 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
322 sqp->wr_id = wr->wr_id;
323 sqp->opcode = wr2opcode(t3_wr_opcode);
324 sqp->sq_wptr = qhp->wq.sq_wptr;
325 sqp->complete = 0;
326 sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED);
327
328 build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags,
329 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
330 0, t3_wr_flit_cnt);
331 PDBG("%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d\n",
332 __FUNCTION__, (unsigned long long) wr->wr_id, idx,
333 Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2),
334 sqp->opcode);
335 wr = wr->next;
336 num_wrs--;
337 ++(qhp->wq.wptr);
338 ++(qhp->wq.sq_wptr);
339 }
340 spin_unlock_irqrestore(&qhp->lock, flag);
341 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
342 return err;
343}
344
345int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
346 struct ib_recv_wr **bad_wr)
347{
348 int err = 0;
349 struct iwch_qp *qhp;
350 u32 idx;
351 union t3_wr *wqe;
352 u32 num_wrs;
353 unsigned long flag;
354
355 qhp = to_iwch_qp(ibqp);
356 spin_lock_irqsave(&qhp->lock, flag);
357 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
358 spin_unlock_irqrestore(&qhp->lock, flag);
359 return -EINVAL;
360 }
361 num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
362 qhp->wq.rq_size_log2) - 1;
363 if (!wr) {
364 spin_unlock_irqrestore(&qhp->lock, flag);
365 return -EINVAL;
366 }
367 while (wr) {
368 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
369 wqe = (union t3_wr *) (qhp->wq.queue + idx);
370 if (num_wrs)
371 err = iwch_build_rdma_recv(qhp->rhp, wqe, wr);
372 else
373 err = -ENOMEM;
374 if (err) {
375 *bad_wr = wr;
376 break;
377 }
378 qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] =
379 wr->wr_id;
380 build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
381 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
382 0, sizeof(struct t3_receive_wr) >> 3);
383 PDBG("%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x "
384 "wqe %p \n", __FUNCTION__, (unsigned long long) wr->wr_id,
385 idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe);
386 ++(qhp->wq.rq_wptr);
387 ++(qhp->wq.wptr);
388 wr = wr->next;
389 num_wrs--;
390 }
391 spin_unlock_irqrestore(&qhp->lock, flag);
392 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
393 return err;
394}
395
396int iwch_bind_mw(struct ib_qp *qp,
397 struct ib_mw *mw,
398 struct ib_mw_bind *mw_bind)
399{
400 struct iwch_dev *rhp;
401 struct iwch_mw *mhp;
402 struct iwch_qp *qhp;
403 union t3_wr *wqe;
404 u32 pbl_addr;
405 u8 page_size;
406 u32 num_wrs;
407 unsigned long flag;
408 struct ib_sge sgl;
409 int err=0;
410 enum t3_wr_flags t3_wr_flags;
411 u32 idx;
412 struct t3_swsq *sqp;
413
414 qhp = to_iwch_qp(qp);
415 mhp = to_iwch_mw(mw);
416 rhp = qhp->rhp;
417
418 spin_lock_irqsave(&qhp->lock, flag);
419 if (qhp->attr.state > IWCH_QP_STATE_RTS) {
420 spin_unlock_irqrestore(&qhp->lock, flag);
421 return -EINVAL;
422 }
423 num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
424 qhp->wq.sq_size_log2);
425 if ((num_wrs) <= 0) {
426 spin_unlock_irqrestore(&qhp->lock, flag);
427 return -ENOMEM;
428 }
429 idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
430 PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __FUNCTION__, idx,
431 mw, mw_bind);
432 wqe = (union t3_wr *) (qhp->wq.queue + idx);
433
434 t3_wr_flags = 0;
435 if (mw_bind->send_flags & IB_SEND_SIGNALED)
436 t3_wr_flags = T3_COMPLETION_FLAG;
437
438 sgl.addr = mw_bind->addr;
439 sgl.lkey = mw_bind->mr->lkey;
440 sgl.length = mw_bind->length;
441 wqe->bind.reserved = 0;
442 wqe->bind.type = T3_VA_BASED_TO;
443
444 /* TBD: check perms */
445 wqe->bind.perms = iwch_convert_access(mw_bind->mw_access_flags);
446 wqe->bind.mr_stag = cpu_to_be32(mw_bind->mr->lkey);
447 wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
448 wqe->bind.mw_len = cpu_to_be32(mw_bind->length);
449 wqe->bind.mw_va = cpu_to_be64(mw_bind->addr);
450 err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
451 if (err) {
452 spin_unlock_irqrestore(&qhp->lock, flag);
453 return err;
454 }
455 wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
456 sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
457 sqp->wr_id = mw_bind->wr_id;
458 sqp->opcode = T3_BIND_MW;
459 sqp->sq_wptr = qhp->wq.sq_wptr;
460 sqp->complete = 0;
461 sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
462 wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
463 wqe->bind.mr_pagesz = page_size;
464 wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id;
465 build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
466 Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
467 sizeof(struct t3_bind_mw_wr) >> 3);
468 ++(qhp->wq.wptr);
469 ++(qhp->wq.sq_wptr);
470 spin_unlock_irqrestore(&qhp->lock, flag);
471
472 ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
473
474 return err;
475}
476
477static inline void build_term_codes(int t3err, u8 *layer_type, u8 *ecode,
478 int tagged)
479{
480 switch (t3err) {
481 case TPT_ERR_STAG:
482 if (tagged == 1) {
483 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
484 *ecode = DDPT_INV_STAG;
485 } else if (tagged == 2) {
486 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
487 *ecode = RDMAP_INV_STAG;
488 }
489 break;
490 case TPT_ERR_PDID:
491 case TPT_ERR_QPID:
492 case TPT_ERR_ACCESS:
493 if (tagged == 1) {
494 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
495 *ecode = DDPT_STAG_NOT_ASSOC;
496 } else if (tagged == 2) {
497 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
498 *ecode = RDMAP_STAG_NOT_ASSOC;
499 }
500 break;
501 case TPT_ERR_WRAP:
502 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
503 *ecode = RDMAP_TO_WRAP;
504 break;
505 case TPT_ERR_BOUND:
506 if (tagged == 1) {
507 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
508 *ecode = DDPT_BASE_BOUNDS;
509 } else if (tagged == 2) {
510 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
511 *ecode = RDMAP_BASE_BOUNDS;
512 } else {
513 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
514 *ecode = DDPU_MSG_TOOBIG;
515 }
516 break;
517 case TPT_ERR_INVALIDATE_SHARED_MR:
518 case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
519 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
520 *ecode = RDMAP_CANT_INV_STAG;
521 break;
522 case TPT_ERR_ECC:
523 case TPT_ERR_ECC_PSTAG:
524 case TPT_ERR_INTERNAL_ERR:
525 *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
526 *ecode = 0;
527 break;
528 case TPT_ERR_OUT_OF_RQE:
529 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
530 *ecode = DDPU_INV_MSN_NOBUF;
531 break;
532 case TPT_ERR_PBL_ADDR_BOUND:
533 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
534 *ecode = DDPT_BASE_BOUNDS;
535 break;
536 case TPT_ERR_CRC:
537 *layer_type = LAYER_MPA|DDP_LLP;
538 *ecode = MPA_CRC_ERR;
539 break;
540 case TPT_ERR_MARKER:
541 *layer_type = LAYER_MPA|DDP_LLP;
542 *ecode = MPA_MARKER_ERR;
543 break;
544 case TPT_ERR_PDU_LEN_ERR:
545 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
546 *ecode = DDPU_MSG_TOOBIG;
547 break;
548 case TPT_ERR_DDP_VERSION:
549 if (tagged) {
550 *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
551 *ecode = DDPT_INV_VERS;
552 } else {
553 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
554 *ecode = DDPU_INV_VERS;
555 }
556 break;
557 case TPT_ERR_RDMA_VERSION:
558 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
559 *ecode = RDMAP_INV_VERS;
560 break;
561 case TPT_ERR_OPCODE:
562 *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
563 *ecode = RDMAP_INV_OPCODE;
564 break;
565 case TPT_ERR_DDP_QUEUE_NUM:
566 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
567 *ecode = DDPU_INV_QN;
568 break;
569 case TPT_ERR_MSN:
570 case TPT_ERR_MSN_GAP:
571 case TPT_ERR_MSN_RANGE:
572 case TPT_ERR_IRD_OVERFLOW:
573 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
574 *ecode = DDPU_INV_MSN_RANGE;
575 break;
576 case TPT_ERR_TBIT:
577 *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
578 *ecode = 0;
579 break;
580 case TPT_ERR_MO:
581 *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
582 *ecode = DDPU_INV_MO;
583 break;
584 default:
585 *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
586 *ecode = 0;
587 break;
588 }
589}
590
591/*
592 * This posts a TERMINATE with layer=RDMA, type=catastrophic.
593 */
594int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
595{
596 union t3_wr *wqe;
597 struct terminate_message *term;
598 int status;
599 int tagged = 0;
600 struct sk_buff *skb;
601
602 PDBG("%s %d\n", __FUNCTION__, __LINE__);
603 skb = alloc_skb(40, GFP_ATOMIC);
604 if (!skb) {
605 printk(KERN_ERR "%s cannot send TERMINATE!\n", __FUNCTION__);
606 return -ENOMEM;
607 }
608 wqe = (union t3_wr *)skb_put(skb, 40);
609 memset(wqe, 0, 40);
610 wqe->send.rdmaop = T3_TERMINATE;
611
612 /* immediate data length */
613 wqe->send.plen = htonl(4);
614
615 /* immediate data starts here. */
616 term = (struct terminate_message *)wqe->send.sgl;
617 if (rsp_msg) {
618 status = CQE_STATUS(rsp_msg->cqe);
619 if (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)
620 tagged = 1;
621 if ((CQE_OPCODE(rsp_msg->cqe) == T3_READ_REQ) ||
622 (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP))
623 tagged = 2;
624 } else {
625 status = TPT_ERR_INTERNAL_ERR;
626 }
627 build_term_codes(status, &term->layer_etype, &term->ecode, tagged);
628 build_fw_riwrh((void *)wqe, T3_WR_SEND,
629 T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1,
630 qhp->ep->hwtid, 5);
631 skb->priority = CPL_PRIORITY_DATA;
632 return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
633}
634
635/*
636 * Assumes qhp lock is held.
637 */
638static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
639{
640 struct iwch_cq *rchp, *schp;
641 int count;
642
643 rchp = get_chp(qhp->rhp, qhp->attr.rcq);
644 schp = get_chp(qhp->rhp, qhp->attr.scq);
645
646 PDBG("%s qhp %p rchp %p schp %p\n", __FUNCTION__, qhp, rchp, schp);
647 /* take a ref on the qhp since we must release the lock */
648 atomic_inc(&qhp->refcnt);
649 spin_unlock_irqrestore(&qhp->lock, *flag);
650
651 /* locking heirarchy: cq lock first, then qp lock. */
652 spin_lock_irqsave(&rchp->lock, *flag);
653 spin_lock(&qhp->lock);
654 cxio_flush_hw_cq(&rchp->cq);
655 cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
656 cxio_flush_rq(&qhp->wq, &rchp->cq, count);
657 spin_unlock(&qhp->lock);
658 spin_unlock_irqrestore(&rchp->lock, *flag);
659
660 /* locking heirarchy: cq lock first, then qp lock. */
661 spin_lock_irqsave(&schp->lock, *flag);
662 spin_lock(&qhp->lock);
663 cxio_flush_hw_cq(&schp->cq);
664 cxio_count_scqes(&schp->cq, &qhp->wq, &count);
665 cxio_flush_sq(&qhp->wq, &schp->cq, count);
666 spin_unlock(&qhp->lock);
667 spin_unlock_irqrestore(&schp->lock, *flag);
668
669 /* deref */
670 if (atomic_dec_and_test(&qhp->refcnt))
671 wake_up(&qhp->wait);
672
673 spin_lock_irqsave(&qhp->lock, *flag);
674}
675
676static inline void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
677{
678 if (t3b_device(qhp->rhp))
679 cxio_set_wq_in_error(&qhp->wq);
680 else
681 __flush_qp(qhp, flag);
682}
683
684
685/*
686 * Return non zero if at least one RECV was pre-posted.
687 */
688static inline int rqes_posted(struct iwch_qp *qhp)
689{
690 return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
691}
692
693static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
694 enum iwch_qp_attr_mask mask,
695 struct iwch_qp_attributes *attrs)
696{
697 struct t3_rdma_init_attr init_attr;
698 int ret;
699
700 init_attr.tid = qhp->ep->hwtid;
701 init_attr.qpid = qhp->wq.qpid;
702 init_attr.pdid = qhp->attr.pd;
703 init_attr.scqid = qhp->attr.scq;
704 init_attr.rcqid = qhp->attr.rcq;
705 init_attr.rq_addr = qhp->wq.rq_addr;
706 init_attr.rq_size = 1 << qhp->wq.rq_size_log2;
707 init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE |
708 qhp->attr.mpa_attr.recv_marker_enabled |
709 (qhp->attr.mpa_attr.xmit_marker_enabled << 1) |
710 (qhp->attr.mpa_attr.crc_enabled << 2);
711
712 /*
713 * XXX - The IWCM doesn't quite handle getting these
714 * attrs set before going into RTS. For now, just turn
715 * them on always...
716 */
717#if 0
718 init_attr.qpcaps = qhp->attr.enableRdmaRead |
719 (qhp->attr.enableRdmaWrite << 1) |
720 (qhp->attr.enableBind << 2) |
721 (qhp->attr.enable_stag0_fastreg << 3) |
722 (qhp->attr.enable_stag0_fastreg << 4);
723#else
724 init_attr.qpcaps = 0x1f;
725#endif
726 init_attr.tcp_emss = qhp->ep->emss;
727 init_attr.ord = qhp->attr.max_ord;
728 init_attr.ird = qhp->attr.max_ird;
729 init_attr.qp_dma_addr = qhp->wq.dma_addr;
730 init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
731 init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
732 PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
733 "flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
734 init_attr.rq_addr, init_attr.rq_size,
735 init_attr.flags, init_attr.qpcaps);
736 ret = cxio_rdma_init(&rhp->rdev, &init_attr);
737 PDBG("%s ret %d\n", __FUNCTION__, ret);
738 return ret;
739}
740
741int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
742 enum iwch_qp_attr_mask mask,
743 struct iwch_qp_attributes *attrs,
744 int internal)
745{
746 int ret = 0;
747 struct iwch_qp_attributes newattr = qhp->attr;
748 unsigned long flag;
749 int disconnect = 0;
750 int terminate = 0;
751 int abort = 0;
752 int free = 0;
753 struct iwch_ep *ep = NULL;
754
755 PDBG("%s qhp %p qpid 0x%x ep %p state %d -> %d\n", __FUNCTION__,
756 qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state,
757 (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
758
759 spin_lock_irqsave(&qhp->lock, flag);
760
761 /* Process attr changes if in IDLE */
762 if (mask & IWCH_QP_ATTR_VALID_MODIFY) {
763 if (qhp->attr.state != IWCH_QP_STATE_IDLE) {
764 ret = -EIO;
765 goto out;
766 }
767 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ)
768 newattr.enable_rdma_read = attrs->enable_rdma_read;
769 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE)
770 newattr.enable_rdma_write = attrs->enable_rdma_write;
771 if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND)
772 newattr.enable_bind = attrs->enable_bind;
773 if (mask & IWCH_QP_ATTR_MAX_ORD) {
774 if (attrs->max_ord >
775 rhp->attr.max_rdma_read_qp_depth) {
776 ret = -EINVAL;
777 goto out;
778 }
779 newattr.max_ord = attrs->max_ord;
780 }
781 if (mask & IWCH_QP_ATTR_MAX_IRD) {
782 if (attrs->max_ird >
783 rhp->attr.max_rdma_reads_per_qp) {
784 ret = -EINVAL;
785 goto out;
786 }
787 newattr.max_ird = attrs->max_ird;
788 }
789 qhp->attr = newattr;
790 }
791
792 if (!(mask & IWCH_QP_ATTR_NEXT_STATE))
793 goto out;
794 if (qhp->attr.state == attrs->next_state)
795 goto out;
796
797 switch (qhp->attr.state) {
798 case IWCH_QP_STATE_IDLE:
799 switch (attrs->next_state) {
800 case IWCH_QP_STATE_RTS:
801 if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) {
802 ret = -EINVAL;
803 goto out;
804 }
805 if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) {
806 ret = -EINVAL;
807 goto out;
808 }
809 qhp->attr.mpa_attr = attrs->mpa_attr;
810 qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
811 qhp->ep = qhp->attr.llp_stream_handle;
812 qhp->attr.state = IWCH_QP_STATE_RTS;
813
814 /*
815 * Ref the endpoint here and deref when we
816 * disassociate the endpoint from the QP. This
817 * happens in CLOSING->IDLE transition or *->ERROR
818 * transition.
819 */
820 get_ep(&qhp->ep->com);
821 spin_unlock_irqrestore(&qhp->lock, flag);
822 ret = rdma_init(rhp, qhp, mask, attrs);
823 spin_lock_irqsave(&qhp->lock, flag);
824 if (ret)
825 goto err;
826 break;
827 case IWCH_QP_STATE_ERROR:
828 qhp->attr.state = IWCH_QP_STATE_ERROR;
829 flush_qp(qhp, &flag);
830 break;
831 default:
832 ret = -EINVAL;
833 goto out;
834 }
835 break;
836 case IWCH_QP_STATE_RTS:
837 switch (attrs->next_state) {
838 case IWCH_QP_STATE_CLOSING:
839 BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
840 qhp->attr.state = IWCH_QP_STATE_CLOSING;
841 if (!internal) {
842 abort=0;
843 disconnect = 1;
844 ep = qhp->ep;
845 }
846 break;
847 case IWCH_QP_STATE_TERMINATE:
848 qhp->attr.state = IWCH_QP_STATE_TERMINATE;
849 if (!internal)
850 terminate = 1;
851 break;
852 case IWCH_QP_STATE_ERROR:
853 qhp->attr.state = IWCH_QP_STATE_ERROR;
854 if (!internal) {
855 abort=1;
856 disconnect = 1;
857 ep = qhp->ep;
858 }
859 goto err;
860 break;
861 default:
862 ret = -EINVAL;
863 goto out;
864 }
865 break;
866 case IWCH_QP_STATE_CLOSING:
867 if (!internal) {
868 ret = -EINVAL;
869 goto out;
870 }
871 switch (attrs->next_state) {
872 case IWCH_QP_STATE_IDLE:
873 qhp->attr.state = IWCH_QP_STATE_IDLE;
874 qhp->attr.llp_stream_handle = NULL;
875 put_ep(&qhp->ep->com);
876 qhp->ep = NULL;
877 wake_up(&qhp->wait);
878 break;
879 case IWCH_QP_STATE_ERROR:
880 goto err;
881 default:
882 ret = -EINVAL;
883 goto err;
884 }
885 break;
886 case IWCH_QP_STATE_ERROR:
887 if (attrs->next_state != IWCH_QP_STATE_IDLE) {
888 ret = -EINVAL;
889 goto out;
890 }
891
892 if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) ||
893 !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) {
894 ret = -EINVAL;
895 goto out;
896 }
897 qhp->attr.state = IWCH_QP_STATE_IDLE;
898 memset(&qhp->attr, 0, sizeof(qhp->attr));
899 break;
900 case IWCH_QP_STATE_TERMINATE:
901 if (!internal) {
902 ret = -EINVAL;
903 goto out;
904 }
905 goto err;
906 break;
907 default:
908 printk(KERN_ERR "%s in a bad state %d\n",
909 __FUNCTION__, qhp->attr.state);
910 ret = -EINVAL;
911 goto err;
912 break;
913 }
914 goto out;
915err:
916 PDBG("%s disassociating ep %p qpid 0x%x\n", __FUNCTION__, qhp->ep,
917 qhp->wq.qpid);
918
919 /* disassociate the LLP connection */
920 qhp->attr.llp_stream_handle = NULL;
921 ep = qhp->ep;
922 qhp->ep = NULL;
923 qhp->attr.state = IWCH_QP_STATE_ERROR;
924 free=1;
925 wake_up(&qhp->wait);
926 BUG_ON(!ep);
927 flush_qp(qhp, &flag);
928out:
929 spin_unlock_irqrestore(&qhp->lock, flag);
930
931 if (terminate)
932 iwch_post_terminate(qhp, NULL);
933
934 /*
935 * If disconnect is 1, then we need to initiate a disconnect
936 * on the EP. This can be a normal close (RTS->CLOSING) or
937 * an abnormal close (RTS/CLOSING->ERROR).
938 */
939 if (disconnect)
940 iwch_ep_disconnect(ep, abort, GFP_KERNEL);
941
942 /*
943 * If free is 1, then we've disassociated the EP from the QP
944 * and we need to dereference the EP.
945 */
946 if (free)
947 put_ep(&ep->com);
948
949 PDBG("%s exit state %d\n", __FUNCTION__, qhp->attr.state);
950 return ret;
951}
952
953static int quiesce_qp(struct iwch_qp *qhp)
954{
955 spin_lock_irq(&qhp->lock);
956 iwch_quiesce_tid(qhp->ep);
957 qhp->flags |= QP_QUIESCED;
958 spin_unlock_irq(&qhp->lock);
959 return 0;
960}
961
962static int resume_qp(struct iwch_qp *qhp)
963{
964 spin_lock_irq(&qhp->lock);
965 iwch_resume_tid(qhp->ep);
966 qhp->flags &= ~QP_QUIESCED;
967 spin_unlock_irq(&qhp->lock);
968 return 0;
969}
970
971int iwch_quiesce_qps(struct iwch_cq *chp)
972{
973 int i;
974 struct iwch_qp *qhp;
975
976 for (i=0; i < T3_MAX_NUM_QP; i++) {
977 qhp = get_qhp(chp->rhp, i);
978 if (!qhp)
979 continue;
980 if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) {
981 quiesce_qp(qhp);
982 continue;
983 }
984 if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp))
985 quiesce_qp(qhp);
986 }
987 return 0;
988}
989
990int iwch_resume_qps(struct iwch_cq *chp)
991{
992 int i;
993 struct iwch_qp *qhp;
994
995 for (i=0; i < T3_MAX_NUM_QP; i++) {
996 qhp = get_qhp(chp->rhp, i);
997 if (!qhp)
998 continue;
999 if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) {
1000 resume_qp(qhp);
1001 continue;
1002 }
1003 if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp))
1004 resume_qp(qhp);
1005 }
1006 return 0;
1007}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/cxgb3/iwch_user.h
new file mode 100644
index 000000000000..c4e7fbea8bbd
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/iwch_user.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (c) 2006 Chelsio, Inc. All rights reserved.
3 * Copyright (c) 2006 Open Grid Computing, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33#ifndef __IWCH_USER_H__
34#define __IWCH_USER_H__
35
36#define IWCH_UVERBS_ABI_VERSION 1
37
38/*
39 * Make sure that all structs defined in this file remain laid out so
40 * that they pack the same way on 32-bit and 64-bit architectures (to
41 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
42 * In particular do not use pointer types -- pass pointers in __u64
43 * instead.
44 */
45struct iwch_create_cq_req {
46 __u64 user_rptr_addr;
47};
48
49struct iwch_create_cq_resp {
50 __u64 key;
51 __u32 cqid;
52 __u32 size_log2;
53};
54
55struct iwch_create_qp_resp {
56 __u64 key;
57 __u64 db_key;
58 __u32 qpid;
59 __u32 size_log2;
60 __u32 sq_size_log2;
61 __u32 rq_size_log2;
62};
63
64struct iwch_reg_user_mr_resp {
65 __u32 pbl_addr;
66};
67#endif
diff --git a/drivers/infiniband/hw/cxgb3/tcb.h b/drivers/infiniband/hw/cxgb3/tcb.h
new file mode 100644
index 000000000000..c702dc199e18
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb3/tcb.h
@@ -0,0 +1,632 @@
1/*
2 * Copyright (c) 2007 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#ifndef _TCB_DEFS_H
33#define _TCB_DEFS_H
34
35#define W_TCB_T_STATE 0
36#define S_TCB_T_STATE 0
37#define M_TCB_T_STATE 0xfULL
38#define V_TCB_T_STATE(x) ((x) << S_TCB_T_STATE)
39
40#define W_TCB_TIMER 0
41#define S_TCB_TIMER 4
42#define M_TCB_TIMER 0x1ULL
43#define V_TCB_TIMER(x) ((x) << S_TCB_TIMER)
44
45#define W_TCB_DACK_TIMER 0
46#define S_TCB_DACK_TIMER 5
47#define M_TCB_DACK_TIMER 0x1ULL
48#define V_TCB_DACK_TIMER(x) ((x) << S_TCB_DACK_TIMER)
49
50#define W_TCB_DEL_FLAG 0
51#define S_TCB_DEL_FLAG 6
52#define M_TCB_DEL_FLAG 0x1ULL
53#define V_TCB_DEL_FLAG(x) ((x) << S_TCB_DEL_FLAG)
54
55#define W_TCB_L2T_IX 0
56#define S_TCB_L2T_IX 7
57#define M_TCB_L2T_IX 0x7ffULL
58#define V_TCB_L2T_IX(x) ((x) << S_TCB_L2T_IX)
59
60#define W_TCB_SMAC_SEL 0
61#define S_TCB_SMAC_SEL 18
62#define M_TCB_SMAC_SEL 0x3ULL
63#define V_TCB_SMAC_SEL(x) ((x) << S_TCB_SMAC_SEL)
64
65#define W_TCB_TOS 0
66#define S_TCB_TOS 20
67#define M_TCB_TOS 0x3fULL
68#define V_TCB_TOS(x) ((x) << S_TCB_TOS)
69
70#define W_TCB_MAX_RT 0
71#define S_TCB_MAX_RT 26
72#define M_TCB_MAX_RT 0xfULL
73#define V_TCB_MAX_RT(x) ((x) << S_TCB_MAX_RT)
74
75#define W_TCB_T_RXTSHIFT 0
76#define S_TCB_T_RXTSHIFT 30
77#define M_TCB_T_RXTSHIFT 0xfULL
78#define V_TCB_T_RXTSHIFT(x) ((x) << S_TCB_T_RXTSHIFT)
79
80#define W_TCB_T_DUPACKS 1
81#define S_TCB_T_DUPACKS 2
82#define M_TCB_T_DUPACKS 0xfULL
83#define V_TCB_T_DUPACKS(x) ((x) << S_TCB_T_DUPACKS)
84
85#define W_TCB_T_MAXSEG 1
86#define S_TCB_T_MAXSEG 6
87#define M_TCB_T_MAXSEG 0xfULL
88#define V_TCB_T_MAXSEG(x) ((x) << S_TCB_T_MAXSEG)
89
90#define W_TCB_T_FLAGS1 1
91#define S_TCB_T_FLAGS1 10
92#define M_TCB_T_FLAGS1 0xffffffffULL
93#define V_TCB_T_FLAGS1(x) ((x) << S_TCB_T_FLAGS1)
94
95#define W_TCB_T_MIGRATION 1
96#define S_TCB_T_MIGRATION 20
97#define M_TCB_T_MIGRATION 0x1ULL
98#define V_TCB_T_MIGRATION(x) ((x) << S_TCB_T_MIGRATION)
99
100#define W_TCB_T_FLAGS2 2
101#define S_TCB_T_FLAGS2 10
102#define M_TCB_T_FLAGS2 0x7fULL
103#define V_TCB_T_FLAGS2(x) ((x) << S_TCB_T_FLAGS2)
104
105#define W_TCB_SND_SCALE 2
106#define S_TCB_SND_SCALE 17
107#define M_TCB_SND_SCALE 0xfULL
108#define V_TCB_SND_SCALE(x) ((x) << S_TCB_SND_SCALE)
109
110#define W_TCB_RCV_SCALE 2
111#define S_TCB_RCV_SCALE 21
112#define M_TCB_RCV_SCALE 0xfULL
113#define V_TCB_RCV_SCALE(x) ((x) << S_TCB_RCV_SCALE)
114
115#define W_TCB_SND_UNA_RAW 2
116#define S_TCB_SND_UNA_RAW 25
117#define M_TCB_SND_UNA_RAW 0x7ffffffULL
118#define V_TCB_SND_UNA_RAW(x) ((x) << S_TCB_SND_UNA_RAW)
119
120#define W_TCB_SND_NXT_RAW 3
121#define S_TCB_SND_NXT_RAW 20
122#define M_TCB_SND_NXT_RAW 0x7ffffffULL
123#define V_TCB_SND_NXT_RAW(x) ((x) << S_TCB_SND_NXT_RAW)
124
125#define W_TCB_RCV_NXT 4
126#define S_TCB_RCV_NXT 15
127#define M_TCB_RCV_NXT 0xffffffffULL
128#define V_TCB_RCV_NXT(x) ((x) << S_TCB_RCV_NXT)
129
130#define W_TCB_RCV_ADV 5
131#define S_TCB_RCV_ADV 15
132#define M_TCB_RCV_ADV 0xffffULL
133#define V_TCB_RCV_ADV(x) ((x) << S_TCB_RCV_ADV)
134
135#define W_TCB_SND_MAX_RAW 5
136#define S_TCB_SND_MAX_RAW 31
137#define M_TCB_SND_MAX_RAW 0x7ffffffULL
138#define V_TCB_SND_MAX_RAW(x) ((x) << S_TCB_SND_MAX_RAW)
139
140#define W_TCB_SND_CWND 6
141#define S_TCB_SND_CWND 26
142#define M_TCB_SND_CWND 0x7ffffffULL
143#define V_TCB_SND_CWND(x) ((x) << S_TCB_SND_CWND)
144
145#define W_TCB_SND_SSTHRESH 7
146#define S_TCB_SND_SSTHRESH 21
147#define M_TCB_SND_SSTHRESH 0x7ffffffULL
148#define V_TCB_SND_SSTHRESH(x) ((x) << S_TCB_SND_SSTHRESH)
149
150#define W_TCB_T_RTT_TS_RECENT_AGE 8
151#define S_TCB_T_RTT_TS_RECENT_AGE 16
152#define M_TCB_T_RTT_TS_RECENT_AGE 0xffffffffULL
153#define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE)
154
155#define W_TCB_T_RTSEQ_RECENT 9
156#define S_TCB_T_RTSEQ_RECENT 16
157#define M_TCB_T_RTSEQ_RECENT 0xffffffffULL
158#define V_TCB_T_RTSEQ_RECENT(x) ((x) << S_TCB_T_RTSEQ_RECENT)
159
160#define W_TCB_T_SRTT 10
161#define S_TCB_T_SRTT 16
162#define M_TCB_T_SRTT 0xffffULL
163#define V_TCB_T_SRTT(x) ((x) << S_TCB_T_SRTT)
164
165#define W_TCB_T_RTTVAR 11
166#define S_TCB_T_RTTVAR 0
167#define M_TCB_T_RTTVAR 0xffffULL
168#define V_TCB_T_RTTVAR(x) ((x) << S_TCB_T_RTTVAR)
169
170#define W_TCB_TS_LAST_ACK_SENT_RAW 11
171#define S_TCB_TS_LAST_ACK_SENT_RAW 16
172#define M_TCB_TS_LAST_ACK_SENT_RAW 0x7ffffffULL
173#define V_TCB_TS_LAST_ACK_SENT_RAW(x) ((x) << S_TCB_TS_LAST_ACK_SENT_RAW)
174
175#define W_TCB_DIP 12
176#define S_TCB_DIP 11
177#define M_TCB_DIP 0xffffffffULL
178#define V_TCB_DIP(x) ((x) << S_TCB_DIP)
179
180#define W_TCB_SIP 13
181#define S_TCB_SIP 11
182#define M_TCB_SIP 0xffffffffULL
183#define V_TCB_SIP(x) ((x) << S_TCB_SIP)
184
185#define W_TCB_DP 14
186#define S_TCB_DP 11
187#define M_TCB_DP 0xffffULL
188#define V_TCB_DP(x) ((x) << S_TCB_DP)
189
190#define W_TCB_SP 14
191#define S_TCB_SP 27
192#define M_TCB_SP 0xffffULL
193#define V_TCB_SP(x) ((x) << S_TCB_SP)
194
195#define W_TCB_TIMESTAMP 15
196#define S_TCB_TIMESTAMP 11
197#define M_TCB_TIMESTAMP 0xffffffffULL
198#define V_TCB_TIMESTAMP(x) ((x) << S_TCB_TIMESTAMP)
199
200#define W_TCB_TIMESTAMP_OFFSET 16
201#define S_TCB_TIMESTAMP_OFFSET 11
202#define M_TCB_TIMESTAMP_OFFSET 0xfULL
203#define V_TCB_TIMESTAMP_OFFSET(x) ((x) << S_TCB_TIMESTAMP_OFFSET)
204
205#define W_TCB_TX_MAX 16
206#define S_TCB_TX_MAX 15
207#define M_TCB_TX_MAX 0xffffffffULL
208#define V_TCB_TX_MAX(x) ((x) << S_TCB_TX_MAX)
209
210#define W_TCB_TX_HDR_PTR_RAW 17
211#define S_TCB_TX_HDR_PTR_RAW 15
212#define M_TCB_TX_HDR_PTR_RAW 0x1ffffULL
213#define V_TCB_TX_HDR_PTR_RAW(x) ((x) << S_TCB_TX_HDR_PTR_RAW)
214
215#define W_TCB_TX_LAST_PTR_RAW 18
216#define S_TCB_TX_LAST_PTR_RAW 0
217#define M_TCB_TX_LAST_PTR_RAW 0x1ffffULL
218#define V_TCB_TX_LAST_PTR_RAW(x) ((x) << S_TCB_TX_LAST_PTR_RAW)
219
220#define W_TCB_TX_COMPACT 18
221#define S_TCB_TX_COMPACT 17
222#define M_TCB_TX_COMPACT 0x1ULL
223#define V_TCB_TX_COMPACT(x) ((x) << S_TCB_TX_COMPACT)
224
225#define W_TCB_RX_COMPACT 18
226#define S_TCB_RX_COMPACT 18
227#define M_TCB_RX_COMPACT 0x1ULL
228#define V_TCB_RX_COMPACT(x) ((x) << S_TCB_RX_COMPACT)
229
230#define W_TCB_RCV_WND 18
231#define S_TCB_RCV_WND 19
232#define M_TCB_RCV_WND 0x7ffffffULL
233#define V_TCB_RCV_WND(x) ((x) << S_TCB_RCV_WND)
234
235#define W_TCB_RX_HDR_OFFSET 19
236#define S_TCB_RX_HDR_OFFSET 14
237#define M_TCB_RX_HDR_OFFSET 0x7ffffffULL
238#define V_TCB_RX_HDR_OFFSET(x) ((x) << S_TCB_RX_HDR_OFFSET)
239
240#define W_TCB_RX_FRAG0_START_IDX_RAW 20
241#define S_TCB_RX_FRAG0_START_IDX_RAW 9
242#define M_TCB_RX_FRAG0_START_IDX_RAW 0x7ffffffULL
243#define V_TCB_RX_FRAG0_START_IDX_RAW(x) ((x) << S_TCB_RX_FRAG0_START_IDX_RAW)
244
245#define W_TCB_RX_FRAG1_START_IDX_OFFSET 21
246#define S_TCB_RX_FRAG1_START_IDX_OFFSET 4
247#define M_TCB_RX_FRAG1_START_IDX_OFFSET 0x7ffffffULL
248#define V_TCB_RX_FRAG1_START_IDX_OFFSET(x) ((x) << S_TCB_RX_FRAG1_START_IDX_OFFSET)
249
250#define W_TCB_RX_FRAG0_LEN 21
251#define S_TCB_RX_FRAG0_LEN 31
252#define M_TCB_RX_FRAG0_LEN 0x7ffffffULL
253#define V_TCB_RX_FRAG0_LEN(x) ((x) << S_TCB_RX_FRAG0_LEN)
254
255#define W_TCB_RX_FRAG1_LEN 22
256#define S_TCB_RX_FRAG1_LEN 26
257#define M_TCB_RX_FRAG1_LEN 0x7ffffffULL
258#define V_TCB_RX_FRAG1_LEN(x) ((x) << S_TCB_RX_FRAG1_LEN)
259
260#define W_TCB_NEWRENO_RECOVER 23
261#define S_TCB_NEWRENO_RECOVER 21
262#define M_TCB_NEWRENO_RECOVER 0x7ffffffULL
263#define V_TCB_NEWRENO_RECOVER(x) ((x) << S_TCB_NEWRENO_RECOVER)
264
265#define W_TCB_PDU_HAVE_LEN 24
266#define S_TCB_PDU_HAVE_LEN 16
267#define M_TCB_PDU_HAVE_LEN 0x1ULL
268#define V_TCB_PDU_HAVE_LEN(x) ((x) << S_TCB_PDU_HAVE_LEN)
269
270#define W_TCB_PDU_LEN 24
271#define S_TCB_PDU_LEN 17
272#define M_TCB_PDU_LEN 0xffffULL
273#define V_TCB_PDU_LEN(x) ((x) << S_TCB_PDU_LEN)
274
275#define W_TCB_RX_QUIESCE 25
276#define S_TCB_RX_QUIESCE 1
277#define M_TCB_RX_QUIESCE 0x1ULL
278#define V_TCB_RX_QUIESCE(x) ((x) << S_TCB_RX_QUIESCE)
279
280#define W_TCB_RX_PTR_RAW 25
281#define S_TCB_RX_PTR_RAW 2
282#define M_TCB_RX_PTR_RAW 0x1ffffULL
283#define V_TCB_RX_PTR_RAW(x) ((x) << S_TCB_RX_PTR_RAW)
284
285#define W_TCB_CPU_NO 25
286#define S_TCB_CPU_NO 19
287#define M_TCB_CPU_NO 0x7fULL
288#define V_TCB_CPU_NO(x) ((x) << S_TCB_CPU_NO)
289
290#define W_TCB_ULP_TYPE 25
291#define S_TCB_ULP_TYPE 26
292#define M_TCB_ULP_TYPE 0xfULL
293#define V_TCB_ULP_TYPE(x) ((x) << S_TCB_ULP_TYPE)
294
295#define W_TCB_RX_FRAG1_PTR_RAW 25
296#define S_TCB_RX_FRAG1_PTR_RAW 30
297#define M_TCB_RX_FRAG1_PTR_RAW 0x1ffffULL
298#define V_TCB_RX_FRAG1_PTR_RAW(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW)
299
300#define W_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 26
301#define S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 15
302#define M_TCB_RX_FRAG2_START_IDX_OFFSET_RAW 0x7ffffffULL
303#define V_TCB_RX_FRAG2_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG2_START_IDX_OFFSET_RAW)
304
305#define W_TCB_RX_FRAG2_PTR_RAW 27
306#define S_TCB_RX_FRAG2_PTR_RAW 10
307#define M_TCB_RX_FRAG2_PTR_RAW 0x1ffffULL
308#define V_TCB_RX_FRAG2_PTR_RAW(x) ((x) << S_TCB_RX_FRAG2_PTR_RAW)
309
310#define W_TCB_RX_FRAG2_LEN_RAW 27
311#define S_TCB_RX_FRAG2_LEN_RAW 27
312#define M_TCB_RX_FRAG2_LEN_RAW 0x7ffffffULL
313#define V_TCB_RX_FRAG2_LEN_RAW(x) ((x) << S_TCB_RX_FRAG2_LEN_RAW)
314
315#define W_TCB_RX_FRAG3_PTR_RAW 28
316#define S_TCB_RX_FRAG3_PTR_RAW 22
317#define M_TCB_RX_FRAG3_PTR_RAW 0x1ffffULL
318#define V_TCB_RX_FRAG3_PTR_RAW(x) ((x) << S_TCB_RX_FRAG3_PTR_RAW)
319
320#define W_TCB_RX_FRAG3_LEN_RAW 29
321#define S_TCB_RX_FRAG3_LEN_RAW 7
322#define M_TCB_RX_FRAG3_LEN_RAW 0x7ffffffULL
323#define V_TCB_RX_FRAG3_LEN_RAW(x) ((x) << S_TCB_RX_FRAG3_LEN_RAW)
324
325#define W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 30
326#define S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 2
327#define M_TCB_RX_FRAG3_START_IDX_OFFSET_RAW 0x7ffffffULL
328#define V_TCB_RX_FRAG3_START_IDX_OFFSET_RAW(x) ((x) << S_TCB_RX_FRAG3_START_IDX_OFFSET_RAW)
329
330#define W_TCB_PDU_HDR_LEN 30
331#define S_TCB_PDU_HDR_LEN 29
332#define M_TCB_PDU_HDR_LEN 0xffULL
333#define V_TCB_PDU_HDR_LEN(x) ((x) << S_TCB_PDU_HDR_LEN)
334
335#define W_TCB_SLUSH1 31
336#define S_TCB_SLUSH1 5
337#define M_TCB_SLUSH1 0x7ffffULL
338#define V_TCB_SLUSH1(x) ((x) << S_TCB_SLUSH1)
339
340#define W_TCB_ULP_RAW 31
341#define S_TCB_ULP_RAW 24
342#define M_TCB_ULP_RAW 0xffULL
343#define V_TCB_ULP_RAW(x) ((x) << S_TCB_ULP_RAW)
344
345#define W_TCB_DDP_RDMAP_VERSION 25
346#define S_TCB_DDP_RDMAP_VERSION 30
347#define M_TCB_DDP_RDMAP_VERSION 0x1ULL
348#define V_TCB_DDP_RDMAP_VERSION(x) ((x) << S_TCB_DDP_RDMAP_VERSION)
349
350#define W_TCB_MARKER_ENABLE_RX 25
351#define S_TCB_MARKER_ENABLE_RX 31
352#define M_TCB_MARKER_ENABLE_RX 0x1ULL
353#define V_TCB_MARKER_ENABLE_RX(x) ((x) << S_TCB_MARKER_ENABLE_RX)
354
355#define W_TCB_MARKER_ENABLE_TX 26
356#define S_TCB_MARKER_ENABLE_TX 0
357#define M_TCB_MARKER_ENABLE_TX 0x1ULL
358#define V_TCB_MARKER_ENABLE_TX(x) ((x) << S_TCB_MARKER_ENABLE_TX)
359
360#define W_TCB_CRC_ENABLE 26
361#define S_TCB_CRC_ENABLE 1
362#define M_TCB_CRC_ENABLE 0x1ULL
363#define V_TCB_CRC_ENABLE(x) ((x) << S_TCB_CRC_ENABLE)
364
365#define W_TCB_IRS_ULP 26
366#define S_TCB_IRS_ULP 2
367#define M_TCB_IRS_ULP 0x1ffULL
368#define V_TCB_IRS_ULP(x) ((x) << S_TCB_IRS_ULP)
369
370#define W_TCB_ISS_ULP 26
371#define S_TCB_ISS_ULP 11
372#define M_TCB_ISS_ULP 0x1ffULL
373#define V_TCB_ISS_ULP(x) ((x) << S_TCB_ISS_ULP)
374
375#define W_TCB_TX_PDU_LEN 26
376#define S_TCB_TX_PDU_LEN 20
377#define M_TCB_TX_PDU_LEN 0x3fffULL
378#define V_TCB_TX_PDU_LEN(x) ((x) << S_TCB_TX_PDU_LEN)
379
380#define W_TCB_TX_PDU_OUT 27
381#define S_TCB_TX_PDU_OUT 2
382#define M_TCB_TX_PDU_OUT 0x1ULL
383#define V_TCB_TX_PDU_OUT(x) ((x) << S_TCB_TX_PDU_OUT)
384
385#define W_TCB_CQ_IDX_SQ 27
386#define S_TCB_CQ_IDX_SQ 3
387#define M_TCB_CQ_IDX_SQ 0xffffULL
388#define V_TCB_CQ_IDX_SQ(x) ((x) << S_TCB_CQ_IDX_SQ)
389
390#define W_TCB_CQ_IDX_RQ 27
391#define S_TCB_CQ_IDX_RQ 19
392#define M_TCB_CQ_IDX_RQ 0xffffULL
393#define V_TCB_CQ_IDX_RQ(x) ((x) << S_TCB_CQ_IDX_RQ)
394
395#define W_TCB_QP_ID 28
396#define S_TCB_QP_ID 3
397#define M_TCB_QP_ID 0xffffULL
398#define V_TCB_QP_ID(x) ((x) << S_TCB_QP_ID)
399
400#define W_TCB_PD_ID 28
401#define S_TCB_PD_ID 19
402#define M_TCB_PD_ID 0xffffULL
403#define V_TCB_PD_ID(x) ((x) << S_TCB_PD_ID)
404
405#define W_TCB_STAG 29
406#define S_TCB_STAG 3
407#define M_TCB_STAG 0xffffffffULL
408#define V_TCB_STAG(x) ((x) << S_TCB_STAG)
409
410#define W_TCB_RQ_START 30
411#define S_TCB_RQ_START 3
412#define M_TCB_RQ_START 0x3ffffffULL
413#define V_TCB_RQ_START(x) ((x) << S_TCB_RQ_START)
414
415#define W_TCB_RQ_MSN 30
416#define S_TCB_RQ_MSN 29
417#define M_TCB_RQ_MSN 0x3ffULL
418#define V_TCB_RQ_MSN(x) ((x) << S_TCB_RQ_MSN)
419
420#define W_TCB_RQ_MAX_OFFSET 31
421#define S_TCB_RQ_MAX_OFFSET 7
422#define M_TCB_RQ_MAX_OFFSET 0xfULL
423#define V_TCB_RQ_MAX_OFFSET(x) ((x) << S_TCB_RQ_MAX_OFFSET)
424
425#define W_TCB_RQ_WRITE_PTR 31
426#define S_TCB_RQ_WRITE_PTR 11
427#define M_TCB_RQ_WRITE_PTR 0x3ffULL
428#define V_TCB_RQ_WRITE_PTR(x) ((x) << S_TCB_RQ_WRITE_PTR)
429
430#define W_TCB_INB_WRITE_PERM 31
431#define S_TCB_INB_WRITE_PERM 21
432#define M_TCB_INB_WRITE_PERM 0x1ULL
433#define V_TCB_INB_WRITE_PERM(x) ((x) << S_TCB_INB_WRITE_PERM)
434
435#define W_TCB_INB_READ_PERM 31
436#define S_TCB_INB_READ_PERM 22
437#define M_TCB_INB_READ_PERM 0x1ULL
438#define V_TCB_INB_READ_PERM(x) ((x) << S_TCB_INB_READ_PERM)
439
440#define W_TCB_ORD_L_BIT_VLD 31
441#define S_TCB_ORD_L_BIT_VLD 23
442#define M_TCB_ORD_L_BIT_VLD 0x1ULL
443#define V_TCB_ORD_L_BIT_VLD(x) ((x) << S_TCB_ORD_L_BIT_VLD)
444
445#define W_TCB_RDMAP_OPCODE 31
446#define S_TCB_RDMAP_OPCODE 24
447#define M_TCB_RDMAP_OPCODE 0xfULL
448#define V_TCB_RDMAP_OPCODE(x) ((x) << S_TCB_RDMAP_OPCODE)
449
450#define W_TCB_TX_FLUSH 31
451#define S_TCB_TX_FLUSH 28
452#define M_TCB_TX_FLUSH 0x1ULL
453#define V_TCB_TX_FLUSH(x) ((x) << S_TCB_TX_FLUSH)
454
455#define W_TCB_TX_OOS_RXMT 31
456#define S_TCB_TX_OOS_RXMT 29
457#define M_TCB_TX_OOS_RXMT 0x1ULL
458#define V_TCB_TX_OOS_RXMT(x) ((x) << S_TCB_TX_OOS_RXMT)
459
460#define W_TCB_TX_OOS_TXMT 31
461#define S_TCB_TX_OOS_TXMT 30
462#define M_TCB_TX_OOS_TXMT 0x1ULL
463#define V_TCB_TX_OOS_TXMT(x) ((x) << S_TCB_TX_OOS_TXMT)
464
465#define W_TCB_SLUSH_AUX2 31
466#define S_TCB_SLUSH_AUX2 31
467#define M_TCB_SLUSH_AUX2 0x1ULL
468#define V_TCB_SLUSH_AUX2(x) ((x) << S_TCB_SLUSH_AUX2)
469
470#define W_TCB_RX_FRAG1_PTR_RAW2 25
471#define S_TCB_RX_FRAG1_PTR_RAW2 30
472#define M_TCB_RX_FRAG1_PTR_RAW2 0x1ffffULL
473#define V_TCB_RX_FRAG1_PTR_RAW2(x) ((x) << S_TCB_RX_FRAG1_PTR_RAW2)
474
475#define W_TCB_RX_DDP_FLAGS 26
476#define S_TCB_RX_DDP_FLAGS 15
477#define M_TCB_RX_DDP_FLAGS 0x3ffULL
478#define V_TCB_RX_DDP_FLAGS(x) ((x) << S_TCB_RX_DDP_FLAGS)
479
480#define W_TCB_SLUSH_AUX3 26
481#define S_TCB_SLUSH_AUX3 31
482#define M_TCB_SLUSH_AUX3 0x1ffULL
483#define V_TCB_SLUSH_AUX3(x) ((x) << S_TCB_SLUSH_AUX3)
484
485#define W_TCB_RX_DDP_BUF0_OFFSET 27
486#define S_TCB_RX_DDP_BUF0_OFFSET 8
487#define M_TCB_RX_DDP_BUF0_OFFSET 0x3fffffULL
488#define V_TCB_RX_DDP_BUF0_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF0_OFFSET)
489
490#define W_TCB_RX_DDP_BUF0_LEN 27
491#define S_TCB_RX_DDP_BUF0_LEN 30
492#define M_TCB_RX_DDP_BUF0_LEN 0x3fffffULL
493#define V_TCB_RX_DDP_BUF0_LEN(x) ((x) << S_TCB_RX_DDP_BUF0_LEN)
494
495#define W_TCB_RX_DDP_BUF1_OFFSET 28
496#define S_TCB_RX_DDP_BUF1_OFFSET 20
497#define M_TCB_RX_DDP_BUF1_OFFSET 0x3fffffULL
498#define V_TCB_RX_DDP_BUF1_OFFSET(x) ((x) << S_TCB_RX_DDP_BUF1_OFFSET)
499
500#define W_TCB_RX_DDP_BUF1_LEN 29
501#define S_TCB_RX_DDP_BUF1_LEN 10
502#define M_TCB_RX_DDP_BUF1_LEN 0x3fffffULL
503#define V_TCB_RX_DDP_BUF1_LEN(x) ((x) << S_TCB_RX_DDP_BUF1_LEN)
504
505#define W_TCB_RX_DDP_BUF0_TAG 30
506#define S_TCB_RX_DDP_BUF0_TAG 0
507#define M_TCB_RX_DDP_BUF0_TAG 0xffffffffULL
508#define V_TCB_RX_DDP_BUF0_TAG(x) ((x) << S_TCB_RX_DDP_BUF0_TAG)
509
510#define W_TCB_RX_DDP_BUF1_TAG 31
511#define S_TCB_RX_DDP_BUF1_TAG 0
512#define M_TCB_RX_DDP_BUF1_TAG 0xffffffffULL
513#define V_TCB_RX_DDP_BUF1_TAG(x) ((x) << S_TCB_RX_DDP_BUF1_TAG)
514
515#define S_TF_DACK 10
516#define V_TF_DACK(x) ((x) << S_TF_DACK)
517
518#define S_TF_NAGLE 11
519#define V_TF_NAGLE(x) ((x) << S_TF_NAGLE)
520
521#define S_TF_RECV_SCALE 12
522#define V_TF_RECV_SCALE(x) ((x) << S_TF_RECV_SCALE)
523
524#define S_TF_RECV_TSTMP 13
525#define V_TF_RECV_TSTMP(x) ((x) << S_TF_RECV_TSTMP)
526
527#define S_TF_RECV_SACK 14
528#define V_TF_RECV_SACK(x) ((x) << S_TF_RECV_SACK)
529
530#define S_TF_TURBO 15
531#define V_TF_TURBO(x) ((x) << S_TF_TURBO)
532
533#define S_TF_KEEPALIVE 16
534#define V_TF_KEEPALIVE(x) ((x) << S_TF_KEEPALIVE)
535
536#define S_TF_TCAM_BYPASS 17
537#define V_TF_TCAM_BYPASS(x) ((x) << S_TF_TCAM_BYPASS)
538
539#define S_TF_CORE_FIN 18
540#define V_TF_CORE_FIN(x) ((x) << S_TF_CORE_FIN)
541
542#define S_TF_CORE_MORE 19
543#define V_TF_CORE_MORE(x) ((x) << S_TF_CORE_MORE)
544
545#define S_TF_MIGRATING 20
546#define V_TF_MIGRATING(x) ((x) << S_TF_MIGRATING)
547
548#define S_TF_ACTIVE_OPEN 21
549#define V_TF_ACTIVE_OPEN(x) ((x) << S_TF_ACTIVE_OPEN)
550
551#define S_TF_ASK_MODE 22
552#define V_TF_ASK_MODE(x) ((x) << S_TF_ASK_MODE)
553
554#define S_TF_NON_OFFLOAD 23
555#define V_TF_NON_OFFLOAD(x) ((x) << S_TF_NON_OFFLOAD)
556
557#define S_TF_MOD_SCHD 24
558#define V_TF_MOD_SCHD(x) ((x) << S_TF_MOD_SCHD)
559
560#define S_TF_MOD_SCHD_REASON0 25
561#define V_TF_MOD_SCHD_REASON0(x) ((x) << S_TF_MOD_SCHD_REASON0)
562
563#define S_TF_MOD_SCHD_REASON1 26
564#define V_TF_MOD_SCHD_REASON1(x) ((x) << S_TF_MOD_SCHD_REASON1)
565
566#define S_TF_MOD_SCHD_RX 27
567#define V_TF_MOD_SCHD_RX(x) ((x) << S_TF_MOD_SCHD_RX)
568
569#define S_TF_CORE_PUSH 28
570#define V_TF_CORE_PUSH(x) ((x) << S_TF_CORE_PUSH)
571
572#define S_TF_RCV_COALESCE_ENABLE 29
573#define V_TF_RCV_COALESCE_ENABLE(x) ((x) << S_TF_RCV_COALESCE_ENABLE)
574
575#define S_TF_RCV_COALESCE_PUSH 30
576#define V_TF_RCV_COALESCE_PUSH(x) ((x) << S_TF_RCV_COALESCE_PUSH)
577
578#define S_TF_RCV_COALESCE_LAST_PSH 31
579#define V_TF_RCV_COALESCE_LAST_PSH(x) ((x) << S_TF_RCV_COALESCE_LAST_PSH)
580
581#define S_TF_RCV_COALESCE_HEARTBEAT 32
582#define V_TF_RCV_COALESCE_HEARTBEAT(x) ((x) << S_TF_RCV_COALESCE_HEARTBEAT)
583
584#define S_TF_HALF_CLOSE 33
585#define V_TF_HALF_CLOSE(x) ((x) << S_TF_HALF_CLOSE)
586
587#define S_TF_DACK_MSS 34
588#define V_TF_DACK_MSS(x) ((x) << S_TF_DACK_MSS)
589
590#define S_TF_CCTRL_SEL0 35
591#define V_TF_CCTRL_SEL0(x) ((x) << S_TF_CCTRL_SEL0)
592
593#define S_TF_CCTRL_SEL1 36
594#define V_TF_CCTRL_SEL1(x) ((x) << S_TF_CCTRL_SEL1)
595
596#define S_TF_TCP_NEWRENO_FAST_RECOVERY 37
597#define V_TF_TCP_NEWRENO_FAST_RECOVERY(x) ((x) << S_TF_TCP_NEWRENO_FAST_RECOVERY)
598
599#define S_TF_TX_PACE_AUTO 38
600#define V_TF_TX_PACE_AUTO(x) ((x) << S_TF_TX_PACE_AUTO)
601
602#define S_TF_PEER_FIN_HELD 39
603#define V_TF_PEER_FIN_HELD(x) ((x) << S_TF_PEER_FIN_HELD)
604
605#define S_TF_CORE_URG 40
606#define V_TF_CORE_URG(x) ((x) << S_TF_CORE_URG)
607
608#define S_TF_RDMA_ERROR 41
609#define V_TF_RDMA_ERROR(x) ((x) << S_TF_RDMA_ERROR)
610
611#define S_TF_SSWS_DISABLED 42
612#define V_TF_SSWS_DISABLED(x) ((x) << S_TF_SSWS_DISABLED)
613
614#define S_TF_DUPACK_COUNT_ODD 43
615#define V_TF_DUPACK_COUNT_ODD(x) ((x) << S_TF_DUPACK_COUNT_ODD)
616
617#define S_TF_TX_CHANNEL 44
618#define V_TF_TX_CHANNEL(x) ((x) << S_TF_TX_CHANNEL)
619
620#define S_TF_RX_CHANNEL 45
621#define V_TF_RX_CHANNEL(x) ((x) << S_TF_RX_CHANNEL)
622
623#define S_TF_TX_PACE_FIXED 46
624#define V_TF_TX_PACE_FIXED(x) ((x) << S_TF_TX_PACE_FIXED)
625
626#define S_TF_RDMA_FLM_ERROR 47
627#define V_TF_RDMA_FLM_ERROR(x) ((x) << S_TF_RDMA_FLM_ERROR)
628
629#define S_TF_RX_FLOW_CONTROL_DISABLE 48
630#define V_TF_RX_FLOW_CONTROL_DISABLE(x) ((x) << S_TF_RX_FLOW_CONTROL_DISABLE)
631
632#endif /* _TCB_DEFS_H */
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index c069be8cbcb2..6c4f9f91b15d 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -756,6 +756,8 @@ void ehca_destroy_comp_pool(void)
756 if (cpu_online(i)) 756 if (cpu_online(i))
757 destroy_comp_task(pool, i); 757 destroy_comp_task(pool, i);
758 } 758 }
759 free_percpu(pool->cpu_comp_tasks);
760 kfree(pool);
759#endif 761#endif
760 762
761 return; 763 return;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 968d1519761c..71314460b11e 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1051,7 +1051,11 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
1051 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET); 1051 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
1052 dev_lim->max_eqs = 1 << (field & 0x7); 1052 dev_lim->max_eqs = 1 << (field & 0x7);
1053 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET); 1053 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
1054 dev_lim->reserved_mtts = 1 << (field >> 4); 1054 if (mthca_is_memfree(dev))
1055 dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * sizeof(u64),
1056 MTHCA_MTT_SEG_SIZE) / MTHCA_MTT_SEG_SIZE;
1057 else
1058 dev_lim->reserved_mtts = 1 << (field >> 4);
1055 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET); 1059 MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
1056 dev_lim->max_mrw_sz = 1 << field; 1060 dev_lim->max_mrw_sz = 1 << field;
1057 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET); 1061 MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index fe5cecf70fed..b7e42efaf43d 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -464,6 +464,8 @@ void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);
464int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd); 464int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd);
465void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); 465void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
466 466
467int mthca_write_mtt_size(struct mthca_dev *dev);
468
467struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); 469struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size);
468void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt); 470void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt);
469int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, 471int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 44bc6cc734ab..0d9b7d06bbc2 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -379,7 +379,7 @@ static int mthca_load_fw(struct mthca_dev *mdev)
379 379
380 mdev->fw.arbel.fw_icm = 380 mdev->fw.arbel.fw_icm =
381 mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages, 381 mthca_alloc_icm(mdev, mdev->fw.arbel.fw_pages,
382 GFP_HIGHUSER | __GFP_NOWARN); 382 GFP_HIGHUSER | __GFP_NOWARN, 0);
383 if (!mdev->fw.arbel.fw_icm) { 383 if (!mdev->fw.arbel.fw_icm) {
384 mthca_err(mdev, "Couldn't allocate FW area, aborting.\n"); 384 mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
385 return -ENOMEM; 385 return -ENOMEM;
@@ -412,7 +412,7 @@ err_unmap_fa:
412 mthca_UNMAP_FA(mdev, &status); 412 mthca_UNMAP_FA(mdev, &status);
413 413
414err_free: 414err_free:
415 mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); 415 mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
416 return err; 416 return err;
417} 417}
418 418
@@ -441,7 +441,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
441 (unsigned long long) aux_pages << 2); 441 (unsigned long long) aux_pages << 2);
442 442
443 mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages, 443 mdev->fw.arbel.aux_icm = mthca_alloc_icm(mdev, aux_pages,
444 GFP_HIGHUSER | __GFP_NOWARN); 444 GFP_HIGHUSER | __GFP_NOWARN, 0);
445 if (!mdev->fw.arbel.aux_icm) { 445 if (!mdev->fw.arbel.aux_icm) {
446 mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n"); 446 mthca_err(mdev, "Couldn't allocate aux memory, aborting.\n");
447 return -ENOMEM; 447 return -ENOMEM;
@@ -464,10 +464,15 @@ static int mthca_init_icm(struct mthca_dev *mdev,
464 goto err_unmap_aux; 464 goto err_unmap_aux;
465 } 465 }
466 466
467 /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
468 mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * MTHCA_MTT_SEG_SIZE,
469 dma_get_cache_alignment()) / MTHCA_MTT_SEG_SIZE;
470
467 mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base, 471 mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
468 MTHCA_MTT_SEG_SIZE, 472 MTHCA_MTT_SEG_SIZE,
469 mdev->limits.num_mtt_segs, 473 mdev->limits.num_mtt_segs,
470 mdev->limits.reserved_mtts, 1); 474 mdev->limits.reserved_mtts,
475 1, 0);
471 if (!mdev->mr_table.mtt_table) { 476 if (!mdev->mr_table.mtt_table) {
472 mthca_err(mdev, "Failed to map MTT context memory, aborting.\n"); 477 mthca_err(mdev, "Failed to map MTT context memory, aborting.\n");
473 err = -ENOMEM; 478 err = -ENOMEM;
@@ -477,7 +482,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
477 mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base, 482 mdev->mr_table.mpt_table = mthca_alloc_icm_table(mdev, init_hca->mpt_base,
478 dev_lim->mpt_entry_sz, 483 dev_lim->mpt_entry_sz,
479 mdev->limits.num_mpts, 484 mdev->limits.num_mpts,
480 mdev->limits.reserved_mrws, 1); 485 mdev->limits.reserved_mrws,
486 1, 1);
481 if (!mdev->mr_table.mpt_table) { 487 if (!mdev->mr_table.mpt_table) {
482 mthca_err(mdev, "Failed to map MPT context memory, aborting.\n"); 488 mthca_err(mdev, "Failed to map MPT context memory, aborting.\n");
483 err = -ENOMEM; 489 err = -ENOMEM;
@@ -487,7 +493,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
487 mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base, 493 mdev->qp_table.qp_table = mthca_alloc_icm_table(mdev, init_hca->qpc_base,
488 dev_lim->qpc_entry_sz, 494 dev_lim->qpc_entry_sz,
489 mdev->limits.num_qps, 495 mdev->limits.num_qps,
490 mdev->limits.reserved_qps, 0); 496 mdev->limits.reserved_qps,
497 0, 0);
491 if (!mdev->qp_table.qp_table) { 498 if (!mdev->qp_table.qp_table) {
492 mthca_err(mdev, "Failed to map QP context memory, aborting.\n"); 499 mthca_err(mdev, "Failed to map QP context memory, aborting.\n");
493 err = -ENOMEM; 500 err = -ENOMEM;
@@ -497,7 +504,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
497 mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base, 504 mdev->qp_table.eqp_table = mthca_alloc_icm_table(mdev, init_hca->eqpc_base,
498 dev_lim->eqpc_entry_sz, 505 dev_lim->eqpc_entry_sz,
499 mdev->limits.num_qps, 506 mdev->limits.num_qps,
500 mdev->limits.reserved_qps, 0); 507 mdev->limits.reserved_qps,
508 0, 0);
501 if (!mdev->qp_table.eqp_table) { 509 if (!mdev->qp_table.eqp_table) {
502 mthca_err(mdev, "Failed to map EQP context memory, aborting.\n"); 510 mthca_err(mdev, "Failed to map EQP context memory, aborting.\n");
503 err = -ENOMEM; 511 err = -ENOMEM;
@@ -507,7 +515,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
507 mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base, 515 mdev->qp_table.rdb_table = mthca_alloc_icm_table(mdev, init_hca->rdb_base,
508 MTHCA_RDB_ENTRY_SIZE, 516 MTHCA_RDB_ENTRY_SIZE,
509 mdev->limits.num_qps << 517 mdev->limits.num_qps <<
510 mdev->qp_table.rdb_shift, 518 mdev->qp_table.rdb_shift, 0,
511 0, 0); 519 0, 0);
512 if (!mdev->qp_table.rdb_table) { 520 if (!mdev->qp_table.rdb_table) {
513 mthca_err(mdev, "Failed to map RDB context memory, aborting\n"); 521 mthca_err(mdev, "Failed to map RDB context memory, aborting\n");
@@ -518,7 +526,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
518 mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, 526 mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base,
519 dev_lim->cqc_entry_sz, 527 dev_lim->cqc_entry_sz,
520 mdev->limits.num_cqs, 528 mdev->limits.num_cqs,
521 mdev->limits.reserved_cqs, 0); 529 mdev->limits.reserved_cqs,
530 0, 0);
522 if (!mdev->cq_table.table) { 531 if (!mdev->cq_table.table) {
523 mthca_err(mdev, "Failed to map CQ context memory, aborting.\n"); 532 mthca_err(mdev, "Failed to map CQ context memory, aborting.\n");
524 err = -ENOMEM; 533 err = -ENOMEM;
@@ -530,7 +539,8 @@ static int mthca_init_icm(struct mthca_dev *mdev,
530 mthca_alloc_icm_table(mdev, init_hca->srqc_base, 539 mthca_alloc_icm_table(mdev, init_hca->srqc_base,
531 dev_lim->srq_entry_sz, 540 dev_lim->srq_entry_sz,
532 mdev->limits.num_srqs, 541 mdev->limits.num_srqs,
533 mdev->limits.reserved_srqs, 0); 542 mdev->limits.reserved_srqs,
543 0, 0);
534 if (!mdev->srq_table.table) { 544 if (!mdev->srq_table.table) {
535 mthca_err(mdev, "Failed to map SRQ context memory, " 545 mthca_err(mdev, "Failed to map SRQ context memory, "
536 "aborting.\n"); 546 "aborting.\n");
@@ -550,7 +560,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
550 mdev->limits.num_amgms, 560 mdev->limits.num_amgms,
551 mdev->limits.num_mgms + 561 mdev->limits.num_mgms +
552 mdev->limits.num_amgms, 562 mdev->limits.num_amgms,
553 0); 563 0, 0);
554 if (!mdev->mcg_table.table) { 564 if (!mdev->mcg_table.table) {
555 mthca_err(mdev, "Failed to map MCG context memory, aborting.\n"); 565 mthca_err(mdev, "Failed to map MCG context memory, aborting.\n");
556 err = -ENOMEM; 566 err = -ENOMEM;
@@ -588,7 +598,7 @@ err_unmap_aux:
588 mthca_UNMAP_ICM_AUX(mdev, &status); 598 mthca_UNMAP_ICM_AUX(mdev, &status);
589 599
590err_free_aux: 600err_free_aux:
591 mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); 601 mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
592 602
593 return err; 603 return err;
594} 604}
@@ -609,7 +619,7 @@ static void mthca_free_icms(struct mthca_dev *mdev)
609 mthca_unmap_eq_icm(mdev); 619 mthca_unmap_eq_icm(mdev);
610 620
611 mthca_UNMAP_ICM_AUX(mdev, &status); 621 mthca_UNMAP_ICM_AUX(mdev, &status);
612 mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); 622 mthca_free_icm(mdev, mdev->fw.arbel.aux_icm, 0);
613} 623}
614 624
615static int mthca_init_arbel(struct mthca_dev *mdev) 625static int mthca_init_arbel(struct mthca_dev *mdev)
@@ -693,7 +703,7 @@ err_free_icm:
693 703
694err_stop_fw: 704err_stop_fw:
695 mthca_UNMAP_FA(mdev, &status); 705 mthca_UNMAP_FA(mdev, &status);
696 mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); 706 mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
697 707
698err_disable: 708err_disable:
699 if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) 709 if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
@@ -712,7 +722,7 @@ static void mthca_close_hca(struct mthca_dev *mdev)
712 mthca_free_icms(mdev); 722 mthca_free_icms(mdev);
713 723
714 mthca_UNMAP_FA(mdev, &status); 724 mthca_UNMAP_FA(mdev, &status);
715 mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); 725 mthca_free_icm(mdev, mdev->fw.arbel.fw_icm, 0);
716 726
717 if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) 727 if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
718 mthca_DISABLE_LAM(mdev, &status); 728 mthca_DISABLE_LAM(mdev, &status);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index 6b19645d946c..0b9d053a599d 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -35,6 +35,9 @@
35 */ 35 */
36 36
37#include <linux/mm.h> 37#include <linux/mm.h>
38#include <linux/scatterlist.h>
39
40#include <asm/page.h>
38 41
39#include "mthca_memfree.h" 42#include "mthca_memfree.h"
40#include "mthca_dev.h" 43#include "mthca_dev.h"
@@ -58,22 +61,42 @@ struct mthca_user_db_table {
58 } page[0]; 61 } page[0];
59}; 62};
60 63
61void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) 64static void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
65{
66 int i;
67
68 if (chunk->nsg > 0)
69 pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
70 PCI_DMA_BIDIRECTIONAL);
71
72 for (i = 0; i < chunk->npages; ++i)
73 __free_pages(chunk->mem[i].page,
74 get_order(chunk->mem[i].length));
75}
76
77static void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
62{ 78{
63 struct mthca_icm_chunk *chunk, *tmp;
64 int i; 79 int i;
65 80
81 for (i = 0; i < chunk->npages; ++i) {
82 dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
83 lowmem_page_address(chunk->mem[i].page),
84 sg_dma_address(&chunk->mem[i]));
85 }
86}
87
88void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
89{
90 struct mthca_icm_chunk *chunk, *tmp;
91
66 if (!icm) 92 if (!icm)
67 return; 93 return;
68 94
69 list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) { 95 list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
70 if (chunk->nsg > 0) 96 if (coherent)
71 pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, 97 mthca_free_icm_coherent(dev, chunk);
72 PCI_DMA_BIDIRECTIONAL); 98 else
73 99 mthca_free_icm_pages(dev, chunk);
74 for (i = 0; i < chunk->npages; ++i)
75 __free_pages(chunk->mem[i].page,
76 get_order(chunk->mem[i].length));
77 100
78 kfree(chunk); 101 kfree(chunk);
79 } 102 }
@@ -81,12 +104,41 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
81 kfree(icm); 104 kfree(icm);
82} 105}
83 106
107static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
108{
109 mem->page = alloc_pages(gfp_mask, order);
110 if (!mem->page)
111 return -ENOMEM;
112
113 mem->length = PAGE_SIZE << order;
114 mem->offset = 0;
115 return 0;
116}
117
118static int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
119 int order, gfp_t gfp_mask)
120{
121 void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem),
122 gfp_mask);
123 if (!buf)
124 return -ENOMEM;
125
126 sg_set_buf(mem, buf, PAGE_SIZE << order);
127 BUG_ON(mem->offset);
128 sg_dma_len(mem) = PAGE_SIZE << order;
129 return 0;
130}
131
84struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, 132struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
85 gfp_t gfp_mask) 133 gfp_t gfp_mask, int coherent)
86{ 134{
87 struct mthca_icm *icm; 135 struct mthca_icm *icm;
88 struct mthca_icm_chunk *chunk = NULL; 136 struct mthca_icm_chunk *chunk = NULL;
89 int cur_order; 137 int cur_order;
138 int ret;
139
140 /* We use sg_set_buf for coherent allocs, which assumes low memory */
141 BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
90 142
91 icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); 143 icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
92 if (!icm) 144 if (!icm)
@@ -112,21 +164,28 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
112 while (1 << cur_order > npages) 164 while (1 << cur_order > npages)
113 --cur_order; 165 --cur_order;
114 166
115 chunk->mem[chunk->npages].page = alloc_pages(gfp_mask, cur_order); 167 if (coherent)
116 if (chunk->mem[chunk->npages].page) { 168 ret = mthca_alloc_icm_coherent(&dev->pdev->dev,
117 chunk->mem[chunk->npages].length = PAGE_SIZE << cur_order; 169 &chunk->mem[chunk->npages],
118 chunk->mem[chunk->npages].offset = 0; 170 cur_order, gfp_mask);
171 else
172 ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages],
173 cur_order, gfp_mask);
119 174
120 if (++chunk->npages == MTHCA_ICM_CHUNK_LEN) { 175 if (!ret) {
176 ++chunk->npages;
177
178 if (!coherent && chunk->npages == MTHCA_ICM_CHUNK_LEN) {
121 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, 179 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
122 chunk->npages, 180 chunk->npages,
123 PCI_DMA_BIDIRECTIONAL); 181 PCI_DMA_BIDIRECTIONAL);
124 182
125 if (chunk->nsg <= 0) 183 if (chunk->nsg <= 0)
126 goto fail; 184 goto fail;
185 }
127 186
187 if (chunk->npages == MTHCA_ICM_CHUNK_LEN)
128 chunk = NULL; 188 chunk = NULL;
129 }
130 189
131 npages -= 1 << cur_order; 190 npages -= 1 << cur_order;
132 } else { 191 } else {
@@ -136,7 +195,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
136 } 195 }
137 } 196 }
138 197
139 if (chunk) { 198 if (!coherent && chunk) {
140 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, 199 chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
141 chunk->npages, 200 chunk->npages,
142 PCI_DMA_BIDIRECTIONAL); 201 PCI_DMA_BIDIRECTIONAL);
@@ -148,7 +207,7 @@ struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
148 return icm; 207 return icm;
149 208
150fail: 209fail:
151 mthca_free_icm(dev, icm); 210 mthca_free_icm(dev, icm, coherent);
152 return NULL; 211 return NULL;
153} 212}
154 213
@@ -167,7 +226,7 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob
167 226
168 table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT, 227 table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
169 (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | 228 (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
170 __GFP_NOWARN); 229 __GFP_NOWARN, table->coherent);
171 if (!table->icm[i]) { 230 if (!table->icm[i]) {
172 ret = -ENOMEM; 231 ret = -ENOMEM;
173 goto out; 232 goto out;
@@ -175,7 +234,7 @@ int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int ob
175 234
176 if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE, 235 if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
177 &status) || status) { 236 &status) || status) {
178 mthca_free_icm(dev, table->icm[i]); 237 mthca_free_icm(dev, table->icm[i], table->coherent);
179 table->icm[i] = NULL; 238 table->icm[i] = NULL;
180 ret = -ENOMEM; 239 ret = -ENOMEM;
181 goto out; 240 goto out;
@@ -204,16 +263,16 @@ void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int o
204 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, 263 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
205 MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, 264 MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
206 &status); 265 &status);
207 mthca_free_icm(dev, table->icm[i]); 266 mthca_free_icm(dev, table->icm[i], table->coherent);
208 table->icm[i] = NULL; 267 table->icm[i] = NULL;
209 } 268 }
210 269
211 mutex_unlock(&table->mutex); 270 mutex_unlock(&table->mutex);
212} 271}
213 272
214void *mthca_table_find(struct mthca_icm_table *table, int obj) 273void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle)
215{ 274{
216 int idx, offset, i; 275 int idx, offset, dma_offset, i;
217 struct mthca_icm_chunk *chunk; 276 struct mthca_icm_chunk *chunk;
218 struct mthca_icm *icm; 277 struct mthca_icm *icm;
219 struct page *page = NULL; 278 struct page *page = NULL;
@@ -225,13 +284,22 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj)
225 284
226 idx = (obj & (table->num_obj - 1)) * table->obj_size; 285 idx = (obj & (table->num_obj - 1)) * table->obj_size;
227 icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE]; 286 icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
228 offset = idx % MTHCA_TABLE_CHUNK_SIZE; 287 dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE;
229 288
230 if (!icm) 289 if (!icm)
231 goto out; 290 goto out;
232 291
233 list_for_each_entry(chunk, &icm->chunk_list, list) { 292 list_for_each_entry(chunk, &icm->chunk_list, list) {
234 for (i = 0; i < chunk->npages; ++i) { 293 for (i = 0; i < chunk->npages; ++i) {
294 if (dma_handle && dma_offset >= 0) {
295 if (sg_dma_len(&chunk->mem[i]) > dma_offset)
296 *dma_handle = sg_dma_address(&chunk->mem[i]) +
297 dma_offset;
298 dma_offset -= sg_dma_len(&chunk->mem[i]);
299 }
300 /* DMA mapping can merge pages but not split them,
301 * so if we found the page, dma_handle has already
302 * been assigned to. */
235 if (chunk->mem[i].length > offset) { 303 if (chunk->mem[i].length > offset) {
236 page = chunk->mem[i].page; 304 page = chunk->mem[i].page;
237 goto out; 305 goto out;
@@ -283,7 +351,7 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
283struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, 351struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
284 u64 virt, int obj_size, 352 u64 virt, int obj_size,
285 int nobj, int reserved, 353 int nobj, int reserved,
286 int use_lowmem) 354 int use_lowmem, int use_coherent)
287{ 355{
288 struct mthca_icm_table *table; 356 struct mthca_icm_table *table;
289 int num_icm; 357 int num_icm;
@@ -302,6 +370,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
302 table->num_obj = nobj; 370 table->num_obj = nobj;
303 table->obj_size = obj_size; 371 table->obj_size = obj_size;
304 table->lowmem = use_lowmem; 372 table->lowmem = use_lowmem;
373 table->coherent = use_coherent;
305 mutex_init(&table->mutex); 374 mutex_init(&table->mutex);
306 375
307 for (i = 0; i < num_icm; ++i) 376 for (i = 0; i < num_icm; ++i)
@@ -314,12 +383,12 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
314 383
315 table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT, 384 table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
316 (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | 385 (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
317 __GFP_NOWARN); 386 __GFP_NOWARN, use_coherent);
318 if (!table->icm[i]) 387 if (!table->icm[i])
319 goto err; 388 goto err;
320 if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE, 389 if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
321 &status) || status) { 390 &status) || status) {
322 mthca_free_icm(dev, table->icm[i]); 391 mthca_free_icm(dev, table->icm[i], table->coherent);
323 table->icm[i] = NULL; 392 table->icm[i] = NULL;
324 goto err; 393 goto err;
325 } 394 }
@@ -339,7 +408,7 @@ err:
339 mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE, 408 mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
340 MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, 409 MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
341 &status); 410 &status);
342 mthca_free_icm(dev, table->icm[i]); 411 mthca_free_icm(dev, table->icm[i], table->coherent);
343 } 412 }
344 413
345 kfree(table); 414 kfree(table);
@@ -357,7 +426,7 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
357 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE, 426 mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
358 MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE, 427 MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
359 &status); 428 &status);
360 mthca_free_icm(dev, table->icm[i]); 429 mthca_free_icm(dev, table->icm[i], table->coherent);
361 } 430 }
362 431
363 kfree(table); 432 kfree(table);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index 6d42947e1dc4..594144145f45 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -69,6 +69,7 @@ struct mthca_icm_table {
69 int num_obj; 69 int num_obj;
70 int obj_size; 70 int obj_size;
71 int lowmem; 71 int lowmem;
72 int coherent;
72 struct mutex mutex; 73 struct mutex mutex;
73 struct mthca_icm *icm[0]; 74 struct mthca_icm *icm[0];
74}; 75};
@@ -82,17 +83,17 @@ struct mthca_icm_iter {
82struct mthca_dev; 83struct mthca_dev;
83 84
84struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages, 85struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
85 gfp_t gfp_mask); 86 gfp_t gfp_mask, int coherent);
86void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm); 87void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent);
87 88
88struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, 89struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
89 u64 virt, int obj_size, 90 u64 virt, int obj_size,
90 int nobj, int reserved, 91 int nobj, int reserved,
91 int use_lowmem); 92 int use_lowmem, int use_coherent);
92void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table); 93void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table);
93int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj); 94int mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
94void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj); 95void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj);
95void *mthca_table_find(struct mthca_icm_table *table, int obj); 96void *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle);
96int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table, 97int mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
97 int start, int end); 98 int start, int end);
98void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, 99void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index f71ffa88db3a..6037dd3f87df 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -243,8 +243,8 @@ void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
243 kfree(mtt); 243 kfree(mtt);
244} 244}
245 245
246int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, 246static int __mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
247 int start_index, u64 *buffer_list, int list_len) 247 int start_index, u64 *buffer_list, int list_len)
248{ 248{
249 struct mthca_mailbox *mailbox; 249 struct mthca_mailbox *mailbox;
250 __be64 *mtt_entry; 250 __be64 *mtt_entry;
@@ -295,6 +295,84 @@ out:
295 return err; 295 return err;
296} 296}
297 297
298int mthca_write_mtt_size(struct mthca_dev *dev)
299{
300 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
301 /*
302 * Be friendly to WRITE_MTT command
303 * and leave two empty slots for the
304 * index and reserved fields of the
305 * mailbox.
306 */
307 return PAGE_SIZE / sizeof (u64) - 2;
308
309 /* For Arbel, all MTTs must fit in the same page. */
310 return mthca_is_memfree(dev) ? (PAGE_SIZE / sizeof (u64)) : 0x7ffffff;
311}
312
313void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
314 int start_index, u64 *buffer_list, int list_len)
315{
316 u64 __iomem *mtts;
317 int i;
318
319 mtts = dev->mr_table.tavor_fmr.mtt_base + mtt->first_seg * MTHCA_MTT_SEG_SIZE +
320 start_index * sizeof (u64);
321 for (i = 0; i < list_len; ++i)
322 mthca_write64_raw(cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT),
323 mtts + i);
324}
325
326void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, struct mthca_mtt *mtt,
327 int start_index, u64 *buffer_list, int list_len)
328{
329 __be64 *mtts;
330 dma_addr_t dma_handle;
331 int i;
332 int s = start_index * sizeof (u64);
333
334 /* For Arbel, all MTTs must fit in the same page. */
335 BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
336 /* Require full segments */
337 BUG_ON(s % MTHCA_MTT_SEG_SIZE);
338
339 mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
340 s / MTHCA_MTT_SEG_SIZE, &dma_handle);
341
342 BUG_ON(!mtts);
343
344 for (i = 0; i < list_len; ++i)
345 mtts[i] = cpu_to_be64(buffer_list[i] | MTHCA_MTT_FLAG_PRESENT);
346
347 dma_sync_single(&dev->pdev->dev, dma_handle, list_len * sizeof (u64), DMA_TO_DEVICE);
348}
349
350int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
351 int start_index, u64 *buffer_list, int list_len)
352{
353 int size = mthca_write_mtt_size(dev);
354 int chunk;
355
356 if (dev->mr_table.fmr_mtt_buddy != &dev->mr_table.mtt_buddy)
357 return __mthca_write_mtt(dev, mtt, start_index, buffer_list, list_len);
358
359 while (list_len > 0) {
360 chunk = min(size, list_len);
361 if (mthca_is_memfree(dev))
362 mthca_arbel_write_mtt_seg(dev, mtt, start_index,
363 buffer_list, chunk);
364 else
365 mthca_tavor_write_mtt_seg(dev, mtt, start_index,
366 buffer_list, chunk);
367
368 list_len -= chunk;
369 start_index += chunk;
370 buffer_list += chunk;
371 }
372
373 return 0;
374}
375
298static inline u32 tavor_hw_index_to_key(u32 ind) 376static inline u32 tavor_hw_index_to_key(u32 ind)
299{ 377{
300 return ind; 378 return ind;
@@ -524,7 +602,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
524 if (err) 602 if (err)
525 goto err_out_mpt_free; 603 goto err_out_mpt_free;
526 604
527 mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key); 605 mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key, NULL);
528 BUG_ON(!mr->mem.arbel.mpt); 606 BUG_ON(!mr->mem.arbel.mpt);
529 } else 607 } else
530 mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + 608 mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
@@ -538,7 +616,8 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
538 616
539 if (mthca_is_memfree(dev)) { 617 if (mthca_is_memfree(dev)) {
540 mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, 618 mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
541 mr->mtt->first_seg); 619 mr->mtt->first_seg,
620 &mr->mem.arbel.dma_handle);
542 BUG_ON(!mr->mem.arbel.mtts); 621 BUG_ON(!mr->mem.arbel.mtts);
543 } else 622 } else
544 mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; 623 mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
@@ -712,6 +791,9 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
712 fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] | 791 fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
713 MTHCA_MTT_FLAG_PRESENT); 792 MTHCA_MTT_FLAG_PRESENT);
714 793
794 dma_sync_single(&dev->pdev->dev, fmr->mem.arbel.dma_handle,
795 list_len * sizeof(u64), DMA_TO_DEVICE);
796
715 fmr->mem.arbel.mpt->key = cpu_to_be32(key); 797 fmr->mem.arbel.mpt->key = cpu_to_be32(key);
716 fmr->mem.arbel.mpt->lkey = cpu_to_be32(key); 798 fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
717 fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift)); 799 fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_shift));
@@ -761,7 +843,7 @@ void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
761int mthca_init_mr_table(struct mthca_dev *dev) 843int mthca_init_mr_table(struct mthca_dev *dev)
762{ 844{
763 unsigned long addr; 845 unsigned long addr;
764 int err, i; 846 int mpts, mtts, err, i;
765 847
766 err = mthca_alloc_init(&dev->mr_table.mpt_alloc, 848 err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
767 dev->limits.num_mpts, 849 dev->limits.num_mpts,
@@ -795,13 +877,21 @@ int mthca_init_mr_table(struct mthca_dev *dev)
795 err = -EINVAL; 877 err = -EINVAL;
796 goto err_fmr_mpt; 878 goto err_fmr_mpt;
797 } 879 }
880 mpts = mtts = 1 << i;
881 } else {
882 mpts = dev->limits.num_mtt_segs;
883 mtts = dev->limits.num_mpts;
884 }
885
886 if (!mthca_is_memfree(dev) &&
887 (dev->mthca_flags & MTHCA_FLAG_FMR)) {
798 888
799 addr = pci_resource_start(dev->pdev, 4) + 889 addr = pci_resource_start(dev->pdev, 4) +
800 ((pci_resource_len(dev->pdev, 4) - 1) & 890 ((pci_resource_len(dev->pdev, 4) - 1) &
801 dev->mr_table.mpt_base); 891 dev->mr_table.mpt_base);
802 892
803 dev->mr_table.tavor_fmr.mpt_base = 893 dev->mr_table.tavor_fmr.mpt_base =
804 ioremap(addr, (1 << i) * sizeof(struct mthca_mpt_entry)); 894 ioremap(addr, mpts * sizeof(struct mthca_mpt_entry));
805 895
806 if (!dev->mr_table.tavor_fmr.mpt_base) { 896 if (!dev->mr_table.tavor_fmr.mpt_base) {
807 mthca_warn(dev, "MPT ioremap for FMR failed.\n"); 897 mthca_warn(dev, "MPT ioremap for FMR failed.\n");
@@ -814,19 +904,21 @@ int mthca_init_mr_table(struct mthca_dev *dev)
814 dev->mr_table.mtt_base); 904 dev->mr_table.mtt_base);
815 905
816 dev->mr_table.tavor_fmr.mtt_base = 906 dev->mr_table.tavor_fmr.mtt_base =
817 ioremap(addr, (1 << i) * MTHCA_MTT_SEG_SIZE); 907 ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE);
818 if (!dev->mr_table.tavor_fmr.mtt_base) { 908 if (!dev->mr_table.tavor_fmr.mtt_base) {
819 mthca_warn(dev, "MTT ioremap for FMR failed.\n"); 909 mthca_warn(dev, "MTT ioremap for FMR failed.\n");
820 err = -ENOMEM; 910 err = -ENOMEM;
821 goto err_fmr_mtt; 911 goto err_fmr_mtt;
822 } 912 }
913 }
823 914
824 err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i); 915 if (dev->limits.fmr_reserved_mtts) {
916 err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, fls(mtts - 1));
825 if (err) 917 if (err)
826 goto err_fmr_mtt_buddy; 918 goto err_fmr_mtt_buddy;
827 919
828 /* Prevent regular MRs from using FMR keys */ 920 /* Prevent regular MRs from using FMR keys */
829 err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i); 921 err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, fls(mtts - 1));
830 if (err) 922 if (err)
831 goto err_reserve_fmr; 923 goto err_reserve_fmr;
832 924
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 58d44aa3c302..26bf86d1cfcd 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -277,7 +277,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
277 * out of the MR pool. They don't use additional memory, but 277 * out of the MR pool. They don't use additional memory, but
278 * we assign them as part of the HCA profile anyway. 278 * we assign them as part of the HCA profile anyway.
279 */ 279 */
280 if (mthca_is_memfree(dev)) 280 if (mthca_is_memfree(dev) || BITS_PER_LONG == 64)
281 dev->limits.fmr_reserved_mtts = 0; 281 dev->limits.fmr_reserved_mtts = 0;
282 else 282 else
283 dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts; 283 dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 7b96751695ea..0725ad7ad9bf 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1015,6 +1015,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
1015 int shift, n, len; 1015 int shift, n, len;
1016 int i, j, k; 1016 int i, j, k;
1017 int err = 0; 1017 int err = 0;
1018 int write_mtt_size;
1018 1019
1019 shift = ffs(region->page_size) - 1; 1020 shift = ffs(region->page_size) - 1;
1020 1021
@@ -1040,6 +1041,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
1040 1041
1041 i = n = 0; 1042 i = n = 0;
1042 1043
1044 write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
1045
1043 list_for_each_entry(chunk, &region->chunk_list, list) 1046 list_for_each_entry(chunk, &region->chunk_list, list)
1044 for (j = 0; j < chunk->nmap; ++j) { 1047 for (j = 0; j < chunk->nmap; ++j) {
1045 len = sg_dma_len(&chunk->page_list[j]) >> shift; 1048 len = sg_dma_len(&chunk->page_list[j]) >> shift;
@@ -1047,14 +1050,11 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
1047 pages[i++] = sg_dma_address(&chunk->page_list[j]) + 1050 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
1048 region->page_size * k; 1051 region->page_size * k;
1049 /* 1052 /*
1050 * Be friendly to WRITE_MTT command 1053 * Be friendly to write_mtt and pass it chunks
1051 * and leave two empty slots for the 1054 * of appropriate size.
1052 * index and reserved fields of the
1053 * mailbox.
1054 */ 1055 */
1055 if (i == PAGE_SIZE / sizeof (u64) - 2) { 1056 if (i == write_mtt_size) {
1056 err = mthca_write_mtt(dev, mr->mtt, 1057 err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
1057 n, pages, i);
1058 if (err) 1058 if (err)
1059 goto mtt_done; 1059 goto mtt_done;
1060 n += i; 1060 n += i;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 9a5bece3fa5c..1d266ac2e094 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -89,6 +89,7 @@ struct mthca_fmr {
89 struct { 89 struct {
90 struct mthca_mpt_entry *mpt; 90 struct mthca_mpt_entry *mpt;
91 __be64 *mtts; 91 __be64 *mtts;
92 dma_addr_t dma_handle;
92 } arbel; 93 } arbel;
93 } mem; 94 } mem;
94}; 95};
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 5f5214c0337d..224c93dd29eb 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -399,7 +399,7 @@ static int to_ib_qp_access_flags(int mthca_flags)
399static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr, 399static void to_ib_ah_attr(struct mthca_dev *dev, struct ib_ah_attr *ib_ah_attr,
400 struct mthca_qp_path *path) 400 struct mthca_qp_path *path)
401{ 401{
402 memset(ib_ah_attr, 0, sizeof *path); 402 memset(ib_ah_attr, 0, sizeof *ib_ah_attr);
403 ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3; 403 ib_ah_attr->port_num = (be32_to_cpu(path->port_pkey) >> 24) & 0x3;
404 404
405 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports) 405 if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->limits.num_ports)
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 10684da33d58..61974b0296ca 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -116,11 +116,16 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
116 struct mthca_srq *srq, 116 struct mthca_srq *srq,
117 struct mthca_arbel_srq_context *context) 117 struct mthca_arbel_srq_context *context)
118{ 118{
119 int logsize; 119 int logsize, max;
120 120
121 memset(context, 0, sizeof *context); 121 memset(context, 0, sizeof *context);
122 122
123 logsize = ilog2(srq->max); 123 /*
124 * Put max in a temporary variable to work around gcc bug
125 * triggered by ilog2() on sparc64.
126 */
127 max = srq->max;
128 logsize = ilog2(max);
124 context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); 129 context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn);
125 context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); 130 context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
126 context->db_index = cpu_to_be32(srq->db_index); 131 context->db_index = cpu_to_be32(srq->db_index);
diff --git a/drivers/infiniband/ulp/ipoib/Kconfig b/drivers/infiniband/ulp/ipoib/Kconfig
index c75322d820d4..af78ccc4ce71 100644
--- a/drivers/infiniband/ulp/ipoib/Kconfig
+++ b/drivers/infiniband/ulp/ipoib/Kconfig
@@ -1,6 +1,6 @@
1config INFINIBAND_IPOIB 1config INFINIBAND_IPOIB
2 tristate "IP-over-InfiniBand" 2 tristate "IP-over-InfiniBand"
3 depends on INFINIBAND && NETDEVICES && INET 3 depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n)
4 ---help--- 4 ---help---
5 Support for the IP-over-InfiniBand protocol (IPoIB). This 5 Support for the IP-over-InfiniBand protocol (IPoIB). This
6 transports IP packets over InfiniBand so you can use your IB 6 transports IP packets over InfiniBand so you can use your IB
@@ -8,6 +8,20 @@ config INFINIBAND_IPOIB
8 8
9 See Documentation/infiniband/ipoib.txt for more information 9 See Documentation/infiniband/ipoib.txt for more information
10 10
11config INFINIBAND_IPOIB_CM
12 bool "IP-over-InfiniBand Connected Mode support"
13 depends on INFINIBAND_IPOIB && EXPERIMENTAL
14 default n
15 ---help---
16 This option enables experimental support for IPoIB connected mode.
17 After enabling this option, you need to switch to connected mode through
18 /sys/class/net/ibXXX/mode to actually create connections, and then increase
19 the interface MTU with e.g. ifconfig ib0 mtu 65520.
20
21 WARNING: Enabling connected mode will trigger some
22 packet drops for multicast and UD mode traffic from this interface,
23 unless you limit mtu for these destinations to 2044.
24
11config INFINIBAND_IPOIB_DEBUG 25config INFINIBAND_IPOIB_DEBUG
12 bool "IP-over-InfiniBand debugging" if EMBEDDED 26 bool "IP-over-InfiniBand debugging" if EMBEDDED
13 depends on INFINIBAND_IPOIB 27 depends on INFINIBAND_IPOIB
diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile
index 8935e74ae3f8..98ee38e8c2c4 100644
--- a/drivers/infiniband/ulp/ipoib/Makefile
+++ b/drivers/infiniband/ulp/ipoib/Makefile
@@ -5,5 +5,6 @@ ib_ipoib-y := ipoib_main.o \
5 ipoib_multicast.o \ 5 ipoib_multicast.o \
6 ipoib_verbs.o \ 6 ipoib_verbs.o \
7 ipoib_vlan.o 7 ipoib_vlan.o
8ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o
8ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o 9ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o
9 10
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 07deee8f81ce..2594db2030b3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -62,6 +62,10 @@ enum {
62 62
63 IPOIB_ENCAP_LEN = 4, 63 IPOIB_ENCAP_LEN = 4,
64 64
65 IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header to 16 */
66 IPOIB_CM_BUF_SIZE = IPOIB_CM_MTU + IPOIB_ENCAP_LEN,
67 IPOIB_CM_HEAD_SIZE = IPOIB_CM_BUF_SIZE % PAGE_SIZE,
68 IPOIB_CM_RX_SG = ALIGN(IPOIB_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
65 IPOIB_RX_RING_SIZE = 128, 69 IPOIB_RX_RING_SIZE = 128,
66 IPOIB_TX_RING_SIZE = 64, 70 IPOIB_TX_RING_SIZE = 64,
67 IPOIB_MAX_QUEUE_SIZE = 8192, 71 IPOIB_MAX_QUEUE_SIZE = 8192,
@@ -81,6 +85,8 @@ enum {
81 IPOIB_MCAST_RUN = 6, 85 IPOIB_MCAST_RUN = 6,
82 IPOIB_STOP_REAPER = 7, 86 IPOIB_STOP_REAPER = 7,
83 IPOIB_MCAST_STARTED = 8, 87 IPOIB_MCAST_STARTED = 8,
88 IPOIB_FLAG_NETIF_STOPPED = 9,
89 IPOIB_FLAG_ADMIN_CM = 10,
84 90
85 IPOIB_MAX_BACKOFF_SECONDS = 16, 91 IPOIB_MAX_BACKOFF_SECONDS = 16,
86 92
@@ -90,6 +96,13 @@ enum {
90 IPOIB_MCAST_FLAG_ATTACHED = 3, 96 IPOIB_MCAST_FLAG_ATTACHED = 3,
91}; 97};
92 98
99#define IPOIB_OP_RECV (1ul << 31)
100#ifdef CONFIG_INFINIBAND_IPOIB_CM
101#define IPOIB_CM_OP_SRQ (1ul << 30)
102#else
103#define IPOIB_CM_OP_SRQ (0)
104#endif
105
93/* structs */ 106/* structs */
94 107
95struct ipoib_header { 108struct ipoib_header {
@@ -113,6 +126,59 @@ struct ipoib_tx_buf {
113 u64 mapping; 126 u64 mapping;
114}; 127};
115 128
129struct ib_cm_id;
130
131struct ipoib_cm_data {
132 __be32 qpn; /* High byte MUST be ignored on receive */
133 __be32 mtu;
134};
135
136struct ipoib_cm_rx {
137 struct ib_cm_id *id;
138 struct ib_qp *qp;
139 struct list_head list;
140 struct net_device *dev;
141 unsigned long jiffies;
142};
143
144struct ipoib_cm_tx {
145 struct ib_cm_id *id;
146 struct ib_cq *cq;
147 struct ib_qp *qp;
148 struct list_head list;
149 struct net_device *dev;
150 struct ipoib_neigh *neigh;
151 struct ipoib_path *path;
152 struct ipoib_tx_buf *tx_ring;
153 unsigned tx_head;
154 unsigned tx_tail;
155 unsigned long flags;
156 u32 mtu;
157 struct ib_wc ibwc[IPOIB_NUM_WC];
158};
159
160struct ipoib_cm_rx_buf {
161 struct sk_buff *skb;
162 u64 mapping[IPOIB_CM_RX_SG];
163};
164
165struct ipoib_cm_dev_priv {
166 struct ib_srq *srq;
167 struct ipoib_cm_rx_buf *srq_ring;
168 struct ib_cm_id *id;
169 struct list_head passive_ids;
170 struct work_struct start_task;
171 struct work_struct reap_task;
172 struct work_struct skb_task;
173 struct delayed_work stale_task;
174 struct sk_buff_head skb_queue;
175 struct list_head start_list;
176 struct list_head reap_list;
177 struct ib_wc ibwc[IPOIB_NUM_WC];
178 struct ib_sge rx_sge[IPOIB_CM_RX_SG];
179 struct ib_recv_wr rx_wr;
180};
181
116/* 182/*
117 * Device private locking: tx_lock protects members used in TX fast 183 * Device private locking: tx_lock protects members used in TX fast
118 * path (and we use LLTX so upper layers don't do extra locking). 184 * path (and we use LLTX so upper layers don't do extra locking).
@@ -179,6 +245,10 @@ struct ipoib_dev_priv {
179 struct list_head child_intfs; 245 struct list_head child_intfs;
180 struct list_head list; 246 struct list_head list;
181 247
248#ifdef CONFIG_INFINIBAND_IPOIB_CM
249 struct ipoib_cm_dev_priv cm;
250#endif
251
182#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 252#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
183 struct list_head fs_list; 253 struct list_head fs_list;
184 struct dentry *mcg_dentry; 254 struct dentry *mcg_dentry;
@@ -212,6 +282,9 @@ struct ipoib_path {
212 282
213struct ipoib_neigh { 283struct ipoib_neigh {
214 struct ipoib_ah *ah; 284 struct ipoib_ah *ah;
285#ifdef CONFIG_INFINIBAND_IPOIB_CM
286 struct ipoib_cm_tx *cm;
287#endif
215 union ib_gid dgid; 288 union ib_gid dgid;
216 struct sk_buff_head queue; 289 struct sk_buff_head queue;
217 290
@@ -315,6 +388,146 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
315void ipoib_pkey_poll(struct work_struct *work); 388void ipoib_pkey_poll(struct work_struct *work);
316int ipoib_pkey_dev_delay_open(struct net_device *dev); 389int ipoib_pkey_dev_delay_open(struct net_device *dev);
317 390
391#ifdef CONFIG_INFINIBAND_IPOIB_CM
392
393#define IPOIB_FLAGS_RC 0x80
394#define IPOIB_FLAGS_UC 0x40
395
396/* We don't support UC connections at the moment */
397#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
398
399static inline int ipoib_cm_admin_enabled(struct net_device *dev)
400{
401 struct ipoib_dev_priv *priv = netdev_priv(dev);
402 return IPOIB_CM_SUPPORTED(dev->dev_addr) &&
403 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
404}
405
406static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
407{
408 struct ipoib_dev_priv *priv = netdev_priv(dev);
409 return IPOIB_CM_SUPPORTED(n->ha) &&
410 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
411}
412
413static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
414
415{
416 return test_bit(IPOIB_FLAG_OPER_UP, &neigh->cm->flags);
417}
418
419static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
420{
421 return neigh->cm;
422}
423
424static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
425{
426 neigh->cm = tx;
427}
428
429void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx);
430int ipoib_cm_dev_open(struct net_device *dev);
431void ipoib_cm_dev_stop(struct net_device *dev);
432int ipoib_cm_dev_init(struct net_device *dev);
433int ipoib_cm_add_mode_attr(struct net_device *dev);
434void ipoib_cm_dev_cleanup(struct net_device *dev);
435struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
436 struct ipoib_neigh *neigh);
437void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx);
438void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
439 unsigned int mtu);
440void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
441#else
442
443struct ipoib_cm_tx;
444
445static inline int ipoib_cm_admin_enabled(struct net_device *dev)
446{
447 return 0;
448}
449static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
450
451{
452 return 0;
453}
454
455static inline int ipoib_cm_up(struct ipoib_neigh *neigh)
456
457{
458 return 0;
459}
460
461static inline struct ipoib_cm_tx *ipoib_cm_get(struct ipoib_neigh *neigh)
462{
463 return NULL;
464}
465
466static inline void ipoib_cm_set(struct ipoib_neigh *neigh, struct ipoib_cm_tx *tx)
467{
468}
469
470static inline
471void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
472{
473 return;
474}
475
476static inline
477int ipoib_cm_dev_open(struct net_device *dev)
478{
479 return 0;
480}
481
482static inline
483void ipoib_cm_dev_stop(struct net_device *dev)
484{
485 return;
486}
487
488static inline
489int ipoib_cm_dev_init(struct net_device *dev)
490{
491 return -ENOSYS;
492}
493
494static inline
495void ipoib_cm_dev_cleanup(struct net_device *dev)
496{
497 return;
498}
499
500static inline
501struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
502 struct ipoib_neigh *neigh)
503{
504 return NULL;
505}
506
507static inline
508void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
509{
510 return;
511}
512
513static inline
514int ipoib_cm_add_mode_attr(struct net_device *dev)
515{
516 return 0;
517}
518
519static inline void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
520 unsigned int mtu)
521{
522 dev_kfree_skb_any(skb);
523}
524
525static inline void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
526{
527}
528
529#endif
530
318#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG 531#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
319void ipoib_create_debug_files(struct net_device *dev); 532void ipoib_create_debug_files(struct net_device *dev);
320void ipoib_delete_debug_files(struct net_device *dev); 533void ipoib_delete_debug_files(struct net_device *dev);
@@ -392,4 +605,6 @@ extern int ipoib_debug_level;
392 605
393#define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw) 606#define IPOIB_GID_ARG(gid) IPOIB_GID_RAW_ARG((gid).raw)
394 607
608#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
609
395#endif /* _IPOIB_H */ 610#endif /* _IPOIB_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
new file mode 100644
index 000000000000..2d483874a589
--- /dev/null
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -0,0 +1,1237 @@
1/*
2 * Copyright (c) 2006 Mellanox Technologies. All rights reserved
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 * $Id$
33 */
34
35#include <rdma/ib_cm.h>
36#include <rdma/ib_cache.h>
37#include <net/dst.h>
38#include <net/icmp.h>
39#include <linux/icmpv6.h>
40
41#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG_DATA
42static int data_debug_level;
43
44module_param_named(cm_data_debug_level, data_debug_level, int, 0644);
45MODULE_PARM_DESC(cm_data_debug_level,
46 "Enable data path debug tracing for connected mode if > 0");
47#endif
48
49#include "ipoib.h"
50
51#define IPOIB_CM_IETF_ID 0x1000000000000000ULL
52
53#define IPOIB_CM_RX_UPDATE_TIME (256 * HZ)
54#define IPOIB_CM_RX_TIMEOUT (2 * 256 * HZ)
55#define IPOIB_CM_RX_DELAY (3 * 256 * HZ)
56#define IPOIB_CM_RX_UPDATE_MASK (0x3)
57
58struct ipoib_cm_id {
59 struct ib_cm_id *id;
60 int flags;
61 u32 remote_qpn;
62 u32 remote_mtu;
63};
64
65static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
66 struct ib_cm_event *event);
67
68static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv,
69 u64 mapping[IPOIB_CM_RX_SG])
70{
71 int i;
72
73 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
74
75 for (i = 0; i < IPOIB_CM_RX_SG - 1; ++i)
76 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
77}
78
79static int ipoib_cm_post_receive(struct net_device *dev, int id)
80{
81 struct ipoib_dev_priv *priv = netdev_priv(dev);
82 struct ib_recv_wr *bad_wr;
83 int i, ret;
84
85 priv->cm.rx_wr.wr_id = id | IPOIB_CM_OP_SRQ;
86
87 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
88 priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
89
90 ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
91 if (unlikely(ret)) {
92 ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
93 ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[id].mapping);
94 dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
95 priv->cm.srq_ring[id].skb = NULL;
96 }
97
98 return ret;
99}
100
101static int ipoib_cm_alloc_rx_skb(struct net_device *dev, int id,
102 u64 mapping[IPOIB_CM_RX_SG])
103{
104 struct ipoib_dev_priv *priv = netdev_priv(dev);
105 struct sk_buff *skb;
106 int i;
107
108 skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
109 if (unlikely(!skb))
110 return -ENOMEM;
111
112 /*
113 * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
114 * IP header to a multiple of 16.
115 */
116 skb_reserve(skb, 12);
117
118 mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
119 DMA_FROM_DEVICE);
120 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
121 dev_kfree_skb_any(skb);
122 return -EIO;
123 }
124
125 for (i = 0; i < IPOIB_CM_RX_SG - 1; i++) {
126 struct page *page = alloc_page(GFP_ATOMIC);
127
128 if (!page)
129 goto partial_error;
130 skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
131
132 mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,
133 0, PAGE_SIZE, DMA_TO_DEVICE);
134 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
135 goto partial_error;
136 }
137
138 priv->cm.srq_ring[id].skb = skb;
139 return 0;
140
141partial_error:
142
143 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
144
145 for (; i >= 0; --i)
146 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
147
148 kfree_skb(skb);
149 return -ENOMEM;
150}
151
152static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
153 struct ipoib_cm_rx *p)
154{
155 struct ipoib_dev_priv *priv = netdev_priv(dev);
156 struct ib_qp_init_attr attr = {
157 .send_cq = priv->cq, /* does not matter, we never send anything */
158 .recv_cq = priv->cq,
159 .srq = priv->cm.srq,
160 .cap.max_send_wr = 1, /* FIXME: 0 Seems not to work */
161 .cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
162 .sq_sig_type = IB_SIGNAL_ALL_WR,
163 .qp_type = IB_QPT_RC,
164 .qp_context = p,
165 };
166 return ib_create_qp(priv->pd, &attr);
167}
168
169static int ipoib_cm_modify_rx_qp(struct net_device *dev,
170 struct ib_cm_id *cm_id, struct ib_qp *qp,
171 unsigned psn)
172{
173 struct ipoib_dev_priv *priv = netdev_priv(dev);
174 struct ib_qp_attr qp_attr;
175 int qp_attr_mask, ret;
176
177 qp_attr.qp_state = IB_QPS_INIT;
178 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
179 if (ret) {
180 ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
181 return ret;
182 }
183 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
184 if (ret) {
185 ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret);
186 return ret;
187 }
188 qp_attr.qp_state = IB_QPS_RTR;
189 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
190 if (ret) {
191 ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
192 return ret;
193 }
194 qp_attr.rq_psn = psn;
195 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
196 if (ret) {
197 ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
198 return ret;
199 }
200 return 0;
201}
202
203static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
204 struct ib_qp *qp, struct ib_cm_req_event_param *req,
205 unsigned psn)
206{
207 struct ipoib_dev_priv *priv = netdev_priv(dev);
208 struct ipoib_cm_data data = {};
209 struct ib_cm_rep_param rep = {};
210
211 data.qpn = cpu_to_be32(priv->qp->qp_num);
212 data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
213
214 rep.private_data = &data;
215 rep.private_data_len = sizeof data;
216 rep.flow_control = 0;
217 rep.rnr_retry_count = req->rnr_retry_count;
218 rep.target_ack_delay = 20; /* FIXME */
219 rep.srq = 1;
220 rep.qp_num = qp->qp_num;
221 rep.starting_psn = psn;
222 return ib_send_cm_rep(cm_id, &rep);
223}
224
225static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
226{
227 struct net_device *dev = cm_id->context;
228 struct ipoib_dev_priv *priv = netdev_priv(dev);
229 struct ipoib_cm_rx *p;
230 unsigned long flags;
231 unsigned psn;
232 int ret;
233
234 ipoib_dbg(priv, "REQ arrived\n");
235 p = kzalloc(sizeof *p, GFP_KERNEL);
236 if (!p)
237 return -ENOMEM;
238 p->dev = dev;
239 p->id = cm_id;
240 p->qp = ipoib_cm_create_rx_qp(dev, p);
241 if (IS_ERR(p->qp)) {
242 ret = PTR_ERR(p->qp);
243 goto err_qp;
244 }
245
246 psn = random32() & 0xffffff;
247 ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
248 if (ret)
249 goto err_modify;
250
251 ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
252 if (ret) {
253 ipoib_warn(priv, "failed to send REP: %d\n", ret);
254 goto err_rep;
255 }
256
257 cm_id->context = p;
258 p->jiffies = jiffies;
259 spin_lock_irqsave(&priv->lock, flags);
260 list_add(&p->list, &priv->cm.passive_ids);
261 spin_unlock_irqrestore(&priv->lock, flags);
262 queue_delayed_work(ipoib_workqueue,
263 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
264 return 0;
265
266err_rep:
267err_modify:
268 ib_destroy_qp(p->qp);
269err_qp:
270 kfree(p);
271 return ret;
272}
273
274static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
275 struct ib_cm_event *event)
276{
277 struct ipoib_cm_rx *p;
278 struct ipoib_dev_priv *priv;
279 unsigned long flags;
280 int ret;
281
282 switch (event->event) {
283 case IB_CM_REQ_RECEIVED:
284 return ipoib_cm_req_handler(cm_id, event);
285 case IB_CM_DREQ_RECEIVED:
286 p = cm_id->context;
287 ib_send_cm_drep(cm_id, NULL, 0);
288 /* Fall through */
289 case IB_CM_REJ_RECEIVED:
290 p = cm_id->context;
291 priv = netdev_priv(p->dev);
292 spin_lock_irqsave(&priv->lock, flags);
293 if (list_empty(&p->list))
294 ret = 0; /* Connection is going away already. */
295 else {
296 list_del_init(&p->list);
297 ret = -ECONNRESET;
298 }
299 spin_unlock_irqrestore(&priv->lock, flags);
300 if (ret) {
301 ib_destroy_qp(p->qp);
302 kfree(p);
303 return ret;
304 }
305 return 0;
306 default:
307 return 0;
308 }
309}
310/* Adjust length of skb with fragments to match received data */
311static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
312 unsigned int length)
313{
314 int i, num_frags;
315 unsigned int size;
316
317 /* put header into skb */
318 size = min(length, hdr_space);
319 skb->tail += size;
320 skb->len += size;
321 length -= size;
322
323 num_frags = skb_shinfo(skb)->nr_frags;
324 for (i = 0; i < num_frags; i++) {
325 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
326
327 if (length == 0) {
328 /* don't need this page */
329 __free_page(frag->page);
330 --skb_shinfo(skb)->nr_frags;
331 } else {
332 size = min(length, (unsigned) PAGE_SIZE);
333
334 frag->size = size;
335 skb->data_len += size;
336 skb->truesize += size;
337 skb->len += size;
338 length -= size;
339 }
340 }
341}
342
343void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
344{
345 struct ipoib_dev_priv *priv = netdev_priv(dev);
346 unsigned int wr_id = wc->wr_id & ~IPOIB_CM_OP_SRQ;
347 struct sk_buff *skb;
348 struct ipoib_cm_rx *p;
349 unsigned long flags;
350 u64 mapping[IPOIB_CM_RX_SG];
351
352 ipoib_dbg_data(priv, "cm recv completion: id %d, op %d, status: %d\n",
353 wr_id, wc->opcode, wc->status);
354
355 if (unlikely(wr_id >= ipoib_recvq_size)) {
356 ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
357 wr_id, ipoib_recvq_size);
358 return;
359 }
360
361 skb = priv->cm.srq_ring[wr_id].skb;
362
363 if (unlikely(wc->status != IB_WC_SUCCESS)) {
364 ipoib_dbg(priv, "cm recv error "
365 "(status=%d, wrid=%d vend_err %x)\n",
366 wc->status, wr_id, wc->vendor_err);
367 ++priv->stats.rx_dropped;
368 goto repost;
369 }
370
371 if (!likely(wr_id & IPOIB_CM_RX_UPDATE_MASK)) {
372 p = wc->qp->qp_context;
373 if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
374 spin_lock_irqsave(&priv->lock, flags);
375 p->jiffies = jiffies;
376 /* Move this entry to list head, but do
377 * not re-add it if it has been removed. */
378 if (!list_empty(&p->list))
379 list_move(&p->list, &priv->cm.passive_ids);
380 spin_unlock_irqrestore(&priv->lock, flags);
381 queue_delayed_work(ipoib_workqueue,
382 &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
383 }
384 }
385
386 if (unlikely(ipoib_cm_alloc_rx_skb(dev, wr_id, mapping))) {
387 /*
388 * If we can't allocate a new RX buffer, dump
389 * this packet and reuse the old buffer.
390 */
391 ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
392 ++priv->stats.rx_dropped;
393 goto repost;
394 }
395
396 ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[wr_id].mapping);
397 memcpy(priv->cm.srq_ring[wr_id].mapping, mapping, sizeof mapping);
398
399 ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
400 wc->byte_len, wc->slid);
401
402 skb_put_frags(skb, IPOIB_CM_HEAD_SIZE, wc->byte_len);
403
404 skb->protocol = ((struct ipoib_header *) skb->data)->proto;
405 skb->mac.raw = skb->data;
406 skb_pull(skb, IPOIB_ENCAP_LEN);
407
408 dev->last_rx = jiffies;
409 ++priv->stats.rx_packets;
410 priv->stats.rx_bytes += skb->len;
411
412 skb->dev = dev;
413 /* XXX get correct PACKET_ type here */
414 skb->pkt_type = PACKET_HOST;
415 netif_rx_ni(skb);
416
417repost:
418 if (unlikely(ipoib_cm_post_receive(dev, wr_id)))
419 ipoib_warn(priv, "ipoib_cm_post_receive failed "
420 "for buf %d\n", wr_id);
421}
422
423static inline int post_send(struct ipoib_dev_priv *priv,
424 struct ipoib_cm_tx *tx,
425 unsigned int wr_id,
426 u64 addr, int len)
427{
428 struct ib_send_wr *bad_wr;
429
430 priv->tx_sge.addr = addr;
431 priv->tx_sge.length = len;
432
433 priv->tx_wr.wr_id = wr_id;
434
435 return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
436}
437
438void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
439{
440 struct ipoib_dev_priv *priv = netdev_priv(dev);
441 struct ipoib_tx_buf *tx_req;
442 u64 addr;
443
444 if (unlikely(skb->len > tx->mtu)) {
445 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
446 skb->len, tx->mtu);
447 ++priv->stats.tx_dropped;
448 ++priv->stats.tx_errors;
449 ipoib_cm_skb_too_long(dev, skb, tx->mtu - INFINIBAND_ALEN);
450 return;
451 }
452
453 ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
454 tx->tx_head, skb->len, tx->qp->qp_num);
455
456 /*
457 * We put the skb into the tx_ring _before_ we call post_send()
458 * because it's entirely possible that the completion handler will
459 * run before we execute anything after the post_send(). That
460 * means we have to make sure everything is properly recorded and
461 * our state is consistent before we call post_send().
462 */
463 tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
464 tx_req->skb = skb;
465 addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
466 if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
467 ++priv->stats.tx_errors;
468 dev_kfree_skb_any(skb);
469 return;
470 }
471
472 tx_req->mapping = addr;
473
474 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
475 addr, skb->len))) {
476 ipoib_warn(priv, "post_send failed\n");
477 ++priv->stats.tx_errors;
478 ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
479 dev_kfree_skb_any(skb);
480 } else {
481 dev->trans_start = jiffies;
482 ++tx->tx_head;
483
484 if (tx->tx_head - tx->tx_tail == ipoib_sendq_size) {
485 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
486 tx->qp->qp_num);
487 netif_stop_queue(dev);
488 set_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
489 }
490 }
491}
492
493static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx,
494 struct ib_wc *wc)
495{
496 struct ipoib_dev_priv *priv = netdev_priv(dev);
497 unsigned int wr_id = wc->wr_id;
498 struct ipoib_tx_buf *tx_req;
499 unsigned long flags;
500
501 ipoib_dbg_data(priv, "cm send completion: id %d, op %d, status: %d\n",
502 wr_id, wc->opcode, wc->status);
503
504 if (unlikely(wr_id >= ipoib_sendq_size)) {
505 ipoib_warn(priv, "cm send completion event with wrid %d (> %d)\n",
506 wr_id, ipoib_sendq_size);
507 return;
508 }
509
510 tx_req = &tx->tx_ring[wr_id];
511
512 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
513
514 /* FIXME: is this right? Shouldn't we only increment on success? */
515 ++priv->stats.tx_packets;
516 priv->stats.tx_bytes += tx_req->skb->len;
517
518 dev_kfree_skb_any(tx_req->skb);
519
520 spin_lock_irqsave(&priv->tx_lock, flags);
521 ++tx->tx_tail;
522 if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags)) &&
523 tx->tx_head - tx->tx_tail <= ipoib_sendq_size >> 1) {
524 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags);
525 netif_wake_queue(dev);
526 }
527
528 if (wc->status != IB_WC_SUCCESS &&
529 wc->status != IB_WC_WR_FLUSH_ERR) {
530 struct ipoib_neigh *neigh;
531
532 ipoib_dbg(priv, "failed cm send event "
533 "(status=%d, wrid=%d vend_err %x)\n",
534 wc->status, wr_id, wc->vendor_err);
535
536 spin_lock(&priv->lock);
537 neigh = tx->neigh;
538
539 if (neigh) {
540 neigh->cm = NULL;
541 list_del(&neigh->list);
542 if (neigh->ah)
543 ipoib_put_ah(neigh->ah);
544 ipoib_neigh_free(dev, neigh);
545
546 tx->neigh = NULL;
547 }
548
549 /* queue would be re-started anyway when TX is destroyed,
550 * but it makes sense to do it ASAP here. */
551 if (test_and_clear_bit(IPOIB_FLAG_NETIF_STOPPED, &tx->flags))
552 netif_wake_queue(dev);
553
554 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
555 list_move(&tx->list, &priv->cm.reap_list);
556 queue_work(ipoib_workqueue, &priv->cm.reap_task);
557 }
558
559 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
560
561 spin_unlock(&priv->lock);
562 }
563
564 spin_unlock_irqrestore(&priv->tx_lock, flags);
565}
566
567static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
568{
569 struct ipoib_cm_tx *tx = tx_ptr;
570 int n, i;
571
572 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
573 do {
574 n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
575 for (i = 0; i < n; ++i)
576 ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
577 } while (n == IPOIB_NUM_WC);
578}
579
580int ipoib_cm_dev_open(struct net_device *dev)
581{
582 struct ipoib_dev_priv *priv = netdev_priv(dev);
583 int ret;
584
585 if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
586 return 0;
587
588 priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev);
589 if (IS_ERR(priv->cm.id)) {
590 printk(KERN_WARNING "%s: failed to create CM ID\n", priv->ca->name);
591 return IS_ERR(priv->cm.id);
592 }
593
594 ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num),
595 0, NULL);
596 if (ret) {
597 printk(KERN_WARNING "%s: failed to listen on ID 0x%llx\n", priv->ca->name,
598 IPOIB_CM_IETF_ID | priv->qp->qp_num);
599 ib_destroy_cm_id(priv->cm.id);
600 return ret;
601 }
602 return 0;
603}
604
605void ipoib_cm_dev_stop(struct net_device *dev)
606{
607 struct ipoib_dev_priv *priv = netdev_priv(dev);
608 struct ipoib_cm_rx *p;
609 unsigned long flags;
610
611 if (!IPOIB_CM_SUPPORTED(dev->dev_addr))
612 return;
613
614 ib_destroy_cm_id(priv->cm.id);
615 spin_lock_irqsave(&priv->lock, flags);
616 while (!list_empty(&priv->cm.passive_ids)) {
617 p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
618 list_del_init(&p->list);
619 spin_unlock_irqrestore(&priv->lock, flags);
620 ib_destroy_cm_id(p->id);
621 ib_destroy_qp(p->qp);
622 kfree(p);
623 spin_lock_irqsave(&priv->lock, flags);
624 }
625 spin_unlock_irqrestore(&priv->lock, flags);
626
627 cancel_delayed_work(&priv->cm.stale_task);
628}
629
630static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
631{
632 struct ipoib_cm_tx *p = cm_id->context;
633 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
634 struct ipoib_cm_data *data = event->private_data;
635 struct sk_buff_head skqueue;
636 struct ib_qp_attr qp_attr;
637 int qp_attr_mask, ret;
638 struct sk_buff *skb;
639 unsigned long flags;
640
641 p->mtu = be32_to_cpu(data->mtu);
642
643 if (p->mtu < priv->dev->mtu + IPOIB_ENCAP_LEN) {
644 ipoib_warn(priv, "Rejecting connection: mtu %d < device mtu %d + 4\n",
645 p->mtu, priv->dev->mtu);
646 return -EINVAL;
647 }
648
649 qp_attr.qp_state = IB_QPS_RTR;
650 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
651 if (ret) {
652 ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
653 return ret;
654 }
655
656 qp_attr.rq_psn = 0 /* FIXME */;
657 ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
658 if (ret) {
659 ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret);
660 return ret;
661 }
662
663 qp_attr.qp_state = IB_QPS_RTS;
664 ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
665 if (ret) {
666 ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
667 return ret;
668 }
669 ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
670 if (ret) {
671 ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret);
672 return ret;
673 }
674
675 skb_queue_head_init(&skqueue);
676
677 spin_lock_irqsave(&priv->lock, flags);
678 set_bit(IPOIB_FLAG_OPER_UP, &p->flags);
679 if (p->neigh)
680 while ((skb = __skb_dequeue(&p->neigh->queue)))
681 __skb_queue_tail(&skqueue, skb);
682 spin_unlock_irqrestore(&priv->lock, flags);
683
684 while ((skb = __skb_dequeue(&skqueue))) {
685 skb->dev = p->dev;
686 if (dev_queue_xmit(skb))
687 ipoib_warn(priv, "dev_queue_xmit failed "
688 "to requeue packet\n");
689 }
690
691 ret = ib_send_cm_rtu(cm_id, NULL, 0);
692 if (ret) {
693 ipoib_warn(priv, "failed to send RTU: %d\n", ret);
694 return ret;
695 }
696 return 0;
697}
698
699static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ib_cq *cq)
700{
701 struct ipoib_dev_priv *priv = netdev_priv(dev);
702 struct ib_qp_init_attr attr = {};
703 attr.recv_cq = priv->cq;
704 attr.srq = priv->cm.srq;
705 attr.cap.max_send_wr = ipoib_sendq_size;
706 attr.cap.max_send_sge = 1;
707 attr.sq_sig_type = IB_SIGNAL_ALL_WR;
708 attr.qp_type = IB_QPT_RC;
709 attr.send_cq = cq;
710 return ib_create_qp(priv->pd, &attr);
711}
712
713static int ipoib_cm_send_req(struct net_device *dev,
714 struct ib_cm_id *id, struct ib_qp *qp,
715 u32 qpn,
716 struct ib_sa_path_rec *pathrec)
717{
718 struct ipoib_dev_priv *priv = netdev_priv(dev);
719 struct ipoib_cm_data data = {};
720 struct ib_cm_req_param req = {};
721
722 data.qpn = cpu_to_be32(priv->qp->qp_num);
723 data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
724
725 req.primary_path = pathrec;
726 req.alternate_path = NULL;
727 req.service_id = cpu_to_be64(IPOIB_CM_IETF_ID | qpn);
728 req.qp_num = qp->qp_num;
729 req.qp_type = qp->qp_type;
730 req.private_data = &data;
731 req.private_data_len = sizeof data;
732 req.flow_control = 0;
733
734 req.starting_psn = 0; /* FIXME */
735
736 /*
737 * Pick some arbitrary defaults here; we could make these
738 * module parameters if anyone cared about setting them.
739 */
740 req.responder_resources = 4;
741 req.remote_cm_response_timeout = 20;
742 req.local_cm_response_timeout = 20;
743 req.retry_count = 0; /* RFC draft warns against retries */
744 req.rnr_retry_count = 0; /* RFC draft warns against retries */
745 req.max_cm_retries = 15;
746 req.srq = 1;
747 return ib_send_cm_req(id, &req);
748}
749
750static int ipoib_cm_modify_tx_init(struct net_device *dev,
751 struct ib_cm_id *cm_id, struct ib_qp *qp)
752{
753 struct ipoib_dev_priv *priv = netdev_priv(dev);
754 struct ib_qp_attr qp_attr;
755 int qp_attr_mask, ret;
756 ret = ib_find_cached_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
757 if (ret) {
758 ipoib_warn(priv, "pkey 0x%x not in cache: %d\n", priv->pkey, ret);
759 return ret;
760 }
761
762 qp_attr.qp_state = IB_QPS_INIT;
763 qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
764 qp_attr.port_num = priv->port;
765 qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
766
767 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
768 if (ret) {
769 ipoib_warn(priv, "failed to modify tx QP to INIT: %d\n", ret);
770 return ret;
771 }
772 return 0;
773}
774
775static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
776 struct ib_sa_path_rec *pathrec)
777{
778 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
779 int ret;
780
781 p->tx_ring = kzalloc(ipoib_sendq_size * sizeof *p->tx_ring,
782 GFP_KERNEL);
783 if (!p->tx_ring) {
784 ipoib_warn(priv, "failed to allocate tx ring\n");
785 ret = -ENOMEM;
786 goto err_tx;
787 }
788
789 p->cq = ib_create_cq(priv->ca, ipoib_cm_tx_completion, NULL, p,
790 ipoib_sendq_size + 1);
791 if (IS_ERR(p->cq)) {
792 ret = PTR_ERR(p->cq);
793 ipoib_warn(priv, "failed to allocate tx cq: %d\n", ret);
794 goto err_cq;
795 }
796
797 ret = ib_req_notify_cq(p->cq, IB_CQ_NEXT_COMP);
798 if (ret) {
799 ipoib_warn(priv, "failed to request completion notification: %d\n", ret);
800 goto err_req_notify;
801 }
802
803 p->qp = ipoib_cm_create_tx_qp(p->dev, p->cq);
804 if (IS_ERR(p->qp)) {
805 ret = PTR_ERR(p->qp);
806 ipoib_warn(priv, "failed to allocate tx qp: %d\n", ret);
807 goto err_qp;
808 }
809
810 p->id = ib_create_cm_id(priv->ca, ipoib_cm_tx_handler, p);
811 if (IS_ERR(p->id)) {
812 ret = PTR_ERR(p->id);
813 ipoib_warn(priv, "failed to create tx cm id: %d\n", ret);
814 goto err_id;
815 }
816
817 ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp);
818 if (ret) {
819 ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
820 goto err_modify;
821 }
822
823 ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
824 if (ret) {
825 ipoib_warn(priv, "failed to send cm req: %d\n", ret);
826 goto err_send_cm;
827 }
828
829 ipoib_dbg(priv, "Request connection 0x%x for gid " IPOIB_GID_FMT " qpn 0x%x\n",
830 p->qp->qp_num, IPOIB_GID_ARG(pathrec->dgid), qpn);
831
832 return 0;
833
834err_send_cm:
835err_modify:
836 ib_destroy_cm_id(p->id);
837err_id:
838 p->id = NULL;
839 ib_destroy_qp(p->qp);
840err_req_notify:
841err_qp:
842 p->qp = NULL;
843 ib_destroy_cq(p->cq);
844err_cq:
845 p->cq = NULL;
846err_tx:
847 return ret;
848}
849
850static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
851{
852 struct ipoib_dev_priv *priv = netdev_priv(p->dev);
853 struct ipoib_tx_buf *tx_req;
854
855 ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
856 p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
857
858 if (p->id)
859 ib_destroy_cm_id(p->id);
860
861 if (p->qp)
862 ib_destroy_qp(p->qp);
863
864 if (p->cq)
865 ib_destroy_cq(p->cq);
866
867 if (test_bit(IPOIB_FLAG_NETIF_STOPPED, &p->flags))
868 netif_wake_queue(p->dev);
869
870 if (p->tx_ring) {
871 while ((int) p->tx_tail - (int) p->tx_head < 0) {
872 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
873 ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
874 DMA_TO_DEVICE);
875 dev_kfree_skb_any(tx_req->skb);
876 ++p->tx_tail;
877 }
878
879 kfree(p->tx_ring);
880 }
881
882 kfree(p);
883}
884
885static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
886 struct ib_cm_event *event)
887{
888 struct ipoib_cm_tx *tx = cm_id->context;
889 struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
890 struct net_device *dev = priv->dev;
891 struct ipoib_neigh *neigh;
892 unsigned long flags;
893 int ret;
894
895 switch (event->event) {
896 case IB_CM_DREQ_RECEIVED:
897 ipoib_dbg(priv, "DREQ received.\n");
898 ib_send_cm_drep(cm_id, NULL, 0);
899 break;
900 case IB_CM_REP_RECEIVED:
901 ipoib_dbg(priv, "REP received.\n");
902 ret = ipoib_cm_rep_handler(cm_id, event);
903 if (ret)
904 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
905 NULL, 0, NULL, 0);
906 break;
907 case IB_CM_REQ_ERROR:
908 case IB_CM_REJ_RECEIVED:
909 case IB_CM_TIMEWAIT_EXIT:
910 ipoib_dbg(priv, "CM error %d.\n", event->event);
911 spin_lock_irqsave(&priv->tx_lock, flags);
912 spin_lock(&priv->lock);
913 neigh = tx->neigh;
914
915 if (neigh) {
916 neigh->cm = NULL;
917 list_del(&neigh->list);
918 if (neigh->ah)
919 ipoib_put_ah(neigh->ah);
920 ipoib_neigh_free(dev, neigh);
921
922 tx->neigh = NULL;
923 }
924
925 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
926 list_move(&tx->list, &priv->cm.reap_list);
927 queue_work(ipoib_workqueue, &priv->cm.reap_task);
928 }
929
930 spin_unlock(&priv->lock);
931 spin_unlock_irqrestore(&priv->tx_lock, flags);
932 break;
933 default:
934 break;
935 }
936
937 return 0;
938}
939
940struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path *path,
941 struct ipoib_neigh *neigh)
942{
943 struct ipoib_dev_priv *priv = netdev_priv(dev);
944 struct ipoib_cm_tx *tx;
945
946 tx = kzalloc(sizeof *tx, GFP_ATOMIC);
947 if (!tx)
948 return NULL;
949
950 neigh->cm = tx;
951 tx->neigh = neigh;
952 tx->path = path;
953 tx->dev = dev;
954 list_add(&tx->list, &priv->cm.start_list);
955 set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
956 queue_work(ipoib_workqueue, &priv->cm.start_task);
957 return tx;
958}
959
960void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
961{
962 struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
963 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
964 list_move(&tx->list, &priv->cm.reap_list);
965 queue_work(ipoib_workqueue, &priv->cm.reap_task);
966 ipoib_dbg(priv, "Reap connection for gid " IPOIB_GID_FMT "\n",
967 IPOIB_GID_ARG(tx->neigh->dgid));
968 tx->neigh = NULL;
969 }
970}
971
972static void ipoib_cm_tx_start(struct work_struct *work)
973{
974 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
975 cm.start_task);
976 struct net_device *dev = priv->dev;
977 struct ipoib_neigh *neigh;
978 struct ipoib_cm_tx *p;
979 unsigned long flags;
980 int ret;
981
982 struct ib_sa_path_rec pathrec;
983 u32 qpn;
984
985 spin_lock_irqsave(&priv->tx_lock, flags);
986 spin_lock(&priv->lock);
987 while (!list_empty(&priv->cm.start_list)) {
988 p = list_entry(priv->cm.start_list.next, typeof(*p), list);
989 list_del_init(&p->list);
990 neigh = p->neigh;
991 qpn = IPOIB_QPN(neigh->neighbour->ha);
992 memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
993 spin_unlock(&priv->lock);
994 spin_unlock_irqrestore(&priv->tx_lock, flags);
995 ret = ipoib_cm_tx_init(p, qpn, &pathrec);
996 spin_lock_irqsave(&priv->tx_lock, flags);
997 spin_lock(&priv->lock);
998 if (ret) {
999 neigh = p->neigh;
1000 if (neigh) {
1001 neigh->cm = NULL;
1002 list_del(&neigh->list);
1003 if (neigh->ah)
1004 ipoib_put_ah(neigh->ah);
1005 ipoib_neigh_free(dev, neigh);
1006 }
1007 list_del(&p->list);
1008 kfree(p);
1009 }
1010 }
1011 spin_unlock(&priv->lock);
1012 spin_unlock_irqrestore(&priv->tx_lock, flags);
1013}
1014
1015static void ipoib_cm_tx_reap(struct work_struct *work)
1016{
1017 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1018 cm.reap_task);
1019 struct ipoib_cm_tx *p;
1020 unsigned long flags;
1021
1022 spin_lock_irqsave(&priv->tx_lock, flags);
1023 spin_lock(&priv->lock);
1024 while (!list_empty(&priv->cm.reap_list)) {
1025 p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
1026 list_del(&p->list);
1027 spin_unlock(&priv->lock);
1028 spin_unlock_irqrestore(&priv->tx_lock, flags);
1029 ipoib_cm_tx_destroy(p);
1030 spin_lock_irqsave(&priv->tx_lock, flags);
1031 spin_lock(&priv->lock);
1032 }
1033 spin_unlock(&priv->lock);
1034 spin_unlock_irqrestore(&priv->tx_lock, flags);
1035}
1036
1037static void ipoib_cm_skb_reap(struct work_struct *work)
1038{
1039 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1040 cm.skb_task);
1041 struct net_device *dev = priv->dev;
1042 struct sk_buff *skb;
1043 unsigned long flags;
1044
1045 unsigned mtu = priv->mcast_mtu;
1046
1047 spin_lock_irqsave(&priv->tx_lock, flags);
1048 spin_lock(&priv->lock);
1049 while ((skb = skb_dequeue(&priv->cm.skb_queue))) {
1050 spin_unlock(&priv->lock);
1051 spin_unlock_irqrestore(&priv->tx_lock, flags);
1052 if (skb->protocol == htons(ETH_P_IP))
1053 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1054#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1055 else if (skb->protocol == htons(ETH_P_IPV6))
1056 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
1057#endif
1058 dev_kfree_skb_any(skb);
1059 spin_lock_irqsave(&priv->tx_lock, flags);
1060 spin_lock(&priv->lock);
1061 }
1062 spin_unlock(&priv->lock);
1063 spin_unlock_irqrestore(&priv->tx_lock, flags);
1064}
1065
1066void ipoib_cm_skb_too_long(struct net_device* dev, struct sk_buff *skb,
1067 unsigned int mtu)
1068{
1069 struct ipoib_dev_priv *priv = netdev_priv(dev);
1070 int e = skb_queue_empty(&priv->cm.skb_queue);
1071
1072 if (skb->dst)
1073 skb->dst->ops->update_pmtu(skb->dst, mtu);
1074
1075 skb_queue_tail(&priv->cm.skb_queue, skb);
1076 if (e)
1077 queue_work(ipoib_workqueue, &priv->cm.skb_task);
1078}
1079
1080static void ipoib_cm_stale_task(struct work_struct *work)
1081{
1082 struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
1083 cm.stale_task.work);
1084 struct ipoib_cm_rx *p;
1085 unsigned long flags;
1086
1087 spin_lock_irqsave(&priv->lock, flags);
1088 while (!list_empty(&priv->cm.passive_ids)) {
1089 /* List if sorted by LRU, start from tail,
1090 * stop when we see a recently used entry */
1091 p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
1092 if (time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT))
1093 break;
1094 list_del_init(&p->list);
1095 spin_unlock_irqrestore(&priv->lock, flags);
1096 ib_destroy_cm_id(p->id);
1097 ib_destroy_qp(p->qp);
1098 kfree(p);
1099 spin_lock_irqsave(&priv->lock, flags);
1100 }
1101 spin_unlock_irqrestore(&priv->lock, flags);
1102}
1103
1104
1105static ssize_t show_mode(struct device *d, struct device_attribute *attr,
1106 char *buf)
1107{
1108 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(d));
1109
1110 if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
1111 return sprintf(buf, "connected\n");
1112 else
1113 return sprintf(buf, "datagram\n");
1114}
1115
1116static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1117 const char *buf, size_t count)
1118{
1119 struct net_device *dev = to_net_dev(d);
1120 struct ipoib_dev_priv *priv = netdev_priv(dev);
1121
1122 /* flush paths if we switch modes so that connections are restarted */
1123 if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
1124 set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
1125 ipoib_warn(priv, "enabling connected mode "
1126 "will cause multicast packet drops\n");
1127 ipoib_flush_paths(dev);
1128 return count;
1129 }
1130
1131 if (!strcmp(buf, "datagram\n")) {
1132 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
1133 dev->mtu = min(priv->mcast_mtu, dev->mtu);
1134 ipoib_flush_paths(dev);
1135 return count;
1136 }
1137
1138 return -EINVAL;
1139}
1140
1141static DEVICE_ATTR(mode, S_IWUGO | S_IRUGO, show_mode, set_mode);
1142
1143int ipoib_cm_add_mode_attr(struct net_device *dev)
1144{
1145 return device_create_file(&dev->dev, &dev_attr_mode);
1146}
1147
1148int ipoib_cm_dev_init(struct net_device *dev)
1149{
1150 struct ipoib_dev_priv *priv = netdev_priv(dev);
1151 struct ib_srq_init_attr srq_init_attr = {
1152 .attr = {
1153 .max_wr = ipoib_recvq_size,
1154 .max_sge = IPOIB_CM_RX_SG
1155 }
1156 };
1157 int ret, i;
1158
1159 INIT_LIST_HEAD(&priv->cm.passive_ids);
1160 INIT_LIST_HEAD(&priv->cm.reap_list);
1161 INIT_LIST_HEAD(&priv->cm.start_list);
1162 INIT_WORK(&priv->cm.start_task, ipoib_cm_tx_start);
1163 INIT_WORK(&priv->cm.reap_task, ipoib_cm_tx_reap);
1164 INIT_WORK(&priv->cm.skb_task, ipoib_cm_skb_reap);
1165 INIT_DELAYED_WORK(&priv->cm.stale_task, ipoib_cm_stale_task);
1166
1167 skb_queue_head_init(&priv->cm.skb_queue);
1168
1169 priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
1170 if (IS_ERR(priv->cm.srq)) {
1171 ret = PTR_ERR(priv->cm.srq);
1172 priv->cm.srq = NULL;
1173 return ret;
1174 }
1175
1176 priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring,
1177 GFP_KERNEL);
1178 if (!priv->cm.srq_ring) {
1179 printk(KERN_WARNING "%s: failed to allocate CM ring (%d entries)\n",
1180 priv->ca->name, ipoib_recvq_size);
1181 ipoib_cm_dev_cleanup(dev);
1182 return -ENOMEM;
1183 }
1184
1185 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
1186 priv->cm.rx_sge[i].lkey = priv->mr->lkey;
1187
1188 priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
1189 for (i = 1; i < IPOIB_CM_RX_SG; ++i)
1190 priv->cm.rx_sge[i].length = PAGE_SIZE;
1191 priv->cm.rx_wr.next = NULL;
1192 priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
1193 priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG;
1194
1195 for (i = 0; i < ipoib_recvq_size; ++i) {
1196 if (ipoib_cm_alloc_rx_skb(dev, i, priv->cm.srq_ring[i].mapping)) {
1197 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
1198 ipoib_cm_dev_cleanup(dev);
1199 return -ENOMEM;
1200 }
1201 if (ipoib_cm_post_receive(dev, i)) {
1202 ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
1203 ipoib_cm_dev_cleanup(dev);
1204 return -EIO;
1205 }
1206 }
1207
1208 priv->dev->dev_addr[0] = IPOIB_FLAGS_RC;
1209 return 0;
1210}
1211
1212void ipoib_cm_dev_cleanup(struct net_device *dev)
1213{
1214 struct ipoib_dev_priv *priv = netdev_priv(dev);
1215 int i, ret;
1216
1217 if (!priv->cm.srq)
1218 return;
1219
1220 ipoib_dbg(priv, "Cleanup ipoib connected mode.\n");
1221
1222 ret = ib_destroy_srq(priv->cm.srq);
1223 if (ret)
1224 ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret);
1225
1226 priv->cm.srq = NULL;
1227 if (!priv->cm.srq_ring)
1228 return;
1229 for (i = 0; i < ipoib_recvq_size; ++i)
1230 if (priv->cm.srq_ring[i].skb) {
1231 ipoib_cm_dma_unmap_rx(priv, priv->cm.srq_ring[i].mapping);
1232 dev_kfree_skb_any(priv->cm.srq_ring[i].skb);
1233 priv->cm.srq_ring[i].skb = NULL;
1234 }
1235 kfree(priv->cm.srq_ring);
1236 priv->cm.srq_ring = NULL;
1237}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 59d9594ed6d9..f2aa923ddbea 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -50,8 +50,6 @@ MODULE_PARM_DESC(data_debug_level,
50 "Enable data path debug tracing if > 0"); 50 "Enable data path debug tracing if > 0");
51#endif 51#endif
52 52
53#define IPOIB_OP_RECV (1ul << 31)
54
55static DEFINE_MUTEX(pkey_mutex); 53static DEFINE_MUTEX(pkey_mutex);
56 54
57struct ipoib_ah *ipoib_create_ah(struct net_device *dev, 55struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
@@ -268,10 +266,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
268 266
269 spin_lock_irqsave(&priv->tx_lock, flags); 267 spin_lock_irqsave(&priv->tx_lock, flags);
270 ++priv->tx_tail; 268 ++priv->tx_tail;
271 if (netif_queue_stopped(dev) && 269 if (unlikely(test_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags)) &&
272 test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) && 270 priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) {
273 priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1) 271 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
274 netif_wake_queue(dev); 272 netif_wake_queue(dev);
273 }
275 spin_unlock_irqrestore(&priv->tx_lock, flags); 274 spin_unlock_irqrestore(&priv->tx_lock, flags);
276 275
277 if (wc->status != IB_WC_SUCCESS && 276 if (wc->status != IB_WC_SUCCESS &&
@@ -283,7 +282,9 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
283 282
284static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc) 283static void ipoib_ib_handle_wc(struct net_device *dev, struct ib_wc *wc)
285{ 284{
286 if (wc->wr_id & IPOIB_OP_RECV) 285 if (wc->wr_id & IPOIB_CM_OP_SRQ)
286 ipoib_cm_handle_rx_wc(dev, wc);
287 else if (wc->wr_id & IPOIB_OP_RECV)
287 ipoib_ib_handle_rx_wc(dev, wc); 288 ipoib_ib_handle_rx_wc(dev, wc);
288 else 289 else
289 ipoib_ib_handle_tx_wc(dev, wc); 290 ipoib_ib_handle_tx_wc(dev, wc);
@@ -327,12 +328,12 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
327 struct ipoib_tx_buf *tx_req; 328 struct ipoib_tx_buf *tx_req;
328 u64 addr; 329 u64 addr;
329 330
330 if (unlikely(skb->len > dev->mtu + INFINIBAND_ALEN)) { 331 if (unlikely(skb->len > priv->mcast_mtu + INFINIBAND_ALEN)) {
331 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", 332 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
332 skb->len, dev->mtu + INFINIBAND_ALEN); 333 skb->len, priv->mcast_mtu + INFINIBAND_ALEN);
333 ++priv->stats.tx_dropped; 334 ++priv->stats.tx_dropped;
334 ++priv->stats.tx_errors; 335 ++priv->stats.tx_errors;
335 dev_kfree_skb_any(skb); 336 ipoib_cm_skb_too_long(dev, skb, priv->mcast_mtu);
336 return; 337 return;
337 } 338 }
338 339
@@ -372,6 +373,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
372 if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) { 373 if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
373 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); 374 ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
374 netif_stop_queue(dev); 375 netif_stop_queue(dev);
376 set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
375 } 377 }
376 } 378 }
377} 379}
@@ -424,6 +426,13 @@ int ipoib_ib_dev_open(struct net_device *dev)
424 return -1; 426 return -1;
425 } 427 }
426 428
429 ret = ipoib_cm_dev_open(dev);
430 if (ret) {
431 ipoib_warn(priv, "ipoib_ib_post_receives returned %d\n", ret);
432 ipoib_ib_dev_stop(dev);
433 return -1;
434 }
435
427 clear_bit(IPOIB_STOP_REAPER, &priv->flags); 436 clear_bit(IPOIB_STOP_REAPER, &priv->flags);
428 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ); 437 queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task, HZ);
429 438
@@ -509,6 +518,8 @@ int ipoib_ib_dev_stop(struct net_device *dev)
509 518
510 clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); 519 clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
511 520
521 ipoib_cm_dev_stop(dev);
522
512 /* 523 /*
513 * Move our QP to the error state and then reinitialize in 524 * Move our QP to the error state and then reinitialize in
514 * when all work requests have completed or have been flushed. 525 * when all work requests have completed or have been flushed.
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index af5ee2ec4499..18d27fd352ad 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -49,8 +49,6 @@
49 49
50#include <net/dst.h> 50#include <net/dst.h>
51 51
52#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
53
54MODULE_AUTHOR("Roland Dreier"); 52MODULE_AUTHOR("Roland Dreier");
55MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 53MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
56MODULE_LICENSE("Dual BSD/GPL"); 54MODULE_LICENSE("Dual BSD/GPL");
@@ -145,6 +143,8 @@ static int ipoib_stop(struct net_device *dev)
145 143
146 netif_stop_queue(dev); 144 netif_stop_queue(dev);
147 145
146 clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
147
148 /* 148 /*
149 * Now flush workqueue to make sure a scheduled task doesn't 149 * Now flush workqueue to make sure a scheduled task doesn't
150 * bring our internal state back up. 150 * bring our internal state back up.
@@ -178,8 +178,18 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
178{ 178{
179 struct ipoib_dev_priv *priv = netdev_priv(dev); 179 struct ipoib_dev_priv *priv = netdev_priv(dev);
180 180
181 if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) 181 /* dev->mtu > 2K ==> connected mode */
182 if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) {
183 if (new_mtu > priv->mcast_mtu)
184 ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
185 priv->mcast_mtu);
186 dev->mtu = new_mtu;
187 return 0;
188 }
189
190 if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) {
182 return -EINVAL; 191 return -EINVAL;
192 }
183 193
184 priv->admin_mtu = new_mtu; 194 priv->admin_mtu = new_mtu;
185 195
@@ -414,6 +424,20 @@ static void path_rec_completion(int status,
414 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, 424 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
415 sizeof(union ib_gid)); 425 sizeof(union ib_gid));
416 426
427 if (ipoib_cm_enabled(dev, neigh->neighbour)) {
428 if (!ipoib_cm_get(neigh))
429 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
430 path,
431 neigh));
432 if (!ipoib_cm_get(neigh)) {
433 list_del(&neigh->list);
434 if (neigh->ah)
435 ipoib_put_ah(neigh->ah);
436 ipoib_neigh_free(dev, neigh);
437 continue;
438 }
439 }
440
417 while ((skb = __skb_dequeue(&neigh->queue))) 441 while ((skb = __skb_dequeue(&neigh->queue)))
418 __skb_queue_tail(&skqueue, skb); 442 __skb_queue_tail(&skqueue, skb);
419 } 443 }
@@ -520,7 +544,25 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
520 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw, 544 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
521 sizeof(union ib_gid)); 545 sizeof(union ib_gid));
522 546
523 ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha)); 547 if (ipoib_cm_enabled(dev, neigh->neighbour)) {
548 if (!ipoib_cm_get(neigh))
549 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
550 if (!ipoib_cm_get(neigh)) {
551 list_del(&neigh->list);
552 if (neigh->ah)
553 ipoib_put_ah(neigh->ah);
554 ipoib_neigh_free(dev, neigh);
555 goto err_drop;
556 }
557 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
558 __skb_queue_tail(&neigh->queue, skb);
559 else {
560 ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
561 skb_queue_len(&neigh->queue));
562 goto err_drop;
563 }
564 } else
565 ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
524 } else { 566 } else {
525 neigh->ah = NULL; 567 neigh->ah = NULL;
526 568
@@ -538,6 +580,7 @@ err_list:
538 580
539err_path: 581err_path:
540 ipoib_neigh_free(dev, neigh); 582 ipoib_neigh_free(dev, neigh);
583err_drop:
541 ++priv->stats.tx_dropped; 584 ++priv->stats.tx_dropped;
542 dev_kfree_skb_any(skb); 585 dev_kfree_skb_any(skb);
543 586
@@ -640,7 +683,12 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
640 683
641 neigh = *to_ipoib_neigh(skb->dst->neighbour); 684 neigh = *to_ipoib_neigh(skb->dst->neighbour);
642 685
643 if (likely(neigh->ah)) { 686 if (ipoib_cm_get(neigh)) {
687 if (ipoib_cm_up(neigh)) {
688 ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
689 goto out;
690 }
691 } else if (neigh->ah) {
644 if (unlikely(memcmp(&neigh->dgid.raw, 692 if (unlikely(memcmp(&neigh->dgid.raw,
645 skb->dst->neighbour->ha + 4, 693 skb->dst->neighbour->ha + 4,
646 sizeof(union ib_gid)))) { 694 sizeof(union ib_gid)))) {
@@ -805,6 +853,7 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour)
805 neigh->neighbour = neighbour; 853 neigh->neighbour = neighbour;
806 *to_ipoib_neigh(neighbour) = neigh; 854 *to_ipoib_neigh(neighbour) = neigh;
807 skb_queue_head_init(&neigh->queue); 855 skb_queue_head_init(&neigh->queue);
856 ipoib_cm_set(neigh, NULL);
808 857
809 return neigh; 858 return neigh;
810} 859}
@@ -818,6 +867,8 @@ void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
818 ++priv->stats.tx_dropped; 867 ++priv->stats.tx_dropped;
819 dev_kfree_skb_any(skb); 868 dev_kfree_skb_any(skb);
820 } 869 }
870 if (ipoib_cm_get(neigh))
871 ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
821 kfree(neigh); 872 kfree(neigh);
822} 873}
823 874
@@ -1080,6 +1131,8 @@ static struct net_device *ipoib_add_port(const char *format,
1080 1131
1081 ipoib_create_debug_files(priv->dev); 1132 ipoib_create_debug_files(priv->dev);
1082 1133
1134 if (ipoib_cm_add_mode_attr(priv->dev))
1135 goto sysfs_failed;
1083 if (ipoib_add_pkey_attr(priv->dev)) 1136 if (ipoib_add_pkey_attr(priv->dev))
1084 goto sysfs_failed; 1137 goto sysfs_failed;
1085 if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) 1138 if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index b04b72ca32ed..fea737f520fd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -597,7 +597,9 @@ void ipoib_mcast_join_task(struct work_struct *work)
597 597
598 priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - 598 priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -
599 IPOIB_ENCAP_LEN; 599 IPOIB_ENCAP_LEN;
600 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); 600
601 if (!ipoib_cm_admin_enabled(dev))
602 dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
601 603
602 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 604 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
603 605
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 7b717c648f72..3cb551b88756 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -168,35 +168,41 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
168 .qp_type = IB_QPT_UD 168 .qp_type = IB_QPT_UD
169 }; 169 };
170 170
171 int ret, size;
172
171 priv->pd = ib_alloc_pd(priv->ca); 173 priv->pd = ib_alloc_pd(priv->ca);
172 if (IS_ERR(priv->pd)) { 174 if (IS_ERR(priv->pd)) {
173 printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); 175 printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
174 return -ENODEV; 176 return -ENODEV;
175 } 177 }
176 178
177 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, 179 priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
178 ipoib_sendq_size + ipoib_recvq_size + 1); 180 if (IS_ERR(priv->mr)) {
181 printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
182 goto out_free_pd;
183 }
184
185 size = ipoib_sendq_size + ipoib_recvq_size + 1;
186 ret = ipoib_cm_dev_init(dev);
187 if (!ret)
188 size += ipoib_recvq_size;
189
190 priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size);
179 if (IS_ERR(priv->cq)) { 191 if (IS_ERR(priv->cq)) {
180 printk(KERN_WARNING "%s: failed to create CQ\n", ca->name); 192 printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
181 goto out_free_pd; 193 goto out_free_mr;
182 } 194 }
183 195
184 if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP)) 196 if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
185 goto out_free_cq; 197 goto out_free_cq;
186 198
187 priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
188 if (IS_ERR(priv->mr)) {
189 printk(KERN_WARNING "%s: ib_get_dma_mr failed\n", ca->name);
190 goto out_free_cq;
191 }
192
193 init_attr.send_cq = priv->cq; 199 init_attr.send_cq = priv->cq;
194 init_attr.recv_cq = priv->cq, 200 init_attr.recv_cq = priv->cq,
195 201
196 priv->qp = ib_create_qp(priv->pd, &init_attr); 202 priv->qp = ib_create_qp(priv->pd, &init_attr);
197 if (IS_ERR(priv->qp)) { 203 if (IS_ERR(priv->qp)) {
198 printk(KERN_WARNING "%s: failed to create QP\n", ca->name); 204 printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
199 goto out_free_mr; 205 goto out_free_cq;
200 } 206 }
201 207
202 priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff; 208 priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
@@ -212,12 +218,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
212 218
213 return 0; 219 return 0;
214 220
215out_free_mr:
216 ib_dereg_mr(priv->mr);
217
218out_free_cq: 221out_free_cq:
219 ib_destroy_cq(priv->cq); 222 ib_destroy_cq(priv->cq);
220 223
224out_free_mr:
225 ib_dereg_mr(priv->mr);
226
221out_free_pd: 227out_free_pd:
222 ib_dealloc_pd(priv->pd); 228 ib_dealloc_pd(priv->pd);
223 return -ENODEV; 229 return -ENODEV;
@@ -235,12 +241,14 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
235 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 241 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
236 } 242 }
237 243
238 if (ib_dereg_mr(priv->mr))
239 ipoib_warn(priv, "ib_dereg_mr failed\n");
240
241 if (ib_destroy_cq(priv->cq)) 244 if (ib_destroy_cq(priv->cq))
242 ipoib_warn(priv, "ib_cq_destroy failed\n"); 245 ipoib_warn(priv, "ib_cq_destroy failed\n");
243 246
247 ipoib_cm_dev_cleanup(dev);
248
249 if (ib_dereg_mr(priv->mr))
250 ipoib_warn(priv, "ib_dereg_mr failed\n");
251
244 if (ib_dealloc_pd(priv->pd)) 252 if (ib_dealloc_pd(priv->pd))
245 ipoib_warn(priv, "ib_dealloc_pd failed\n"); 253 ipoib_warn(priv, "ib_dealloc_pd failed\n");
246} 254}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 085eafe6667c..6762988439d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -115,6 +115,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
115 115
116 ipoib_create_debug_files(priv->dev); 116 ipoib_create_debug_files(priv->dev);
117 117
118 if (ipoib_cm_add_mode_attr(priv->dev))
119 goto sysfs_failed;
118 if (ipoib_add_pkey_attr(priv->dev)) 120 if (ipoib_add_pkey_attr(priv->dev))
119 goto sysfs_failed; 121 goto sysfs_failed;
120 122