aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-09 22:40:09 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-09 22:40:09 -0400
commitde5603748af8bf7deac403e6ba92887f8d18e812 (patch)
treefd93862c70ec0eeba5fe340d7519ab33a9cb1dff /drivers/infiniband
parentb5f0adbcc4f16e378882d8f68fe3111df04911be (diff)
parent225c7b1feef1b41170f7037a5b10a65cd8a42c54 (diff)
Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband: IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters IB: Put rlimit accounting struct in struct ib_umem IB/uverbs: Export ib_umem_get()/ib_umem_release() to modules
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig7
-rw-r--r--drivers/infiniband/Makefile1
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/device.c2
-rw-r--r--drivers/infiniband/core/umem.c (renamed from drivers/infiniband/core/uverbs_mem.c)153
-rw-r--r--drivers/infiniband/core/uverbs.h6
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c60
-rw-r--r--drivers/infiniband/core/uverbs_main.c11
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c42
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c28
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_iverbs.h3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c69
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mr.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h5
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig9
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile3
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c100
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c525
-rw-r--r--drivers/infiniband/hw/mlx4/doorbell.c216
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c339
-rw-r--r--drivers/infiniband/hw/mlx4/main.c651
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h285
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c184
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c1294
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c334
-rw-r--r--drivers/infiniband/hw/mlx4/user.h92
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c38
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h1
31 files changed, 4309 insertions, 194 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 66b36de9fa6f..37deaae49190 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS
29 libibverbs, libibcm and a hardware driver library from 29 libibverbs, libibcm and a hardware driver library from
30 <http://www.openib.org>. 30 <http://www.openib.org>.
31 31
32config INFINIBAND_USER_MEM
33 bool
34 depends on INFINIBAND_USER_ACCESS != n
35 default y
36
32config INFINIBAND_ADDR_TRANS 37config INFINIBAND_ADDR_TRANS
33 bool 38 bool
34 depends on INFINIBAND && INET 39 depends on INFINIBAND && INET
@@ -40,6 +45,8 @@ source "drivers/infiniband/hw/ehca/Kconfig"
40source "drivers/infiniband/hw/amso1100/Kconfig" 45source "drivers/infiniband/hw/amso1100/Kconfig"
41source "drivers/infiniband/hw/cxgb3/Kconfig" 46source "drivers/infiniband/hw/cxgb3/Kconfig"
42 47
48source "drivers/infiniband/hw/mlx4/Kconfig"
49
43source "drivers/infiniband/ulp/ipoib/Kconfig" 50source "drivers/infiniband/ulp/ipoib/Kconfig"
44 51
45source "drivers/infiniband/ulp/srp/Kconfig" 52source "drivers/infiniband/ulp/srp/Kconfig"
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index da2066c4f22c..75f325e40b54 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
4obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ 4obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
5obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ 5obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
6obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ 6obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
7obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
7obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 8obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
8obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ 9obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
9obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ 10obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 189e5d4b9b17..cb1ab3ea4998 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
9 9
10ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ 10ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
11 device.o fmr_pool.o cache.o 11 device.o fmr_pool.o cache.o
12ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
12 13
13ib_mad-y := mad.o smi.o agent.o mad_rmpp.o 14ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
14 15
@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o
28 29
29ib_ucm-y := ucm.o 30ib_ucm-y := ucm.o
30 31
31ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ 32ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o
32 uverbs_marshall.o
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7fabb425b033..592c90aa3183 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void)
613{ 613{
614 ib_cache_cleanup(); 614 ib_cache_cleanup();
615 ib_sysfs_cleanup(); 615 ib_sysfs_cleanup();
616 /* Make sure that any pending umem accounting work is done. */
617 flush_scheduled_work();
616} 618}
617 619
618module_init(ib_core_init); 620module_init(ib_core_init);
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/umem.c
index c95fe952abd5..f32ca5fbb26b 100644
--- a/drivers/infiniband/core/uverbs_mem.c
+++ b/drivers/infiniband/core/umem.c
@@ -39,13 +39,6 @@
39 39
40#include "uverbs.h" 40#include "uverbs.h"
41 41
42struct ib_umem_account_work {
43 struct work_struct work;
44 struct mm_struct *mm;
45 unsigned long diff;
46};
47
48
49static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) 42static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
50{ 43{
51 struct ib_umem_chunk *chunk, *tmp; 44 struct ib_umem_chunk *chunk, *tmp;
@@ -64,35 +57,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
64 } 57 }
65} 58}
66 59
67int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, 60/**
68 void *addr, size_t size, int write) 61 * ib_umem_get - Pin and DMA map userspace memory.
62 * @context: userspace context to pin memory for
63 * @addr: userspace virtual address to start at
64 * @size: length of region to pin
65 * @access: IB_ACCESS_xxx flags for memory being pinned
66 */
67struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
68 size_t size, int access)
69{ 69{
70 struct ib_umem *umem;
70 struct page **page_list; 71 struct page **page_list;
71 struct ib_umem_chunk *chunk; 72 struct ib_umem_chunk *chunk;
72 unsigned long locked; 73 unsigned long locked;
73 unsigned long lock_limit; 74 unsigned long lock_limit;
74 unsigned long cur_base; 75 unsigned long cur_base;
75 unsigned long npages; 76 unsigned long npages;
76 int ret = 0; 77 int ret;
77 int off; 78 int off;
78 int i; 79 int i;
79 80
80 if (!can_do_mlock()) 81 if (!can_do_mlock())
81 return -EPERM; 82 return ERR_PTR(-EPERM);
82 83
83 page_list = (struct page **) __get_free_page(GFP_KERNEL); 84 umem = kmalloc(sizeof *umem, GFP_KERNEL);
84 if (!page_list) 85 if (!umem)
85 return -ENOMEM; 86 return ERR_PTR(-ENOMEM);
87
88 umem->context = context;
89 umem->length = size;
90 umem->offset = addr & ~PAGE_MASK;
91 umem->page_size = PAGE_SIZE;
92 /*
93 * We ask for writable memory if any access flags other than
94 * "remote read" are set. "Local write" and "remote write"
95 * obviously require write access. "Remote atomic" can do
96 * things like fetch and add, which will modify memory, and
97 * "MW bind" can change permissions by binding a window.
98 */
99 umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ);
86 100
87 mem->user_base = (unsigned long) addr; 101 INIT_LIST_HEAD(&umem->chunk_list);
88 mem->length = size;
89 mem->offset = (unsigned long) addr & ~PAGE_MASK;
90 mem->page_size = PAGE_SIZE;
91 mem->writable = write;
92 102
93 INIT_LIST_HEAD(&mem->chunk_list); 103 page_list = (struct page **) __get_free_page(GFP_KERNEL);
104 if (!page_list) {
105 kfree(umem);
106 return ERR_PTR(-ENOMEM);
107 }
94 108
95 npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; 109 npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
96 110
97 down_write(&current->mm->mmap_sem); 111 down_write(&current->mm->mmap_sem);
98 112
@@ -104,13 +118,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
104 goto out; 118 goto out;
105 } 119 }
106 120
107 cur_base = (unsigned long) addr & PAGE_MASK; 121 cur_base = addr & PAGE_MASK;
108 122
109 while (npages) { 123 while (npages) {
110 ret = get_user_pages(current, current->mm, cur_base, 124 ret = get_user_pages(current, current->mm, cur_base,
111 min_t(int, npages, 125 min_t(int, npages,
112 PAGE_SIZE / sizeof (struct page *)), 126 PAGE_SIZE / sizeof (struct page *)),
113 1, !write, page_list, NULL); 127 1, !umem->writable, page_list, NULL);
114 128
115 if (ret < 0) 129 if (ret < 0)
116 goto out; 130 goto out;
@@ -136,7 +150,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
136 chunk->page_list[i].length = PAGE_SIZE; 150 chunk->page_list[i].length = PAGE_SIZE;
137 } 151 }
138 152
139 chunk->nmap = ib_dma_map_sg(dev, 153 chunk->nmap = ib_dma_map_sg(context->device,
140 &chunk->page_list[0], 154 &chunk->page_list[0],
141 chunk->nents, 155 chunk->nents,
142 DMA_BIDIRECTIONAL); 156 DMA_BIDIRECTIONAL);
@@ -151,75 +165,94 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
151 165
152 ret -= chunk->nents; 166 ret -= chunk->nents;
153 off += chunk->nents; 167 off += chunk->nents;
154 list_add_tail(&chunk->list, &mem->chunk_list); 168 list_add_tail(&chunk->list, &umem->chunk_list);
155 } 169 }
156 170
157 ret = 0; 171 ret = 0;
158 } 172 }
159 173
160out: 174out:
161 if (ret < 0) 175 if (ret < 0) {
162 __ib_umem_release(dev, mem, 0); 176 __ib_umem_release(context->device, umem, 0);
163 else 177 kfree(umem);
178 } else
164 current->mm->locked_vm = locked; 179 current->mm->locked_vm = locked;
165 180
166 up_write(&current->mm->mmap_sem); 181 up_write(&current->mm->mmap_sem);
167 free_page((unsigned long) page_list); 182 free_page((unsigned long) page_list);
168 183
169 return ret; 184 return ret < 0 ? ERR_PTR(ret) : umem;
170} 185}
186EXPORT_SYMBOL(ib_umem_get);
171 187
172void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) 188static void ib_umem_account(struct work_struct *work)
173{ 189{
174 __ib_umem_release(dev, umem, 1); 190 struct ib_umem *umem = container_of(work, struct ib_umem, work);
175
176 down_write(&current->mm->mmap_sem);
177 current->mm->locked_vm -=
178 PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
179 up_write(&current->mm->mmap_sem);
180}
181 191
182static void ib_umem_account(struct work_struct *_work) 192 down_write(&umem->mm->mmap_sem);
183{ 193 umem->mm->locked_vm -= umem->diff;
184 struct ib_umem_account_work *work = 194 up_write(&umem->mm->mmap_sem);
185 container_of(_work, struct ib_umem_account_work, work); 195 mmput(umem->mm);
186 196 kfree(umem);
187 down_write(&work->mm->mmap_sem);
188 work->mm->locked_vm -= work->diff;
189 up_write(&work->mm->mmap_sem);
190 mmput(work->mm);
191 kfree(work);
192} 197}
193 198
194void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) 199/**
200 * ib_umem_release - release memory pinned with ib_umem_get
201 * @umem: umem struct to release
202 */
203void ib_umem_release(struct ib_umem *umem)
195{ 204{
196 struct ib_umem_account_work *work; 205 struct ib_ucontext *context = umem->context;
197 struct mm_struct *mm; 206 struct mm_struct *mm;
207 unsigned long diff;
198 208
199 __ib_umem_release(dev, umem, 1); 209 __ib_umem_release(umem->context->device, umem, 1);
200 210
201 mm = get_task_mm(current); 211 mm = get_task_mm(current);
202 if (!mm) 212 if (!mm)
203 return; 213 return;
204 214
215 diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
216
205 /* 217 /*
206 * We may be called with the mm's mmap_sem already held. This 218 * We may be called with the mm's mmap_sem already held. This
207 * can happen when a userspace munmap() is the call that drops 219 * can happen when a userspace munmap() is the call that drops
208 * the last reference to our file and calls our release 220 * the last reference to our file and calls our release
209 * method. If there are memory regions to destroy, we'll end 221 * method. If there are memory regions to destroy, we'll end
210 * up here and not be able to take the mmap_sem. Therefore we 222 * up here and not be able to take the mmap_sem. In that case
211 * defer the vm_locked accounting to the system workqueue. 223 * we defer the vm_locked accounting to the system workqueue.
212 */ 224 */
225 if (context->closing && !down_write_trylock(&mm->mmap_sem)) {
226 INIT_WORK(&umem->work, ib_umem_account);
227 umem->mm = mm;
228 umem->diff = diff;
213 229
214 work = kmalloc(sizeof *work, GFP_KERNEL); 230 schedule_work(&umem->work);
215 if (!work) {
216 mmput(mm);
217 return; 231 return;
218 } 232 } else
233 down_write(&mm->mmap_sem);
234
235 current->mm->locked_vm -= diff;
236 up_write(&mm->mmap_sem);
237 mmput(mm);
238 kfree(umem);
239}
240EXPORT_SYMBOL(ib_umem_release);
241
242int ib_umem_page_count(struct ib_umem *umem)
243{
244 struct ib_umem_chunk *chunk;
245 int shift;
246 int i;
247 int n;
248
249 shift = ilog2(umem->page_size);
219 250
220 INIT_WORK(&work->work, ib_umem_account); 251 n = 0;
221 work->mm = mm; 252 list_for_each_entry(chunk, &umem->chunk_list, list)
222 work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; 253 for (i = 0; i < chunk->nmap; ++i)
254 n += sg_dma_len(&chunk->page_list[i]) >> shift;
223 255
224 schedule_work(&work->work); 256 return n;
225} 257}
258EXPORT_SYMBOL(ib_umem_page_count);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 102a59c033ff..c33546f9e961 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -45,6 +45,7 @@
45#include <linux/completion.h> 45#include <linux/completion.h>
46 46
47#include <rdma/ib_verbs.h> 47#include <rdma/ib_verbs.h>
48#include <rdma/ib_umem.h>
48#include <rdma/ib_user_verbs.h> 49#include <rdma/ib_user_verbs.h>
49 50
50/* 51/*
@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
163void ib_uverbs_event_handler(struct ib_event_handler *handler, 164void ib_uverbs_event_handler(struct ib_event_handler *handler,
164 struct ib_event *event); 165 struct ib_event *event);
165 166
166int ib_umem_get(struct ib_device *dev, struct ib_umem *mem,
167 void *addr, size_t size, int write);
168void ib_umem_release(struct ib_device *dev, struct ib_umem *umem);
169void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem);
170
171#define IB_UVERBS_DECLARE_CMD(name) \ 167#define IB_UVERBS_DECLARE_CMD(name) \
172 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ 168 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
173 const char __user *buf, int in_len, \ 169 const char __user *buf, int in_len, \
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index bab66769be14..01d70084aebe 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2005 Topspin Communications. All rights reserved. 2 * Copyright (c) 2005 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 3 * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved.
4 * Copyright (c) 2005 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005 PathScale, Inc. All rights reserved.
5 * Copyright (c) 2006 Mellanox Technologies. All rights reserved. 5 * Copyright (c) 2006 Mellanox Technologies. All rights reserved.
6 * 6 *
@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
295 INIT_LIST_HEAD(&ucontext->qp_list); 295 INIT_LIST_HEAD(&ucontext->qp_list);
296 INIT_LIST_HEAD(&ucontext->srq_list); 296 INIT_LIST_HEAD(&ucontext->srq_list);
297 INIT_LIST_HEAD(&ucontext->ah_list); 297 INIT_LIST_HEAD(&ucontext->ah_list);
298 ucontext->closing = 0;
298 299
299 resp.num_comp_vectors = file->device->num_comp_vectors; 300 resp.num_comp_vectors = file->device->num_comp_vectors;
300 301
@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
573 struct ib_uverbs_reg_mr cmd; 574 struct ib_uverbs_reg_mr cmd;
574 struct ib_uverbs_reg_mr_resp resp; 575 struct ib_uverbs_reg_mr_resp resp;
575 struct ib_udata udata; 576 struct ib_udata udata;
576 struct ib_umem_object *obj; 577 struct ib_uobject *uobj;
577 struct ib_pd *pd; 578 struct ib_pd *pd;
578 struct ib_mr *mr; 579 struct ib_mr *mr;
579 int ret; 580 int ret;
@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
599 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) 600 !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE))
600 return -EINVAL; 601 return -EINVAL;
601 602
602 obj = kmalloc(sizeof *obj, GFP_KERNEL); 603 uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
603 if (!obj) 604 if (!uobj)
604 return -ENOMEM; 605 return -ENOMEM;
605 606
606 init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key); 607 init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
607 down_write(&obj->uobject.mutex); 608 down_write(&uobj->mutex);
608
609 /*
610 * We ask for writable memory if any access flags other than
611 * "remote read" are set. "Local write" and "remote write"
612 * obviously require write access. "Remote atomic" can do
613 * things like fetch and add, which will modify memory, and
614 * "MW bind" can change permissions by binding a window.
615 */
616 ret = ib_umem_get(file->device->ib_dev, &obj->umem,
617 (void *) (unsigned long) cmd.start, cmd.length,
618 !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ));
619 if (ret)
620 goto err_free;
621
622 obj->umem.virt_base = cmd.hca_va;
623 609
624 pd = idr_read_pd(cmd.pd_handle, file->ucontext); 610 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
625 if (!pd) { 611 if (!pd) {
626 ret = -EINVAL; 612 ret = -EINVAL;
627 goto err_release; 613 goto err_free;
628 } 614 }
629 615
630 mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); 616 mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
617 cmd.access_flags, &udata);
631 if (IS_ERR(mr)) { 618 if (IS_ERR(mr)) {
632 ret = PTR_ERR(mr); 619 ret = PTR_ERR(mr);
633 goto err_put; 620 goto err_put;
@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
635 622
636 mr->device = pd->device; 623 mr->device = pd->device;
637 mr->pd = pd; 624 mr->pd = pd;
638 mr->uobject = &obj->uobject; 625 mr->uobject = uobj;
639 atomic_inc(&pd->usecnt); 626 atomic_inc(&pd->usecnt);
640 atomic_set(&mr->usecnt, 0); 627 atomic_set(&mr->usecnt, 0);
641 628
642 obj->uobject.object = mr; 629 uobj->object = mr;
643 ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject); 630 ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
644 if (ret) 631 if (ret)
645 goto err_unreg; 632 goto err_unreg;
646 633
647 memset(&resp, 0, sizeof resp); 634 memset(&resp, 0, sizeof resp);
648 resp.lkey = mr->lkey; 635 resp.lkey = mr->lkey;
649 resp.rkey = mr->rkey; 636 resp.rkey = mr->rkey;
650 resp.mr_handle = obj->uobject.id; 637 resp.mr_handle = uobj->id;
651 638
652 if (copy_to_user((void __user *) (unsigned long) cmd.response, 639 if (copy_to_user((void __user *) (unsigned long) cmd.response,
653 &resp, sizeof resp)) { 640 &resp, sizeof resp)) {
@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
658 put_pd_read(pd); 645 put_pd_read(pd);
659 646
660 mutex_lock(&file->mutex); 647 mutex_lock(&file->mutex);
661 list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); 648 list_add_tail(&uobj->list, &file->ucontext->mr_list);
662 mutex_unlock(&file->mutex); 649 mutex_unlock(&file->mutex);
663 650
664 obj->uobject.live = 1; 651 uobj->live = 1;
665 652
666 up_write(&obj->uobject.mutex); 653 up_write(&uobj->mutex);
667 654
668 return in_len; 655 return in_len;
669 656
670err_copy: 657err_copy:
671 idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject); 658 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
672 659
673err_unreg: 660err_unreg:
674 ib_dereg_mr(mr); 661 ib_dereg_mr(mr);
@@ -676,11 +663,8 @@ err_unreg:
676err_put: 663err_put:
677 put_pd_read(pd); 664 put_pd_read(pd);
678 665
679err_release:
680 ib_umem_release(file->device->ib_dev, &obj->umem);
681
682err_free: 666err_free:
683 put_uobj_write(&obj->uobject); 667 put_uobj_write(uobj);
684 return ret; 668 return ret;
685} 669}
686 670
@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
691 struct ib_uverbs_dereg_mr cmd; 675 struct ib_uverbs_dereg_mr cmd;
692 struct ib_mr *mr; 676 struct ib_mr *mr;
693 struct ib_uobject *uobj; 677 struct ib_uobject *uobj;
694 struct ib_umem_object *memobj;
695 int ret = -EINVAL; 678 int ret = -EINVAL;
696 679
697 if (copy_from_user(&cmd, buf, sizeof cmd)) 680 if (copy_from_user(&cmd, buf, sizeof cmd))
@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
701 if (!uobj) 684 if (!uobj)
702 return -EINVAL; 685 return -EINVAL;
703 686
704 memobj = container_of(uobj, struct ib_umem_object, uobject); 687 mr = uobj->object;
705 mr = uobj->object;
706 688
707 ret = ib_dereg_mr(mr); 689 ret = ib_dereg_mr(mr);
708 if (!ret) 690 if (!ret)
@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
719 list_del(&uobj->list); 701 list_del(&uobj->list);
720 mutex_unlock(&file->mutex); 702 mutex_unlock(&file->mutex);
721 703
722 ib_umem_release(file->device->ib_dev, &memobj->umem);
723
724 put_uobj(uobj); 704 put_uobj(uobj);
725 705
726 return in_len; 706 return in_len;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index d44e54799651..14d7ccd89195 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
183 if (!context) 183 if (!context)
184 return 0; 184 return 0;
185 185
186 context->closing = 1;
187
186 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { 188 list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
187 struct ib_ah *ah = uobj->object; 189 struct ib_ah *ah = uobj->object;
188 190
@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
230 232
231 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { 233 list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
232 struct ib_mr *mr = uobj->object; 234 struct ib_mr *mr = uobj->object;
233 struct ib_device *mrdev = mr->device;
234 struct ib_umem_object *memobj;
235 235
236 idr_remove_uobj(&ib_uverbs_mr_idr, uobj); 236 idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
237 ib_dereg_mr(mr); 237 ib_dereg_mr(mr);
238 238 kfree(uobj);
239 memobj = container_of(uobj, struct ib_umem_object, uobject);
240 ib_umem_release_on_close(mrdev, &memobj->umem);
241
242 kfree(memobj);
243 } 239 }
244 240
245 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { 241 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void)
906 unregister_filesystem(&uverbs_event_fs); 902 unregister_filesystem(&uverbs_event_fs);
907 class_destroy(uverbs_class); 903 class_destroy(uverbs_class);
908 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); 904 unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
909 flush_scheduled_work();
910 idr_destroy(&ib_uverbs_pd_idr); 905 idr_destroy(&ib_uverbs_pd_idr);
911 idr_destroy(&ib_uverbs_mr_idr); 906 idr_destroy(&ib_uverbs_mr_idr);
912 idr_destroy(&ib_uverbs_mw_idr); 907 idr_destroy(&ib_uverbs_mw_idr);
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 109166223c09..997cf1530762 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -56,6 +56,7 @@
56#include <asm/byteorder.h> 56#include <asm/byteorder.h>
57 57
58#include <rdma/ib_smi.h> 58#include <rdma/ib_smi.h>
59#include <rdma/ib_umem.h>
59#include <rdma/ib_user_verbs.h> 60#include <rdma/ib_user_verbs.h>
60#include "c2.h" 61#include "c2.h"
61#include "c2_provider.h" 62#include "c2_provider.h"
@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
396 } 397 }
397 398
398 mr->pd = to_c2pd(ib_pd); 399 mr->pd = to_c2pd(ib_pd);
400 mr->umem = NULL;
399 pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " 401 pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
400 "*iova_start %llx, first pa %llx, last pa %llx\n", 402 "*iova_start %llx, first pa %llx, last pa %llx\n",
401 __FUNCTION__, page_shift, pbl_depth, total_len, 403 __FUNCTION__, page_shift, pbl_depth, total_len,
@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
428 return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); 430 return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
429} 431}
430 432
431static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 433static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
432 int acc, struct ib_udata *udata) 434 u64 virt, int acc, struct ib_udata *udata)
433{ 435{
434 u64 *pages; 436 u64 *pages;
435 u64 kva = 0; 437 u64 kva = 0;
@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
441 struct c2_mr *c2mr; 443 struct c2_mr *c2mr;
442 444
443 pr_debug("%s:%u\n", __FUNCTION__, __LINE__); 445 pr_debug("%s:%u\n", __FUNCTION__, __LINE__);
444 shift = ffs(region->page_size) - 1;
445 446
446 c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); 447 c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL);
447 if (!c2mr) 448 if (!c2mr)
448 return ERR_PTR(-ENOMEM); 449 return ERR_PTR(-ENOMEM);
449 c2mr->pd = c2pd; 450 c2mr->pd = c2pd;
450 451
452 c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
453 if (IS_ERR(c2mr->umem)) {
454 err = PTR_ERR(c2mr->umem);
455 kfree(c2mr);
456 return ERR_PTR(err);
457 }
458
459 shift = ffs(c2mr->umem->page_size) - 1;
460
451 n = 0; 461 n = 0;
452 list_for_each_entry(chunk, &region->chunk_list, list) 462 list_for_each_entry(chunk, &c2mr->umem->chunk_list, list)
453 n += chunk->nents; 463 n += chunk->nents;
454 464
455 pages = kmalloc(n * sizeof(u64), GFP_KERNEL); 465 pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
459 } 469 }
460 470
461 i = 0; 471 i = 0;
462 list_for_each_entry(chunk, &region->chunk_list, list) { 472 list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) {
463 for (j = 0; j < chunk->nmap; ++j) { 473 for (j = 0; j < chunk->nmap; ++j) {
464 len = sg_dma_len(&chunk->page_list[j]) >> shift; 474 len = sg_dma_len(&chunk->page_list[j]) >> shift;
465 for (k = 0; k < len; ++k) { 475 for (k = 0; k < len; ++k) {
466 pages[i++] = 476 pages[i++] =
467 sg_dma_address(&chunk->page_list[j]) + 477 sg_dma_address(&chunk->page_list[j]) +
468 (region->page_size * k); 478 (c2mr->umem->page_size * k);
469 } 479 }
470 } 480 }
471 } 481 }
472 482
473 kva = (u64)region->virt_base; 483 kva = virt;
474 err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), 484 err = c2_nsmr_register_phys_kern(to_c2dev(pd->device),
475 pages, 485 pages,
476 region->page_size, 486 c2mr->umem->page_size,
477 i, 487 i,
478 region->length, 488 length,
479 region->offset, 489 c2mr->umem->offset,
480 &kva, 490 &kva,
481 c2_convert_access(acc), 491 c2_convert_access(acc),
482 c2mr); 492 c2mr);
483 kfree(pages); 493 kfree(pages);
484 if (err) { 494 if (err)
485 kfree(c2mr); 495 goto err;
486 return ERR_PTR(err);
487 }
488 return &c2mr->ibmr; 496 return &c2mr->ibmr;
489 497
490err: 498err:
499 ib_umem_release(c2mr->umem);
491 kfree(c2mr); 500 kfree(c2mr);
492 return ERR_PTR(err); 501 return ERR_PTR(err);
493} 502}
@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr)
502 err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); 511 err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey);
503 if (err) 512 if (err)
504 pr_debug("c2_stag_dealloc failed: %d\n", err); 513 pr_debug("c2_stag_dealloc failed: %d\n", err);
505 else 514 else {
515 if (mr->umem)
516 ib_umem_release(mr->umem);
506 kfree(mr); 517 kfree(mr);
518 }
507 519
508 return err; 520 return err;
509} 521}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h
index fc906223220f..1076df2ee96a 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.h
+++ b/drivers/infiniband/hw/amso1100/c2_provider.h
@@ -73,6 +73,7 @@ struct c2_pd {
73struct c2_mr { 73struct c2_mr {
74 struct ib_mr ibmr; 74 struct ib_mr ibmr;
75 struct c2_pd *pd; 75 struct c2_pd *pd;
76 struct ib_umem *umem;
76}; 77};
77 78
78struct c2_av; 79struct c2_av;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index a891493fd340..e7c2c3948037 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -47,6 +47,7 @@
47#include <rdma/iw_cm.h> 47#include <rdma/iw_cm.h>
48#include <rdma/ib_verbs.h> 48#include <rdma/ib_verbs.h>
49#include <rdma/ib_smi.h> 49#include <rdma/ib_smi.h>
50#include <rdma/ib_umem.h>
50#include <rdma/ib_user_verbs.h> 51#include <rdma/ib_user_verbs.h>
51 52
52#include "cxio_hal.h" 53#include "cxio_hal.h"
@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
443 remove_handle(rhp, &rhp->mmidr, mmid); 444 remove_handle(rhp, &rhp->mmidr, mmid);
444 if (mhp->kva) 445 if (mhp->kva)
445 kfree((void *) (unsigned long) mhp->kva); 446 kfree((void *) (unsigned long) mhp->kva);
447 if (mhp->umem)
448 ib_umem_release(mhp->umem);
446 PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp); 449 PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp);
447 kfree(mhp); 450 kfree(mhp);
448 return 0; 451 return 0;
@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
577} 580}
578 581
579 582
580static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 583static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
581 int acc, struct ib_udata *udata) 584 u64 virt, int acc, struct ib_udata *udata)
582{ 585{
583 __be64 *pages; 586 __be64 *pages;
584 int shift, n, len; 587 int shift, n, len;
@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
591 struct iwch_reg_user_mr_resp uresp; 594 struct iwch_reg_user_mr_resp uresp;
592 595
593 PDBG("%s ib_pd %p\n", __FUNCTION__, pd); 596 PDBG("%s ib_pd %p\n", __FUNCTION__, pd);
594 shift = ffs(region->page_size) - 1;
595 597
596 php = to_iwch_pd(pd); 598 php = to_iwch_pd(pd);
597 rhp = php->rhp; 599 rhp = php->rhp;
@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
599 if (!mhp) 601 if (!mhp)
600 return ERR_PTR(-ENOMEM); 602 return ERR_PTR(-ENOMEM);
601 603
604 mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc);
605 if (IS_ERR(mhp->umem)) {
606 err = PTR_ERR(mhp->umem);
607 kfree(mhp);
608 return ERR_PTR(err);
609 }
610
611 shift = ffs(mhp->umem->page_size) - 1;
612
602 n = 0; 613 n = 0;
603 list_for_each_entry(chunk, &region->chunk_list, list) 614 list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
604 n += chunk->nents; 615 n += chunk->nents;
605 616
606 pages = kmalloc(n * sizeof(u64), GFP_KERNEL); 617 pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
611 622
612 i = n = 0; 623 i = n = 0;
613 624
614 list_for_each_entry(chunk, &region->chunk_list, list) 625 list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
615 for (j = 0; j < chunk->nmap; ++j) { 626 for (j = 0; j < chunk->nmap; ++j) {
616 len = sg_dma_len(&chunk->page_list[j]) >> shift; 627 len = sg_dma_len(&chunk->page_list[j]) >> shift;
617 for (k = 0; k < len; ++k) { 628 for (k = 0; k < len; ++k) {
618 pages[i++] = cpu_to_be64(sg_dma_address( 629 pages[i++] = cpu_to_be64(sg_dma_address(
619 &chunk->page_list[j]) + 630 &chunk->page_list[j]) +
620 region->page_size * k); 631 mhp->umem->page_size * k);
621 } 632 }
622 } 633 }
623 634
@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
625 mhp->attr.pdid = php->pdid; 636 mhp->attr.pdid = php->pdid;
626 mhp->attr.zbva = 0; 637 mhp->attr.zbva = 0;
627 mhp->attr.perms = iwch_ib_to_tpt_access(acc); 638 mhp->attr.perms = iwch_ib_to_tpt_access(acc);
628 mhp->attr.va_fbo = region->virt_base; 639 mhp->attr.va_fbo = virt;
629 mhp->attr.page_size = shift - 12; 640 mhp->attr.page_size = shift - 12;
630 mhp->attr.len = (u32) region->length; 641 mhp->attr.len = (u32) length;
631 mhp->attr.pbl_size = i; 642 mhp->attr.pbl_size = i;
632 err = iwch_register_mem(rhp, php, mhp, shift, pages); 643 err = iwch_register_mem(rhp, php, mhp, shift, pages);
633 kfree(pages); 644 kfree(pages);
@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
650 return &mhp->ibmr; 661 return &mhp->ibmr;
651 662
652err: 663err:
664 ib_umem_release(mhp->umem);
653 kfree(mhp); 665 kfree(mhp);
654 return ERR_PTR(err); 666 return ERR_PTR(err);
655} 667}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 93bcc56756bd..48833f3f3bd0 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -73,6 +73,7 @@ struct tpt_attributes {
73 73
74struct iwch_mr { 74struct iwch_mr {
75 struct ib_mr ibmr; 75 struct ib_mr ibmr;
76 struct ib_umem *umem;
76 struct iwch_dev *rhp; 77 struct iwch_dev *rhp;
77 u64 kva; 78 u64 kva;
78 struct tpt_attributes attr; 79 struct tpt_attributes attr;
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index 10fb8fbafa0c..f64d42b08674 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -176,6 +176,7 @@ struct ehca_mr {
176 struct ib_mr ib_mr; /* must always be first in ehca_mr */ 176 struct ib_mr ib_mr; /* must always be first in ehca_mr */
177 struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ 177 struct ib_fmr ib_fmr; /* must always be first in ehca_mr */
178 } ib; 178 } ib;
179 struct ib_umem *umem;
179 spinlock_t mrlock; 180 spinlock_t mrlock;
180 181
181 enum ehca_mr_flag flags; 182 enum ehca_mr_flag flags;
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
index e14b029332c8..37e7fe0908cf 100644
--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
+++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
78 int num_phys_buf, 78 int num_phys_buf,
79 int mr_access_flags, u64 *iova_start); 79 int mr_access_flags, u64 *iova_start);
80 80
81struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, 81struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
82 struct ib_umem *region,
83 int mr_access_flags, struct ib_udata *udata); 82 int mr_access_flags, struct ib_udata *udata);
84 83
85int ehca_rereg_phys_mr(struct ib_mr *mr, 84int ehca_rereg_phys_mr(struct ib_mr *mr,
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index d22ab563633f..84c5bb498563 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -39,6 +39,8 @@
39 * POSSIBILITY OF SUCH DAMAGE. 39 * POSSIBILITY OF SUCH DAMAGE.
40 */ 40 */
41 41
42#include <rdma/ib_umem.h>
43
42#include <asm/current.h> 44#include <asm/current.h>
43 45
44#include "ehca_iverbs.h" 46#include "ehca_iverbs.h"
@@ -238,10 +240,8 @@ reg_phys_mr_exit0:
238 240
239/*----------------------------------------------------------------------*/ 241/*----------------------------------------------------------------------*/
240 242
241struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, 243struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt,
242 struct ib_umem *region, 244 int mr_access_flags, struct ib_udata *udata)
243 int mr_access_flags,
244 struct ib_udata *udata)
245{ 245{
246 struct ib_mr *ib_mr; 246 struct ib_mr *ib_mr;
247 struct ehca_mr *e_mr; 247 struct ehca_mr *e_mr;
@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
257 ehca_gen_err("bad pd=%p", pd); 257 ehca_gen_err("bad pd=%p", pd);
258 return ERR_PTR(-EFAULT); 258 return ERR_PTR(-EFAULT);
259 } 259 }
260 if (!region) { 260
261 ehca_err(pd->device, "bad input values: region=%p", region);
262 ib_mr = ERR_PTR(-EINVAL);
263 goto reg_user_mr_exit0;
264 }
265 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && 261 if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
266 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || 262 !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
267 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && 263 ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
275 ib_mr = ERR_PTR(-EINVAL); 271 ib_mr = ERR_PTR(-EINVAL);
276 goto reg_user_mr_exit0; 272 goto reg_user_mr_exit0;
277 } 273 }
278 if (region->page_size != PAGE_SIZE) {
279 ehca_err(pd->device, "page size not supported, "
280 "region->page_size=%x", region->page_size);
281 ib_mr = ERR_PTR(-EINVAL);
282 goto reg_user_mr_exit0;
283 }
284 274
285 if ((region->length == 0) || 275 if (length == 0 || virt + length < virt) {
286 ((region->virt_base + region->length) < region->virt_base)) {
287 ehca_err(pd->device, "bad input values: length=%lx " 276 ehca_err(pd->device, "bad input values: length=%lx "
288 "virt_base=%lx", region->length, region->virt_base); 277 "virt_base=%lx", length, virt);
289 ib_mr = ERR_PTR(-EINVAL); 278 ib_mr = ERR_PTR(-EINVAL);
290 goto reg_user_mr_exit0; 279 goto reg_user_mr_exit0;
291 } 280 }
@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd,
297 goto reg_user_mr_exit0; 286 goto reg_user_mr_exit0;
298 } 287 }
299 288
289 e_mr->umem = ib_umem_get(pd->uobject->context, start, length,
290 mr_access_flags);
291 if (IS_ERR(e_mr->umem)) {
292 ib_mr = (void *) e_mr->umem;
293 goto reg_user_mr_exit1;
294 }
295
296 if (e_mr->umem->page_size != PAGE_SIZE) {
297 ehca_err(pd->device, "page size not supported, "
298 "e_mr->umem->page_size=%x", e_mr->umem->page_size);
299 ib_mr = ERR_PTR(-EINVAL);
300 goto reg_user_mr_exit2;
301 }
302
300 /* determine number of MR pages */ 303 /* determine number of MR pages */
301 num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + 304 num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) /
302 PAGE_SIZE - 1) / PAGE_SIZE); 305 PAGE_SIZE);
303 num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + 306 num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) /
304 EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); 307 EHCA_PAGESIZE);
305 308
306 /* register MR on HCA */ 309 /* register MR on HCA */
307 pginfo.type = EHCA_MR_PGI_USER; 310 pginfo.type = EHCA_MR_PGI_USER;
308 pginfo.num_pages = num_pages_mr; 311 pginfo.num_pages = num_pages_mr;
309 pginfo.num_4k = num_pages_4k; 312 pginfo.num_4k = num_pages_4k;
310 pginfo.region = region; 313 pginfo.region = e_mr->umem;
311 pginfo.next_4k = region->offset / EHCA_PAGESIZE; 314 pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE;
312 pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, 315 pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk,
313 (&region->chunk_list), 316 (&e_mr->umem->chunk_list),
314 list); 317 list);
315 318
316 ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, 319 ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd,
317 region->length, mr_access_flags, e_pd, &pginfo, 320 &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
318 &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey);
319 if (ret) { 321 if (ret) {
320 ib_mr = ERR_PTR(ret); 322 ib_mr = ERR_PTR(ret);
321 goto reg_user_mr_exit1; 323 goto reg_user_mr_exit2;
322 } 324 }
323 325
324 /* successful registration of all pages */ 326 /* successful registration of all pages */
325 return &e_mr->ib.ib_mr; 327 return &e_mr->ib.ib_mr;
326 328
329reg_user_mr_exit2:
330 ib_umem_release(e_mr->umem);
327reg_user_mr_exit1: 331reg_user_mr_exit1:
328 ehca_mr_delete(e_mr); 332 ehca_mr_delete(e_mr);
329reg_user_mr_exit0: 333reg_user_mr_exit0:
330 if (IS_ERR(ib_mr)) 334 if (IS_ERR(ib_mr))
331 ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x" 335 ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x"
332 " udata=%p", 336 " udata=%p",
333 PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); 337 PTR_ERR(ib_mr), pd, mr_access_flags, udata);
334 return ib_mr; 338 return ib_mr;
335} /* end ehca_reg_user_mr() */ 339} /* end ehca_reg_user_mr() */
336 340
@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr)
596 goto dereg_mr_exit0; 600 goto dereg_mr_exit0;
597 } 601 }
598 602
603 if (e_mr->umem)
604 ib_umem_release(e_mr->umem);
605
599 /* successful deregistration */ 606 /* successful deregistration */
600 ehca_mr_delete(e_mr); 607 ehca_mr_delete(e_mr);
601 608
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 31e70732e369..bdeef8d4f279 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c
@@ -31,6 +31,7 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <rdma/ib_umem.h>
34#include <rdma/ib_pack.h> 35#include <rdma/ib_pack.h>
35#include <rdma/ib_smi.h> 36#include <rdma/ib_smi.h>
36 37
@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
147 mr->mr.offset = 0; 148 mr->mr.offset = 0;
148 mr->mr.access_flags = acc; 149 mr->mr.access_flags = acc;
149 mr->mr.max_segs = num_phys_buf; 150 mr->mr.max_segs = num_phys_buf;
151 mr->umem = NULL;
150 152
151 m = 0; 153 m = 0;
152 n = 0; 154 n = 0;
@@ -170,46 +172,56 @@ bail:
170/** 172/**
171 * ipath_reg_user_mr - register a userspace memory region 173 * ipath_reg_user_mr - register a userspace memory region
172 * @pd: protection domain for this memory region 174 * @pd: protection domain for this memory region
173 * @region: the user memory region 175 * @start: starting userspace address
176 * @length: length of region to register
177 * @virt_addr: virtual address to use (from HCA's point of view)
174 * @mr_access_flags: access flags for this memory region 178 * @mr_access_flags: access flags for this memory region
175 * @udata: unused by the InfiniPath driver 179 * @udata: unused by the InfiniPath driver
176 * 180 *
177 * Returns the memory region on success, otherwise returns an errno. 181 * Returns the memory region on success, otherwise returns an errno.
178 */ 182 */
179struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 183struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
180 int mr_access_flags, struct ib_udata *udata) 184 u64 virt_addr, int mr_access_flags,
185 struct ib_udata *udata)
181{ 186{
182 struct ipath_mr *mr; 187 struct ipath_mr *mr;
188 struct ib_umem *umem;
183 struct ib_umem_chunk *chunk; 189 struct ib_umem_chunk *chunk;
184 int n, m, i; 190 int n, m, i;
185 struct ib_mr *ret; 191 struct ib_mr *ret;
186 192
187 if (region->length == 0) { 193 if (length == 0) {
188 ret = ERR_PTR(-EINVAL); 194 ret = ERR_PTR(-EINVAL);
189 goto bail; 195 goto bail;
190 } 196 }
191 197
198 umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags);
199 if (IS_ERR(umem))
200 return (void *) umem;
201
192 n = 0; 202 n = 0;
193 list_for_each_entry(chunk, &region->chunk_list, list) 203 list_for_each_entry(chunk, &umem->chunk_list, list)
194 n += chunk->nents; 204 n += chunk->nents;
195 205
196 mr = alloc_mr(n, &to_idev(pd->device)->lk_table); 206 mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
197 if (!mr) { 207 if (!mr) {
198 ret = ERR_PTR(-ENOMEM); 208 ret = ERR_PTR(-ENOMEM);
209 ib_umem_release(umem);
199 goto bail; 210 goto bail;
200 } 211 }
201 212
202 mr->mr.pd = pd; 213 mr->mr.pd = pd;
203 mr->mr.user_base = region->user_base; 214 mr->mr.user_base = start;
204 mr->mr.iova = region->virt_base; 215 mr->mr.iova = virt_addr;
205 mr->mr.length = region->length; 216 mr->mr.length = length;
206 mr->mr.offset = region->offset; 217 mr->mr.offset = umem->offset;
207 mr->mr.access_flags = mr_access_flags; 218 mr->mr.access_flags = mr_access_flags;
208 mr->mr.max_segs = n; 219 mr->mr.max_segs = n;
220 mr->umem = umem;
209 221
210 m = 0; 222 m = 0;
211 n = 0; 223 n = 0;
212 list_for_each_entry(chunk, &region->chunk_list, list) { 224 list_for_each_entry(chunk, &umem->chunk_list, list) {
213 for (i = 0; i < chunk->nents; i++) { 225 for (i = 0; i < chunk->nents; i++) {
214 void *vaddr; 226 void *vaddr;
215 227
@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
219 goto bail; 231 goto bail;
220 } 232 }
221 mr->mr.map[m]->segs[n].vaddr = vaddr; 233 mr->mr.map[m]->segs[n].vaddr = vaddr;
222 mr->mr.map[m]->segs[n].length = region->page_size; 234 mr->mr.map[m]->segs[n].length = umem->page_size;
223 n++; 235 n++;
224 if (n == IPATH_SEGSZ) { 236 if (n == IPATH_SEGSZ) {
225 m++; 237 m++;
@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr)
253 i--; 265 i--;
254 kfree(mr->mr.map[i]); 266 kfree(mr->mr.map[i]);
255 } 267 }
268
269 if (mr->umem)
270 ib_umem_release(mr->umem);
271
256 kfree(mr); 272 kfree(mr);
257 return 0; 273 return 0;
258} 274}
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 7064fc222727..088b837ebea8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -251,6 +251,7 @@ struct ipath_sge {
251/* Memory region */ 251/* Memory region */
252struct ipath_mr { 252struct ipath_mr {
253 struct ib_mr ibmr; 253 struct ib_mr ibmr;
254 struct ib_umem *umem;
254 struct ipath_mregion mr; /* must be last */ 255 struct ipath_mregion mr; /* must be last */
255}; 256};
256 257
@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
751 struct ib_phys_buf *buffer_list, 752 struct ib_phys_buf *buffer_list,
752 int num_phys_buf, int acc, u64 *iova_start); 753 int num_phys_buf, int acc, u64 *iova_start);
753 754
754struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 755struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
755 int mr_access_flags, 756 u64 virt_addr, int mr_access_flags,
756 struct ib_udata *udata); 757 struct ib_udata *udata);
757 758
758int ipath_dereg_mr(struct ib_mr *ibmr); 759int ipath_dereg_mr(struct ib_mr *ibmr);
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
new file mode 100644
index 000000000000..b8912cdb9663
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -0,0 +1,9 @@
1config MLX4_INFINIBAND
2 tristate "Mellanox ConnectX HCA support"
3 depends on INFINIBAND
4 select MLX4_CORE
5 ---help---
6 This driver provides low-level InfiniBand support for
7 Mellanox ConnectX PCI Express host channel adapters (HCAs).
8 This is required to use InfiniBand protocols such as
9 IP-over-IB or SRP with these devices.
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
new file mode 100644
index 000000000000..70f09c7826da
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -0,0 +1,3 @@
1obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
2
3mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
new file mode 100644
index 000000000000..c75ac9463e20
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -0,0 +1,100 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "mlx4_ib.h"
34
35struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
36{
37 struct mlx4_dev *dev = to_mdev(pd->device)->dev;
38 struct mlx4_ib_ah *ah;
39
40 ah = kmalloc(sizeof *ah, GFP_ATOMIC);
41 if (!ah)
42 return ERR_PTR(-ENOMEM);
43
44 memset(&ah->av, 0, sizeof ah->av);
45
46 ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
47 ah->av.g_slid = ah_attr->src_path_bits;
48 ah->av.dlid = cpu_to_be16(ah_attr->dlid);
49 if (ah_attr->static_rate) {
50 ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
51 while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
52 !(1 << ah->av.stat_rate & dev->caps.stat_rate_support))
53 --ah->av.stat_rate;
54 }
55 ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
56 if (ah_attr->ah_flags & IB_AH_GRH) {
57 ah->av.g_slid |= 0x80;
58 ah->av.gid_index = ah_attr->grh.sgid_index;
59 ah->av.hop_limit = ah_attr->grh.hop_limit;
60 ah->av.sl_tclass_flowlabel |=
61 cpu_to_be32((ah_attr->grh.traffic_class << 20) |
62 ah_attr->grh.flow_label);
63 memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
64 }
65
66 return &ah->ibah;
67}
68
69int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
70{
71 struct mlx4_ib_ah *ah = to_mah(ibah);
72
73 memset(ah_attr, 0, sizeof *ah_attr);
74 ah_attr->dlid = be16_to_cpu(ah->av.dlid);
75 ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
76 ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24;
77 if (ah->av.stat_rate)
78 ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET;
79 ah_attr->src_path_bits = ah->av.g_slid & 0x7F;
80
81 if (mlx4_ib_ah_grh_present(ah)) {
82 ah_attr->ah_flags = IB_AH_GRH;
83
84 ah_attr->grh.traffic_class =
85 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20;
86 ah_attr->grh.flow_label =
87 be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff;
88 ah_attr->grh.hop_limit = ah->av.hop_limit;
89 ah_attr->grh.sgid_index = ah->av.gid_index;
90 memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16);
91 }
92
93 return 0;
94}
95
96int mlx4_ib_destroy_ah(struct ib_ah *ah)
97{
98 kfree(to_mah(ah));
99 return 0;
100}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
new file mode 100644
index 000000000000..b2a290c6703a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -0,0 +1,525 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx4/cq.h>
34#include <linux/mlx4/qp.h>
35
36#include "mlx4_ib.h"
37#include "user.h"
38
39static void mlx4_ib_cq_comp(struct mlx4_cq *cq)
40{
41 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
42 ibcq->comp_handler(ibcq, ibcq->cq_context);
43}
44
45static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
46{
47 struct ib_event event;
48 struct ib_cq *ibcq;
49
50 if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
51 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
52 "on CQ %06x\n", type, cq->cqn);
53 return;
54 }
55
56 ibcq = &to_mibcq(cq)->ibcq;
57 if (ibcq->event_handler) {
58 event.device = ibcq->device;
59 event.event = IB_EVENT_CQ_ERR;
60 event.element.cq = ibcq;
61 ibcq->event_handler(&event, ibcq->cq_context);
62 }
63}
64
65static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
66{
67 int offset = n * sizeof (struct mlx4_cqe);
68
69 if (buf->buf.nbufs == 1)
70 return buf->buf.u.direct.buf + offset;
71 else
72 return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf +
73 (offset & (PAGE_SIZE - 1));
74}
75
76static void *get_cqe(struct mlx4_ib_cq *cq, int n)
77{
78 return get_cqe_from_buf(&cq->buf, n);
79}
80
81static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
82{
83 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
84
85 return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
86 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
87}
88
89static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq)
90{
91 return get_sw_cqe(cq, cq->mcq.cons_index);
92}
93
94struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
95 struct ib_ucontext *context,
96 struct ib_udata *udata)
97{
98 struct mlx4_ib_dev *dev = to_mdev(ibdev);
99 struct mlx4_ib_cq *cq;
100 struct mlx4_uar *uar;
101 int buf_size;
102 int err;
103
104 if (entries < 1 || entries > dev->dev->caps.max_cqes)
105 return ERR_PTR(-EINVAL);
106
107 cq = kmalloc(sizeof *cq, GFP_KERNEL);
108 if (!cq)
109 return ERR_PTR(-ENOMEM);
110
111 entries = roundup_pow_of_two(entries + 1);
112 cq->ibcq.cqe = entries - 1;
113 buf_size = entries * sizeof (struct mlx4_cqe);
114 spin_lock_init(&cq->lock);
115
116 if (context) {
117 struct mlx4_ib_create_cq ucmd;
118
119 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
120 err = -EFAULT;
121 goto err_cq;
122 }
123
124 cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size,
125 IB_ACCESS_LOCAL_WRITE);
126 if (IS_ERR(cq->umem)) {
127 err = PTR_ERR(cq->umem);
128 goto err_cq;
129 }
130
131 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem),
132 ilog2(cq->umem->page_size), &cq->buf.mtt);
133 if (err)
134 goto err_buf;
135
136 err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem);
137 if (err)
138 goto err_mtt;
139
140 err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
141 &cq->db);
142 if (err)
143 goto err_mtt;
144
145 uar = &to_mucontext(context)->uar;
146 } else {
147 err = mlx4_ib_db_alloc(dev, &cq->db, 1);
148 if (err)
149 goto err_cq;
150
151 cq->mcq.set_ci_db = cq->db.db;
152 cq->mcq.arm_db = cq->db.db + 1;
153 *cq->mcq.set_ci_db = 0;
154 *cq->mcq.arm_db = 0;
155
156 if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) {
157 err = -ENOMEM;
158 goto err_db;
159 }
160
161 err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift,
162 &cq->buf.mtt);
163 if (err)
164 goto err_buf;
165
166 err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf);
167 if (err)
168 goto err_mtt;
169
170 uar = &dev->priv_uar;
171 }
172
173 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
174 cq->db.dma, &cq->mcq);
175 if (err)
176 goto err_dbmap;
177
178 cq->mcq.comp = mlx4_ib_cq_comp;
179 cq->mcq.event = mlx4_ib_cq_event;
180
181 if (context)
182 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
183 err = -EFAULT;
184 goto err_dbmap;
185 }
186
187 return &cq->ibcq;
188
189err_dbmap:
190 if (context)
191 mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
192
193err_mtt:
194 mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt);
195
196err_buf:
197 if (context)
198 ib_umem_release(cq->umem);
199 else
200 mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe),
201 &cq->buf.buf);
202
203err_db:
204 if (!context)
205 mlx4_ib_db_free(dev, &cq->db);
206
207err_cq:
208 kfree(cq);
209
210 return ERR_PTR(err);
211}
212
213int mlx4_ib_destroy_cq(struct ib_cq *cq)
214{
215 struct mlx4_ib_dev *dev = to_mdev(cq->device);
216 struct mlx4_ib_cq *mcq = to_mcq(cq);
217
218 mlx4_cq_free(dev->dev, &mcq->mcq);
219 mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt);
220
221 if (cq->uobject) {
222 mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
223 ib_umem_release(mcq->umem);
224 } else {
225 mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe),
226 &mcq->buf.buf);
227 mlx4_ib_db_free(dev, &mcq->db);
228 }
229
230 kfree(mcq);
231
232 return 0;
233}
234
235static void dump_cqe(void *cqe)
236{
237 __be32 *buf = cqe;
238
239 printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
240 be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
241 be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
242 be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
243}
244
245static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
246 struct ib_wc *wc)
247{
248 if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
249 printk(KERN_DEBUG "local QP operation err "
250 "(QPN %06x, WQE index %x, vendor syndrome %02x, "
251 "opcode = %02x)\n",
252 be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
253 cqe->vendor_err_syndrome,
254 cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK);
255 dump_cqe(cqe);
256 }
257
258 switch (cqe->syndrome) {
259 case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR:
260 wc->status = IB_WC_LOC_LEN_ERR;
261 break;
262 case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR:
263 wc->status = IB_WC_LOC_QP_OP_ERR;
264 break;
265 case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR:
266 wc->status = IB_WC_LOC_PROT_ERR;
267 break;
268 case MLX4_CQE_SYNDROME_WR_FLUSH_ERR:
269 wc->status = IB_WC_WR_FLUSH_ERR;
270 break;
271 case MLX4_CQE_SYNDROME_MW_BIND_ERR:
272 wc->status = IB_WC_MW_BIND_ERR;
273 break;
274 case MLX4_CQE_SYNDROME_BAD_RESP_ERR:
275 wc->status = IB_WC_BAD_RESP_ERR;
276 break;
277 case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR:
278 wc->status = IB_WC_LOC_ACCESS_ERR;
279 break;
280 case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR:
281 wc->status = IB_WC_REM_INV_REQ_ERR;
282 break;
283 case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR:
284 wc->status = IB_WC_REM_ACCESS_ERR;
285 break;
286 case MLX4_CQE_SYNDROME_REMOTE_OP_ERR:
287 wc->status = IB_WC_REM_OP_ERR;
288 break;
289 case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
290 wc->status = IB_WC_RETRY_EXC_ERR;
291 break;
292 case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
293 wc->status = IB_WC_RNR_RETRY_EXC_ERR;
294 break;
295 case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR:
296 wc->status = IB_WC_REM_ABORT_ERR;
297 break;
298 default:
299 wc->status = IB_WC_GENERAL_ERR;
300 break;
301 }
302
303 wc->vendor_err = cqe->vendor_err_syndrome;
304}
305
306static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
307 struct mlx4_ib_qp **cur_qp,
308 struct ib_wc *wc)
309{
310 struct mlx4_cqe *cqe;
311 struct mlx4_qp *mqp;
312 struct mlx4_ib_wq *wq;
313 struct mlx4_ib_srq *srq;
314 int is_send;
315 int is_error;
316 u16 wqe_ctr;
317
318 cqe = next_cqe_sw(cq);
319 if (!cqe)
320 return -EAGAIN;
321
322 ++cq->mcq.cons_index;
323
324 /*
325 * Make sure we read CQ entry contents after we've checked the
326 * ownership bit.
327 */
328 rmb();
329
330 is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK;
331 is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
332 MLX4_CQE_OPCODE_ERROR;
333
334 if (!*cur_qp ||
335 (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) {
336 /*
337 * We do not have to take the QP table lock here,
338 * because CQs will be locked while QPs are removed
339 * from the table.
340 */
341 mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
342 be32_to_cpu(cqe->my_qpn));
343 if (unlikely(!mqp)) {
344 printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
345 cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff);
346 return -EINVAL;
347 }
348
349 *cur_qp = to_mibqp(mqp);
350 }
351
352 wc->qp = &(*cur_qp)->ibqp;
353
354 if (is_send) {
355 wq = &(*cur_qp)->sq;
356 wqe_ctr = be16_to_cpu(cqe->wqe_index);
357 wq->tail += wqe_ctr - (u16) wq->tail;
358 wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
359 ++wq->tail;
360 } else if ((*cur_qp)->ibqp.srq) {
361 srq = to_msrq((*cur_qp)->ibqp.srq);
362 wqe_ctr = be16_to_cpu(cqe->wqe_index);
363 wc->wr_id = srq->wrid[wqe_ctr];
364 mlx4_ib_free_srq_wqe(srq, wqe_ctr);
365 } else {
366 wq = &(*cur_qp)->rq;
367 wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)];
368 ++wq->tail;
369 }
370
371 if (unlikely(is_error)) {
372 mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc);
373 return 0;
374 }
375
376 wc->status = IB_WC_SUCCESS;
377
378 if (is_send) {
379 wc->wc_flags = 0;
380 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
381 case MLX4_OPCODE_RDMA_WRITE_IMM:
382 wc->wc_flags |= IB_WC_WITH_IMM;
383 case MLX4_OPCODE_RDMA_WRITE:
384 wc->opcode = IB_WC_RDMA_WRITE;
385 break;
386 case MLX4_OPCODE_SEND_IMM:
387 wc->wc_flags |= IB_WC_WITH_IMM;
388 case MLX4_OPCODE_SEND:
389 wc->opcode = IB_WC_SEND;
390 break;
391 case MLX4_OPCODE_RDMA_READ:
392 wc->opcode = IB_WC_SEND;
393 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
394 break;
395 case MLX4_OPCODE_ATOMIC_CS:
396 wc->opcode = IB_WC_COMP_SWAP;
397 wc->byte_len = 8;
398 break;
399 case MLX4_OPCODE_ATOMIC_FA:
400 wc->opcode = IB_WC_FETCH_ADD;
401 wc->byte_len = 8;
402 break;
403 case MLX4_OPCODE_BIND_MW:
404 wc->opcode = IB_WC_BIND_MW;
405 break;
406 }
407 } else {
408 wc->byte_len = be32_to_cpu(cqe->byte_cnt);
409
410 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) {
411 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM:
412 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
413 wc->wc_flags = IB_WC_WITH_IMM;
414 wc->imm_data = cqe->immed_rss_invalid;
415 break;
416 case MLX4_RECV_OPCODE_SEND:
417 wc->opcode = IB_WC_RECV;
418 wc->wc_flags = 0;
419 break;
420 case MLX4_RECV_OPCODE_SEND_IMM:
421 wc->opcode = IB_WC_RECV;
422 wc->wc_flags = IB_WC_WITH_IMM;
423 wc->imm_data = cqe->immed_rss_invalid;
424 break;
425 }
426
427 wc->slid = be16_to_cpu(cqe->rlid);
428 wc->sl = cqe->sl >> 4;
429 wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff;
430 wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f;
431 wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ?
432 IB_WC_GRH : 0;
433 wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) >> 16;
434 }
435
436 return 0;
437}
438
439int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
440{
441 struct mlx4_ib_cq *cq = to_mcq(ibcq);
442 struct mlx4_ib_qp *cur_qp = NULL;
443 unsigned long flags;
444 int npolled;
445 int err = 0;
446
447 spin_lock_irqsave(&cq->lock, flags);
448
449 for (npolled = 0; npolled < num_entries; ++npolled) {
450 err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
451 if (err)
452 break;
453 }
454
455 if (npolled)
456 mlx4_cq_set_ci(&cq->mcq);
457
458 spin_unlock_irqrestore(&cq->lock, flags);
459
460 if (err == 0 || err == -EAGAIN)
461 return npolled;
462 else
463 return err;
464}
465
466int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
467{
468 mlx4_cq_arm(&to_mcq(ibcq)->mcq,
469 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
470 MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT,
471 to_mdev(ibcq->device)->uar_map,
472 MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock));
473
474 return 0;
475}
476
477void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
478{
479 u32 prod_index;
480 int nfreed = 0;
481 struct mlx4_cqe *cqe;
482
483 /*
484 * First we need to find the current producer index, so we
485 * know where to start cleaning from. It doesn't matter if HW
486 * adds new entries after this loop -- the QP we're worried
487 * about is already in RESET, so the new entries won't come
488 * from our QP and therefore don't need to be checked.
489 */
490 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index)
491 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe)
492 break;
493
494 /*
495 * Now sweep backwards through the CQ, removing CQ entries
496 * that match our QP by copying older entries on top of them.
497 */
498 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
499 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
500 if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) {
501 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
502 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
503 ++nfreed;
504 } else if (nfreed)
505 memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe),
506 cqe, sizeof *cqe);
507 }
508
509 if (nfreed) {
510 cq->mcq.cons_index += nfreed;
511 /*
512 * Make sure update of buffer contents is done before
513 * updating consumer index.
514 */
515 wmb();
516 mlx4_cq_set_ci(&cq->mcq);
517 }
518}
519
520void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
521{
522 spin_lock_irq(&cq->lock);
523 __mlx4_ib_cq_clean(cq, qpn, srq);
524 spin_unlock_irq(&cq->lock);
525}
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
new file mode 100644
index 000000000000..1c36087aef14
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -0,0 +1,216 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/slab.h>
34
35#include "mlx4_ib.h"
36
37struct mlx4_ib_db_pgdir {
38 struct list_head list;
39 DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE);
40 DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2);
41 unsigned long *bits[2];
42 __be32 *db_page;
43 dma_addr_t db_dma;
44};
45
46static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev)
47{
48 struct mlx4_ib_db_pgdir *pgdir;
49
50 pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
51 if (!pgdir)
52 return NULL;
53
54 bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2);
55 pgdir->bits[0] = pgdir->order0;
56 pgdir->bits[1] = pgdir->order1;
57 pgdir->db_page = dma_alloc_coherent(dev->ib_dev.dma_device,
58 PAGE_SIZE, &pgdir->db_dma,
59 GFP_KERNEL);
60 if (!pgdir->db_page) {
61 kfree(pgdir);
62 return NULL;
63 }
64
65 return pgdir;
66}
67
68static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir,
69 struct mlx4_ib_db *db, int order)
70{
71 int o;
72 int i;
73
74 for (o = order; o <= 1; ++o) {
75 i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o);
76 if (i < MLX4_IB_DB_PER_PAGE >> o)
77 goto found;
78 }
79
80 return -ENOMEM;
81
82found:
83 clear_bit(i, pgdir->bits[o]);
84
85 i <<= o;
86
87 if (o > order)
88 set_bit(i ^ 1, pgdir->bits[order]);
89
90 db->u.pgdir = pgdir;
91 db->index = i;
92 db->db = pgdir->db_page + db->index;
93 db->dma = pgdir->db_dma + db->index * 4;
94 db->order = order;
95
96 return 0;
97}
98
99int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order)
100{
101 struct mlx4_ib_db_pgdir *pgdir;
102 int ret = 0;
103
104 mutex_lock(&dev->pgdir_mutex);
105
106 list_for_each_entry(pgdir, &dev->pgdir_list, list)
107 if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order))
108 goto out;
109
110 pgdir = mlx4_ib_alloc_db_pgdir(dev);
111 if (!pgdir) {
112 ret = -ENOMEM;
113 goto out;
114 }
115
116 list_add(&pgdir->list, &dev->pgdir_list);
117
118 /* This should never fail -- we just allocated an empty page: */
119 WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order));
120
121out:
122 mutex_unlock(&dev->pgdir_mutex);
123
124 return ret;
125}
126
127void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db)
128{
129 int o;
130 int i;
131
132 mutex_lock(&dev->pgdir_mutex);
133
134 o = db->order;
135 i = db->index;
136
137 if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) {
138 clear_bit(i ^ 1, db->u.pgdir->order0);
139 ++o;
140 }
141
142 i >>= o;
143 set_bit(i, db->u.pgdir->bits[o]);
144
145 if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) {
146 dma_free_coherent(dev->ib_dev.dma_device, PAGE_SIZE,
147 db->u.pgdir->db_page, db->u.pgdir->db_dma);
148 list_del(&db->u.pgdir->list);
149 kfree(db->u.pgdir);
150 }
151
152 mutex_unlock(&dev->pgdir_mutex);
153}
154
155struct mlx4_ib_user_db_page {
156 struct list_head list;
157 struct ib_umem *umem;
158 unsigned long user_virt;
159 int refcnt;
160};
161
162int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
163 struct mlx4_ib_db *db)
164{
165 struct mlx4_ib_user_db_page *page;
166 struct ib_umem_chunk *chunk;
167 int err = 0;
168
169 mutex_lock(&context->db_page_mutex);
170
171 list_for_each_entry(page, &context->db_page_list, list)
172 if (page->user_virt == (virt & PAGE_MASK))
173 goto found;
174
175 page = kmalloc(sizeof *page, GFP_KERNEL);
176 if (!page) {
177 err = -ENOMEM;
178 goto out;
179 }
180
181 page->user_virt = (virt & PAGE_MASK);
182 page->refcnt = 0;
183 page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
184 PAGE_SIZE, 0);
185 if (IS_ERR(page->umem)) {
186 err = PTR_ERR(page->umem);
187 kfree(page);
188 goto out;
189 }
190
191 list_add(&page->list, &context->db_page_list);
192
193found:
194 chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list);
195 db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK);
196 db->u.user_page = page;
197 ++page->refcnt;
198
199out:
200 mutex_unlock(&context->db_page_mutex);
201
202 return err;
203}
204
205void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db)
206{
207 mutex_lock(&context->db_page_mutex);
208
209 if (!--db->u.user_page->refcnt) {
210 list_del(&db->u.user_page->list);
211 ib_umem_release(db->u.user_page->umem);
212 kfree(db->u.user_page);
213 }
214
215 mutex_unlock(&context->db_page_mutex);
216}
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
new file mode 100644
index 000000000000..333091787c5f
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -0,0 +1,339 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34#include <rdma/ib_smi.h>
35
36#include <linux/mlx4/cmd.h>
37
38#include "mlx4_ib.h"
39
40enum {
41 MLX4_IB_VENDOR_CLASS1 = 0x9,
42 MLX4_IB_VENDOR_CLASS2 = 0xa
43};
44
45int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
46 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
47 void *in_mad, void *response_mad)
48{
49 struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
50 void *inbox;
51 int err;
52 u32 in_modifier = port;
53 u8 op_modifier = 0;
54
55 inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
56 if (IS_ERR(inmailbox))
57 return PTR_ERR(inmailbox);
58 inbox = inmailbox->buf;
59
60 outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
61 if (IS_ERR(outmailbox)) {
62 mlx4_free_cmd_mailbox(dev->dev, inmailbox);
63 return PTR_ERR(outmailbox);
64 }
65
66 memcpy(inbox, in_mad, 256);
67
68 /*
69 * Key check traps can't be generated unless we have in_wc to
70 * tell us where to send the trap.
71 */
72 if (ignore_mkey || !in_wc)
73 op_modifier |= 0x1;
74 if (ignore_bkey || !in_wc)
75 op_modifier |= 0x2;
76
77 if (in_wc) {
78 struct {
79 __be32 my_qpn;
80 u32 reserved1;
81 __be32 rqpn;
82 u8 sl;
83 u8 g_path;
84 u16 reserved2[2];
85 __be16 pkey;
86 u32 reserved3[11];
87 u8 grh[40];
88 } *ext_info;
89
90 memset(inbox + 256, 0, 256);
91 ext_info = inbox + 256;
92
93 ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
94 ext_info->rqpn = cpu_to_be32(in_wc->src_qp);
95 ext_info->sl = in_wc->sl << 4;
96 ext_info->g_path = in_wc->dlid_path_bits |
97 (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
98 ext_info->pkey = cpu_to_be16(in_wc->pkey_index);
99
100 if (in_grh)
101 memcpy(ext_info->grh, in_grh, 40);
102
103 op_modifier |= 0x4;
104
105 in_modifier |= in_wc->slid << 16;
106 }
107
108 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
109 in_modifier, op_modifier,
110 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
111
112 if (!err);
113 memcpy(response_mad, outmailbox->buf, 256);
114
115 mlx4_free_cmd_mailbox(dev->dev, inmailbox);
116 mlx4_free_cmd_mailbox(dev->dev, outmailbox);
117
118 return err;
119}
120
121static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
122{
123 struct ib_ah *new_ah;
124 struct ib_ah_attr ah_attr;
125
126 if (!dev->send_agent[port_num - 1][0])
127 return;
128
129 memset(&ah_attr, 0, sizeof ah_attr);
130 ah_attr.dlid = lid;
131 ah_attr.sl = sl;
132 ah_attr.port_num = port_num;
133
134 new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
135 &ah_attr);
136 if (IS_ERR(new_ah))
137 return;
138
139 spin_lock(&dev->sm_lock);
140 if (dev->sm_ah[port_num - 1])
141 ib_destroy_ah(dev->sm_ah[port_num - 1]);
142 dev->sm_ah[port_num - 1] = new_ah;
143 spin_unlock(&dev->sm_lock);
144}
145
146/*
147 * Snoop SM MADs for port info and P_Key table sets, so we can
148 * synthesize LID change and P_Key change events.
149 */
150static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad)
151{
152 struct ib_event event;
153
154 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
155 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
156 mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
157 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
158 struct ib_port_info *pinfo =
159 (struct ib_port_info *) ((struct ib_smp *) mad)->data;
160
161 update_sm_ah(to_mdev(ibdev), port_num,
162 be16_to_cpu(pinfo->sm_lid),
163 pinfo->neighbormtu_mastersmsl & 0xf);
164
165 event.device = ibdev;
166 event.element.port_num = port_num;
167
168 if(pinfo->clientrereg_resv_subnetto & 0x80)
169 event.event = IB_EVENT_CLIENT_REREGISTER;
170 else
171 event.event = IB_EVENT_LID_CHANGE;
172
173 ib_dispatch_event(&event);
174 }
175
176 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
177 event.device = ibdev;
178 event.event = IB_EVENT_PKEY_CHANGE;
179 event.element.port_num = port_num;
180 ib_dispatch_event(&event);
181 }
182 }
183}
184
185static void node_desc_override(struct ib_device *dev,
186 struct ib_mad *mad)
187{
188 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
189 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
190 mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
191 mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
192 spin_lock(&to_mdev(dev)->sm_lock);
193 memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
194 spin_unlock(&to_mdev(dev)->sm_lock);
195 }
196}
197
198static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad)
199{
200 int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
201 struct ib_mad_send_buf *send_buf;
202 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
203 int ret;
204
205 if (agent) {
206 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
207 IB_MGMT_MAD_DATA, GFP_ATOMIC);
208 /*
209 * We rely here on the fact that MLX QPs don't use the
210 * address handle after the send is posted (this is
211 * wrong following the IB spec strictly, but we know
212 * it's OK for our devices).
213 */
214 spin_lock(&dev->sm_lock);
215 memcpy(send_buf->mad, mad, sizeof *mad);
216 if ((send_buf->ah = dev->sm_ah[port_num - 1]))
217 ret = ib_post_send_mad(send_buf, NULL);
218 else
219 ret = -EINVAL;
220 spin_unlock(&dev->sm_lock);
221
222 if (ret)
223 ib_free_send_mad(send_buf);
224 }
225}
226
227int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
228 struct ib_wc *in_wc, struct ib_grh *in_grh,
229 struct ib_mad *in_mad, struct ib_mad *out_mad)
230{
231 u16 slid;
232 int err;
233
234 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
235
236 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
237 forward_trap(to_mdev(ibdev), port_num, in_mad);
238 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
239 }
240
241 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
242 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
243 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
244 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
245 in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
246 return IB_MAD_RESULT_SUCCESS;
247
248 /*
249 * Don't process SMInfo queries or vendor-specific
250 * MADs -- the SMA can't handle them.
251 */
252 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
253 ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
254 IB_SMP_ATTR_VENDOR_MASK))
255 return IB_MAD_RESULT_SUCCESS;
256 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
257 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
258 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) {
259 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
260 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
261 return IB_MAD_RESULT_SUCCESS;
262 } else
263 return IB_MAD_RESULT_SUCCESS;
264
265 err = mlx4_MAD_IFC(to_mdev(ibdev),
266 mad_flags & IB_MAD_IGNORE_MKEY,
267 mad_flags & IB_MAD_IGNORE_BKEY,
268 port_num, in_wc, in_grh, in_mad, out_mad);
269 if (err)
270 return IB_MAD_RESULT_FAILURE;
271
272 if (!out_mad->mad_hdr.status) {
273 smp_snoop(ibdev, port_num, in_mad);
274 node_desc_override(ibdev, out_mad);
275 }
276
277 /* set return bit in status of directed route responses */
278 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
279 out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
280
281 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
282 /* no response for trap repress */
283 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
284
285 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
286}
287
288static void send_handler(struct ib_mad_agent *agent,
289 struct ib_mad_send_wc *mad_send_wc)
290{
291 ib_free_send_mad(mad_send_wc->send_buf);
292}
293
294int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
295{
296 struct ib_mad_agent *agent;
297 int p, q;
298 int ret;
299
300 for (p = 0; p < dev->dev->caps.num_ports; ++p)
301 for (q = 0; q <= 1; ++q) {
302 agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
303 q ? IB_QPT_GSI : IB_QPT_SMI,
304 NULL, 0, send_handler,
305 NULL, NULL);
306 if (IS_ERR(agent)) {
307 ret = PTR_ERR(agent);
308 goto err;
309 }
310 dev->send_agent[p][q] = agent;
311 }
312
313 return 0;
314
315err:
316 for (p = 0; p < dev->dev->caps.num_ports; ++p)
317 for (q = 0; q <= 1; ++q)
318 if (dev->send_agent[p][q])
319 ib_unregister_mad_agent(dev->send_agent[p][q]);
320
321 return ret;
322}
323
324void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
325{
326 struct ib_mad_agent *agent;
327 int p, q;
328
329 for (p = 0; p < dev->dev->caps.num_ports; ++p) {
330 for (q = 0; q <= 1; ++q) {
331 agent = dev->send_agent[p][q];
332 dev->send_agent[p][q] = NULL;
333 ib_unregister_mad_agent(agent);
334 }
335
336 if (dev->sm_ah[p])
337 ib_destroy_ah(dev->sm_ah[p]);
338 }
339}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
new file mode 100644
index 000000000000..688ecb4c39f3
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -0,0 +1,651 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <linux/init.h>
35#include <linux/errno.h>
36
37#include <rdma/ib_smi.h>
38#include <rdma/ib_user_verbs.h>
39
40#include <linux/mlx4/driver.h>
41#include <linux/mlx4/cmd.h>
42
43#include "mlx4_ib.h"
44#include "user.h"
45
46#define DRV_NAME "mlx4_ib"
47#define DRV_VERSION "0.01"
48#define DRV_RELDATE "May 1, 2006"
49
50MODULE_AUTHOR("Roland Dreier");
51MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
52MODULE_LICENSE("Dual BSD/GPL");
53MODULE_VERSION(DRV_VERSION);
54
55static const char mlx4_ib_version[] __devinitdata =
56 DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
57 DRV_VERSION " (" DRV_RELDATE ")\n";
58
59static void init_query_mad(struct ib_smp *mad)
60{
61 mad->base_version = 1;
62 mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
63 mad->class_version = 1;
64 mad->method = IB_MGMT_METHOD_GET;
65}
66
67static int mlx4_ib_query_device(struct ib_device *ibdev,
68 struct ib_device_attr *props)
69{
70 struct mlx4_ib_dev *dev = to_mdev(ibdev);
71 struct ib_smp *in_mad = NULL;
72 struct ib_smp *out_mad = NULL;
73 int err = -ENOMEM;
74
75 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
76 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
77 if (!in_mad || !out_mad)
78 goto out;
79
80 init_query_mad(in_mad);
81 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
82
83 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
84 if (err)
85 goto out;
86
87 memset(props, 0, sizeof *props);
88
89 props->fw_ver = dev->dev->caps.fw_ver;
90 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
91 IB_DEVICE_PORT_ACTIVE_EVENT |
92 IB_DEVICE_SYS_IMAGE_GUID |
93 IB_DEVICE_RC_RNR_NAK_GEN;
94 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
95 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
96 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
97 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
98 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
99 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
100 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
101 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
102
103 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
104 0xffffff;
105 props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
106 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
107 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
108
109 props->max_mr_size = ~0ull;
110 props->page_size_cap = dev->dev->caps.page_size_cap;
111 props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
112 props->max_qp_wr = dev->dev->caps.max_wqes;
113 props->max_sge = min(dev->dev->caps.max_sq_sg,
114 dev->dev->caps.max_rq_sg);
115 props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
116 props->max_cqe = dev->dev->caps.max_cqes;
117 props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
118 props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
119 props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma;
120 props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
121 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
122 props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
123 props->max_srq_wr = dev->dev->caps.max_srq_wqes;
124 props->max_srq_sge = dev->dev->caps.max_srq_sge;
125 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
126 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
127 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
128 props->max_pkeys = dev->dev->caps.pkey_table_len;
129 props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
130 props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
131 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
132 props->max_mcast_grp;
133 props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1;
134
135out:
136 kfree(in_mad);
137 kfree(out_mad);
138
139 return err;
140}
141
142static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
143 struct ib_port_attr *props)
144{
145 struct ib_smp *in_mad = NULL;
146 struct ib_smp *out_mad = NULL;
147 int err = -ENOMEM;
148
149 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
150 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
151 if (!in_mad || !out_mad)
152 goto out;
153
154 memset(props, 0, sizeof *props);
155
156 init_query_mad(in_mad);
157 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
158 in_mad->attr_mod = cpu_to_be32(port);
159
160 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
161 if (err)
162 goto out;
163
164 props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
165 props->lmc = out_mad->data[34] & 0x7;
166 props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
167 props->sm_sl = out_mad->data[36] & 0xf;
168 props->state = out_mad->data[32] & 0xf;
169 props->phys_state = out_mad->data[33] >> 4;
170 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
171 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len;
172 props->max_msg_sz = 0x80000000;
173 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len;
174 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
175 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
176 props->active_width = out_mad->data[31] & 0xf;
177 props->active_speed = out_mad->data[35] >> 4;
178 props->max_mtu = out_mad->data[41] & 0xf;
179 props->active_mtu = out_mad->data[36] >> 4;
180 props->subnet_timeout = out_mad->data[51] & 0x1f;
181 props->max_vl_num = out_mad->data[37] >> 4;
182 props->init_type_reply = out_mad->data[41] >> 4;
183
184out:
185 kfree(in_mad);
186 kfree(out_mad);
187
188 return err;
189}
190
191static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
192 union ib_gid *gid)
193{
194 struct ib_smp *in_mad = NULL;
195 struct ib_smp *out_mad = NULL;
196 int err = -ENOMEM;
197
198 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
199 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
200 if (!in_mad || !out_mad)
201 goto out;
202
203 init_query_mad(in_mad);
204 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
205 in_mad->attr_mod = cpu_to_be32(port);
206
207 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
208 if (err)
209 goto out;
210
211 memcpy(gid->raw, out_mad->data + 8, 8);
212
213 init_query_mad(in_mad);
214 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
215 in_mad->attr_mod = cpu_to_be32(index / 8);
216
217 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
218 if (err)
219 goto out;
220
221 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
222
223out:
224 kfree(in_mad);
225 kfree(out_mad);
226 return err;
227}
228
229static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
230 u16 *pkey)
231{
232 struct ib_smp *in_mad = NULL;
233 struct ib_smp *out_mad = NULL;
234 int err = -ENOMEM;
235
236 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
237 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
238 if (!in_mad || !out_mad)
239 goto out;
240
241 init_query_mad(in_mad);
242 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
243 in_mad->attr_mod = cpu_to_be32(index / 32);
244
245 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
246 if (err)
247 goto out;
248
249 *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
250
251out:
252 kfree(in_mad);
253 kfree(out_mad);
254 return err;
255}
256
257static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
258 struct ib_device_modify *props)
259{
260 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
261 return -EOPNOTSUPP;
262
263 if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
264 spin_lock(&to_mdev(ibdev)->sm_lock);
265 memcpy(ibdev->node_desc, props->node_desc, 64);
266 spin_unlock(&to_mdev(ibdev)->sm_lock);
267 }
268
269 return 0;
270}
271
272static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
273 u32 cap_mask)
274{
275 struct mlx4_cmd_mailbox *mailbox;
276 int err;
277
278 mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
279 if (IS_ERR(mailbox))
280 return PTR_ERR(mailbox);
281
282 memset(mailbox->buf, 0, 256);
283 *(u8 *) mailbox->buf = !!reset_qkey_viols << 6;
284 ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
285
286 err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
287 MLX4_CMD_TIME_CLASS_B);
288
289 mlx4_free_cmd_mailbox(dev->dev, mailbox);
290 return err;
291}
292
293static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
294 struct ib_port_modify *props)
295{
296 struct ib_port_attr attr;
297 u32 cap_mask;
298 int err;
299
300 mutex_lock(&to_mdev(ibdev)->cap_mask_mutex);
301
302 err = mlx4_ib_query_port(ibdev, port, &attr);
303 if (err)
304 goto out;
305
306 cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
307 ~props->clr_port_cap_mask;
308
309 err = mlx4_SET_PORT(to_mdev(ibdev), port,
310 !!(mask & IB_PORT_RESET_QKEY_CNTR),
311 cap_mask);
312
313out:
314 mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
315 return err;
316}
317
318static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
319 struct ib_udata *udata)
320{
321 struct mlx4_ib_dev *dev = to_mdev(ibdev);
322 struct mlx4_ib_ucontext *context;
323 struct mlx4_ib_alloc_ucontext_resp resp;
324 int err;
325
326 resp.qp_tab_size = dev->dev->caps.num_qps;
327 resp.bf_reg_size = dev->dev->caps.bf_reg_size;
328 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
329
330 context = kmalloc(sizeof *context, GFP_KERNEL);
331 if (!context)
332 return ERR_PTR(-ENOMEM);
333
334 err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
335 if (err) {
336 kfree(context);
337 return ERR_PTR(err);
338 }
339
340 INIT_LIST_HEAD(&context->db_page_list);
341 mutex_init(&context->db_page_mutex);
342
343 err = ib_copy_to_udata(udata, &resp, sizeof resp);
344 if (err) {
345 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
346 kfree(context);
347 return ERR_PTR(-EFAULT);
348 }
349
350 return &context->ibucontext;
351}
352
353static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
354{
355 struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
356
357 mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
358 kfree(context);
359
360 return 0;
361}
362
363static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
364{
365 struct mlx4_ib_dev *dev = to_mdev(context->device);
366
367 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
368 return -EINVAL;
369
370 if (vma->vm_pgoff == 0) {
371 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
372
373 if (io_remap_pfn_range(vma, vma->vm_start,
374 to_mucontext(context)->uar.pfn,
375 PAGE_SIZE, vma->vm_page_prot))
376 return -EAGAIN;
377 } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
378 /* FIXME want pgprot_writecombine() for BlueFlame pages */
379 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
380
381 if (io_remap_pfn_range(vma, vma->vm_start,
382 to_mucontext(context)->uar.pfn +
383 dev->dev->caps.num_uars,
384 PAGE_SIZE, vma->vm_page_prot))
385 return -EAGAIN;
386 } else
387 return -EINVAL;
388
389 return 0;
390}
391
392static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
393 struct ib_ucontext *context,
394 struct ib_udata *udata)
395{
396 struct mlx4_ib_pd *pd;
397 int err;
398
399 pd = kmalloc(sizeof *pd, GFP_KERNEL);
400 if (!pd)
401 return ERR_PTR(-ENOMEM);
402
403 err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
404 if (err) {
405 kfree(pd);
406 return ERR_PTR(err);
407 }
408
409 if (context)
410 if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
411 mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
412 kfree(pd);
413 return ERR_PTR(-EFAULT);
414 }
415
416 return &pd->ibpd;
417}
418
419static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
420{
421 mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
422 kfree(pd);
423
424 return 0;
425}
426
427static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
428{
429 return mlx4_multicast_attach(to_mdev(ibqp->device)->dev,
430 &to_mqp(ibqp)->mqp, gid->raw);
431}
432
433static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
434{
435 return mlx4_multicast_detach(to_mdev(ibqp->device)->dev,
436 &to_mqp(ibqp)->mqp, gid->raw);
437}
438
439static int init_node_data(struct mlx4_ib_dev *dev)
440{
441 struct ib_smp *in_mad = NULL;
442 struct ib_smp *out_mad = NULL;
443 int err = -ENOMEM;
444
445 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
446 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
447 if (!in_mad || !out_mad)
448 goto out;
449
450 init_query_mad(in_mad);
451 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
452
453 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
454 if (err)
455 goto out;
456
457 memcpy(dev->ib_dev.node_desc, out_mad->data, 64);
458
459 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
460
461 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
462 if (err)
463 goto out;
464
465 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
466
467out:
468 kfree(in_mad);
469 kfree(out_mad);
470 return err;
471}
472
473static void *mlx4_ib_add(struct mlx4_dev *dev)
474{
475 struct mlx4_ib_dev *ibdev;
476
477 ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
478 if (!ibdev) {
479 dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
480 return NULL;
481 }
482
483 if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
484 goto err_dealloc;
485
486 if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
487 goto err_pd;
488
489 ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
490 if (!ibdev->uar_map)
491 goto err_uar;
492
493 INIT_LIST_HEAD(&ibdev->pgdir_list);
494 mutex_init(&ibdev->pgdir_mutex);
495
496 ibdev->dev = dev;
497
498 strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
499 ibdev->ib_dev.owner = THIS_MODULE;
500 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA;
501 ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports;
502 ibdev->ib_dev.num_comp_vectors = 1;
503 ibdev->ib_dev.dma_device = &dev->pdev->dev;
504
505 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
506 ibdev->ib_dev.uverbs_cmd_mask =
507 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
508 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
509 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
510 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
511 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
512 (1ull << IB_USER_VERBS_CMD_REG_MR) |
513 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
514 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
515 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
516 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
517 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
518 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
519 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
520 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
521 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
522 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
523 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
524 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
525
526 ibdev->ib_dev.query_device = mlx4_ib_query_device;
527 ibdev->ib_dev.query_port = mlx4_ib_query_port;
528 ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
529 ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
530 ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
531 ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
532 ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
533 ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
534 ibdev->ib_dev.mmap = mlx4_ib_mmap;
535 ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
536 ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
537 ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
538 ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
539 ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
540 ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
541 ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
542 ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
543 ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
544 ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
545 ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
546 ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
547 ibdev->ib_dev.post_send = mlx4_ib_post_send;
548 ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
549 ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
550 ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
551 ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
552 ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
553 ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
554 ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
555 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
556 ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
557 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
558 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
559
560 if (init_node_data(ibdev))
561 goto err_map;
562
563 spin_lock_init(&ibdev->sm_lock);
564 mutex_init(&ibdev->cap_mask_mutex);
565
566 if (ib_register_device(&ibdev->ib_dev))
567 goto err_map;
568
569 if (mlx4_ib_mad_init(ibdev))
570 goto err_reg;
571
572 return ibdev;
573
574err_reg:
575 ib_unregister_device(&ibdev->ib_dev);
576
577err_map:
578 iounmap(ibdev->uar_map);
579
580err_uar:
581 mlx4_uar_free(dev, &ibdev->priv_uar);
582
583err_pd:
584 mlx4_pd_free(dev, ibdev->priv_pdn);
585
586err_dealloc:
587 ib_dealloc_device(&ibdev->ib_dev);
588
589 return NULL;
590}
591
592static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
593{
594 struct mlx4_ib_dev *ibdev = ibdev_ptr;
595 int p;
596
597 for (p = 1; p <= dev->caps.num_ports; ++p)
598 mlx4_CLOSE_PORT(dev, p);
599
600 mlx4_ib_mad_cleanup(ibdev);
601 ib_unregister_device(&ibdev->ib_dev);
602 iounmap(ibdev->uar_map);
603 mlx4_uar_free(dev, &ibdev->priv_uar);
604 mlx4_pd_free(dev, ibdev->priv_pdn);
605 ib_dealloc_device(&ibdev->ib_dev);
606}
607
608static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
609 enum mlx4_dev_event event, int subtype,
610 int port)
611{
612 struct ib_event ibev;
613
614 switch (event) {
615 case MLX4_EVENT_TYPE_PORT_CHANGE:
616 ibev.event = subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ?
617 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
618 break;
619
620 case MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR:
621 ibev.event = IB_EVENT_DEVICE_FATAL;
622 break;
623
624 default:
625 return;
626 }
627
628 ibev.device = ibdev_ptr;
629 ibev.element.port_num = port;
630
631 ib_dispatch_event(&ibev);
632}
633
634static struct mlx4_interface mlx4_ib_interface = {
635 .add = mlx4_ib_add,
636 .remove = mlx4_ib_remove,
637 .event = mlx4_ib_event
638};
639
640static int __init mlx4_ib_init(void)
641{
642 return mlx4_register_interface(&mlx4_ib_interface);
643}
644
645static void __exit mlx4_ib_cleanup(void)
646{
647 mlx4_unregister_interface(&mlx4_ib_interface);
648}
649
650module_init(mlx4_ib_init);
651module_exit(mlx4_ib_cleanup);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
new file mode 100644
index 000000000000..93dac71f3230
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -0,0 +1,285 @@
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX4_IB_H
34#define MLX4_IB_H
35
36#include <linux/compiler.h>
37#include <linux/list.h>
38
39#include <rdma/ib_verbs.h>
40#include <rdma/ib_umem.h>
41
42#include <linux/mlx4/device.h>
43#include <linux/mlx4/doorbell.h>
44
45enum {
46 MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4
47};
48
49struct mlx4_ib_db_pgdir;
50struct mlx4_ib_user_db_page;
51
52struct mlx4_ib_db {
53 __be32 *db;
54 union {
55 struct mlx4_ib_db_pgdir *pgdir;
56 struct mlx4_ib_user_db_page *user_page;
57 } u;
58 dma_addr_t dma;
59 int index;
60 int order;
61};
62
63struct mlx4_ib_ucontext {
64 struct ib_ucontext ibucontext;
65 struct mlx4_uar uar;
66 struct list_head db_page_list;
67 struct mutex db_page_mutex;
68};
69
70struct mlx4_ib_pd {
71 struct ib_pd ibpd;
72 u32 pdn;
73};
74
75struct mlx4_ib_cq_buf {
76 struct mlx4_buf buf;
77 struct mlx4_mtt mtt;
78};
79
80struct mlx4_ib_cq {
81 struct ib_cq ibcq;
82 struct mlx4_cq mcq;
83 struct mlx4_ib_cq_buf buf;
84 struct mlx4_ib_db db;
85 spinlock_t lock;
86 struct ib_umem *umem;
87};
88
89struct mlx4_ib_mr {
90 struct ib_mr ibmr;
91 struct mlx4_mr mmr;
92 struct ib_umem *umem;
93};
94
95struct mlx4_ib_wq {
96 u64 *wrid;
97 spinlock_t lock;
98 int max;
99 int max_gs;
100 int offset;
101 int wqe_shift;
102 unsigned head;
103 unsigned tail;
104};
105
106struct mlx4_ib_qp {
107 struct ib_qp ibqp;
108 struct mlx4_qp mqp;
109 struct mlx4_buf buf;
110
111 struct mlx4_ib_db db;
112 struct mlx4_ib_wq rq;
113
114 u32 doorbell_qpn;
115 __be32 sq_signal_bits;
116 struct mlx4_ib_wq sq;
117
118 struct ib_umem *umem;
119 struct mlx4_mtt mtt;
120 int buf_size;
121 struct mutex mutex;
122 u8 port;
123 u8 alt_port;
124 u8 atomic_rd_en;
125 u8 resp_depth;
126 u8 state;
127};
128
129struct mlx4_ib_srq {
130 struct ib_srq ibsrq;
131 struct mlx4_srq msrq;
132 struct mlx4_buf buf;
133 struct mlx4_ib_db db;
134 u64 *wrid;
135 spinlock_t lock;
136 int head;
137 int tail;
138 u16 wqe_ctr;
139 struct ib_umem *umem;
140 struct mlx4_mtt mtt;
141 struct mutex mutex;
142};
143
144struct mlx4_ib_ah {
145 struct ib_ah ibah;
146 struct mlx4_av av;
147};
148
149struct mlx4_ib_dev {
150 struct ib_device ib_dev;
151 struct mlx4_dev *dev;
152 void __iomem *uar_map;
153
154 struct list_head pgdir_list;
155 struct mutex pgdir_mutex;
156
157 struct mlx4_uar priv_uar;
158 u32 priv_pdn;
159 MLX4_DECLARE_DOORBELL_LOCK(uar_lock);
160
161 struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
162 struct ib_ah *sm_ah[MLX4_MAX_PORTS];
163 spinlock_t sm_lock;
164
165 struct mutex cap_mask_mutex;
166};
167
168static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
169{
170 return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
171}
172
173static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
174{
175 return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext);
176}
177
178static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd)
179{
180 return container_of(ibpd, struct mlx4_ib_pd, ibpd);
181}
182
183static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
184{
185 return container_of(ibcq, struct mlx4_ib_cq, ibcq);
186}
187
188static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq)
189{
190 return container_of(mcq, struct mlx4_ib_cq, mcq);
191}
192
193static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr)
194{
195 return container_of(ibmr, struct mlx4_ib_mr, ibmr);
196}
197
198static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp)
199{
200 return container_of(ibqp, struct mlx4_ib_qp, ibqp);
201}
202
203static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp)
204{
205 return container_of(mqp, struct mlx4_ib_qp, mqp);
206}
207
208static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq)
209{
210 return container_of(ibsrq, struct mlx4_ib_srq, ibsrq);
211}
212
213static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq)
214{
215 return container_of(msrq, struct mlx4_ib_srq, msrq);
216}
217
218static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
219{
220 return container_of(ibah, struct mlx4_ib_ah, ibah);
221}
222
223int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order);
224void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db);
225int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
226 struct mlx4_ib_db *db);
227void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db);
228
229struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc);
230int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
231 struct ib_umem *umem);
232struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
233 u64 virt_addr, int access_flags,
234 struct ib_udata *udata);
235int mlx4_ib_dereg_mr(struct ib_mr *mr);
236
237struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector,
238 struct ib_ucontext *context,
239 struct ib_udata *udata);
240int mlx4_ib_destroy_cq(struct ib_cq *cq);
241int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
242int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
243void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
244void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
245
246struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
247int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
248int mlx4_ib_destroy_ah(struct ib_ah *ah);
249
250struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
251 struct ib_srq_init_attr *init_attr,
252 struct ib_udata *udata);
253int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
254 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
255int mlx4_ib_destroy_srq(struct ib_srq *srq);
256void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
257int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
258 struct ib_recv_wr **bad_wr);
259
260struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
261 struct ib_qp_init_attr *init_attr,
262 struct ib_udata *udata);
263int mlx4_ib_destroy_qp(struct ib_qp *qp);
264int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
265 int attr_mask, struct ib_udata *udata);
266int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
267 struct ib_send_wr **bad_wr);
268int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
269 struct ib_recv_wr **bad_wr);
270
271int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
272 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
273 void *in_mad, void *response_mad);
274int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
275 struct ib_wc *in_wc, struct ib_grh *in_grh,
276 struct ib_mad *in_mad, struct ib_mad *out_mad);
277int mlx4_ib_mad_init(struct mlx4_ib_dev *dev);
278void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev);
279
280static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
281{
282 return !!(ah->av.g_slid & 0x80);
283}
284
285#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
new file mode 100644
index 000000000000..85ae906f1d12
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -0,0 +1,184 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include "mlx4_ib.h"
34
35static u32 convert_access(int acc)
36{
37 return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
38 (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
39 (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
40 (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
41 MLX4_PERM_LOCAL_READ;
42}
43
44struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
45{
46 struct mlx4_ib_mr *mr;
47 int err;
48
49 mr = kmalloc(sizeof *mr, GFP_KERNEL);
50 if (!mr)
51 return ERR_PTR(-ENOMEM);
52
53 err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
54 ~0ull, convert_access(acc), 0, 0, &mr->mmr);
55 if (err)
56 goto err_free;
57
58 err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
59 if (err)
60 goto err_mr;
61
62 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
63 mr->umem = NULL;
64
65 return &mr->ibmr;
66
67err_mr:
68 mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
69
70err_free:
71 kfree(mr);
72
73 return ERR_PTR(err);
74}
75
76int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
77 struct ib_umem *umem)
78{
79 u64 *pages;
80 struct ib_umem_chunk *chunk;
81 int i, j, k;
82 int n;
83 int len;
84 int err = 0;
85
86 pages = (u64 *) __get_free_page(GFP_KERNEL);
87 if (!pages)
88 return -ENOMEM;
89
90 i = n = 0;
91
92 list_for_each_entry(chunk, &umem->chunk_list, list)
93 for (j = 0; j < chunk->nmap; ++j) {
94 len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift;
95 for (k = 0; k < len; ++k) {
96 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
97 umem->page_size * k;
98 /*
99 * Be friendly to WRITE_MTT firmware
100 * command, and pass it chunks of
101 * appropriate size.
102 */
103 if (i == PAGE_SIZE / sizeof (u64) - 2) {
104 err = mlx4_write_mtt(dev->dev, mtt, n,
105 i, pages);
106 if (err)
107 goto out;
108 n += i;
109 i = 0;
110 }
111 }
112 }
113
114 if (i)
115 err = mlx4_write_mtt(dev->dev, mtt, n, i, pages);
116
117out:
118 free_page((unsigned long) pages);
119 return err;
120}
121
122struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
123 u64 virt_addr, int access_flags,
124 struct ib_udata *udata)
125{
126 struct mlx4_ib_dev *dev = to_mdev(pd->device);
127 struct mlx4_ib_mr *mr;
128 int shift;
129 int err;
130 int n;
131
132 mr = kmalloc(sizeof *mr, GFP_KERNEL);
133 if (!mr)
134 return ERR_PTR(-ENOMEM);
135
136 mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags);
137 if (IS_ERR(mr->umem)) {
138 err = PTR_ERR(mr->umem);
139 goto err_free;
140 }
141
142 n = ib_umem_page_count(mr->umem);
143 shift = ilog2(mr->umem->page_size);
144
145 err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
146 convert_access(access_flags), n, shift, &mr->mmr);
147 if (err)
148 goto err_umem;
149
150 err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
151 if (err)
152 goto err_mr;
153
154 err = mlx4_mr_enable(dev->dev, &mr->mmr);
155 if (err)
156 goto err_mr;
157
158 mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
159
160 return &mr->ibmr;
161
162err_mr:
163 mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
164
165err_umem:
166 ib_umem_release(mr->umem);
167
168err_free:
169 kfree(mr);
170
171 return ERR_PTR(err);
172}
173
174int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
175{
176 struct mlx4_ib_mr *mr = to_mmr(ibmr);
177
178 mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
179 if (mr->umem)
180 ib_umem_release(mr->umem);
181 kfree(mr);
182
183 return 0;
184}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
new file mode 100644
index 000000000000..5cd706908450
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -0,0 +1,1294 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_cache.h>
34#include <rdma/ib_pack.h>
35
36#include <linux/mlx4/qp.h>
37
38#include "mlx4_ib.h"
39#include "user.h"
40
41enum {
42 MLX4_IB_ACK_REQ_FREQ = 8,
43};
44
45enum {
46 MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83,
47 MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f
48};
49
50enum {
51 /*
52 * Largest possible UD header: send with GRH and immediate data.
53 */
54 MLX4_IB_UD_HEADER_SIZE = 72
55};
56
57struct mlx4_ib_sqp {
58 struct mlx4_ib_qp qp;
59 int pkey_index;
60 u32 qkey;
61 u32 send_psn;
62 struct ib_ud_header ud_header;
63 u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
64};
65
66static const __be32 mlx4_ib_opcode[] = {
67 [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND),
68 [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
69 [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
70 [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
71 [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ),
72 [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
73 [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
74};
75
76static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
77{
78 return container_of(mqp, struct mlx4_ib_sqp, qp);
79}
80
81static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
82{
83 return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
84 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
85}
86
87static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
88{
89 return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
90 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
91}
92
93static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
94{
95 if (qp->buf.nbufs == 1)
96 return qp->buf.u.direct.buf + offset;
97 else
98 return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf +
99 (offset & (PAGE_SIZE - 1));
100}
101
102static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n)
103{
104 return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
105}
106
107static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
108{
109 return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift));
110}
111
112static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
113{
114 struct ib_event event;
115 struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
116
117 if (type == MLX4_EVENT_TYPE_PATH_MIG)
118 to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
119
120 if (ibqp->event_handler) {
121 event.device = ibqp->device;
122 event.element.qp = ibqp;
123 switch (type) {
124 case MLX4_EVENT_TYPE_PATH_MIG:
125 event.event = IB_EVENT_PATH_MIG;
126 break;
127 case MLX4_EVENT_TYPE_COMM_EST:
128 event.event = IB_EVENT_COMM_EST;
129 break;
130 case MLX4_EVENT_TYPE_SQ_DRAINED:
131 event.event = IB_EVENT_SQ_DRAINED;
132 break;
133 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
134 event.event = IB_EVENT_QP_LAST_WQE_REACHED;
135 break;
136 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
137 event.event = IB_EVENT_QP_FATAL;
138 break;
139 case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
140 event.event = IB_EVENT_PATH_MIG_ERR;
141 break;
142 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
143 event.event = IB_EVENT_QP_REQ_ERR;
144 break;
145 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
146 event.event = IB_EVENT_QP_ACCESS_ERR;
147 break;
148 default:
149 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
150 "on QP %06x\n", type, qp->qpn);
151 return;
152 }
153
154 ibqp->event_handler(&event, ibqp->qp_context);
155 }
156}
157
158static int send_wqe_overhead(enum ib_qp_type type)
159{
160 /*
161 * UD WQEs must have a datagram segment.
162 * RC and UC WQEs might have a remote address segment.
163 * MLX WQEs need two extra inline data segments (for the UD
164 * header and space for the ICRC).
165 */
166 switch (type) {
167 case IB_QPT_UD:
168 return sizeof (struct mlx4_wqe_ctrl_seg) +
169 sizeof (struct mlx4_wqe_datagram_seg);
170 case IB_QPT_UC:
171 return sizeof (struct mlx4_wqe_ctrl_seg) +
172 sizeof (struct mlx4_wqe_raddr_seg);
173 case IB_QPT_RC:
174 return sizeof (struct mlx4_wqe_ctrl_seg) +
175 sizeof (struct mlx4_wqe_atomic_seg) +
176 sizeof (struct mlx4_wqe_raddr_seg);
177 case IB_QPT_SMI:
178 case IB_QPT_GSI:
179 return sizeof (struct mlx4_wqe_ctrl_seg) +
180 ALIGN(MLX4_IB_UD_HEADER_SIZE +
181 sizeof (struct mlx4_wqe_inline_seg),
182 sizeof (struct mlx4_wqe_data_seg)) +
183 ALIGN(4 +
184 sizeof (struct mlx4_wqe_inline_seg),
185 sizeof (struct mlx4_wqe_data_seg));
186 default:
187 return sizeof (struct mlx4_wqe_ctrl_seg);
188 }
189}
190
191static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
192 enum ib_qp_type type, struct mlx4_ib_qp *qp)
193{
194 /* Sanity check QP size before proceeding */
195 if (cap->max_send_wr > dev->dev->caps.max_wqes ||
196 cap->max_recv_wr > dev->dev->caps.max_wqes ||
197 cap->max_send_sge > dev->dev->caps.max_sq_sg ||
198 cap->max_recv_sge > dev->dev->caps.max_rq_sg ||
199 cap->max_inline_data + send_wqe_overhead(type) +
200 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
201 return -EINVAL;
202
203 /*
204 * For MLX transport we need 2 extra S/G entries:
205 * one for the header and one for the checksum at the end
206 */
207 if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
208 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
209 return -EINVAL;
210
211 qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0;
212 qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0;
213
214 qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge *
215 sizeof (struct mlx4_wqe_data_seg)));
216 qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg);
217
218 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge *
219 sizeof (struct mlx4_wqe_data_seg),
220 cap->max_inline_data +
221 sizeof (struct mlx4_wqe_inline_seg)) +
222 send_wqe_overhead(type)));
223 qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) /
224 sizeof (struct mlx4_wqe_data_seg);
225
226 qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) +
227 (qp->sq.max << qp->sq.wqe_shift);
228 if (qp->rq.wqe_shift > qp->sq.wqe_shift) {
229 qp->rq.offset = 0;
230 qp->sq.offset = qp->rq.max << qp->rq.wqe_shift;
231 } else {
232 qp->rq.offset = qp->sq.max << qp->sq.wqe_shift;
233 qp->sq.offset = 0;
234 }
235
236 cap->max_send_wr = qp->sq.max;
237 cap->max_recv_wr = qp->rq.max;
238 cap->max_send_sge = qp->sq.max_gs;
239 cap->max_recv_sge = qp->rq.max_gs;
240 cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) -
241 sizeof (struct mlx4_wqe_inline_seg);
242
243 return 0;
244}
245
246static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
247 struct ib_qp_init_attr *init_attr,
248 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
249{
250 struct mlx4_wqe_ctrl_seg *ctrl;
251 int err;
252 int i;
253
254 mutex_init(&qp->mutex);
255 spin_lock_init(&qp->sq.lock);
256 spin_lock_init(&qp->rq.lock);
257
258 qp->state = IB_QPS_RESET;
259 qp->atomic_rd_en = 0;
260 qp->resp_depth = 0;
261
262 qp->rq.head = 0;
263 qp->rq.tail = 0;
264 qp->sq.head = 0;
265 qp->sq.tail = 0;
266
267 err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp);
268 if (err)
269 goto err;
270
271 if (pd->uobject) {
272 struct mlx4_ib_create_qp ucmd;
273
274 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
275 err = -EFAULT;
276 goto err;
277 }
278
279 qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
280 qp->buf_size, 0);
281 if (IS_ERR(qp->umem)) {
282 err = PTR_ERR(qp->umem);
283 goto err;
284 }
285
286 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem),
287 ilog2(qp->umem->page_size), &qp->mtt);
288 if (err)
289 goto err_buf;
290
291 err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem);
292 if (err)
293 goto err_mtt;
294
295 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
296 ucmd.db_addr, &qp->db);
297 if (err)
298 goto err_mtt;
299 } else {
300 err = mlx4_ib_db_alloc(dev, &qp->db, 0);
301 if (err)
302 goto err;
303
304 *qp->db.db = 0;
305
306 if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
307 err = -ENOMEM;
308 goto err_db;
309 }
310
311 err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
312 &qp->mtt);
313 if (err)
314 goto err_buf;
315
316 err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
317 if (err)
318 goto err_mtt;
319
320 for (i = 0; i < qp->sq.max; ++i) {
321 ctrl = get_send_wqe(qp, i);
322 ctrl->owner_opcode = cpu_to_be32(1 << 31);
323 }
324
325 qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL);
326 qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL);
327
328 if (!qp->sq.wrid || !qp->rq.wrid) {
329 err = -ENOMEM;
330 goto err_wrid;
331 }
332
333 /* We don't support inline sends for kernel QPs (yet) */
334 init_attr->cap.max_inline_data = 0;
335 }
336
337 err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp);
338 if (err)
339 goto err_wrid;
340
341 /*
342 * Hardware wants QPN written in big-endian order (after
343 * shifting) for send doorbell. Precompute this value to save
344 * a little bit when posting sends.
345 */
346 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
347
348 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
349 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
350 else
351 qp->sq_signal_bits = 0;
352
353 qp->mqp.event = mlx4_ib_qp_event;
354
355 return 0;
356
357err_wrid:
358 if (pd->uobject)
359 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
360 else {
361 kfree(qp->sq.wrid);
362 kfree(qp->rq.wrid);
363 }
364
365err_mtt:
366 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
367
368err_buf:
369 if (pd->uobject)
370 ib_umem_release(qp->umem);
371 else
372 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
373
374err_db:
375 if (!pd->uobject)
376 mlx4_ib_db_free(dev, &qp->db);
377
378err:
379 return err;
380}
381
382static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state)
383{
384 switch (state) {
385 case IB_QPS_RESET: return MLX4_QP_STATE_RST;
386 case IB_QPS_INIT: return MLX4_QP_STATE_INIT;
387 case IB_QPS_RTR: return MLX4_QP_STATE_RTR;
388 case IB_QPS_RTS: return MLX4_QP_STATE_RTS;
389 case IB_QPS_SQD: return MLX4_QP_STATE_SQD;
390 case IB_QPS_SQE: return MLX4_QP_STATE_SQER;
391 case IB_QPS_ERR: return MLX4_QP_STATE_ERR;
392 default: return -1;
393 }
394}
395
396static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
397{
398 if (send_cq == recv_cq)
399 spin_lock_irq(&send_cq->lock);
400 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
401 spin_lock_irq(&send_cq->lock);
402 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
403 } else {
404 spin_lock_irq(&recv_cq->lock);
405 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
406 }
407}
408
409static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq)
410{
411 if (send_cq == recv_cq)
412 spin_unlock_irq(&send_cq->lock);
413 else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
414 spin_unlock(&recv_cq->lock);
415 spin_unlock_irq(&send_cq->lock);
416 } else {
417 spin_unlock(&send_cq->lock);
418 spin_unlock_irq(&recv_cq->lock);
419 }
420}
421
422static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
423 int is_user)
424{
425 struct mlx4_ib_cq *send_cq, *recv_cq;
426
427 if (qp->state != IB_QPS_RESET)
428 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
429 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
430 printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
431 qp->mqp.qpn);
432
433 send_cq = to_mcq(qp->ibqp.send_cq);
434 recv_cq = to_mcq(qp->ibqp.recv_cq);
435
436 mlx4_ib_lock_cqs(send_cq, recv_cq);
437
438 if (!is_user) {
439 __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
440 qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
441 if (send_cq != recv_cq)
442 __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
443 }
444
445 mlx4_qp_remove(dev->dev, &qp->mqp);
446
447 mlx4_ib_unlock_cqs(send_cq, recv_cq);
448
449 mlx4_qp_free(dev->dev, &qp->mqp);
450 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
451
452 if (is_user) {
453 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
454 &qp->db);
455 ib_umem_release(qp->umem);
456 } else {
457 kfree(qp->sq.wrid);
458 kfree(qp->rq.wrid);
459 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
460 mlx4_ib_db_free(dev, &qp->db);
461 }
462}
463
464struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
465 struct ib_qp_init_attr *init_attr,
466 struct ib_udata *udata)
467{
468 struct mlx4_ib_dev *dev = to_mdev(pd->device);
469 struct mlx4_ib_sqp *sqp;
470 struct mlx4_ib_qp *qp;
471 int err;
472
473 switch (init_attr->qp_type) {
474 case IB_QPT_RC:
475 case IB_QPT_UC:
476 case IB_QPT_UD:
477 {
478 qp = kmalloc(sizeof *qp, GFP_KERNEL);
479 if (!qp)
480 return ERR_PTR(-ENOMEM);
481
482 err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
483 if (err) {
484 kfree(qp);
485 return ERR_PTR(err);
486 }
487
488 qp->ibqp.qp_num = qp->mqp.qpn;
489
490 break;
491 }
492 case IB_QPT_SMI:
493 case IB_QPT_GSI:
494 {
495 /* Userspace is not allowed to create special QPs: */
496 if (pd->uobject)
497 return ERR_PTR(-EINVAL);
498
499 sqp = kmalloc(sizeof *sqp, GFP_KERNEL);
500 if (!sqp)
501 return ERR_PTR(-ENOMEM);
502
503 qp = &sqp->qp;
504
505 err = create_qp_common(dev, pd, init_attr, udata,
506 dev->dev->caps.sqp_start +
507 (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
508 init_attr->port_num - 1,
509 qp);
510 if (err) {
511 kfree(sqp);
512 return ERR_PTR(err);
513 }
514
515 qp->port = init_attr->port_num;
516 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
517
518 break;
519 }
520 default:
521 /* Don't support raw QPs */
522 return ERR_PTR(-EINVAL);
523 }
524
525 return &qp->ibqp;
526}
527
528int mlx4_ib_destroy_qp(struct ib_qp *qp)
529{
530 struct mlx4_ib_dev *dev = to_mdev(qp->device);
531 struct mlx4_ib_qp *mqp = to_mqp(qp);
532
533 if (is_qp0(dev, mqp))
534 mlx4_CLOSE_PORT(dev->dev, mqp->port);
535
536 destroy_qp_common(dev, mqp, !!qp->pd->uobject);
537
538 if (is_sqp(dev, mqp))
539 kfree(to_msqp(mqp));
540 else
541 kfree(mqp);
542
543 return 0;
544}
545
546static void init_port(struct mlx4_ib_dev *dev, int port)
547{
548 struct mlx4_init_port_param param;
549 int err;
550
551 memset(&param, 0, sizeof param);
552
553 param.port_width_cap = dev->dev->caps.port_width_cap;
554 param.vl_cap = dev->dev->caps.vl_cap;
555 param.mtu = ib_mtu_enum_to_int(dev->dev->caps.mtu_cap);
556 param.max_gid = dev->dev->caps.gid_table_len;
557 param.max_pkey = dev->dev->caps.pkey_table_len;
558
559 err = mlx4_INIT_PORT(dev->dev, &param, port);
560 if (err)
561 printk(KERN_WARNING "INIT_PORT failed, return code %d.\n", err);
562}
563
564static int to_mlx4_st(enum ib_qp_type type)
565{
566 switch (type) {
567 case IB_QPT_RC: return MLX4_QP_ST_RC;
568 case IB_QPT_UC: return MLX4_QP_ST_UC;
569 case IB_QPT_UD: return MLX4_QP_ST_UD;
570 case IB_QPT_SMI:
571 case IB_QPT_GSI: return MLX4_QP_ST_MLX;
572 default: return -1;
573 }
574}
575
576static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr,
577 int attr_mask)
578{
579 u8 dest_rd_atomic;
580 u32 access_flags;
581 u32 hw_access_flags = 0;
582
583 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
584 dest_rd_atomic = attr->max_dest_rd_atomic;
585 else
586 dest_rd_atomic = qp->resp_depth;
587
588 if (attr_mask & IB_QP_ACCESS_FLAGS)
589 access_flags = attr->qp_access_flags;
590 else
591 access_flags = qp->atomic_rd_en;
592
593 if (!dest_rd_atomic)
594 access_flags &= IB_ACCESS_REMOTE_WRITE;
595
596 if (access_flags & IB_ACCESS_REMOTE_READ)
597 hw_access_flags |= MLX4_QP_BIT_RRE;
598 if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
599 hw_access_flags |= MLX4_QP_BIT_RAE;
600 if (access_flags & IB_ACCESS_REMOTE_WRITE)
601 hw_access_flags |= MLX4_QP_BIT_RWE;
602
603 return cpu_to_be32(hw_access_flags);
604}
605
606static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr,
607 int attr_mask)
608{
609 if (attr_mask & IB_QP_PKEY_INDEX)
610 sqp->pkey_index = attr->pkey_index;
611 if (attr_mask & IB_QP_QKEY)
612 sqp->qkey = attr->qkey;
613 if (attr_mask & IB_QP_SQ_PSN)
614 sqp->send_psn = attr->sq_psn;
615}
616
617static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
618{
619 path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
620}
621
622static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah,
623 struct mlx4_qp_path *path, u8 port)
624{
625 path->grh_mylmc = ah->src_path_bits & 0x7f;
626 path->rlid = cpu_to_be16(ah->dlid);
627 if (ah->static_rate) {
628 path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET;
629 while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
630 !(1 << path->static_rate & dev->dev->caps.stat_rate_support))
631 --path->static_rate;
632 } else
633 path->static_rate = 0;
634 path->counter_index = 0xff;
635
636 if (ah->ah_flags & IB_AH_GRH) {
637 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len) {
638 printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
639 ah->grh.sgid_index, dev->dev->caps.gid_table_len - 1);
640 return -1;
641 }
642
643 path->grh_mylmc |= 1 << 7;
644 path->mgid_index = ah->grh.sgid_index;
645 path->hop_limit = ah->grh.hop_limit;
646 path->tclass_flowlabel =
647 cpu_to_be32((ah->grh.traffic_class << 20) |
648 (ah->grh.flow_label));
649 memcpy(path->rgid, ah->grh.dgid.raw, 16);
650 }
651
652 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
653 ((port - 1) << 6) | ((ah->sl & 0xf) << 2);
654
655 return 0;
656}
657
658int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
659 int attr_mask, struct ib_udata *udata)
660{
661 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
662 struct mlx4_ib_qp *qp = to_mqp(ibqp);
663 struct mlx4_qp_context *context;
664 enum mlx4_qp_optpar optpar = 0;
665 enum ib_qp_state cur_state, new_state;
666 int sqd_event;
667 int err = -EINVAL;
668
669 context = kzalloc(sizeof *context, GFP_KERNEL);
670 if (!context)
671 return -ENOMEM;
672
673 mutex_lock(&qp->mutex);
674
675 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
676 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
677
678 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
679 goto out;
680
681 if ((attr_mask & IB_QP_PKEY_INDEX) &&
682 attr->pkey_index >= dev->dev->caps.pkey_table_len) {
683 goto out;
684 }
685
686 if ((attr_mask & IB_QP_PORT) &&
687 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
688 goto out;
689 }
690
691 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
692 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
693 goto out;
694 }
695
696 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
697 attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) {
698 goto out;
699 }
700
701 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
702 (to_mlx4_st(ibqp->qp_type) << 16));
703 context->flags |= cpu_to_be32(1 << 8); /* DE? */
704
705 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
706 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
707 else {
708 optpar |= MLX4_QP_OPTPAR_PM_STATE;
709 switch (attr->path_mig_state) {
710 case IB_MIG_MIGRATED:
711 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
712 break;
713 case IB_MIG_REARM:
714 context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11);
715 break;
716 case IB_MIG_ARMED:
717 context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11);
718 break;
719 }
720 }
721
722 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
723 ibqp->qp_type == IB_QPT_UD)
724 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
725 else if (attr_mask & IB_QP_PATH_MTU) {
726 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
727 printk(KERN_ERR "path MTU (%u) is invalid\n",
728 attr->path_mtu);
729 return -EINVAL;
730 }
731 context->mtu_msgmax = (attr->path_mtu << 5) | 31;
732 }
733
734 if (qp->rq.max)
735 context->rq_size_stride = ilog2(qp->rq.max) << 3;
736 context->rq_size_stride |= qp->rq.wqe_shift - 4;
737
738 if (qp->sq.max)
739 context->sq_size_stride = ilog2(qp->sq.max) << 3;
740 context->sq_size_stride |= qp->sq.wqe_shift - 4;
741
742 if (qp->ibqp.uobject)
743 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
744 else
745 context->usr_page = cpu_to_be32(dev->priv_uar.index);
746
747 if (attr_mask & IB_QP_DEST_QPN)
748 context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
749
750 if (attr_mask & IB_QP_PORT) {
751 if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD &&
752 !(attr_mask & IB_QP_AV)) {
753 mlx4_set_sched(&context->pri_path, attr->port_num);
754 optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE;
755 }
756 }
757
758 if (attr_mask & IB_QP_PKEY_INDEX) {
759 context->pri_path.pkey_index = attr->pkey_index;
760 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
761 }
762
763 if (attr_mask & IB_QP_RNR_RETRY) {
764 context->params1 |= cpu_to_be32(attr->rnr_retry << 13);
765 optpar |= MLX4_QP_OPTPAR_RNR_RETRY;
766 }
767
768 if (attr_mask & IB_QP_AV) {
769 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
770 attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) {
771 err = -EINVAL;
772 goto out;
773 }
774
775 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
776 MLX4_QP_OPTPAR_SCHED_QUEUE);
777 }
778
779 if (attr_mask & IB_QP_TIMEOUT) {
780 context->pri_path.ackto = attr->timeout << 3;
781 optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT;
782 }
783
784 if (attr_mask & IB_QP_ALT_PATH) {
785 if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len)
786 return -EINVAL;
787
788 if (attr->alt_port_num == 0 ||
789 attr->alt_port_num > dev->dev->caps.num_ports)
790 return -EINVAL;
791
792 if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
793 attr->alt_port_num))
794 return -EINVAL;
795
796 context->alt_path.pkey_index = attr->alt_pkey_index;
797 context->alt_path.ackto = attr->alt_timeout << 3;
798 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
799 }
800
801 context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
802 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
803 if (attr_mask & IB_QP_RETRY_CNT) {
804 context->params1 |= cpu_to_be32(attr->retry_cnt << 16);
805 optpar |= MLX4_QP_OPTPAR_RETRY_COUNT;
806 }
807
808 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
809 if (attr->max_rd_atomic)
810 context->params1 |=
811 cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
812 optpar |= MLX4_QP_OPTPAR_SRA_MAX;
813 }
814
815 if (attr_mask & IB_QP_SQ_PSN)
816 context->next_send_psn = cpu_to_be32(attr->sq_psn);
817
818 context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
819
820 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
821 if (attr->max_dest_rd_atomic)
822 context->params2 |=
823 cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
824 optpar |= MLX4_QP_OPTPAR_RRA_MAX;
825 }
826
827 if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
828 context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask);
829 optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
830 }
831
832 if (ibqp->srq)
833 context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
834
835 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
836 context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
837 optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT;
838 }
839 if (attr_mask & IB_QP_RQ_PSN)
840 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
841
842 context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
843
844 if (attr_mask & IB_QP_QKEY) {
845 context->qkey = cpu_to_be32(attr->qkey);
846 optpar |= MLX4_QP_OPTPAR_Q_KEY;
847 }
848
849 if (ibqp->srq)
850 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
851
852 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
853 context->db_rec_addr = cpu_to_be64(qp->db.dma);
854
855 if (cur_state == IB_QPS_INIT &&
856 new_state == IB_QPS_RTR &&
857 (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
858 ibqp->qp_type == IB_QPT_UD)) {
859 context->pri_path.sched_queue = (qp->port - 1) << 6;
860 if (is_qp0(dev, qp))
861 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
862 else
863 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
864 }
865
866 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
867 attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
868 sqd_event = 1;
869 else
870 sqd_event = 0;
871
872 err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state),
873 to_mlx4_state(new_state), context, optpar,
874 sqd_event, &qp->mqp);
875 if (err)
876 goto out;
877
878 qp->state = new_state;
879
880 if (attr_mask & IB_QP_ACCESS_FLAGS)
881 qp->atomic_rd_en = attr->qp_access_flags;
882 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
883 qp->resp_depth = attr->max_dest_rd_atomic;
884 if (attr_mask & IB_QP_PORT)
885 qp->port = attr->port_num;
886 if (attr_mask & IB_QP_ALT_PATH)
887 qp->alt_port = attr->alt_port_num;
888
889 if (is_sqp(dev, qp))
890 store_sqp_attrs(to_msqp(qp), attr, attr_mask);
891
892 /*
893 * If we moved QP0 to RTR, bring the IB link up; if we moved
894 * QP0 to RESET or ERROR, bring the link back down.
895 */
896 if (is_qp0(dev, qp)) {
897 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
898 init_port(dev, qp->port);
899
900 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
901 (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
902 mlx4_CLOSE_PORT(dev->dev, qp->port);
903 }
904
905 /*
906 * If we moved a kernel QP to RESET, clean up all old CQ
907 * entries and reinitialize the QP.
908 */
909 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
910 mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
911 ibqp->srq ? to_msrq(ibqp->srq): NULL);
912 if (ibqp->send_cq != ibqp->recv_cq)
913 mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
914
915 qp->rq.head = 0;
916 qp->rq.tail = 0;
917 qp->sq.head = 0;
918 qp->sq.tail = 0;
919 *qp->db.db = 0;
920 }
921
922out:
923 mutex_unlock(&qp->mutex);
924 kfree(context);
925 return err;
926}
927
928static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
929 void *wqe)
930{
931 struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
932 struct mlx4_wqe_mlx_seg *mlx = wqe;
933 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
934 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
935 u16 pkey;
936 int send_size;
937 int header_size;
938 int i;
939
940 send_size = 0;
941 for (i = 0; i < wr->num_sge; ++i)
942 send_size += wr->sg_list[i].length;
943
944 ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header);
945
946 sqp->ud_header.lrh.service_level =
947 be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28;
948 sqp->ud_header.lrh.destination_lid = ah->av.dlid;
949 sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f);
950 if (mlx4_ib_ah_grh_present(ah)) {
951 sqp->ud_header.grh.traffic_class =
952 (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff;
953 sqp->ud_header.grh.flow_label =
954 ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
955 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24,
956 ah->av.gid_index, &sqp->ud_header.grh.source_gid);
957 memcpy(sqp->ud_header.grh.destination_gid.raw,
958 ah->av.dgid, 16);
959 }
960
961 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
962 mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) |
963 (sqp->ud_header.lrh.destination_lid ==
964 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
965 (sqp->ud_header.lrh.service_level << 8));
966 mlx->rlid = sqp->ud_header.lrh.destination_lid;
967
968 switch (wr->opcode) {
969 case IB_WR_SEND:
970 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
971 sqp->ud_header.immediate_present = 0;
972 break;
973 case IB_WR_SEND_WITH_IMM:
974 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
975 sqp->ud_header.immediate_present = 1;
976 sqp->ud_header.immediate_data = wr->imm_data;
977 break;
978 default:
979 return -EINVAL;
980 }
981
982 sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0;
983 if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
984 sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
985 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
986 if (!sqp->qp.ibqp.qp_num)
987 ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
988 else
989 ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
990 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
991 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
992 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
993 sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
994 sqp->qkey : wr->wr.ud.remote_qkey);
995 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
996
997 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
998
999 if (0) {
1000 printk(KERN_ERR "built UD header of size %d:\n", header_size);
1001 for (i = 0; i < header_size / 4; ++i) {
1002 if (i % 8 == 0)
1003 printk(" [%02x] ", i * 4);
1004 printk(" %08x",
1005 be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
1006 if ((i + 1) % 8 == 0)
1007 printk("\n");
1008 }
1009 printk("\n");
1010 }
1011
1012 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1013 memcpy(inl + 1, sqp->header_buf, header_size);
1014
1015 return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1016}
1017
1018static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
1019{
1020 unsigned cur;
1021 struct mlx4_ib_cq *cq;
1022
1023 cur = wq->head - wq->tail;
1024 if (likely(cur + nreq < wq->max))
1025 return 0;
1026
1027 cq = to_mcq(ib_cq);
1028 spin_lock(&cq->lock);
1029 cur = wq->head - wq->tail;
1030 spin_unlock(&cq->lock);
1031
1032 return cur + nreq >= wq->max;
1033}
1034
1035int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1036 struct ib_send_wr **bad_wr)
1037{
1038 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1039 void *wqe;
1040 struct mlx4_wqe_ctrl_seg *ctrl;
1041 unsigned long flags;
1042 int nreq;
1043 int err = 0;
1044 int ind;
1045 int size;
1046 int i;
1047
1048 spin_lock_irqsave(&qp->rq.lock, flags);
1049
1050 ind = qp->sq.head;
1051
1052 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1053 if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
1054 err = -ENOMEM;
1055 *bad_wr = wr;
1056 goto out;
1057 }
1058
1059 if (unlikely(wr->num_sge > qp->sq.max_gs)) {
1060 err = -EINVAL;
1061 *bad_wr = wr;
1062 goto out;
1063 }
1064
1065 ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1));
1066 qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id;
1067
1068 ctrl->srcrb_flags =
1069 (wr->send_flags & IB_SEND_SIGNALED ?
1070 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
1071 (wr->send_flags & IB_SEND_SOLICITED ?
1072 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
1073 qp->sq_signal_bits;
1074
1075 if (wr->opcode == IB_WR_SEND_WITH_IMM ||
1076 wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
1077 ctrl->imm = wr->imm_data;
1078 else
1079 ctrl->imm = 0;
1080
1081 wqe += sizeof *ctrl;
1082 size = sizeof *ctrl / 16;
1083
1084 switch (ibqp->qp_type) {
1085 case IB_QPT_RC:
1086 case IB_QPT_UC:
1087 switch (wr->opcode) {
1088 case IB_WR_ATOMIC_CMP_AND_SWP:
1089 case IB_WR_ATOMIC_FETCH_AND_ADD:
1090 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
1091 cpu_to_be64(wr->wr.atomic.remote_addr);
1092 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
1093 cpu_to_be32(wr->wr.atomic.rkey);
1094 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
1095
1096 wqe += sizeof (struct mlx4_wqe_raddr_seg);
1097
1098 if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
1099 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
1100 cpu_to_be64(wr->wr.atomic.swap);
1101 ((struct mlx4_wqe_atomic_seg *) wqe)->compare =
1102 cpu_to_be64(wr->wr.atomic.compare_add);
1103 } else {
1104 ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add =
1105 cpu_to_be64(wr->wr.atomic.compare_add);
1106 ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0;
1107 }
1108
1109 wqe += sizeof (struct mlx4_wqe_atomic_seg);
1110 size += (sizeof (struct mlx4_wqe_raddr_seg) +
1111 sizeof (struct mlx4_wqe_atomic_seg)) / 16;
1112
1113 break;
1114
1115 case IB_WR_RDMA_READ:
1116 case IB_WR_RDMA_WRITE:
1117 case IB_WR_RDMA_WRITE_WITH_IMM:
1118 ((struct mlx4_wqe_raddr_seg *) wqe)->raddr =
1119 cpu_to_be64(wr->wr.rdma.remote_addr);
1120 ((struct mlx4_wqe_raddr_seg *) wqe)->rkey =
1121 cpu_to_be32(wr->wr.rdma.rkey);
1122 ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0;
1123
1124 wqe += sizeof (struct mlx4_wqe_raddr_seg);
1125 size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
1126
1127 break;
1128
1129 default:
1130 /* No extra segments required for sends */
1131 break;
1132 }
1133 break;
1134
1135 case IB_QPT_UD:
1136 memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av,
1137 &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
1138 ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn =
1139 cpu_to_be32(wr->wr.ud.remote_qpn);
1140 ((struct mlx4_wqe_datagram_seg *) wqe)->qkey =
1141 cpu_to_be32(wr->wr.ud.remote_qkey);
1142
1143 wqe += sizeof (struct mlx4_wqe_datagram_seg);
1144 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
1145 break;
1146
1147 case IB_QPT_SMI:
1148 case IB_QPT_GSI:
1149 err = build_mlx_header(to_msqp(qp), wr, ctrl);
1150 if (err < 0) {
1151 *bad_wr = wr;
1152 goto out;
1153 }
1154 wqe += err;
1155 size += err / 16;
1156
1157 err = 0;
1158 break;
1159
1160 default:
1161 break;
1162 }
1163
1164 for (i = 0; i < wr->num_sge; ++i) {
1165 ((struct mlx4_wqe_data_seg *) wqe)->byte_count =
1166 cpu_to_be32(wr->sg_list[i].length);
1167 ((struct mlx4_wqe_data_seg *) wqe)->lkey =
1168 cpu_to_be32(wr->sg_list[i].lkey);
1169 ((struct mlx4_wqe_data_seg *) wqe)->addr =
1170 cpu_to_be64(wr->sg_list[i].addr);
1171
1172 wqe += sizeof (struct mlx4_wqe_data_seg);
1173 size += sizeof (struct mlx4_wqe_data_seg) / 16;
1174 }
1175
1176 /* Add one more inline data segment for ICRC for MLX sends */
1177 if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) {
1178 ((struct mlx4_wqe_inline_seg *) wqe)->byte_count =
1179 cpu_to_be32((1 << 31) | 4);
1180 ((u32 *) wqe)[1] = 0;
1181 wqe += sizeof (struct mlx4_wqe_data_seg);
1182 size += sizeof (struct mlx4_wqe_data_seg) / 16;
1183 }
1184
1185 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
1186 MLX4_WQE_CTRL_FENCE : 0) | size;
1187
1188 /*
1189 * Make sure descriptor is fully written before
1190 * setting ownership bit (because HW can start
1191 * executing as soon as we do).
1192 */
1193 wmb();
1194
1195 if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) {
1196 err = -EINVAL;
1197 goto out;
1198 }
1199
1200 ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
1201 (ind & qp->sq.max ? cpu_to_be32(1 << 31) : 0);
1202
1203 ++ind;
1204 }
1205
1206out:
1207 if (likely(nreq)) {
1208 qp->sq.head += nreq;
1209
1210 /*
1211 * Make sure that descriptors are written before
1212 * doorbell record.
1213 */
1214 wmb();
1215
1216 writel(qp->doorbell_qpn,
1217 to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL);
1218
1219 /*
1220 * Make sure doorbells don't leak out of SQ spinlock
1221 * and reach the HCA out of order.
1222 */
1223 mmiowb();
1224 }
1225
1226 spin_unlock_irqrestore(&qp->rq.lock, flags);
1227
1228 return err;
1229}
1230
1231int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1232 struct ib_recv_wr **bad_wr)
1233{
1234 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1235 struct mlx4_wqe_data_seg *scat;
1236 unsigned long flags;
1237 int err = 0;
1238 int nreq;
1239 int ind;
1240 int i;
1241
1242 spin_lock_irqsave(&qp->rq.lock, flags);
1243
1244 ind = qp->rq.head & (qp->rq.max - 1);
1245
1246 for (nreq = 0; wr; ++nreq, wr = wr->next) {
1247 if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) {
1248 err = -ENOMEM;
1249 *bad_wr = wr;
1250 goto out;
1251 }
1252
1253 if (unlikely(wr->num_sge > qp->rq.max_gs)) {
1254 err = -EINVAL;
1255 *bad_wr = wr;
1256 goto out;
1257 }
1258
1259 scat = get_recv_wqe(qp, ind);
1260
1261 for (i = 0; i < wr->num_sge; ++i) {
1262 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
1263 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
1264 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
1265 }
1266
1267 if (i < qp->rq.max_gs) {
1268 scat[i].byte_count = 0;
1269 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
1270 scat[i].addr = 0;
1271 }
1272
1273 qp->rq.wrid[ind] = wr->wr_id;
1274
1275 ind = (ind + 1) & (qp->rq.max - 1);
1276 }
1277
1278out:
1279 if (likely(nreq)) {
1280 qp->rq.head += nreq;
1281
1282 /*
1283 * Make sure that descriptors are written before
1284 * doorbell record.
1285 */
1286 wmb();
1287
1288 *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff);
1289 }
1290
1291 spin_unlock_irqrestore(&qp->rq.lock, flags);
1292
1293 return err;
1294}
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
new file mode 100644
index 000000000000..42ab4a801d6a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -0,0 +1,334 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/mlx4/qp.h>
34#include <linux/mlx4/srq.h>
35
36#include "mlx4_ib.h"
37#include "user.h"
38
39static void *get_wqe(struct mlx4_ib_srq *srq, int n)
40{
41 int offset = n << srq->msrq.wqe_shift;
42
43 if (srq->buf.nbufs == 1)
44 return srq->buf.u.direct.buf + offset;
45 else
46 return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf +
47 (offset & (PAGE_SIZE - 1));
48}
49
50static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
51{
52 struct ib_event event;
53 struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq;
54
55 if (ibsrq->event_handler) {
56 event.device = ibsrq->device;
57 event.element.srq = ibsrq;
58 switch (type) {
59 case MLX4_EVENT_TYPE_SRQ_LIMIT:
60 event.event = IB_EVENT_SRQ_LIMIT_REACHED;
61 break;
62 case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
63 event.event = IB_EVENT_SRQ_ERR;
64 break;
65 default:
66 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
67 "on SRQ %06x\n", type, srq->srqn);
68 return;
69 }
70
71 ibsrq->event_handler(&event, ibsrq->srq_context);
72 }
73}
74
75struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
76 struct ib_srq_init_attr *init_attr,
77 struct ib_udata *udata)
78{
79 struct mlx4_ib_dev *dev = to_mdev(pd->device);
80 struct mlx4_ib_srq *srq;
81 struct mlx4_wqe_srq_next_seg *next;
82 int desc_size;
83 int buf_size;
84 int err;
85 int i;
86
87 /* Sanity check SRQ size before proceeding */
88 if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes ||
89 init_attr->attr.max_sge > dev->dev->caps.max_srq_sge)
90 return ERR_PTR(-EINVAL);
91
92 srq = kmalloc(sizeof *srq, GFP_KERNEL);
93 if (!srq)
94 return ERR_PTR(-ENOMEM);
95
96 mutex_init(&srq->mutex);
97 spin_lock_init(&srq->lock);
98 srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
99 srq->msrq.max_gs = init_attr->attr.max_sge;
100
101 desc_size = max(32UL,
102 roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) +
103 srq->msrq.max_gs *
104 sizeof (struct mlx4_wqe_data_seg)));
105 srq->msrq.wqe_shift = ilog2(desc_size);
106
107 buf_size = srq->msrq.max * desc_size;
108
109 if (pd->uobject) {
110 struct mlx4_ib_create_srq ucmd;
111
112 if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
113 err = -EFAULT;
114 goto err_srq;
115 }
116
117 srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
118 buf_size, 0);
119 if (IS_ERR(srq->umem)) {
120 err = PTR_ERR(srq->umem);
121 goto err_srq;
122 }
123
124 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
125 ilog2(srq->umem->page_size), &srq->mtt);
126 if (err)
127 goto err_buf;
128
129 err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem);
130 if (err)
131 goto err_mtt;
132
133 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
134 ucmd.db_addr, &srq->db);
135 if (err)
136 goto err_mtt;
137 } else {
138 err = mlx4_ib_db_alloc(dev, &srq->db, 0);
139 if (err)
140 goto err_srq;
141
142 *srq->db.db = 0;
143
144 if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
145 err = -ENOMEM;
146 goto err_db;
147 }
148
149 srq->head = 0;
150 srq->tail = srq->msrq.max - 1;
151 srq->wqe_ctr = 0;
152
153 for (i = 0; i < srq->msrq.max; ++i) {
154 next = get_wqe(srq, i);
155 next->next_wqe_index =
156 cpu_to_be16((i + 1) & (srq->msrq.max - 1));
157 }
158
159 err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift,
160 &srq->mtt);
161 if (err)
162 goto err_buf;
163
164 err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
165 if (err)
166 goto err_mtt;
167
168 srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
169 if (!srq->wrid) {
170 err = -ENOMEM;
171 goto err_mtt;
172 }
173 }
174
175 err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
176 srq->db.dma, &srq->msrq);
177 if (err)
178 goto err_wrid;
179
180 srq->msrq.event = mlx4_ib_srq_event;
181
182 if (pd->uobject)
183 if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
184 err = -EFAULT;
185 goto err_wrid;
186 }
187
188 init_attr->attr.max_wr = srq->msrq.max - 1;
189
190 return &srq->ibsrq;
191
192err_wrid:
193 if (pd->uobject)
194 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
195 else
196 kfree(srq->wrid);
197
198err_mtt:
199 mlx4_mtt_cleanup(dev->dev, &srq->mtt);
200
201err_buf:
202 if (pd->uobject)
203 ib_umem_release(srq->umem);
204 else
205 mlx4_buf_free(dev->dev, buf_size, &srq->buf);
206
207err_db:
208 if (!pd->uobject)
209 mlx4_ib_db_free(dev, &srq->db);
210
211err_srq:
212 kfree(srq);
213
214 return ERR_PTR(err);
215}
216
217int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
218 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
219{
220 struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
221 struct mlx4_ib_srq *srq = to_msrq(ibsrq);
222 int ret;
223
224 /* We don't support resizing SRQs (yet?) */
225 if (attr_mask & IB_SRQ_MAX_WR)
226 return -EINVAL;
227
228 if (attr_mask & IB_SRQ_LIMIT) {
229 if (attr->srq_limit >= srq->msrq.max)
230 return -EINVAL;
231
232 mutex_lock(&srq->mutex);
233 ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit);
234 mutex_unlock(&srq->mutex);
235
236 if (ret)
237 return ret;
238 }
239
240 return 0;
241}
242
243int mlx4_ib_destroy_srq(struct ib_srq *srq)
244{
245 struct mlx4_ib_dev *dev = to_mdev(srq->device);
246 struct mlx4_ib_srq *msrq = to_msrq(srq);
247
248 mlx4_srq_free(dev->dev, &msrq->msrq);
249 mlx4_mtt_cleanup(dev->dev, &msrq->mtt);
250
251 if (srq->uobject) {
252 mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
253 ib_umem_release(msrq->umem);
254 } else {
255 kfree(msrq->wrid);
256 mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift,
257 &msrq->buf);
258 mlx4_ib_db_free(dev, &msrq->db);
259 }
260
261 kfree(msrq);
262
263 return 0;
264}
265
266void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
267{
268 struct mlx4_wqe_srq_next_seg *next;
269
270 /* always called with interrupts disabled. */
271 spin_lock(&srq->lock);
272
273 next = get_wqe(srq, srq->tail);
274 next->next_wqe_index = cpu_to_be16(wqe_index);
275 srq->tail = wqe_index;
276
277 spin_unlock(&srq->lock);
278}
279
280int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
281 struct ib_recv_wr **bad_wr)
282{
283 struct mlx4_ib_srq *srq = to_msrq(ibsrq);
284 struct mlx4_wqe_srq_next_seg *next;
285 struct mlx4_wqe_data_seg *scat;
286 unsigned long flags;
287 int err = 0;
288 int nreq;
289 int i;
290
291 spin_lock_irqsave(&srq->lock, flags);
292
293 for (nreq = 0; wr; ++nreq, wr = wr->next) {
294 if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
295 err = -EINVAL;
296 *bad_wr = wr;
297 break;
298 }
299
300 srq->wrid[srq->head] = wr->wr_id;
301
302 next = get_wqe(srq, srq->head);
303 srq->head = be16_to_cpu(next->next_wqe_index);
304 scat = (struct mlx4_wqe_data_seg *) (next + 1);
305
306 for (i = 0; i < wr->num_sge; ++i) {
307 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length);
308 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey);
309 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr);
310 }
311
312 if (i < srq->msrq.max_gs) {
313 scat[i].byte_count = 0;
314 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
315 scat[i].addr = 0;
316 }
317 }
318
319 if (likely(nreq)) {
320 srq->wqe_ctr += nreq;
321
322 /*
323 * Make sure that descriptors are written before
324 * doorbell record.
325 */
326 wmb();
327
328 *srq->db.db = cpu_to_be32(srq->wqe_ctr);
329 }
330
331 spin_unlock_irqrestore(&srq->lock, flags);
332
333 return err;
334}
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
new file mode 100644
index 000000000000..5b8eddc9fa83
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -0,0 +1,92 @@
1/*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#ifndef MLX4_IB_USER_H
34#define MLX4_IB_USER_H
35
36#include <linux/types.h>
37
38/*
39 * Increment this value if any changes that break userspace ABI
40 * compatibility are made.
41 */
42#define MLX4_IB_UVERBS_ABI_VERSION 1
43
44/*
45 * Make sure that all structs defined in this file remain laid out so
46 * that they pack the same way on 32-bit and 64-bit architectures (to
47 * avoid incompatibility between 32-bit userspace and 64-bit kernels).
48 * In particular do not use pointer types -- pass pointers in __u64
49 * instead.
50 */
51
52struct mlx4_ib_alloc_ucontext_resp {
53 __u32 qp_tab_size;
54 __u16 bf_reg_size;
55 __u16 bf_regs_per_page;
56};
57
58struct mlx4_ib_alloc_pd_resp {
59 __u32 pdn;
60 __u32 reserved;
61};
62
63struct mlx4_ib_create_cq {
64 __u64 buf_addr;
65 __u64 db_addr;
66};
67
68struct mlx4_ib_create_cq_resp {
69 __u32 cqn;
70 __u32 reserved;
71};
72
73struct mlx4_ib_resize_cq {
74 __u64 buf_addr;
75};
76
77struct mlx4_ib_create_srq {
78 __u64 buf_addr;
79 __u64 db_addr;
80};
81
82struct mlx4_ib_create_srq_resp {
83 __u32 srqn;
84 __u32 reserved;
85};
86
87struct mlx4_ib_create_qp {
88 __u64 buf_addr;
89 __u64 db_addr;
90};
91
92#endif /* MLX4_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 1c05486c3c68..6bcde1cb9688 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
37 */ 37 */
38 38
39#include <rdma/ib_smi.h> 39#include <rdma/ib_smi.h>
40#include <rdma/ib_umem.h>
40#include <rdma/ib_user_verbs.h> 41#include <rdma/ib_user_verbs.h>
41#include <linux/mm.h> 42#include <linux/mm.h>
42 43
@@ -908,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
908 return ERR_PTR(err); 909 return ERR_PTR(err);
909 } 910 }
910 911
912 mr->umem = NULL;
913
911 return &mr->ibmr; 914 return &mr->ibmr;
912} 915}
913 916
@@ -1003,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
1003 } 1006 }
1004 1007
1005 kfree(page_list); 1008 kfree(page_list);
1009 mr->umem = NULL;
1010
1006 return &mr->ibmr; 1011 return &mr->ibmr;
1007} 1012}
1008 1013
1009static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, 1014static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1010 int acc, struct ib_udata *udata) 1015 u64 virt, int acc, struct ib_udata *udata)
1011{ 1016{
1012 struct mthca_dev *dev = to_mdev(pd->device); 1017 struct mthca_dev *dev = to_mdev(pd->device);
1013 struct ib_umem_chunk *chunk; 1018 struct ib_umem_chunk *chunk;
@@ -1018,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
1018 int err = 0; 1023 int err = 0;
1019 int write_mtt_size; 1024 int write_mtt_size;
1020 1025
1021 shift = ffs(region->page_size) - 1;
1022
1023 mr = kmalloc(sizeof *mr, GFP_KERNEL); 1026 mr = kmalloc(sizeof *mr, GFP_KERNEL);
1024 if (!mr) 1027 if (!mr)
1025 return ERR_PTR(-ENOMEM); 1028 return ERR_PTR(-ENOMEM);
1026 1029
1030 mr->umem = ib_umem_get(pd->uobject->context, start, length, acc);
1031 if (IS_ERR(mr->umem)) {
1032 err = PTR_ERR(mr->umem);
1033 goto err;
1034 }
1035
1036 shift = ffs(mr->umem->page_size) - 1;
1037
1027 n = 0; 1038 n = 0;
1028 list_for_each_entry(chunk, &region->chunk_list, list) 1039 list_for_each_entry(chunk, &mr->umem->chunk_list, list)
1029 n += chunk->nents; 1040 n += chunk->nents;
1030 1041
1031 mr->mtt = mthca_alloc_mtt(dev, n); 1042 mr->mtt = mthca_alloc_mtt(dev, n);
1032 if (IS_ERR(mr->mtt)) { 1043 if (IS_ERR(mr->mtt)) {
1033 err = PTR_ERR(mr->mtt); 1044 err = PTR_ERR(mr->mtt);
1034 goto err; 1045 goto err_umem;
1035 } 1046 }
1036 1047
1037 pages = (u64 *) __get_free_page(GFP_KERNEL); 1048 pages = (u64 *) __get_free_page(GFP_KERNEL);
@@ -1044,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
1044 1055
1045 write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); 1056 write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages));
1046 1057
1047 list_for_each_entry(chunk, &region->chunk_list, list) 1058 list_for_each_entry(chunk, &mr->umem->chunk_list, list)
1048 for (j = 0; j < chunk->nmap; ++j) { 1059 for (j = 0; j < chunk->nmap; ++j) {
1049 len = sg_dma_len(&chunk->page_list[j]) >> shift; 1060 len = sg_dma_len(&chunk->page_list[j]) >> shift;
1050 for (k = 0; k < len; ++k) { 1061 for (k = 0; k < len; ++k) {
1051 pages[i++] = sg_dma_address(&chunk->page_list[j]) + 1062 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
1052 region->page_size * k; 1063 mr->umem->page_size * k;
1053 /* 1064 /*
1054 * Be friendly to write_mtt and pass it chunks 1065 * Be friendly to write_mtt and pass it chunks
1055 * of appropriate size. 1066 * of appropriate size.
@@ -1071,8 +1082,8 @@ mtt_done:
1071 if (err) 1082 if (err)
1072 goto err_mtt; 1083 goto err_mtt;
1073 1084
1074 err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, 1085 err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length,
1075 region->length, convert_access(acc), mr); 1086 convert_access(acc), mr);
1076 1087
1077 if (err) 1088 if (err)
1078 goto err_mtt; 1089 goto err_mtt;
@@ -1082,6 +1093,9 @@ mtt_done:
1082err_mtt: 1093err_mtt:
1083 mthca_free_mtt(dev, mr->mtt); 1094 mthca_free_mtt(dev, mr->mtt);
1084 1095
1096err_umem:
1097 ib_umem_release(mr->umem);
1098
1085err: 1099err:
1086 kfree(mr); 1100 kfree(mr);
1087 return ERR_PTR(err); 1101 return ERR_PTR(err);
@@ -1090,8 +1104,12 @@ err:
1090static int mthca_dereg_mr(struct ib_mr *mr) 1104static int mthca_dereg_mr(struct ib_mr *mr)
1091{ 1105{
1092 struct mthca_mr *mmr = to_mmr(mr); 1106 struct mthca_mr *mmr = to_mmr(mr);
1107
1093 mthca_free_mr(to_mdev(mr->device), mmr); 1108 mthca_free_mr(to_mdev(mr->device), mmr);
1109 if (mmr->umem)
1110 ib_umem_release(mmr->umem);
1094 kfree(mmr); 1111 kfree(mmr);
1112
1095 return 0; 1113 return 0;
1096} 1114}
1097 1115
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 1d266ac2e094..262616c8ebb6 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -73,6 +73,7 @@ struct mthca_mtt;
73 73
74struct mthca_mr { 74struct mthca_mr {
75 struct ib_mr ibmr; 75 struct ib_mr ibmr;
76 struct ib_umem *umem;
76 struct mthca_mtt *mtt; 77 struct mthca_mtt *mtt;
77}; 78};
78 79