diff options
Diffstat (limited to 'drivers/infiniband')
31 files changed, 4309 insertions, 194 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 66b36de9fa6f..37deaae49190 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig | |||
@@ -29,6 +29,11 @@ config INFINIBAND_USER_ACCESS | |||
29 | libibverbs, libibcm and a hardware driver library from | 29 | libibverbs, libibcm and a hardware driver library from |
30 | <http://www.openib.org>. | 30 | <http://www.openib.org>. |
31 | 31 | ||
32 | config INFINIBAND_USER_MEM | ||
33 | bool | ||
34 | depends on INFINIBAND_USER_ACCESS != n | ||
35 | default y | ||
36 | |||
32 | config INFINIBAND_ADDR_TRANS | 37 | config INFINIBAND_ADDR_TRANS |
33 | bool | 38 | bool |
34 | depends on INFINIBAND && INET | 39 | depends on INFINIBAND && INET |
@@ -40,6 +45,8 @@ source "drivers/infiniband/hw/ehca/Kconfig" | |||
40 | source "drivers/infiniband/hw/amso1100/Kconfig" | 45 | source "drivers/infiniband/hw/amso1100/Kconfig" |
41 | source "drivers/infiniband/hw/cxgb3/Kconfig" | 46 | source "drivers/infiniband/hw/cxgb3/Kconfig" |
42 | 47 | ||
48 | source "drivers/infiniband/hw/mlx4/Kconfig" | ||
49 | |||
43 | source "drivers/infiniband/ulp/ipoib/Kconfig" | 50 | source "drivers/infiniband/ulp/ipoib/Kconfig" |
44 | 51 | ||
45 | source "drivers/infiniband/ulp/srp/Kconfig" | 52 | source "drivers/infiniband/ulp/srp/Kconfig" |
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index da2066c4f22c..75f325e40b54 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile | |||
@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/ | |||
4 | obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ | 4 | obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/ |
5 | obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ | 5 | obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/ |
6 | obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ | 6 | obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/ |
7 | obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ | ||
7 | obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ | 8 | obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ |
8 | obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ | 9 | obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ |
9 | obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ | 10 | obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ |
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 189e5d4b9b17..cb1ab3ea4998 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile | |||
@@ -9,6 +9,7 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ | |||
9 | 9 | ||
10 | ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ | 10 | ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ |
11 | device.o fmr_pool.o cache.o | 11 | device.o fmr_pool.o cache.o |
12 | ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o | ||
12 | 13 | ||
13 | ib_mad-y := mad.o smi.o agent.o mad_rmpp.o | 14 | ib_mad-y := mad.o smi.o agent.o mad_rmpp.o |
14 | 15 | ||
@@ -28,5 +29,4 @@ ib_umad-y := user_mad.o | |||
28 | 29 | ||
29 | ib_ucm-y := ucm.o | 30 | ib_ucm-y := ucm.o |
30 | 31 | ||
31 | ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o \ | 32 | ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o |
32 | uverbs_marshall.o | ||
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 7fabb425b033..592c90aa3183 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c | |||
@@ -613,6 +613,8 @@ static void __exit ib_core_cleanup(void) | |||
613 | { | 613 | { |
614 | ib_cache_cleanup(); | 614 | ib_cache_cleanup(); |
615 | ib_sysfs_cleanup(); | 615 | ib_sysfs_cleanup(); |
616 | /* Make sure that any pending umem accounting work is done. */ | ||
617 | flush_scheduled_work(); | ||
616 | } | 618 | } |
617 | 619 | ||
618 | module_init(ib_core_init); | 620 | module_init(ib_core_init); |
diff --git a/drivers/infiniband/core/uverbs_mem.c b/drivers/infiniband/core/umem.c index c95fe952abd5..f32ca5fbb26b 100644 --- a/drivers/infiniband/core/uverbs_mem.c +++ b/drivers/infiniband/core/umem.c | |||
@@ -39,13 +39,6 @@ | |||
39 | 39 | ||
40 | #include "uverbs.h" | 40 | #include "uverbs.h" |
41 | 41 | ||
42 | struct ib_umem_account_work { | ||
43 | struct work_struct work; | ||
44 | struct mm_struct *mm; | ||
45 | unsigned long diff; | ||
46 | }; | ||
47 | |||
48 | |||
49 | static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) | 42 | static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) |
50 | { | 43 | { |
51 | struct ib_umem_chunk *chunk, *tmp; | 44 | struct ib_umem_chunk *chunk, *tmp; |
@@ -64,35 +57,56 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d | |||
64 | } | 57 | } |
65 | } | 58 | } |
66 | 59 | ||
67 | int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, | 60 | /** |
68 | void *addr, size_t size, int write) | 61 | * ib_umem_get - Pin and DMA map userspace memory. |
62 | * @context: userspace context to pin memory for | ||
63 | * @addr: userspace virtual address to start at | ||
64 | * @size: length of region to pin | ||
65 | * @access: IB_ACCESS_xxx flags for memory being pinned | ||
66 | */ | ||
67 | struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, | ||
68 | size_t size, int access) | ||
69 | { | 69 | { |
70 | struct ib_umem *umem; | ||
70 | struct page **page_list; | 71 | struct page **page_list; |
71 | struct ib_umem_chunk *chunk; | 72 | struct ib_umem_chunk *chunk; |
72 | unsigned long locked; | 73 | unsigned long locked; |
73 | unsigned long lock_limit; | 74 | unsigned long lock_limit; |
74 | unsigned long cur_base; | 75 | unsigned long cur_base; |
75 | unsigned long npages; | 76 | unsigned long npages; |
76 | int ret = 0; | 77 | int ret; |
77 | int off; | 78 | int off; |
78 | int i; | 79 | int i; |
79 | 80 | ||
80 | if (!can_do_mlock()) | 81 | if (!can_do_mlock()) |
81 | return -EPERM; | 82 | return ERR_PTR(-EPERM); |
82 | 83 | ||
83 | page_list = (struct page **) __get_free_page(GFP_KERNEL); | 84 | umem = kmalloc(sizeof *umem, GFP_KERNEL); |
84 | if (!page_list) | 85 | if (!umem) |
85 | return -ENOMEM; | 86 | return ERR_PTR(-ENOMEM); |
87 | |||
88 | umem->context = context; | ||
89 | umem->length = size; | ||
90 | umem->offset = addr & ~PAGE_MASK; | ||
91 | umem->page_size = PAGE_SIZE; | ||
92 | /* | ||
93 | * We ask for writable memory if any access flags other than | ||
94 | * "remote read" are set. "Local write" and "remote write" | ||
95 | * obviously require write access. "Remote atomic" can do | ||
96 | * things like fetch and add, which will modify memory, and | ||
97 | * "MW bind" can change permissions by binding a window. | ||
98 | */ | ||
99 | umem->writable = !!(access & ~IB_ACCESS_REMOTE_READ); | ||
86 | 100 | ||
87 | mem->user_base = (unsigned long) addr; | 101 | INIT_LIST_HEAD(&umem->chunk_list); |
88 | mem->length = size; | ||
89 | mem->offset = (unsigned long) addr & ~PAGE_MASK; | ||
90 | mem->page_size = PAGE_SIZE; | ||
91 | mem->writable = write; | ||
92 | 102 | ||
93 | INIT_LIST_HEAD(&mem->chunk_list); | 103 | page_list = (struct page **) __get_free_page(GFP_KERNEL); |
104 | if (!page_list) { | ||
105 | kfree(umem); | ||
106 | return ERR_PTR(-ENOMEM); | ||
107 | } | ||
94 | 108 | ||
95 | npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; | 109 | npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT; |
96 | 110 | ||
97 | down_write(¤t->mm->mmap_sem); | 111 | down_write(¤t->mm->mmap_sem); |
98 | 112 | ||
@@ -104,13 +118,13 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, | |||
104 | goto out; | 118 | goto out; |
105 | } | 119 | } |
106 | 120 | ||
107 | cur_base = (unsigned long) addr & PAGE_MASK; | 121 | cur_base = addr & PAGE_MASK; |
108 | 122 | ||
109 | while (npages) { | 123 | while (npages) { |
110 | ret = get_user_pages(current, current->mm, cur_base, | 124 | ret = get_user_pages(current, current->mm, cur_base, |
111 | min_t(int, npages, | 125 | min_t(int, npages, |
112 | PAGE_SIZE / sizeof (struct page *)), | 126 | PAGE_SIZE / sizeof (struct page *)), |
113 | 1, !write, page_list, NULL); | 127 | 1, !umem->writable, page_list, NULL); |
114 | 128 | ||
115 | if (ret < 0) | 129 | if (ret < 0) |
116 | goto out; | 130 | goto out; |
@@ -136,7 +150,7 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, | |||
136 | chunk->page_list[i].length = PAGE_SIZE; | 150 | chunk->page_list[i].length = PAGE_SIZE; |
137 | } | 151 | } |
138 | 152 | ||
139 | chunk->nmap = ib_dma_map_sg(dev, | 153 | chunk->nmap = ib_dma_map_sg(context->device, |
140 | &chunk->page_list[0], | 154 | &chunk->page_list[0], |
141 | chunk->nents, | 155 | chunk->nents, |
142 | DMA_BIDIRECTIONAL); | 156 | DMA_BIDIRECTIONAL); |
@@ -151,75 +165,94 @@ int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, | |||
151 | 165 | ||
152 | ret -= chunk->nents; | 166 | ret -= chunk->nents; |
153 | off += chunk->nents; | 167 | off += chunk->nents; |
154 | list_add_tail(&chunk->list, &mem->chunk_list); | 168 | list_add_tail(&chunk->list, &umem->chunk_list); |
155 | } | 169 | } |
156 | 170 | ||
157 | ret = 0; | 171 | ret = 0; |
158 | } | 172 | } |
159 | 173 | ||
160 | out: | 174 | out: |
161 | if (ret < 0) | 175 | if (ret < 0) { |
162 | __ib_umem_release(dev, mem, 0); | 176 | __ib_umem_release(context->device, umem, 0); |
163 | else | 177 | kfree(umem); |
178 | } else | ||
164 | current->mm->locked_vm = locked; | 179 | current->mm->locked_vm = locked; |
165 | 180 | ||
166 | up_write(¤t->mm->mmap_sem); | 181 | up_write(¤t->mm->mmap_sem); |
167 | free_page((unsigned long) page_list); | 182 | free_page((unsigned long) page_list); |
168 | 183 | ||
169 | return ret; | 184 | return ret < 0 ? ERR_PTR(ret) : umem; |
170 | } | 185 | } |
186 | EXPORT_SYMBOL(ib_umem_get); | ||
171 | 187 | ||
172 | void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) | 188 | static void ib_umem_account(struct work_struct *work) |
173 | { | 189 | { |
174 | __ib_umem_release(dev, umem, 1); | 190 | struct ib_umem *umem = container_of(work, struct ib_umem, work); |
175 | |||
176 | down_write(¤t->mm->mmap_sem); | ||
177 | current->mm->locked_vm -= | ||
178 | PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; | ||
179 | up_write(¤t->mm->mmap_sem); | ||
180 | } | ||
181 | 191 | ||
182 | static void ib_umem_account(struct work_struct *_work) | 192 | down_write(&umem->mm->mmap_sem); |
183 | { | 193 | umem->mm->locked_vm -= umem->diff; |
184 | struct ib_umem_account_work *work = | 194 | up_write(&umem->mm->mmap_sem); |
185 | container_of(_work, struct ib_umem_account_work, work); | 195 | mmput(umem->mm); |
186 | 196 | kfree(umem); | |
187 | down_write(&work->mm->mmap_sem); | ||
188 | work->mm->locked_vm -= work->diff; | ||
189 | up_write(&work->mm->mmap_sem); | ||
190 | mmput(work->mm); | ||
191 | kfree(work); | ||
192 | } | 197 | } |
193 | 198 | ||
194 | void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) | 199 | /** |
200 | * ib_umem_release - release memory pinned with ib_umem_get | ||
201 | * @umem: umem struct to release | ||
202 | */ | ||
203 | void ib_umem_release(struct ib_umem *umem) | ||
195 | { | 204 | { |
196 | struct ib_umem_account_work *work; | 205 | struct ib_ucontext *context = umem->context; |
197 | struct mm_struct *mm; | 206 | struct mm_struct *mm; |
207 | unsigned long diff; | ||
198 | 208 | ||
199 | __ib_umem_release(dev, umem, 1); | 209 | __ib_umem_release(umem->context->device, umem, 1); |
200 | 210 | ||
201 | mm = get_task_mm(current); | 211 | mm = get_task_mm(current); |
202 | if (!mm) | 212 | if (!mm) |
203 | return; | 213 | return; |
204 | 214 | ||
215 | diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; | ||
216 | |||
205 | /* | 217 | /* |
206 | * We may be called with the mm's mmap_sem already held. This | 218 | * We may be called with the mm's mmap_sem already held. This |
207 | * can happen when a userspace munmap() is the call that drops | 219 | * can happen when a userspace munmap() is the call that drops |
208 | * the last reference to our file and calls our release | 220 | * the last reference to our file and calls our release |
209 | * method. If there are memory regions to destroy, we'll end | 221 | * method. If there are memory regions to destroy, we'll end |
210 | * up here and not be able to take the mmap_sem. Therefore we | 222 | * up here and not be able to take the mmap_sem. In that case |
211 | * defer the vm_locked accounting to the system workqueue. | 223 | * we defer the vm_locked accounting to the system workqueue. |
212 | */ | 224 | */ |
225 | if (context->closing && !down_write_trylock(&mm->mmap_sem)) { | ||
226 | INIT_WORK(&umem->work, ib_umem_account); | ||
227 | umem->mm = mm; | ||
228 | umem->diff = diff; | ||
213 | 229 | ||
214 | work = kmalloc(sizeof *work, GFP_KERNEL); | 230 | schedule_work(&umem->work); |
215 | if (!work) { | ||
216 | mmput(mm); | ||
217 | return; | 231 | return; |
218 | } | 232 | } else |
233 | down_write(&mm->mmap_sem); | ||
234 | |||
235 | current->mm->locked_vm -= diff; | ||
236 | up_write(&mm->mmap_sem); | ||
237 | mmput(mm); | ||
238 | kfree(umem); | ||
239 | } | ||
240 | EXPORT_SYMBOL(ib_umem_release); | ||
241 | |||
242 | int ib_umem_page_count(struct ib_umem *umem) | ||
243 | { | ||
244 | struct ib_umem_chunk *chunk; | ||
245 | int shift; | ||
246 | int i; | ||
247 | int n; | ||
248 | |||
249 | shift = ilog2(umem->page_size); | ||
219 | 250 | ||
220 | INIT_WORK(&work->work, ib_umem_account); | 251 | n = 0; |
221 | work->mm = mm; | 252 | list_for_each_entry(chunk, &umem->chunk_list, list) |
222 | work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; | 253 | for (i = 0; i < chunk->nmap; ++i) |
254 | n += sg_dma_len(&chunk->page_list[i]) >> shift; | ||
223 | 255 | ||
224 | schedule_work(&work->work); | 256 | return n; |
225 | } | 257 | } |
258 | EXPORT_SYMBOL(ib_umem_page_count); | ||
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 102a59c033ff..c33546f9e961 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/completion.h> | 45 | #include <linux/completion.h> |
46 | 46 | ||
47 | #include <rdma/ib_verbs.h> | 47 | #include <rdma/ib_verbs.h> |
48 | #include <rdma/ib_umem.h> | ||
48 | #include <rdma/ib_user_verbs.h> | 49 | #include <rdma/ib_user_verbs.h> |
49 | 50 | ||
50 | /* | 51 | /* |
@@ -163,11 +164,6 @@ void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); | |||
163 | void ib_uverbs_event_handler(struct ib_event_handler *handler, | 164 | void ib_uverbs_event_handler(struct ib_event_handler *handler, |
164 | struct ib_event *event); | 165 | struct ib_event *event); |
165 | 166 | ||
166 | int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, | ||
167 | void *addr, size_t size, int write); | ||
168 | void ib_umem_release(struct ib_device *dev, struct ib_umem *umem); | ||
169 | void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); | ||
170 | |||
171 | #define IB_UVERBS_DECLARE_CMD(name) \ | 167 | #define IB_UVERBS_DECLARE_CMD(name) \ |
172 | ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ | 168 | ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ |
173 | const char __user *buf, int in_len, \ | 169 | const char __user *buf, int in_len, \ |
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index bab66769be14..01d70084aebe 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. | 2 | * Copyright (c) 2005 Topspin Communications. All rights reserved. |
3 | * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. | 3 | * Copyright (c) 2005, 2006, 2007 Cisco Systems. All rights reserved. |
4 | * Copyright (c) 2005 PathScale, Inc. All rights reserved. | 4 | * Copyright (c) 2005 PathScale, Inc. All rights reserved. |
5 | * Copyright (c) 2006 Mellanox Technologies. All rights reserved. | 5 | * Copyright (c) 2006 Mellanox Technologies. All rights reserved. |
6 | * | 6 | * |
@@ -295,6 +295,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, | |||
295 | INIT_LIST_HEAD(&ucontext->qp_list); | 295 | INIT_LIST_HEAD(&ucontext->qp_list); |
296 | INIT_LIST_HEAD(&ucontext->srq_list); | 296 | INIT_LIST_HEAD(&ucontext->srq_list); |
297 | INIT_LIST_HEAD(&ucontext->ah_list); | 297 | INIT_LIST_HEAD(&ucontext->ah_list); |
298 | ucontext->closing = 0; | ||
298 | 299 | ||
299 | resp.num_comp_vectors = file->device->num_comp_vectors; | 300 | resp.num_comp_vectors = file->device->num_comp_vectors; |
300 | 301 | ||
@@ -573,7 +574,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, | |||
573 | struct ib_uverbs_reg_mr cmd; | 574 | struct ib_uverbs_reg_mr cmd; |
574 | struct ib_uverbs_reg_mr_resp resp; | 575 | struct ib_uverbs_reg_mr_resp resp; |
575 | struct ib_udata udata; | 576 | struct ib_udata udata; |
576 | struct ib_umem_object *obj; | 577 | struct ib_uobject *uobj; |
577 | struct ib_pd *pd; | 578 | struct ib_pd *pd; |
578 | struct ib_mr *mr; | 579 | struct ib_mr *mr; |
579 | int ret; | 580 | int ret; |
@@ -599,35 +600,21 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, | |||
599 | !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) | 600 | !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) |
600 | return -EINVAL; | 601 | return -EINVAL; |
601 | 602 | ||
602 | obj = kmalloc(sizeof *obj, GFP_KERNEL); | 603 | uobj = kmalloc(sizeof *uobj, GFP_KERNEL); |
603 | if (!obj) | 604 | if (!uobj) |
604 | return -ENOMEM; | 605 | return -ENOMEM; |
605 | 606 | ||
606 | init_uobj(&obj->uobject, 0, file->ucontext, &mr_lock_key); | 607 | init_uobj(uobj, 0, file->ucontext, &mr_lock_key); |
607 | down_write(&obj->uobject.mutex); | 608 | down_write(&uobj->mutex); |
608 | |||
609 | /* | ||
610 | * We ask for writable memory if any access flags other than | ||
611 | * "remote read" are set. "Local write" and "remote write" | ||
612 | * obviously require write access. "Remote atomic" can do | ||
613 | * things like fetch and add, which will modify memory, and | ||
614 | * "MW bind" can change permissions by binding a window. | ||
615 | */ | ||
616 | ret = ib_umem_get(file->device->ib_dev, &obj->umem, | ||
617 | (void *) (unsigned long) cmd.start, cmd.length, | ||
618 | !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ)); | ||
619 | if (ret) | ||
620 | goto err_free; | ||
621 | |||
622 | obj->umem.virt_base = cmd.hca_va; | ||
623 | 609 | ||
624 | pd = idr_read_pd(cmd.pd_handle, file->ucontext); | 610 | pd = idr_read_pd(cmd.pd_handle, file->ucontext); |
625 | if (!pd) { | 611 | if (!pd) { |
626 | ret = -EINVAL; | 612 | ret = -EINVAL; |
627 | goto err_release; | 613 | goto err_free; |
628 | } | 614 | } |
629 | 615 | ||
630 | mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); | 616 | mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va, |
617 | cmd.access_flags, &udata); | ||
631 | if (IS_ERR(mr)) { | 618 | if (IS_ERR(mr)) { |
632 | ret = PTR_ERR(mr); | 619 | ret = PTR_ERR(mr); |
633 | goto err_put; | 620 | goto err_put; |
@@ -635,19 +622,19 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, | |||
635 | 622 | ||
636 | mr->device = pd->device; | 623 | mr->device = pd->device; |
637 | mr->pd = pd; | 624 | mr->pd = pd; |
638 | mr->uobject = &obj->uobject; | 625 | mr->uobject = uobj; |
639 | atomic_inc(&pd->usecnt); | 626 | atomic_inc(&pd->usecnt); |
640 | atomic_set(&mr->usecnt, 0); | 627 | atomic_set(&mr->usecnt, 0); |
641 | 628 | ||
642 | obj->uobject.object = mr; | 629 | uobj->object = mr; |
643 | ret = idr_add_uobj(&ib_uverbs_mr_idr, &obj->uobject); | 630 | ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj); |
644 | if (ret) | 631 | if (ret) |
645 | goto err_unreg; | 632 | goto err_unreg; |
646 | 633 | ||
647 | memset(&resp, 0, sizeof resp); | 634 | memset(&resp, 0, sizeof resp); |
648 | resp.lkey = mr->lkey; | 635 | resp.lkey = mr->lkey; |
649 | resp.rkey = mr->rkey; | 636 | resp.rkey = mr->rkey; |
650 | resp.mr_handle = obj->uobject.id; | 637 | resp.mr_handle = uobj->id; |
651 | 638 | ||
652 | if (copy_to_user((void __user *) (unsigned long) cmd.response, | 639 | if (copy_to_user((void __user *) (unsigned long) cmd.response, |
653 | &resp, sizeof resp)) { | 640 | &resp, sizeof resp)) { |
@@ -658,17 +645,17 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, | |||
658 | put_pd_read(pd); | 645 | put_pd_read(pd); |
659 | 646 | ||
660 | mutex_lock(&file->mutex); | 647 | mutex_lock(&file->mutex); |
661 | list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); | 648 | list_add_tail(&uobj->list, &file->ucontext->mr_list); |
662 | mutex_unlock(&file->mutex); | 649 | mutex_unlock(&file->mutex); |
663 | 650 | ||
664 | obj->uobject.live = 1; | 651 | uobj->live = 1; |
665 | 652 | ||
666 | up_write(&obj->uobject.mutex); | 653 | up_write(&uobj->mutex); |
667 | 654 | ||
668 | return in_len; | 655 | return in_len; |
669 | 656 | ||
670 | err_copy: | 657 | err_copy: |
671 | idr_remove_uobj(&ib_uverbs_mr_idr, &obj->uobject); | 658 | idr_remove_uobj(&ib_uverbs_mr_idr, uobj); |
672 | 659 | ||
673 | err_unreg: | 660 | err_unreg: |
674 | ib_dereg_mr(mr); | 661 | ib_dereg_mr(mr); |
@@ -676,11 +663,8 @@ err_unreg: | |||
676 | err_put: | 663 | err_put: |
677 | put_pd_read(pd); | 664 | put_pd_read(pd); |
678 | 665 | ||
679 | err_release: | ||
680 | ib_umem_release(file->device->ib_dev, &obj->umem); | ||
681 | |||
682 | err_free: | 666 | err_free: |
683 | put_uobj_write(&obj->uobject); | 667 | put_uobj_write(uobj); |
684 | return ret; | 668 | return ret; |
685 | } | 669 | } |
686 | 670 | ||
@@ -691,7 +675,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, | |||
691 | struct ib_uverbs_dereg_mr cmd; | 675 | struct ib_uverbs_dereg_mr cmd; |
692 | struct ib_mr *mr; | 676 | struct ib_mr *mr; |
693 | struct ib_uobject *uobj; | 677 | struct ib_uobject *uobj; |
694 | struct ib_umem_object *memobj; | ||
695 | int ret = -EINVAL; | 678 | int ret = -EINVAL; |
696 | 679 | ||
697 | if (copy_from_user(&cmd, buf, sizeof cmd)) | 680 | if (copy_from_user(&cmd, buf, sizeof cmd)) |
@@ -701,8 +684,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, | |||
701 | if (!uobj) | 684 | if (!uobj) |
702 | return -EINVAL; | 685 | return -EINVAL; |
703 | 686 | ||
704 | memobj = container_of(uobj, struct ib_umem_object, uobject); | 687 | mr = uobj->object; |
705 | mr = uobj->object; | ||
706 | 688 | ||
707 | ret = ib_dereg_mr(mr); | 689 | ret = ib_dereg_mr(mr); |
708 | if (!ret) | 690 | if (!ret) |
@@ -719,8 +701,6 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, | |||
719 | list_del(&uobj->list); | 701 | list_del(&uobj->list); |
720 | mutex_unlock(&file->mutex); | 702 | mutex_unlock(&file->mutex); |
721 | 703 | ||
722 | ib_umem_release(file->device->ib_dev, &memobj->umem); | ||
723 | |||
724 | put_uobj(uobj); | 704 | put_uobj(uobj); |
725 | 705 | ||
726 | return in_len; | 706 | return in_len; |
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index d44e54799651..14d7ccd89195 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c | |||
@@ -183,6 +183,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, | |||
183 | if (!context) | 183 | if (!context) |
184 | return 0; | 184 | return 0; |
185 | 185 | ||
186 | context->closing = 1; | ||
187 | |||
186 | list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { | 188 | list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) { |
187 | struct ib_ah *ah = uobj->object; | 189 | struct ib_ah *ah = uobj->object; |
188 | 190 | ||
@@ -230,16 +232,10 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, | |||
230 | 232 | ||
231 | list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { | 233 | list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { |
232 | struct ib_mr *mr = uobj->object; | 234 | struct ib_mr *mr = uobj->object; |
233 | struct ib_device *mrdev = mr->device; | ||
234 | struct ib_umem_object *memobj; | ||
235 | 235 | ||
236 | idr_remove_uobj(&ib_uverbs_mr_idr, uobj); | 236 | idr_remove_uobj(&ib_uverbs_mr_idr, uobj); |
237 | ib_dereg_mr(mr); | 237 | ib_dereg_mr(mr); |
238 | 238 | kfree(uobj); | |
239 | memobj = container_of(uobj, struct ib_umem_object, uobject); | ||
240 | ib_umem_release_on_close(mrdev, &memobj->umem); | ||
241 | |||
242 | kfree(memobj); | ||
243 | } | 239 | } |
244 | 240 | ||
245 | list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { | 241 | list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { |
@@ -906,7 +902,6 @@ static void __exit ib_uverbs_cleanup(void) | |||
906 | unregister_filesystem(&uverbs_event_fs); | 902 | unregister_filesystem(&uverbs_event_fs); |
907 | class_destroy(uverbs_class); | 903 | class_destroy(uverbs_class); |
908 | unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); | 904 | unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); |
909 | flush_scheduled_work(); | ||
910 | idr_destroy(&ib_uverbs_pd_idr); | 905 | idr_destroy(&ib_uverbs_pd_idr); |
911 | idr_destroy(&ib_uverbs_mr_idr); | 906 | idr_destroy(&ib_uverbs_mr_idr); |
912 | idr_destroy(&ib_uverbs_mw_idr); | 907 | idr_destroy(&ib_uverbs_mw_idr); |
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 109166223c09..997cf1530762 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <asm/byteorder.h> | 56 | #include <asm/byteorder.h> |
57 | 57 | ||
58 | #include <rdma/ib_smi.h> | 58 | #include <rdma/ib_smi.h> |
59 | #include <rdma/ib_umem.h> | ||
59 | #include <rdma/ib_user_verbs.h> | 60 | #include <rdma/ib_user_verbs.h> |
60 | #include "c2.h" | 61 | #include "c2.h" |
61 | #include "c2_provider.h" | 62 | #include "c2_provider.h" |
@@ -396,6 +397,7 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd, | |||
396 | } | 397 | } |
397 | 398 | ||
398 | mr->pd = to_c2pd(ib_pd); | 399 | mr->pd = to_c2pd(ib_pd); |
400 | mr->umem = NULL; | ||
399 | pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " | 401 | pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, " |
400 | "*iova_start %llx, first pa %llx, last pa %llx\n", | 402 | "*iova_start %llx, first pa %llx, last pa %llx\n", |
401 | __FUNCTION__, page_shift, pbl_depth, total_len, | 403 | __FUNCTION__, page_shift, pbl_depth, total_len, |
@@ -428,8 +430,8 @@ static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc) | |||
428 | return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); | 430 | return c2_reg_phys_mr(pd, &bl, 1, acc, &kva); |
429 | } | 431 | } |
430 | 432 | ||
431 | static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | 433 | static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
432 | int acc, struct ib_udata *udata) | 434 | u64 virt, int acc, struct ib_udata *udata) |
433 | { | 435 | { |
434 | u64 *pages; | 436 | u64 *pages; |
435 | u64 kva = 0; | 437 | u64 kva = 0; |
@@ -441,15 +443,23 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
441 | struct c2_mr *c2mr; | 443 | struct c2_mr *c2mr; |
442 | 444 | ||
443 | pr_debug("%s:%u\n", __FUNCTION__, __LINE__); | 445 | pr_debug("%s:%u\n", __FUNCTION__, __LINE__); |
444 | shift = ffs(region->page_size) - 1; | ||
445 | 446 | ||
446 | c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); | 447 | c2mr = kmalloc(sizeof(*c2mr), GFP_KERNEL); |
447 | if (!c2mr) | 448 | if (!c2mr) |
448 | return ERR_PTR(-ENOMEM); | 449 | return ERR_PTR(-ENOMEM); |
449 | c2mr->pd = c2pd; | 450 | c2mr->pd = c2pd; |
450 | 451 | ||
452 | c2mr->umem = ib_umem_get(pd->uobject->context, start, length, acc); | ||
453 | if (IS_ERR(c2mr->umem)) { | ||
454 | err = PTR_ERR(c2mr->umem); | ||
455 | kfree(c2mr); | ||
456 | return ERR_PTR(err); | ||
457 | } | ||
458 | |||
459 | shift = ffs(c2mr->umem->page_size) - 1; | ||
460 | |||
451 | n = 0; | 461 | n = 0; |
452 | list_for_each_entry(chunk, ®ion->chunk_list, list) | 462 | list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) |
453 | n += chunk->nents; | 463 | n += chunk->nents; |
454 | 464 | ||
455 | pages = kmalloc(n * sizeof(u64), GFP_KERNEL); | 465 | pages = kmalloc(n * sizeof(u64), GFP_KERNEL); |
@@ -459,35 +469,34 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
459 | } | 469 | } |
460 | 470 | ||
461 | i = 0; | 471 | i = 0; |
462 | list_for_each_entry(chunk, ®ion->chunk_list, list) { | 472 | list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) { |
463 | for (j = 0; j < chunk->nmap; ++j) { | 473 | for (j = 0; j < chunk->nmap; ++j) { |
464 | len = sg_dma_len(&chunk->page_list[j]) >> shift; | 474 | len = sg_dma_len(&chunk->page_list[j]) >> shift; |
465 | for (k = 0; k < len; ++k) { | 475 | for (k = 0; k < len; ++k) { |
466 | pages[i++] = | 476 | pages[i++] = |
467 | sg_dma_address(&chunk->page_list[j]) + | 477 | sg_dma_address(&chunk->page_list[j]) + |
468 | (region->page_size * k); | 478 | (c2mr->umem->page_size * k); |
469 | } | 479 | } |
470 | } | 480 | } |
471 | } | 481 | } |
472 | 482 | ||
473 | kva = (u64)region->virt_base; | 483 | kva = virt; |
474 | err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), | 484 | err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), |
475 | pages, | 485 | pages, |
476 | region->page_size, | 486 | c2mr->umem->page_size, |
477 | i, | 487 | i, |
478 | region->length, | 488 | length, |
479 | region->offset, | 489 | c2mr->umem->offset, |
480 | &kva, | 490 | &kva, |
481 | c2_convert_access(acc), | 491 | c2_convert_access(acc), |
482 | c2mr); | 492 | c2mr); |
483 | kfree(pages); | 493 | kfree(pages); |
484 | if (err) { | 494 | if (err) |
485 | kfree(c2mr); | 495 | goto err; |
486 | return ERR_PTR(err); | ||
487 | } | ||
488 | return &c2mr->ibmr; | 496 | return &c2mr->ibmr; |
489 | 497 | ||
490 | err: | 498 | err: |
499 | ib_umem_release(c2mr->umem); | ||
491 | kfree(c2mr); | 500 | kfree(c2mr); |
492 | return ERR_PTR(err); | 501 | return ERR_PTR(err); |
493 | } | 502 | } |
@@ -502,8 +511,11 @@ static int c2_dereg_mr(struct ib_mr *ib_mr) | |||
502 | err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); | 511 | err = c2_stag_dealloc(to_c2dev(ib_mr->device), ib_mr->lkey); |
503 | if (err) | 512 | if (err) |
504 | pr_debug("c2_stag_dealloc failed: %d\n", err); | 513 | pr_debug("c2_stag_dealloc failed: %d\n", err); |
505 | else | 514 | else { |
515 | if (mr->umem) | ||
516 | ib_umem_release(mr->umem); | ||
506 | kfree(mr); | 517 | kfree(mr); |
518 | } | ||
507 | 519 | ||
508 | return err; | 520 | return err; |
509 | } | 521 | } |
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.h b/drivers/infiniband/hw/amso1100/c2_provider.h index fc906223220f..1076df2ee96a 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.h +++ b/drivers/infiniband/hw/amso1100/c2_provider.h | |||
@@ -73,6 +73,7 @@ struct c2_pd { | |||
73 | struct c2_mr { | 73 | struct c2_mr { |
74 | struct ib_mr ibmr; | 74 | struct ib_mr ibmr; |
75 | struct c2_pd *pd; | 75 | struct c2_pd *pd; |
76 | struct ib_umem *umem; | ||
76 | }; | 77 | }; |
77 | 78 | ||
78 | struct c2_av; | 79 | struct c2_av; |
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index a891493fd340..e7c2c3948037 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <rdma/iw_cm.h> | 47 | #include <rdma/iw_cm.h> |
48 | #include <rdma/ib_verbs.h> | 48 | #include <rdma/ib_verbs.h> |
49 | #include <rdma/ib_smi.h> | 49 | #include <rdma/ib_smi.h> |
50 | #include <rdma/ib_umem.h> | ||
50 | #include <rdma/ib_user_verbs.h> | 51 | #include <rdma/ib_user_verbs.h> |
51 | 52 | ||
52 | #include "cxio_hal.h" | 53 | #include "cxio_hal.h" |
@@ -443,6 +444,8 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr) | |||
443 | remove_handle(rhp, &rhp->mmidr, mmid); | 444 | remove_handle(rhp, &rhp->mmidr, mmid); |
444 | if (mhp->kva) | 445 | if (mhp->kva) |
445 | kfree((void *) (unsigned long) mhp->kva); | 446 | kfree((void *) (unsigned long) mhp->kva); |
447 | if (mhp->umem) | ||
448 | ib_umem_release(mhp->umem); | ||
446 | PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp); | 449 | PDBG("%s mmid 0x%x ptr %p\n", __FUNCTION__, mmid, mhp); |
447 | kfree(mhp); | 450 | kfree(mhp); |
448 | return 0; | 451 | return 0; |
@@ -577,8 +580,8 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr, | |||
577 | } | 580 | } |
578 | 581 | ||
579 | 582 | ||
580 | static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | 583 | static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
581 | int acc, struct ib_udata *udata) | 584 | u64 virt, int acc, struct ib_udata *udata) |
582 | { | 585 | { |
583 | __be64 *pages; | 586 | __be64 *pages; |
584 | int shift, n, len; | 587 | int shift, n, len; |
@@ -591,7 +594,6 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
591 | struct iwch_reg_user_mr_resp uresp; | 594 | struct iwch_reg_user_mr_resp uresp; |
592 | 595 | ||
593 | PDBG("%s ib_pd %p\n", __FUNCTION__, pd); | 596 | PDBG("%s ib_pd %p\n", __FUNCTION__, pd); |
594 | shift = ffs(region->page_size) - 1; | ||
595 | 597 | ||
596 | php = to_iwch_pd(pd); | 598 | php = to_iwch_pd(pd); |
597 | rhp = php->rhp; | 599 | rhp = php->rhp; |
@@ -599,8 +601,17 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
599 | if (!mhp) | 601 | if (!mhp) |
600 | return ERR_PTR(-ENOMEM); | 602 | return ERR_PTR(-ENOMEM); |
601 | 603 | ||
604 | mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc); | ||
605 | if (IS_ERR(mhp->umem)) { | ||
606 | err = PTR_ERR(mhp->umem); | ||
607 | kfree(mhp); | ||
608 | return ERR_PTR(err); | ||
609 | } | ||
610 | |||
611 | shift = ffs(mhp->umem->page_size) - 1; | ||
612 | |||
602 | n = 0; | 613 | n = 0; |
603 | list_for_each_entry(chunk, ®ion->chunk_list, list) | 614 | list_for_each_entry(chunk, &mhp->umem->chunk_list, list) |
604 | n += chunk->nents; | 615 | n += chunk->nents; |
605 | 616 | ||
606 | pages = kmalloc(n * sizeof(u64), GFP_KERNEL); | 617 | pages = kmalloc(n * sizeof(u64), GFP_KERNEL); |
@@ -611,13 +622,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
611 | 622 | ||
612 | i = n = 0; | 623 | i = n = 0; |
613 | 624 | ||
614 | list_for_each_entry(chunk, ®ion->chunk_list, list) | 625 | list_for_each_entry(chunk, &mhp->umem->chunk_list, list) |
615 | for (j = 0; j < chunk->nmap; ++j) { | 626 | for (j = 0; j < chunk->nmap; ++j) { |
616 | len = sg_dma_len(&chunk->page_list[j]) >> shift; | 627 | len = sg_dma_len(&chunk->page_list[j]) >> shift; |
617 | for (k = 0; k < len; ++k) { | 628 | for (k = 0; k < len; ++k) { |
618 | pages[i++] = cpu_to_be64(sg_dma_address( | 629 | pages[i++] = cpu_to_be64(sg_dma_address( |
619 | &chunk->page_list[j]) + | 630 | &chunk->page_list[j]) + |
620 | region->page_size * k); | 631 | mhp->umem->page_size * k); |
621 | } | 632 | } |
622 | } | 633 | } |
623 | 634 | ||
@@ -625,9 +636,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
625 | mhp->attr.pdid = php->pdid; | 636 | mhp->attr.pdid = php->pdid; |
626 | mhp->attr.zbva = 0; | 637 | mhp->attr.zbva = 0; |
627 | mhp->attr.perms = iwch_ib_to_tpt_access(acc); | 638 | mhp->attr.perms = iwch_ib_to_tpt_access(acc); |
628 | mhp->attr.va_fbo = region->virt_base; | 639 | mhp->attr.va_fbo = virt; |
629 | mhp->attr.page_size = shift - 12; | 640 | mhp->attr.page_size = shift - 12; |
630 | mhp->attr.len = (u32) region->length; | 641 | mhp->attr.len = (u32) length; |
631 | mhp->attr.pbl_size = i; | 642 | mhp->attr.pbl_size = i; |
632 | err = iwch_register_mem(rhp, php, mhp, shift, pages); | 643 | err = iwch_register_mem(rhp, php, mhp, shift, pages); |
633 | kfree(pages); | 644 | kfree(pages); |
@@ -650,6 +661,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
650 | return &mhp->ibmr; | 661 | return &mhp->ibmr; |
651 | 662 | ||
652 | err: | 663 | err: |
664 | ib_umem_release(mhp->umem); | ||
653 | kfree(mhp); | 665 | kfree(mhp); |
654 | return ERR_PTR(err); | 666 | return ERR_PTR(err); |
655 | } | 667 | } |
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h index 93bcc56756bd..48833f3f3bd0 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.h +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h | |||
@@ -73,6 +73,7 @@ struct tpt_attributes { | |||
73 | 73 | ||
74 | struct iwch_mr { | 74 | struct iwch_mr { |
75 | struct ib_mr ibmr; | 75 | struct ib_mr ibmr; |
76 | struct ib_umem *umem; | ||
76 | struct iwch_dev *rhp; | 77 | struct iwch_dev *rhp; |
77 | u64 kva; | 78 | u64 kva; |
78 | struct tpt_attributes attr; | 79 | struct tpt_attributes attr; |
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index 10fb8fbafa0c..f64d42b08674 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h | |||
@@ -176,6 +176,7 @@ struct ehca_mr { | |||
176 | struct ib_mr ib_mr; /* must always be first in ehca_mr */ | 176 | struct ib_mr ib_mr; /* must always be first in ehca_mr */ |
177 | struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ | 177 | struct ib_fmr ib_fmr; /* must always be first in ehca_mr */ |
178 | } ib; | 178 | } ib; |
179 | struct ib_umem *umem; | ||
179 | spinlock_t mrlock; | 180 | spinlock_t mrlock; |
180 | 181 | ||
181 | enum ehca_mr_flag flags; | 182 | enum ehca_mr_flag flags; |
diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h index e14b029332c8..37e7fe0908cf 100644 --- a/drivers/infiniband/hw/ehca/ehca_iverbs.h +++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h | |||
@@ -78,8 +78,7 @@ struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd, | |||
78 | int num_phys_buf, | 78 | int num_phys_buf, |
79 | int mr_access_flags, u64 *iova_start); | 79 | int mr_access_flags, u64 *iova_start); |
80 | 80 | ||
81 | struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, | 81 | struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, |
82 | struct ib_umem *region, | ||
83 | int mr_access_flags, struct ib_udata *udata); | 82 | int mr_access_flags, struct ib_udata *udata); |
84 | 83 | ||
85 | int ehca_rereg_phys_mr(struct ib_mr *mr, | 84 | int ehca_rereg_phys_mr(struct ib_mr *mr, |
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index d22ab563633f..84c5bb498563 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c | |||
@@ -39,6 +39,8 @@ | |||
39 | * POSSIBILITY OF SUCH DAMAGE. | 39 | * POSSIBILITY OF SUCH DAMAGE. |
40 | */ | 40 | */ |
41 | 41 | ||
42 | #include <rdma/ib_umem.h> | ||
43 | |||
42 | #include <asm/current.h> | 44 | #include <asm/current.h> |
43 | 45 | ||
44 | #include "ehca_iverbs.h" | 46 | #include "ehca_iverbs.h" |
@@ -238,10 +240,8 @@ reg_phys_mr_exit0: | |||
238 | 240 | ||
239 | /*----------------------------------------------------------------------*/ | 241 | /*----------------------------------------------------------------------*/ |
240 | 242 | ||
241 | struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, | 243 | struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, |
242 | struct ib_umem *region, | 244 | int mr_access_flags, struct ib_udata *udata) |
243 | int mr_access_flags, | ||
244 | struct ib_udata *udata) | ||
245 | { | 245 | { |
246 | struct ib_mr *ib_mr; | 246 | struct ib_mr *ib_mr; |
247 | struct ehca_mr *e_mr; | 247 | struct ehca_mr *e_mr; |
@@ -257,11 +257,7 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, | |||
257 | ehca_gen_err("bad pd=%p", pd); | 257 | ehca_gen_err("bad pd=%p", pd); |
258 | return ERR_PTR(-EFAULT); | 258 | return ERR_PTR(-EFAULT); |
259 | } | 259 | } |
260 | if (!region) { | 260 | |
261 | ehca_err(pd->device, "bad input values: region=%p", region); | ||
262 | ib_mr = ERR_PTR(-EINVAL); | ||
263 | goto reg_user_mr_exit0; | ||
264 | } | ||
265 | if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && | 261 | if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) && |
266 | !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || | 262 | !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) || |
267 | ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && | 263 | ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) && |
@@ -275,17 +271,10 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, | |||
275 | ib_mr = ERR_PTR(-EINVAL); | 271 | ib_mr = ERR_PTR(-EINVAL); |
276 | goto reg_user_mr_exit0; | 272 | goto reg_user_mr_exit0; |
277 | } | 273 | } |
278 | if (region->page_size != PAGE_SIZE) { | ||
279 | ehca_err(pd->device, "page size not supported, " | ||
280 | "region->page_size=%x", region->page_size); | ||
281 | ib_mr = ERR_PTR(-EINVAL); | ||
282 | goto reg_user_mr_exit0; | ||
283 | } | ||
284 | 274 | ||
285 | if ((region->length == 0) || | 275 | if (length == 0 || virt + length < virt) { |
286 | ((region->virt_base + region->length) < region->virt_base)) { | ||
287 | ehca_err(pd->device, "bad input values: length=%lx " | 276 | ehca_err(pd->device, "bad input values: length=%lx " |
288 | "virt_base=%lx", region->length, region->virt_base); | 277 | "virt_base=%lx", length, virt); |
289 | ib_mr = ERR_PTR(-EINVAL); | 278 | ib_mr = ERR_PTR(-EINVAL); |
290 | goto reg_user_mr_exit0; | 279 | goto reg_user_mr_exit0; |
291 | } | 280 | } |
@@ -297,40 +286,55 @@ struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, | |||
297 | goto reg_user_mr_exit0; | 286 | goto reg_user_mr_exit0; |
298 | } | 287 | } |
299 | 288 | ||
289 | e_mr->umem = ib_umem_get(pd->uobject->context, start, length, | ||
290 | mr_access_flags); | ||
291 | if (IS_ERR(e_mr->umem)) { | ||
292 | ib_mr = (void *) e_mr->umem; | ||
293 | goto reg_user_mr_exit1; | ||
294 | } | ||
295 | |||
296 | if (e_mr->umem->page_size != PAGE_SIZE) { | ||
297 | ehca_err(pd->device, "page size not supported, " | ||
298 | "e_mr->umem->page_size=%x", e_mr->umem->page_size); | ||
299 | ib_mr = ERR_PTR(-EINVAL); | ||
300 | goto reg_user_mr_exit2; | ||
301 | } | ||
302 | |||
300 | /* determine number of MR pages */ | 303 | /* determine number of MR pages */ |
301 | num_pages_mr = (((region->virt_base % PAGE_SIZE) + region->length + | 304 | num_pages_mr = (((virt % PAGE_SIZE) + length + PAGE_SIZE - 1) / |
302 | PAGE_SIZE - 1) / PAGE_SIZE); | 305 | PAGE_SIZE); |
303 | num_pages_4k = (((region->virt_base % EHCA_PAGESIZE) + region->length + | 306 | num_pages_4k = (((virt % EHCA_PAGESIZE) + length + EHCA_PAGESIZE - 1) / |
304 | EHCA_PAGESIZE - 1) / EHCA_PAGESIZE); | 307 | EHCA_PAGESIZE); |
305 | 308 | ||
306 | /* register MR on HCA */ | 309 | /* register MR on HCA */ |
307 | pginfo.type = EHCA_MR_PGI_USER; | 310 | pginfo.type = EHCA_MR_PGI_USER; |
308 | pginfo.num_pages = num_pages_mr; | 311 | pginfo.num_pages = num_pages_mr; |
309 | pginfo.num_4k = num_pages_4k; | 312 | pginfo.num_4k = num_pages_4k; |
310 | pginfo.region = region; | 313 | pginfo.region = e_mr->umem; |
311 | pginfo.next_4k = region->offset / EHCA_PAGESIZE; | 314 | pginfo.next_4k = e_mr->umem->offset / EHCA_PAGESIZE; |
312 | pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, | 315 | pginfo.next_chunk = list_prepare_entry(pginfo.next_chunk, |
313 | (®ion->chunk_list), | 316 | (&e_mr->umem->chunk_list), |
314 | list); | 317 | list); |
315 | 318 | ||
316 | ret = ehca_reg_mr(shca, e_mr, (u64*)region->virt_base, | 319 | ret = ehca_reg_mr(shca, e_mr, (u64*) virt, length, mr_access_flags, e_pd, |
317 | region->length, mr_access_flags, e_pd, &pginfo, | 320 | &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); |
318 | &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey); | ||
319 | if (ret) { | 321 | if (ret) { |
320 | ib_mr = ERR_PTR(ret); | 322 | ib_mr = ERR_PTR(ret); |
321 | goto reg_user_mr_exit1; | 323 | goto reg_user_mr_exit2; |
322 | } | 324 | } |
323 | 325 | ||
324 | /* successful registration of all pages */ | 326 | /* successful registration of all pages */ |
325 | return &e_mr->ib.ib_mr; | 327 | return &e_mr->ib.ib_mr; |
326 | 328 | ||
329 | reg_user_mr_exit2: | ||
330 | ib_umem_release(e_mr->umem); | ||
327 | reg_user_mr_exit1: | 331 | reg_user_mr_exit1: |
328 | ehca_mr_delete(e_mr); | 332 | ehca_mr_delete(e_mr); |
329 | reg_user_mr_exit0: | 333 | reg_user_mr_exit0: |
330 | if (IS_ERR(ib_mr)) | 334 | if (IS_ERR(ib_mr)) |
331 | ehca_err(pd->device, "rc=%lx pd=%p region=%p mr_access_flags=%x" | 335 | ehca_err(pd->device, "rc=%lx pd=%p mr_access_flags=%x" |
332 | " udata=%p", | 336 | " udata=%p", |
333 | PTR_ERR(ib_mr), pd, region, mr_access_flags, udata); | 337 | PTR_ERR(ib_mr), pd, mr_access_flags, udata); |
334 | return ib_mr; | 338 | return ib_mr; |
335 | } /* end ehca_reg_user_mr() */ | 339 | } /* end ehca_reg_user_mr() */ |
336 | 340 | ||
@@ -596,6 +600,9 @@ int ehca_dereg_mr(struct ib_mr *mr) | |||
596 | goto dereg_mr_exit0; | 600 | goto dereg_mr_exit0; |
597 | } | 601 | } |
598 | 602 | ||
603 | if (e_mr->umem) | ||
604 | ib_umem_release(e_mr->umem); | ||
605 | |||
599 | /* successful deregistration */ | 606 | /* successful deregistration */ |
600 | ehca_mr_delete(e_mr); | 607 | ehca_mr_delete(e_mr); |
601 | 608 | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index 31e70732e369..bdeef8d4f279 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c | |||
@@ -31,6 +31,7 @@ | |||
31 | * SOFTWARE. | 31 | * SOFTWARE. |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #include <rdma/ib_umem.h> | ||
34 | #include <rdma/ib_pack.h> | 35 | #include <rdma/ib_pack.h> |
35 | #include <rdma/ib_smi.h> | 36 | #include <rdma/ib_smi.h> |
36 | 37 | ||
@@ -147,6 +148,7 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, | |||
147 | mr->mr.offset = 0; | 148 | mr->mr.offset = 0; |
148 | mr->mr.access_flags = acc; | 149 | mr->mr.access_flags = acc; |
149 | mr->mr.max_segs = num_phys_buf; | 150 | mr->mr.max_segs = num_phys_buf; |
151 | mr->umem = NULL; | ||
150 | 152 | ||
151 | m = 0; | 153 | m = 0; |
152 | n = 0; | 154 | n = 0; |
@@ -170,46 +172,56 @@ bail: | |||
170 | /** | 172 | /** |
171 | * ipath_reg_user_mr - register a userspace memory region | 173 | * ipath_reg_user_mr - register a userspace memory region |
172 | * @pd: protection domain for this memory region | 174 | * @pd: protection domain for this memory region |
173 | * @region: the user memory region | 175 | * @start: starting userspace address |
176 | * @length: length of region to register | ||
177 | * @virt_addr: virtual address to use (from HCA's point of view) | ||
174 | * @mr_access_flags: access flags for this memory region | 178 | * @mr_access_flags: access flags for this memory region |
175 | * @udata: unused by the InfiniPath driver | 179 | * @udata: unused by the InfiniPath driver |
176 | * | 180 | * |
177 | * Returns the memory region on success, otherwise returns an errno. | 181 | * Returns the memory region on success, otherwise returns an errno. |
178 | */ | 182 | */ |
179 | struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | 183 | struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
180 | int mr_access_flags, struct ib_udata *udata) | 184 | u64 virt_addr, int mr_access_flags, |
185 | struct ib_udata *udata) | ||
181 | { | 186 | { |
182 | struct ipath_mr *mr; | 187 | struct ipath_mr *mr; |
188 | struct ib_umem *umem; | ||
183 | struct ib_umem_chunk *chunk; | 189 | struct ib_umem_chunk *chunk; |
184 | int n, m, i; | 190 | int n, m, i; |
185 | struct ib_mr *ret; | 191 | struct ib_mr *ret; |
186 | 192 | ||
187 | if (region->length == 0) { | 193 | if (length == 0) { |
188 | ret = ERR_PTR(-EINVAL); | 194 | ret = ERR_PTR(-EINVAL); |
189 | goto bail; | 195 | goto bail; |
190 | } | 196 | } |
191 | 197 | ||
198 | umem = ib_umem_get(pd->uobject->context, start, length, mr_access_flags); | ||
199 | if (IS_ERR(umem)) | ||
200 | return (void *) umem; | ||
201 | |||
192 | n = 0; | 202 | n = 0; |
193 | list_for_each_entry(chunk, ®ion->chunk_list, list) | 203 | list_for_each_entry(chunk, &umem->chunk_list, list) |
194 | n += chunk->nents; | 204 | n += chunk->nents; |
195 | 205 | ||
196 | mr = alloc_mr(n, &to_idev(pd->device)->lk_table); | 206 | mr = alloc_mr(n, &to_idev(pd->device)->lk_table); |
197 | if (!mr) { | 207 | if (!mr) { |
198 | ret = ERR_PTR(-ENOMEM); | 208 | ret = ERR_PTR(-ENOMEM); |
209 | ib_umem_release(umem); | ||
199 | goto bail; | 210 | goto bail; |
200 | } | 211 | } |
201 | 212 | ||
202 | mr->mr.pd = pd; | 213 | mr->mr.pd = pd; |
203 | mr->mr.user_base = region->user_base; | 214 | mr->mr.user_base = start; |
204 | mr->mr.iova = region->virt_base; | 215 | mr->mr.iova = virt_addr; |
205 | mr->mr.length = region->length; | 216 | mr->mr.length = length; |
206 | mr->mr.offset = region->offset; | 217 | mr->mr.offset = umem->offset; |
207 | mr->mr.access_flags = mr_access_flags; | 218 | mr->mr.access_flags = mr_access_flags; |
208 | mr->mr.max_segs = n; | 219 | mr->mr.max_segs = n; |
220 | mr->umem = umem; | ||
209 | 221 | ||
210 | m = 0; | 222 | m = 0; |
211 | n = 0; | 223 | n = 0; |
212 | list_for_each_entry(chunk, ®ion->chunk_list, list) { | 224 | list_for_each_entry(chunk, &umem->chunk_list, list) { |
213 | for (i = 0; i < chunk->nents; i++) { | 225 | for (i = 0; i < chunk->nents; i++) { |
214 | void *vaddr; | 226 | void *vaddr; |
215 | 227 | ||
@@ -219,7 +231,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
219 | goto bail; | 231 | goto bail; |
220 | } | 232 | } |
221 | mr->mr.map[m]->segs[n].vaddr = vaddr; | 233 | mr->mr.map[m]->segs[n].vaddr = vaddr; |
222 | mr->mr.map[m]->segs[n].length = region->page_size; | 234 | mr->mr.map[m]->segs[n].length = umem->page_size; |
223 | n++; | 235 | n++; |
224 | if (n == IPATH_SEGSZ) { | 236 | if (n == IPATH_SEGSZ) { |
225 | m++; | 237 | m++; |
@@ -253,6 +265,10 @@ int ipath_dereg_mr(struct ib_mr *ibmr) | |||
253 | i--; | 265 | i--; |
254 | kfree(mr->mr.map[i]); | 266 | kfree(mr->mr.map[i]); |
255 | } | 267 | } |
268 | |||
269 | if (mr->umem) | ||
270 | ib_umem_release(mr->umem); | ||
271 | |||
256 | kfree(mr); | 272 | kfree(mr); |
257 | return 0; | 273 | return 0; |
258 | } | 274 | } |
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h index 7064fc222727..088b837ebea8 100644 --- a/drivers/infiniband/hw/ipath/ipath_verbs.h +++ b/drivers/infiniband/hw/ipath/ipath_verbs.h | |||
@@ -251,6 +251,7 @@ struct ipath_sge { | |||
251 | /* Memory region */ | 251 | /* Memory region */ |
252 | struct ipath_mr { | 252 | struct ipath_mr { |
253 | struct ib_mr ibmr; | 253 | struct ib_mr ibmr; |
254 | struct ib_umem *umem; | ||
254 | struct ipath_mregion mr; /* must be last */ | 255 | struct ipath_mregion mr; /* must be last */ |
255 | }; | 256 | }; |
256 | 257 | ||
@@ -751,8 +752,8 @@ struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd, | |||
751 | struct ib_phys_buf *buffer_list, | 752 | struct ib_phys_buf *buffer_list, |
752 | int num_phys_buf, int acc, u64 *iova_start); | 753 | int num_phys_buf, int acc, u64 *iova_start); |
753 | 754 | ||
754 | struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | 755 | struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
755 | int mr_access_flags, | 756 | u64 virt_addr, int mr_access_flags, |
756 | struct ib_udata *udata); | 757 | struct ib_udata *udata); |
757 | 758 | ||
758 | int ipath_dereg_mr(struct ib_mr *ibmr); | 759 | int ipath_dereg_mr(struct ib_mr *ibmr); |
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig new file mode 100644 index 000000000000..b8912cdb9663 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/Kconfig | |||
@@ -0,0 +1,9 @@ | |||
1 | config MLX4_INFINIBAND | ||
2 | tristate "Mellanox ConnectX HCA support" | ||
3 | depends on INFINIBAND | ||
4 | select MLX4_CORE | ||
5 | ---help--- | ||
6 | This driver provides low-level InfiniBand support for | ||
7 | Mellanox ConnectX PCI Express host channel adapters (HCAs). | ||
8 | This is required to use InfiniBand protocols such as | ||
9 | IP-over-IB or SRP with these devices. | ||
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile new file mode 100644 index 000000000000..70f09c7826da --- /dev/null +++ b/drivers/infiniband/hw/mlx4/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o | ||
2 | |||
3 | mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o | ||
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c new file mode 100644 index 000000000000..c75ac9463e20 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/ah.c | |||
@@ -0,0 +1,100 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include "mlx4_ib.h" | ||
34 | |||
35 | struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) | ||
36 | { | ||
37 | struct mlx4_dev *dev = to_mdev(pd->device)->dev; | ||
38 | struct mlx4_ib_ah *ah; | ||
39 | |||
40 | ah = kmalloc(sizeof *ah, GFP_ATOMIC); | ||
41 | if (!ah) | ||
42 | return ERR_PTR(-ENOMEM); | ||
43 | |||
44 | memset(&ah->av, 0, sizeof ah->av); | ||
45 | |||
46 | ah->av.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); | ||
47 | ah->av.g_slid = ah_attr->src_path_bits; | ||
48 | ah->av.dlid = cpu_to_be16(ah_attr->dlid); | ||
49 | if (ah_attr->static_rate) { | ||
50 | ah->av.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET; | ||
51 | while (ah->av.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && | ||
52 | !(1 << ah->av.stat_rate & dev->caps.stat_rate_support)) | ||
53 | --ah->av.stat_rate; | ||
54 | } | ||
55 | ah->av.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); | ||
56 | if (ah_attr->ah_flags & IB_AH_GRH) { | ||
57 | ah->av.g_slid |= 0x80; | ||
58 | ah->av.gid_index = ah_attr->grh.sgid_index; | ||
59 | ah->av.hop_limit = ah_attr->grh.hop_limit; | ||
60 | ah->av.sl_tclass_flowlabel |= | ||
61 | cpu_to_be32((ah_attr->grh.traffic_class << 20) | | ||
62 | ah_attr->grh.flow_label); | ||
63 | memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16); | ||
64 | } | ||
65 | |||
66 | return &ah->ibah; | ||
67 | } | ||
68 | |||
69 | int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) | ||
70 | { | ||
71 | struct mlx4_ib_ah *ah = to_mah(ibah); | ||
72 | |||
73 | memset(ah_attr, 0, sizeof *ah_attr); | ||
74 | ah_attr->dlid = be16_to_cpu(ah->av.dlid); | ||
75 | ah_attr->sl = be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; | ||
76 | ah_attr->port_num = be32_to_cpu(ah->av.port_pd) >> 24; | ||
77 | if (ah->av.stat_rate) | ||
78 | ah_attr->static_rate = ah->av.stat_rate - MLX4_STAT_RATE_OFFSET; | ||
79 | ah_attr->src_path_bits = ah->av.g_slid & 0x7F; | ||
80 | |||
81 | if (mlx4_ib_ah_grh_present(ah)) { | ||
82 | ah_attr->ah_flags = IB_AH_GRH; | ||
83 | |||
84 | ah_attr->grh.traffic_class = | ||
85 | be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20; | ||
86 | ah_attr->grh.flow_label = | ||
87 | be32_to_cpu(ah->av.sl_tclass_flowlabel) & 0xfffff; | ||
88 | ah_attr->grh.hop_limit = ah->av.hop_limit; | ||
89 | ah_attr->grh.sgid_index = ah->av.gid_index; | ||
90 | memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, 16); | ||
91 | } | ||
92 | |||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | int mlx4_ib_destroy_ah(struct ib_ah *ah) | ||
97 | { | ||
98 | kfree(to_mah(ah)); | ||
99 | return 0; | ||
100 | } | ||
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c new file mode 100644 index 000000000000..b2a290c6703a --- /dev/null +++ b/drivers/infiniband/hw/mlx4/cq.c | |||
@@ -0,0 +1,525 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/mlx4/cq.h> | ||
34 | #include <linux/mlx4/qp.h> | ||
35 | |||
36 | #include "mlx4_ib.h" | ||
37 | #include "user.h" | ||
38 | |||
39 | static void mlx4_ib_cq_comp(struct mlx4_cq *cq) | ||
40 | { | ||
41 | struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; | ||
42 | ibcq->comp_handler(ibcq, ibcq->cq_context); | ||
43 | } | ||
44 | |||
45 | static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) | ||
46 | { | ||
47 | struct ib_event event; | ||
48 | struct ib_cq *ibcq; | ||
49 | |||
50 | if (type != MLX4_EVENT_TYPE_CQ_ERROR) { | ||
51 | printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " | ||
52 | "on CQ %06x\n", type, cq->cqn); | ||
53 | return; | ||
54 | } | ||
55 | |||
56 | ibcq = &to_mibcq(cq)->ibcq; | ||
57 | if (ibcq->event_handler) { | ||
58 | event.device = ibcq->device; | ||
59 | event.event = IB_EVENT_CQ_ERR; | ||
60 | event.element.cq = ibcq; | ||
61 | ibcq->event_handler(&event, ibcq->cq_context); | ||
62 | } | ||
63 | } | ||
64 | |||
65 | static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) | ||
66 | { | ||
67 | int offset = n * sizeof (struct mlx4_cqe); | ||
68 | |||
69 | if (buf->buf.nbufs == 1) | ||
70 | return buf->buf.u.direct.buf + offset; | ||
71 | else | ||
72 | return buf->buf.u.page_list[offset >> PAGE_SHIFT].buf + | ||
73 | (offset & (PAGE_SIZE - 1)); | ||
74 | } | ||
75 | |||
76 | static void *get_cqe(struct mlx4_ib_cq *cq, int n) | ||
77 | { | ||
78 | return get_cqe_from_buf(&cq->buf, n); | ||
79 | } | ||
80 | |||
81 | static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) | ||
82 | { | ||
83 | struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); | ||
84 | |||
85 | return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ | ||
86 | !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; | ||
87 | } | ||
88 | |||
89 | static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq) | ||
90 | { | ||
91 | return get_sw_cqe(cq, cq->mcq.cons_index); | ||
92 | } | ||
93 | |||
94 | struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, | ||
95 | struct ib_ucontext *context, | ||
96 | struct ib_udata *udata) | ||
97 | { | ||
98 | struct mlx4_ib_dev *dev = to_mdev(ibdev); | ||
99 | struct mlx4_ib_cq *cq; | ||
100 | struct mlx4_uar *uar; | ||
101 | int buf_size; | ||
102 | int err; | ||
103 | |||
104 | if (entries < 1 || entries > dev->dev->caps.max_cqes) | ||
105 | return ERR_PTR(-EINVAL); | ||
106 | |||
107 | cq = kmalloc(sizeof *cq, GFP_KERNEL); | ||
108 | if (!cq) | ||
109 | return ERR_PTR(-ENOMEM); | ||
110 | |||
111 | entries = roundup_pow_of_two(entries + 1); | ||
112 | cq->ibcq.cqe = entries - 1; | ||
113 | buf_size = entries * sizeof (struct mlx4_cqe); | ||
114 | spin_lock_init(&cq->lock); | ||
115 | |||
116 | if (context) { | ||
117 | struct mlx4_ib_create_cq ucmd; | ||
118 | |||
119 | if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { | ||
120 | err = -EFAULT; | ||
121 | goto err_cq; | ||
122 | } | ||
123 | |||
124 | cq->umem = ib_umem_get(context, ucmd.buf_addr, buf_size, | ||
125 | IB_ACCESS_LOCAL_WRITE); | ||
126 | if (IS_ERR(cq->umem)) { | ||
127 | err = PTR_ERR(cq->umem); | ||
128 | goto err_cq; | ||
129 | } | ||
130 | |||
131 | err = mlx4_mtt_init(dev->dev, ib_umem_page_count(cq->umem), | ||
132 | ilog2(cq->umem->page_size), &cq->buf.mtt); | ||
133 | if (err) | ||
134 | goto err_buf; | ||
135 | |||
136 | err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->umem); | ||
137 | if (err) | ||
138 | goto err_mtt; | ||
139 | |||
140 | err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr, | ||
141 | &cq->db); | ||
142 | if (err) | ||
143 | goto err_mtt; | ||
144 | |||
145 | uar = &to_mucontext(context)->uar; | ||
146 | } else { | ||
147 | err = mlx4_ib_db_alloc(dev, &cq->db, 1); | ||
148 | if (err) | ||
149 | goto err_cq; | ||
150 | |||
151 | cq->mcq.set_ci_db = cq->db.db; | ||
152 | cq->mcq.arm_db = cq->db.db + 1; | ||
153 | *cq->mcq.set_ci_db = 0; | ||
154 | *cq->mcq.arm_db = 0; | ||
155 | |||
156 | if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &cq->buf.buf)) { | ||
157 | err = -ENOMEM; | ||
158 | goto err_db; | ||
159 | } | ||
160 | |||
161 | err = mlx4_mtt_init(dev->dev, cq->buf.buf.npages, cq->buf.buf.page_shift, | ||
162 | &cq->buf.mtt); | ||
163 | if (err) | ||
164 | goto err_buf; | ||
165 | |||
166 | err = mlx4_buf_write_mtt(dev->dev, &cq->buf.mtt, &cq->buf.buf); | ||
167 | if (err) | ||
168 | goto err_mtt; | ||
169 | |||
170 | uar = &dev->priv_uar; | ||
171 | } | ||
172 | |||
173 | err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, | ||
174 | cq->db.dma, &cq->mcq); | ||
175 | if (err) | ||
176 | goto err_dbmap; | ||
177 | |||
178 | cq->mcq.comp = mlx4_ib_cq_comp; | ||
179 | cq->mcq.event = mlx4_ib_cq_event; | ||
180 | |||
181 | if (context) | ||
182 | if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { | ||
183 | err = -EFAULT; | ||
184 | goto err_dbmap; | ||
185 | } | ||
186 | |||
187 | return &cq->ibcq; | ||
188 | |||
189 | err_dbmap: | ||
190 | if (context) | ||
191 | mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); | ||
192 | |||
193 | err_mtt: | ||
194 | mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); | ||
195 | |||
196 | err_buf: | ||
197 | if (context) | ||
198 | ib_umem_release(cq->umem); | ||
199 | else | ||
200 | mlx4_buf_free(dev->dev, entries * sizeof (struct mlx4_cqe), | ||
201 | &cq->buf.buf); | ||
202 | |||
203 | err_db: | ||
204 | if (!context) | ||
205 | mlx4_ib_db_free(dev, &cq->db); | ||
206 | |||
207 | err_cq: | ||
208 | kfree(cq); | ||
209 | |||
210 | return ERR_PTR(err); | ||
211 | } | ||
212 | |||
213 | int mlx4_ib_destroy_cq(struct ib_cq *cq) | ||
214 | { | ||
215 | struct mlx4_ib_dev *dev = to_mdev(cq->device); | ||
216 | struct mlx4_ib_cq *mcq = to_mcq(cq); | ||
217 | |||
218 | mlx4_cq_free(dev->dev, &mcq->mcq); | ||
219 | mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt); | ||
220 | |||
221 | if (cq->uobject) { | ||
222 | mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db); | ||
223 | ib_umem_release(mcq->umem); | ||
224 | } else { | ||
225 | mlx4_buf_free(dev->dev, (cq->cqe + 1) * sizeof (struct mlx4_cqe), | ||
226 | &mcq->buf.buf); | ||
227 | mlx4_ib_db_free(dev, &mcq->db); | ||
228 | } | ||
229 | |||
230 | kfree(mcq); | ||
231 | |||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | static void dump_cqe(void *cqe) | ||
236 | { | ||
237 | __be32 *buf = cqe; | ||
238 | |||
239 | printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", | ||
240 | be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]), | ||
241 | be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]), | ||
242 | be32_to_cpu(buf[6]), be32_to_cpu(buf[7])); | ||
243 | } | ||
244 | |||
245 | static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, | ||
246 | struct ib_wc *wc) | ||
247 | { | ||
248 | if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) { | ||
249 | printk(KERN_DEBUG "local QP operation err " | ||
250 | "(QPN %06x, WQE index %x, vendor syndrome %02x, " | ||
251 | "opcode = %02x)\n", | ||
252 | be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index), | ||
253 | cqe->vendor_err_syndrome, | ||
254 | cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); | ||
255 | dump_cqe(cqe); | ||
256 | } | ||
257 | |||
258 | switch (cqe->syndrome) { | ||
259 | case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR: | ||
260 | wc->status = IB_WC_LOC_LEN_ERR; | ||
261 | break; | ||
262 | case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR: | ||
263 | wc->status = IB_WC_LOC_QP_OP_ERR; | ||
264 | break; | ||
265 | case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR: | ||
266 | wc->status = IB_WC_LOC_PROT_ERR; | ||
267 | break; | ||
268 | case MLX4_CQE_SYNDROME_WR_FLUSH_ERR: | ||
269 | wc->status = IB_WC_WR_FLUSH_ERR; | ||
270 | break; | ||
271 | case MLX4_CQE_SYNDROME_MW_BIND_ERR: | ||
272 | wc->status = IB_WC_MW_BIND_ERR; | ||
273 | break; | ||
274 | case MLX4_CQE_SYNDROME_BAD_RESP_ERR: | ||
275 | wc->status = IB_WC_BAD_RESP_ERR; | ||
276 | break; | ||
277 | case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR: | ||
278 | wc->status = IB_WC_LOC_ACCESS_ERR; | ||
279 | break; | ||
280 | case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: | ||
281 | wc->status = IB_WC_REM_INV_REQ_ERR; | ||
282 | break; | ||
283 | case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR: | ||
284 | wc->status = IB_WC_REM_ACCESS_ERR; | ||
285 | break; | ||
286 | case MLX4_CQE_SYNDROME_REMOTE_OP_ERR: | ||
287 | wc->status = IB_WC_REM_OP_ERR; | ||
288 | break; | ||
289 | case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: | ||
290 | wc->status = IB_WC_RETRY_EXC_ERR; | ||
291 | break; | ||
292 | case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR: | ||
293 | wc->status = IB_WC_RNR_RETRY_EXC_ERR; | ||
294 | break; | ||
295 | case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR: | ||
296 | wc->status = IB_WC_REM_ABORT_ERR; | ||
297 | break; | ||
298 | default: | ||
299 | wc->status = IB_WC_GENERAL_ERR; | ||
300 | break; | ||
301 | } | ||
302 | |||
303 | wc->vendor_err = cqe->vendor_err_syndrome; | ||
304 | } | ||
305 | |||
306 | static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, | ||
307 | struct mlx4_ib_qp **cur_qp, | ||
308 | struct ib_wc *wc) | ||
309 | { | ||
310 | struct mlx4_cqe *cqe; | ||
311 | struct mlx4_qp *mqp; | ||
312 | struct mlx4_ib_wq *wq; | ||
313 | struct mlx4_ib_srq *srq; | ||
314 | int is_send; | ||
315 | int is_error; | ||
316 | u16 wqe_ctr; | ||
317 | |||
318 | cqe = next_cqe_sw(cq); | ||
319 | if (!cqe) | ||
320 | return -EAGAIN; | ||
321 | |||
322 | ++cq->mcq.cons_index; | ||
323 | |||
324 | /* | ||
325 | * Make sure we read CQ entry contents after we've checked the | ||
326 | * ownership bit. | ||
327 | */ | ||
328 | rmb(); | ||
329 | |||
330 | is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; | ||
331 | is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == | ||
332 | MLX4_CQE_OPCODE_ERROR; | ||
333 | |||
334 | if (!*cur_qp || | ||
335 | (be32_to_cpu(cqe->my_qpn) & 0xffffff) != (*cur_qp)->mqp.qpn) { | ||
336 | /* | ||
337 | * We do not have to take the QP table lock here, | ||
338 | * because CQs will be locked while QPs are removed | ||
339 | * from the table. | ||
340 | */ | ||
341 | mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, | ||
342 | be32_to_cpu(cqe->my_qpn)); | ||
343 | if (unlikely(!mqp)) { | ||
344 | printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n", | ||
345 | cq->mcq.cqn, be32_to_cpu(cqe->my_qpn) & 0xffffff); | ||
346 | return -EINVAL; | ||
347 | } | ||
348 | |||
349 | *cur_qp = to_mibqp(mqp); | ||
350 | } | ||
351 | |||
352 | wc->qp = &(*cur_qp)->ibqp; | ||
353 | |||
354 | if (is_send) { | ||
355 | wq = &(*cur_qp)->sq; | ||
356 | wqe_ctr = be16_to_cpu(cqe->wqe_index); | ||
357 | wq->tail += wqe_ctr - (u16) wq->tail; | ||
358 | wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)]; | ||
359 | ++wq->tail; | ||
360 | } else if ((*cur_qp)->ibqp.srq) { | ||
361 | srq = to_msrq((*cur_qp)->ibqp.srq); | ||
362 | wqe_ctr = be16_to_cpu(cqe->wqe_index); | ||
363 | wc->wr_id = srq->wrid[wqe_ctr]; | ||
364 | mlx4_ib_free_srq_wqe(srq, wqe_ctr); | ||
365 | } else { | ||
366 | wq = &(*cur_qp)->rq; | ||
367 | wc->wr_id = wq->wrid[wq->tail & (wq->max - 1)]; | ||
368 | ++wq->tail; | ||
369 | } | ||
370 | |||
371 | if (unlikely(is_error)) { | ||
372 | mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc); | ||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | wc->status = IB_WC_SUCCESS; | ||
377 | |||
378 | if (is_send) { | ||
379 | wc->wc_flags = 0; | ||
380 | switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { | ||
381 | case MLX4_OPCODE_RDMA_WRITE_IMM: | ||
382 | wc->wc_flags |= IB_WC_WITH_IMM; | ||
383 | case MLX4_OPCODE_RDMA_WRITE: | ||
384 | wc->opcode = IB_WC_RDMA_WRITE; | ||
385 | break; | ||
386 | case MLX4_OPCODE_SEND_IMM: | ||
387 | wc->wc_flags |= IB_WC_WITH_IMM; | ||
388 | case MLX4_OPCODE_SEND: | ||
389 | wc->opcode = IB_WC_SEND; | ||
390 | break; | ||
391 | case MLX4_OPCODE_RDMA_READ: | ||
392 | wc->opcode = IB_WC_SEND; | ||
393 | wc->byte_len = be32_to_cpu(cqe->byte_cnt); | ||
394 | break; | ||
395 | case MLX4_OPCODE_ATOMIC_CS: | ||
396 | wc->opcode = IB_WC_COMP_SWAP; | ||
397 | wc->byte_len = 8; | ||
398 | break; | ||
399 | case MLX4_OPCODE_ATOMIC_FA: | ||
400 | wc->opcode = IB_WC_FETCH_ADD; | ||
401 | wc->byte_len = 8; | ||
402 | break; | ||
403 | case MLX4_OPCODE_BIND_MW: | ||
404 | wc->opcode = IB_WC_BIND_MW; | ||
405 | break; | ||
406 | } | ||
407 | } else { | ||
408 | wc->byte_len = be32_to_cpu(cqe->byte_cnt); | ||
409 | |||
410 | switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { | ||
411 | case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: | ||
412 | wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; | ||
413 | wc->wc_flags = IB_WC_WITH_IMM; | ||
414 | wc->imm_data = cqe->immed_rss_invalid; | ||
415 | break; | ||
416 | case MLX4_RECV_OPCODE_SEND: | ||
417 | wc->opcode = IB_WC_RECV; | ||
418 | wc->wc_flags = 0; | ||
419 | break; | ||
420 | case MLX4_RECV_OPCODE_SEND_IMM: | ||
421 | wc->opcode = IB_WC_RECV; | ||
422 | wc->wc_flags = IB_WC_WITH_IMM; | ||
423 | wc->imm_data = cqe->immed_rss_invalid; | ||
424 | break; | ||
425 | } | ||
426 | |||
427 | wc->slid = be16_to_cpu(cqe->rlid); | ||
428 | wc->sl = cqe->sl >> 4; | ||
429 | wc->src_qp = be32_to_cpu(cqe->g_mlpath_rqpn) & 0xffffff; | ||
430 | wc->dlid_path_bits = (be32_to_cpu(cqe->g_mlpath_rqpn) >> 24) & 0x7f; | ||
431 | wc->wc_flags |= be32_to_cpu(cqe->g_mlpath_rqpn) & 0x80000000 ? | ||
432 | IB_WC_GRH : 0; | ||
433 | wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) >> 16; | ||
434 | } | ||
435 | |||
436 | return 0; | ||
437 | } | ||
438 | |||
439 | int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) | ||
440 | { | ||
441 | struct mlx4_ib_cq *cq = to_mcq(ibcq); | ||
442 | struct mlx4_ib_qp *cur_qp = NULL; | ||
443 | unsigned long flags; | ||
444 | int npolled; | ||
445 | int err = 0; | ||
446 | |||
447 | spin_lock_irqsave(&cq->lock, flags); | ||
448 | |||
449 | for (npolled = 0; npolled < num_entries; ++npolled) { | ||
450 | err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); | ||
451 | if (err) | ||
452 | break; | ||
453 | } | ||
454 | |||
455 | if (npolled) | ||
456 | mlx4_cq_set_ci(&cq->mcq); | ||
457 | |||
458 | spin_unlock_irqrestore(&cq->lock, flags); | ||
459 | |||
460 | if (err == 0 || err == -EAGAIN) | ||
461 | return npolled; | ||
462 | else | ||
463 | return err; | ||
464 | } | ||
465 | |||
466 | int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) | ||
467 | { | ||
468 | mlx4_cq_arm(&to_mcq(ibcq)->mcq, | ||
469 | (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? | ||
470 | MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT, | ||
471 | to_mdev(ibcq->device)->uar_map, | ||
472 | MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock)); | ||
473 | |||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) | ||
478 | { | ||
479 | u32 prod_index; | ||
480 | int nfreed = 0; | ||
481 | struct mlx4_cqe *cqe; | ||
482 | |||
483 | /* | ||
484 | * First we need to find the current producer index, so we | ||
485 | * know where to start cleaning from. It doesn't matter if HW | ||
486 | * adds new entries after this loop -- the QP we're worried | ||
487 | * about is already in RESET, so the new entries won't come | ||
488 | * from our QP and therefore don't need to be checked. | ||
489 | */ | ||
490 | for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index) | ||
491 | if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) | ||
492 | break; | ||
493 | |||
494 | /* | ||
495 | * Now sweep backwards through the CQ, removing CQ entries | ||
496 | * that match our QP by copying older entries on top of them. | ||
497 | */ | ||
498 | while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { | ||
499 | cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); | ||
500 | if ((be32_to_cpu(cqe->my_qpn) & 0xffffff) == qpn) { | ||
501 | if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) | ||
502 | mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); | ||
503 | ++nfreed; | ||
504 | } else if (nfreed) | ||
505 | memcpy(get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe), | ||
506 | cqe, sizeof *cqe); | ||
507 | } | ||
508 | |||
509 | if (nfreed) { | ||
510 | cq->mcq.cons_index += nfreed; | ||
511 | /* | ||
512 | * Make sure update of buffer contents is done before | ||
513 | * updating consumer index. | ||
514 | */ | ||
515 | wmb(); | ||
516 | mlx4_cq_set_ci(&cq->mcq); | ||
517 | } | ||
518 | } | ||
519 | |||
520 | void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) | ||
521 | { | ||
522 | spin_lock_irq(&cq->lock); | ||
523 | __mlx4_ib_cq_clean(cq, qpn, srq); | ||
524 | spin_unlock_irq(&cq->lock); | ||
525 | } | ||
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c new file mode 100644 index 000000000000..1c36087aef14 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/doorbell.c | |||
@@ -0,0 +1,216 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/slab.h> | ||
34 | |||
35 | #include "mlx4_ib.h" | ||
36 | |||
37 | struct mlx4_ib_db_pgdir { | ||
38 | struct list_head list; | ||
39 | DECLARE_BITMAP(order0, MLX4_IB_DB_PER_PAGE); | ||
40 | DECLARE_BITMAP(order1, MLX4_IB_DB_PER_PAGE / 2); | ||
41 | unsigned long *bits[2]; | ||
42 | __be32 *db_page; | ||
43 | dma_addr_t db_dma; | ||
44 | }; | ||
45 | |||
46 | static struct mlx4_ib_db_pgdir *mlx4_ib_alloc_db_pgdir(struct mlx4_ib_dev *dev) | ||
47 | { | ||
48 | struct mlx4_ib_db_pgdir *pgdir; | ||
49 | |||
50 | pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL); | ||
51 | if (!pgdir) | ||
52 | return NULL; | ||
53 | |||
54 | bitmap_fill(pgdir->order1, MLX4_IB_DB_PER_PAGE / 2); | ||
55 | pgdir->bits[0] = pgdir->order0; | ||
56 | pgdir->bits[1] = pgdir->order1; | ||
57 | pgdir->db_page = dma_alloc_coherent(dev->ib_dev.dma_device, | ||
58 | PAGE_SIZE, &pgdir->db_dma, | ||
59 | GFP_KERNEL); | ||
60 | if (!pgdir->db_page) { | ||
61 | kfree(pgdir); | ||
62 | return NULL; | ||
63 | } | ||
64 | |||
65 | return pgdir; | ||
66 | } | ||
67 | |||
68 | static int mlx4_ib_alloc_db_from_pgdir(struct mlx4_ib_db_pgdir *pgdir, | ||
69 | struct mlx4_ib_db *db, int order) | ||
70 | { | ||
71 | int o; | ||
72 | int i; | ||
73 | |||
74 | for (o = order; o <= 1; ++o) { | ||
75 | i = find_first_bit(pgdir->bits[o], MLX4_IB_DB_PER_PAGE >> o); | ||
76 | if (i < MLX4_IB_DB_PER_PAGE >> o) | ||
77 | goto found; | ||
78 | } | ||
79 | |||
80 | return -ENOMEM; | ||
81 | |||
82 | found: | ||
83 | clear_bit(i, pgdir->bits[o]); | ||
84 | |||
85 | i <<= o; | ||
86 | |||
87 | if (o > order) | ||
88 | set_bit(i ^ 1, pgdir->bits[order]); | ||
89 | |||
90 | db->u.pgdir = pgdir; | ||
91 | db->index = i; | ||
92 | db->db = pgdir->db_page + db->index; | ||
93 | db->dma = pgdir->db_dma + db->index * 4; | ||
94 | db->order = order; | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order) | ||
100 | { | ||
101 | struct mlx4_ib_db_pgdir *pgdir; | ||
102 | int ret = 0; | ||
103 | |||
104 | mutex_lock(&dev->pgdir_mutex); | ||
105 | |||
106 | list_for_each_entry(pgdir, &dev->pgdir_list, list) | ||
107 | if (!mlx4_ib_alloc_db_from_pgdir(pgdir, db, order)) | ||
108 | goto out; | ||
109 | |||
110 | pgdir = mlx4_ib_alloc_db_pgdir(dev); | ||
111 | if (!pgdir) { | ||
112 | ret = -ENOMEM; | ||
113 | goto out; | ||
114 | } | ||
115 | |||
116 | list_add(&pgdir->list, &dev->pgdir_list); | ||
117 | |||
118 | /* This should never fail -- we just allocated an empty page: */ | ||
119 | WARN_ON(mlx4_ib_alloc_db_from_pgdir(pgdir, db, order)); | ||
120 | |||
121 | out: | ||
122 | mutex_unlock(&dev->pgdir_mutex); | ||
123 | |||
124 | return ret; | ||
125 | } | ||
126 | |||
127 | void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db) | ||
128 | { | ||
129 | int o; | ||
130 | int i; | ||
131 | |||
132 | mutex_lock(&dev->pgdir_mutex); | ||
133 | |||
134 | o = db->order; | ||
135 | i = db->index; | ||
136 | |||
137 | if (db->order == 0 && test_bit(i ^ 1, db->u.pgdir->order0)) { | ||
138 | clear_bit(i ^ 1, db->u.pgdir->order0); | ||
139 | ++o; | ||
140 | } | ||
141 | |||
142 | i >>= o; | ||
143 | set_bit(i, db->u.pgdir->bits[o]); | ||
144 | |||
145 | if (bitmap_full(db->u.pgdir->order1, MLX4_IB_DB_PER_PAGE / 2)) { | ||
146 | dma_free_coherent(dev->ib_dev.dma_device, PAGE_SIZE, | ||
147 | db->u.pgdir->db_page, db->u.pgdir->db_dma); | ||
148 | list_del(&db->u.pgdir->list); | ||
149 | kfree(db->u.pgdir); | ||
150 | } | ||
151 | |||
152 | mutex_unlock(&dev->pgdir_mutex); | ||
153 | } | ||
154 | |||
155 | struct mlx4_ib_user_db_page { | ||
156 | struct list_head list; | ||
157 | struct ib_umem *umem; | ||
158 | unsigned long user_virt; | ||
159 | int refcnt; | ||
160 | }; | ||
161 | |||
162 | int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, | ||
163 | struct mlx4_ib_db *db) | ||
164 | { | ||
165 | struct mlx4_ib_user_db_page *page; | ||
166 | struct ib_umem_chunk *chunk; | ||
167 | int err = 0; | ||
168 | |||
169 | mutex_lock(&context->db_page_mutex); | ||
170 | |||
171 | list_for_each_entry(page, &context->db_page_list, list) | ||
172 | if (page->user_virt == (virt & PAGE_MASK)) | ||
173 | goto found; | ||
174 | |||
175 | page = kmalloc(sizeof *page, GFP_KERNEL); | ||
176 | if (!page) { | ||
177 | err = -ENOMEM; | ||
178 | goto out; | ||
179 | } | ||
180 | |||
181 | page->user_virt = (virt & PAGE_MASK); | ||
182 | page->refcnt = 0; | ||
183 | page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK, | ||
184 | PAGE_SIZE, 0); | ||
185 | if (IS_ERR(page->umem)) { | ||
186 | err = PTR_ERR(page->umem); | ||
187 | kfree(page); | ||
188 | goto out; | ||
189 | } | ||
190 | |||
191 | list_add(&page->list, &context->db_page_list); | ||
192 | |||
193 | found: | ||
194 | chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); | ||
195 | db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); | ||
196 | db->u.user_page = page; | ||
197 | ++page->refcnt; | ||
198 | |||
199 | out: | ||
200 | mutex_unlock(&context->db_page_mutex); | ||
201 | |||
202 | return err; | ||
203 | } | ||
204 | |||
205 | void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db) | ||
206 | { | ||
207 | mutex_lock(&context->db_page_mutex); | ||
208 | |||
209 | if (!--db->u.user_page->refcnt) { | ||
210 | list_del(&db->u.user_page->list); | ||
211 | ib_umem_release(db->u.user_page->umem); | ||
212 | kfree(db->u.user_page); | ||
213 | } | ||
214 | |||
215 | mutex_unlock(&context->db_page_mutex); | ||
216 | } | ||
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c new file mode 100644 index 000000000000..333091787c5f --- /dev/null +++ b/drivers/infiniband/hw/mlx4/mad.c | |||
@@ -0,0 +1,339 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <rdma/ib_mad.h> | ||
34 | #include <rdma/ib_smi.h> | ||
35 | |||
36 | #include <linux/mlx4/cmd.h> | ||
37 | |||
38 | #include "mlx4_ib.h" | ||
39 | |||
40 | enum { | ||
41 | MLX4_IB_VENDOR_CLASS1 = 0x9, | ||
42 | MLX4_IB_VENDOR_CLASS2 = 0xa | ||
43 | }; | ||
44 | |||
45 | int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, | ||
46 | int port, struct ib_wc *in_wc, struct ib_grh *in_grh, | ||
47 | void *in_mad, void *response_mad) | ||
48 | { | ||
49 | struct mlx4_cmd_mailbox *inmailbox, *outmailbox; | ||
50 | void *inbox; | ||
51 | int err; | ||
52 | u32 in_modifier = port; | ||
53 | u8 op_modifier = 0; | ||
54 | |||
55 | inmailbox = mlx4_alloc_cmd_mailbox(dev->dev); | ||
56 | if (IS_ERR(inmailbox)) | ||
57 | return PTR_ERR(inmailbox); | ||
58 | inbox = inmailbox->buf; | ||
59 | |||
60 | outmailbox = mlx4_alloc_cmd_mailbox(dev->dev); | ||
61 | if (IS_ERR(outmailbox)) { | ||
62 | mlx4_free_cmd_mailbox(dev->dev, inmailbox); | ||
63 | return PTR_ERR(outmailbox); | ||
64 | } | ||
65 | |||
66 | memcpy(inbox, in_mad, 256); | ||
67 | |||
68 | /* | ||
69 | * Key check traps can't be generated unless we have in_wc to | ||
70 | * tell us where to send the trap. | ||
71 | */ | ||
72 | if (ignore_mkey || !in_wc) | ||
73 | op_modifier |= 0x1; | ||
74 | if (ignore_bkey || !in_wc) | ||
75 | op_modifier |= 0x2; | ||
76 | |||
77 | if (in_wc) { | ||
78 | struct { | ||
79 | __be32 my_qpn; | ||
80 | u32 reserved1; | ||
81 | __be32 rqpn; | ||
82 | u8 sl; | ||
83 | u8 g_path; | ||
84 | u16 reserved2[2]; | ||
85 | __be16 pkey; | ||
86 | u32 reserved3[11]; | ||
87 | u8 grh[40]; | ||
88 | } *ext_info; | ||
89 | |||
90 | memset(inbox + 256, 0, 256); | ||
91 | ext_info = inbox + 256; | ||
92 | |||
93 | ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num); | ||
94 | ext_info->rqpn = cpu_to_be32(in_wc->src_qp); | ||
95 | ext_info->sl = in_wc->sl << 4; | ||
96 | ext_info->g_path = in_wc->dlid_path_bits | | ||
97 | (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); | ||
98 | ext_info->pkey = cpu_to_be16(in_wc->pkey_index); | ||
99 | |||
100 | if (in_grh) | ||
101 | memcpy(ext_info->grh, in_grh, 40); | ||
102 | |||
103 | op_modifier |= 0x4; | ||
104 | |||
105 | in_modifier |= in_wc->slid << 16; | ||
106 | } | ||
107 | |||
108 | err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, | ||
109 | in_modifier, op_modifier, | ||
110 | MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C); | ||
111 | |||
112 | if (!err); | ||
113 | memcpy(response_mad, outmailbox->buf, 256); | ||
114 | |||
115 | mlx4_free_cmd_mailbox(dev->dev, inmailbox); | ||
116 | mlx4_free_cmd_mailbox(dev->dev, outmailbox); | ||
117 | |||
118 | return err; | ||
119 | } | ||
120 | |||
121 | static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) | ||
122 | { | ||
123 | struct ib_ah *new_ah; | ||
124 | struct ib_ah_attr ah_attr; | ||
125 | |||
126 | if (!dev->send_agent[port_num - 1][0]) | ||
127 | return; | ||
128 | |||
129 | memset(&ah_attr, 0, sizeof ah_attr); | ||
130 | ah_attr.dlid = lid; | ||
131 | ah_attr.sl = sl; | ||
132 | ah_attr.port_num = port_num; | ||
133 | |||
134 | new_ah = ib_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, | ||
135 | &ah_attr); | ||
136 | if (IS_ERR(new_ah)) | ||
137 | return; | ||
138 | |||
139 | spin_lock(&dev->sm_lock); | ||
140 | if (dev->sm_ah[port_num - 1]) | ||
141 | ib_destroy_ah(dev->sm_ah[port_num - 1]); | ||
142 | dev->sm_ah[port_num - 1] = new_ah; | ||
143 | spin_unlock(&dev->sm_lock); | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * Snoop SM MADs for port info and P_Key table sets, so we can | ||
148 | * synthesize LID change and P_Key change events. | ||
149 | */ | ||
150 | static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad) | ||
151 | { | ||
152 | struct ib_event event; | ||
153 | |||
154 | if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || | ||
155 | mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && | ||
156 | mad->mad_hdr.method == IB_MGMT_METHOD_SET) { | ||
157 | if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) { | ||
158 | struct ib_port_info *pinfo = | ||
159 | (struct ib_port_info *) ((struct ib_smp *) mad)->data; | ||
160 | |||
161 | update_sm_ah(to_mdev(ibdev), port_num, | ||
162 | be16_to_cpu(pinfo->sm_lid), | ||
163 | pinfo->neighbormtu_mastersmsl & 0xf); | ||
164 | |||
165 | event.device = ibdev; | ||
166 | event.element.port_num = port_num; | ||
167 | |||
168 | if(pinfo->clientrereg_resv_subnetto & 0x80) | ||
169 | event.event = IB_EVENT_CLIENT_REREGISTER; | ||
170 | else | ||
171 | event.event = IB_EVENT_LID_CHANGE; | ||
172 | |||
173 | ib_dispatch_event(&event); | ||
174 | } | ||
175 | |||
176 | if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) { | ||
177 | event.device = ibdev; | ||
178 | event.event = IB_EVENT_PKEY_CHANGE; | ||
179 | event.element.port_num = port_num; | ||
180 | ib_dispatch_event(&event); | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | |||
185 | static void node_desc_override(struct ib_device *dev, | ||
186 | struct ib_mad *mad) | ||
187 | { | ||
188 | if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || | ||
189 | mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && | ||
190 | mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && | ||
191 | mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { | ||
192 | spin_lock(&to_mdev(dev)->sm_lock); | ||
193 | memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); | ||
194 | spin_unlock(&to_mdev(dev)->sm_lock); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *mad) | ||
199 | { | ||
200 | int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED; | ||
201 | struct ib_mad_send_buf *send_buf; | ||
202 | struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; | ||
203 | int ret; | ||
204 | |||
205 | if (agent) { | ||
206 | send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, | ||
207 | IB_MGMT_MAD_DATA, GFP_ATOMIC); | ||
208 | /* | ||
209 | * We rely here on the fact that MLX QPs don't use the | ||
210 | * address handle after the send is posted (this is | ||
211 | * wrong following the IB spec strictly, but we know | ||
212 | * it's OK for our devices). | ||
213 | */ | ||
214 | spin_lock(&dev->sm_lock); | ||
215 | memcpy(send_buf->mad, mad, sizeof *mad); | ||
216 | if ((send_buf->ah = dev->sm_ah[port_num - 1])) | ||
217 | ret = ib_post_send_mad(send_buf, NULL); | ||
218 | else | ||
219 | ret = -EINVAL; | ||
220 | spin_unlock(&dev->sm_lock); | ||
221 | |||
222 | if (ret) | ||
223 | ib_free_send_mad(send_buf); | ||
224 | } | ||
225 | } | ||
226 | |||
227 | int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, | ||
228 | struct ib_wc *in_wc, struct ib_grh *in_grh, | ||
229 | struct ib_mad *in_mad, struct ib_mad *out_mad) | ||
230 | { | ||
231 | u16 slid; | ||
232 | int err; | ||
233 | |||
234 | slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); | ||
235 | |||
236 | if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { | ||
237 | forward_trap(to_mdev(ibdev), port_num, in_mad); | ||
238 | return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; | ||
239 | } | ||
240 | |||
241 | if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || | ||
242 | in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { | ||
243 | if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && | ||
244 | in_mad->mad_hdr.method != IB_MGMT_METHOD_SET && | ||
245 | in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS) | ||
246 | return IB_MAD_RESULT_SUCCESS; | ||
247 | |||
248 | /* | ||
249 | * Don't process SMInfo queries or vendor-specific | ||
250 | * MADs -- the SMA can't handle them. | ||
251 | */ | ||
252 | if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || | ||
253 | ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == | ||
254 | IB_SMP_ATTR_VENDOR_MASK)) | ||
255 | return IB_MAD_RESULT_SUCCESS; | ||
256 | } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || | ||
257 | in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || | ||
258 | in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2) { | ||
259 | if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET && | ||
260 | in_mad->mad_hdr.method != IB_MGMT_METHOD_SET) | ||
261 | return IB_MAD_RESULT_SUCCESS; | ||
262 | } else | ||
263 | return IB_MAD_RESULT_SUCCESS; | ||
264 | |||
265 | err = mlx4_MAD_IFC(to_mdev(ibdev), | ||
266 | mad_flags & IB_MAD_IGNORE_MKEY, | ||
267 | mad_flags & IB_MAD_IGNORE_BKEY, | ||
268 | port_num, in_wc, in_grh, in_mad, out_mad); | ||
269 | if (err) | ||
270 | return IB_MAD_RESULT_FAILURE; | ||
271 | |||
272 | if (!out_mad->mad_hdr.status) { | ||
273 | smp_snoop(ibdev, port_num, in_mad); | ||
274 | node_desc_override(ibdev, out_mad); | ||
275 | } | ||
276 | |||
277 | /* set return bit in status of directed route responses */ | ||
278 | if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) | ||
279 | out_mad->mad_hdr.status |= cpu_to_be16(1 << 15); | ||
280 | |||
281 | if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) | ||
282 | /* no response for trap repress */ | ||
283 | return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; | ||
284 | |||
285 | return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; | ||
286 | } | ||
287 | |||
288 | static void send_handler(struct ib_mad_agent *agent, | ||
289 | struct ib_mad_send_wc *mad_send_wc) | ||
290 | { | ||
291 | ib_free_send_mad(mad_send_wc->send_buf); | ||
292 | } | ||
293 | |||
294 | int mlx4_ib_mad_init(struct mlx4_ib_dev *dev) | ||
295 | { | ||
296 | struct ib_mad_agent *agent; | ||
297 | int p, q; | ||
298 | int ret; | ||
299 | |||
300 | for (p = 0; p < dev->dev->caps.num_ports; ++p) | ||
301 | for (q = 0; q <= 1; ++q) { | ||
302 | agent = ib_register_mad_agent(&dev->ib_dev, p + 1, | ||
303 | q ? IB_QPT_GSI : IB_QPT_SMI, | ||
304 | NULL, 0, send_handler, | ||
305 | NULL, NULL); | ||
306 | if (IS_ERR(agent)) { | ||
307 | ret = PTR_ERR(agent); | ||
308 | goto err; | ||
309 | } | ||
310 | dev->send_agent[p][q] = agent; | ||
311 | } | ||
312 | |||
313 | return 0; | ||
314 | |||
315 | err: | ||
316 | for (p = 0; p < dev->dev->caps.num_ports; ++p) | ||
317 | for (q = 0; q <= 1; ++q) | ||
318 | if (dev->send_agent[p][q]) | ||
319 | ib_unregister_mad_agent(dev->send_agent[p][q]); | ||
320 | |||
321 | return ret; | ||
322 | } | ||
323 | |||
324 | void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) | ||
325 | { | ||
326 | struct ib_mad_agent *agent; | ||
327 | int p, q; | ||
328 | |||
329 | for (p = 0; p < dev->dev->caps.num_ports; ++p) { | ||
330 | for (q = 0; q <= 1; ++q) { | ||
331 | agent = dev->send_agent[p][q]; | ||
332 | dev->send_agent[p][q] = NULL; | ||
333 | ib_unregister_mad_agent(agent); | ||
334 | } | ||
335 | |||
336 | if (dev->sm_ah[p]) | ||
337 | ib_destroy_ah(dev->sm_ah[p]); | ||
338 | } | ||
339 | } | ||
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c new file mode 100644 index 000000000000..688ecb4c39f3 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/main.c | |||
@@ -0,0 +1,651 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/module.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/errno.h> | ||
36 | |||
37 | #include <rdma/ib_smi.h> | ||
38 | #include <rdma/ib_user_verbs.h> | ||
39 | |||
40 | #include <linux/mlx4/driver.h> | ||
41 | #include <linux/mlx4/cmd.h> | ||
42 | |||
43 | #include "mlx4_ib.h" | ||
44 | #include "user.h" | ||
45 | |||
46 | #define DRV_NAME "mlx4_ib" | ||
47 | #define DRV_VERSION "0.01" | ||
48 | #define DRV_RELDATE "May 1, 2006" | ||
49 | |||
50 | MODULE_AUTHOR("Roland Dreier"); | ||
51 | MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); | ||
52 | MODULE_LICENSE("Dual BSD/GPL"); | ||
53 | MODULE_VERSION(DRV_VERSION); | ||
54 | |||
55 | static const char mlx4_ib_version[] __devinitdata = | ||
56 | DRV_NAME ": Mellanox ConnectX InfiniBand driver v" | ||
57 | DRV_VERSION " (" DRV_RELDATE ")\n"; | ||
58 | |||
59 | static void init_query_mad(struct ib_smp *mad) | ||
60 | { | ||
61 | mad->base_version = 1; | ||
62 | mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; | ||
63 | mad->class_version = 1; | ||
64 | mad->method = IB_MGMT_METHOD_GET; | ||
65 | } | ||
66 | |||
67 | static int mlx4_ib_query_device(struct ib_device *ibdev, | ||
68 | struct ib_device_attr *props) | ||
69 | { | ||
70 | struct mlx4_ib_dev *dev = to_mdev(ibdev); | ||
71 | struct ib_smp *in_mad = NULL; | ||
72 | struct ib_smp *out_mad = NULL; | ||
73 | int err = -ENOMEM; | ||
74 | |||
75 | in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); | ||
76 | out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); | ||
77 | if (!in_mad || !out_mad) | ||
78 | goto out; | ||
79 | |||
80 | init_query_mad(in_mad); | ||
81 | in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; | ||
82 | |||
83 | err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); | ||
84 | if (err) | ||
85 | goto out; | ||
86 | |||
87 | memset(props, 0, sizeof *props); | ||
88 | |||
89 | props->fw_ver = dev->dev->caps.fw_ver; | ||
90 | props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | | ||
91 | IB_DEVICE_PORT_ACTIVE_EVENT | | ||
92 | IB_DEVICE_SYS_IMAGE_GUID | | ||
93 | IB_DEVICE_RC_RNR_NAK_GEN; | ||
94 | if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) | ||
95 | props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; | ||
96 | if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) | ||
97 | props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; | ||
98 | if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM) | ||
99 | props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; | ||
100 | if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) | ||
101 | props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; | ||
102 | |||
103 | props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & | ||
104 | 0xffffff; | ||
105 | props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); | ||
106 | props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); | ||
107 | memcpy(&props->sys_image_guid, out_mad->data + 4, 8); | ||
108 | |||
109 | props->max_mr_size = ~0ull; | ||
110 | props->page_size_cap = dev->dev->caps.page_size_cap; | ||
111 | props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; | ||
112 | props->max_qp_wr = dev->dev->caps.max_wqes; | ||
113 | props->max_sge = min(dev->dev->caps.max_sq_sg, | ||
114 | dev->dev->caps.max_rq_sg); | ||
115 | props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; | ||
116 | props->max_cqe = dev->dev->caps.max_cqes; | ||
117 | props->max_mr = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws; | ||
118 | props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; | ||
119 | props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; | ||
120 | props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; | ||
121 | props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; | ||
122 | props->max_srq = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs; | ||
123 | props->max_srq_wr = dev->dev->caps.max_srq_wqes; | ||
124 | props->max_srq_sge = dev->dev->caps.max_srq_sge; | ||
125 | props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; | ||
126 | props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? | ||
127 | IB_ATOMIC_HCA : IB_ATOMIC_NONE; | ||
128 | props->max_pkeys = dev->dev->caps.pkey_table_len; | ||
129 | props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; | ||
130 | props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; | ||
131 | props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * | ||
132 | props->max_mcast_grp; | ||
133 | props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1; | ||
134 | |||
135 | out: | ||
136 | kfree(in_mad); | ||
137 | kfree(out_mad); | ||
138 | |||
139 | return err; | ||
140 | } | ||
141 | |||
142 | static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, | ||
143 | struct ib_port_attr *props) | ||
144 | { | ||
145 | struct ib_smp *in_mad = NULL; | ||
146 | struct ib_smp *out_mad = NULL; | ||
147 | int err = -ENOMEM; | ||
148 | |||
149 | in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); | ||
150 | out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); | ||
151 | if (!in_mad || !out_mad) | ||
152 | goto out; | ||
153 | |||
154 | memset(props, 0, sizeof *props); | ||
155 | |||
156 | init_query_mad(in_mad); | ||
157 | in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; | ||
158 | in_mad->attr_mod = cpu_to_be32(port); | ||
159 | |||
160 | err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); | ||
161 | if (err) | ||
162 | goto out; | ||
163 | |||
164 | props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); | ||
165 | props->lmc = out_mad->data[34] & 0x7; | ||
166 | props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); | ||
167 | props->sm_sl = out_mad->data[36] & 0xf; | ||
168 | props->state = out_mad->data[32] & 0xf; | ||
169 | props->phys_state = out_mad->data[33] >> 4; | ||
170 | props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); | ||
171 | props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len; | ||
172 | props->max_msg_sz = 0x80000000; | ||
173 | props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len; | ||
174 | props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); | ||
175 | props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); | ||
176 | props->active_width = out_mad->data[31] & 0xf; | ||
177 | props->active_speed = out_mad->data[35] >> 4; | ||
178 | props->max_mtu = out_mad->data[41] & 0xf; | ||
179 | props->active_mtu = out_mad->data[36] >> 4; | ||
180 | props->subnet_timeout = out_mad->data[51] & 0x1f; | ||
181 | props->max_vl_num = out_mad->data[37] >> 4; | ||
182 | props->init_type_reply = out_mad->data[41] >> 4; | ||
183 | |||
184 | out: | ||
185 | kfree(in_mad); | ||
186 | kfree(out_mad); | ||
187 | |||
188 | return err; | ||
189 | } | ||
190 | |||
191 | static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, | ||
192 | union ib_gid *gid) | ||
193 | { | ||
194 | struct ib_smp *in_mad = NULL; | ||
195 | struct ib_smp *out_mad = NULL; | ||
196 | int err = -ENOMEM; | ||
197 | |||
198 | in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); | ||
199 | out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); | ||
200 | if (!in_mad || !out_mad) | ||
201 | goto out; | ||
202 | |||
203 | init_query_mad(in_mad); | ||
204 | in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; | ||
205 | in_mad->attr_mod = cpu_to_be32(port); | ||
206 | |||
207 | err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); | ||
208 | if (err) | ||
209 | goto out; | ||
210 | |||
211 | memcpy(gid->raw, out_mad->data + 8, 8); | ||
212 | |||
213 | init_query_mad(in_mad); | ||
214 | in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; | ||
215 | in_mad->attr_mod = cpu_to_be32(index / 8); | ||
216 | |||
217 | err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); | ||
218 | if (err) | ||
219 | goto out; | ||
220 | |||
221 | memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); | ||
222 | |||
223 | out: | ||
224 | kfree(in_mad); | ||
225 | kfree(out_mad); | ||
226 | return err; | ||
227 | } | ||
228 | |||
229 | static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, | ||
230 | u16 *pkey) | ||
231 | { | ||
232 | struct ib_smp *in_mad = NULL; | ||
233 | struct ib_smp *out_mad = NULL; | ||
234 | int err = -ENOMEM; | ||
235 | |||
236 | in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); | ||
237 | out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); | ||
238 | if (!in_mad || !out_mad) | ||
239 | goto out; | ||
240 | |||
241 | init_query_mad(in_mad); | ||
242 | in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; | ||
243 | in_mad->attr_mod = cpu_to_be32(index / 32); | ||
244 | |||
245 | err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); | ||
246 | if (err) | ||
247 | goto out; | ||
248 | |||
249 | *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); | ||
250 | |||
251 | out: | ||
252 | kfree(in_mad); | ||
253 | kfree(out_mad); | ||
254 | return err; | ||
255 | } | ||
256 | |||
257 | static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, | ||
258 | struct ib_device_modify *props) | ||
259 | { | ||
260 | if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) | ||
261 | return -EOPNOTSUPP; | ||
262 | |||
263 | if (mask & IB_DEVICE_MODIFY_NODE_DESC) { | ||
264 | spin_lock(&to_mdev(ibdev)->sm_lock); | ||
265 | memcpy(ibdev->node_desc, props->node_desc, 64); | ||
266 | spin_unlock(&to_mdev(ibdev)->sm_lock); | ||
267 | } | ||
268 | |||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, | ||
273 | u32 cap_mask) | ||
274 | { | ||
275 | struct mlx4_cmd_mailbox *mailbox; | ||
276 | int err; | ||
277 | |||
278 | mailbox = mlx4_alloc_cmd_mailbox(dev->dev); | ||
279 | if (IS_ERR(mailbox)) | ||
280 | return PTR_ERR(mailbox); | ||
281 | |||
282 | memset(mailbox->buf, 0, 256); | ||
283 | *(u8 *) mailbox->buf = !!reset_qkey_viols << 6; | ||
284 | ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); | ||
285 | |||
286 | err = mlx4_cmd(dev->dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT, | ||
287 | MLX4_CMD_TIME_CLASS_B); | ||
288 | |||
289 | mlx4_free_cmd_mailbox(dev->dev, mailbox); | ||
290 | return err; | ||
291 | } | ||
292 | |||
293 | static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, | ||
294 | struct ib_port_modify *props) | ||
295 | { | ||
296 | struct ib_port_attr attr; | ||
297 | u32 cap_mask; | ||
298 | int err; | ||
299 | |||
300 | mutex_lock(&to_mdev(ibdev)->cap_mask_mutex); | ||
301 | |||
302 | err = mlx4_ib_query_port(ibdev, port, &attr); | ||
303 | if (err) | ||
304 | goto out; | ||
305 | |||
306 | cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & | ||
307 | ~props->clr_port_cap_mask; | ||
308 | |||
309 | err = mlx4_SET_PORT(to_mdev(ibdev), port, | ||
310 | !!(mask & IB_PORT_RESET_QKEY_CNTR), | ||
311 | cap_mask); | ||
312 | |||
313 | out: | ||
314 | mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); | ||
315 | return err; | ||
316 | } | ||
317 | |||
318 | static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, | ||
319 | struct ib_udata *udata) | ||
320 | { | ||
321 | struct mlx4_ib_dev *dev = to_mdev(ibdev); | ||
322 | struct mlx4_ib_ucontext *context; | ||
323 | struct mlx4_ib_alloc_ucontext_resp resp; | ||
324 | int err; | ||
325 | |||
326 | resp.qp_tab_size = dev->dev->caps.num_qps; | ||
327 | resp.bf_reg_size = dev->dev->caps.bf_reg_size; | ||
328 | resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; | ||
329 | |||
330 | context = kmalloc(sizeof *context, GFP_KERNEL); | ||
331 | if (!context) | ||
332 | return ERR_PTR(-ENOMEM); | ||
333 | |||
334 | err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar); | ||
335 | if (err) { | ||
336 | kfree(context); | ||
337 | return ERR_PTR(err); | ||
338 | } | ||
339 | |||
340 | INIT_LIST_HEAD(&context->db_page_list); | ||
341 | mutex_init(&context->db_page_mutex); | ||
342 | |||
343 | err = ib_copy_to_udata(udata, &resp, sizeof resp); | ||
344 | if (err) { | ||
345 | mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); | ||
346 | kfree(context); | ||
347 | return ERR_PTR(-EFAULT); | ||
348 | } | ||
349 | |||
350 | return &context->ibucontext; | ||
351 | } | ||
352 | |||
353 | static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) | ||
354 | { | ||
355 | struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); | ||
356 | |||
357 | mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar); | ||
358 | kfree(context); | ||
359 | |||
360 | return 0; | ||
361 | } | ||
362 | |||
363 | static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) | ||
364 | { | ||
365 | struct mlx4_ib_dev *dev = to_mdev(context->device); | ||
366 | |||
367 | if (vma->vm_end - vma->vm_start != PAGE_SIZE) | ||
368 | return -EINVAL; | ||
369 | |||
370 | if (vma->vm_pgoff == 0) { | ||
371 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | ||
372 | |||
373 | if (io_remap_pfn_range(vma, vma->vm_start, | ||
374 | to_mucontext(context)->uar.pfn, | ||
375 | PAGE_SIZE, vma->vm_page_prot)) | ||
376 | return -EAGAIN; | ||
377 | } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { | ||
378 | /* FIXME want pgprot_writecombine() for BlueFlame pages */ | ||
379 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | ||
380 | |||
381 | if (io_remap_pfn_range(vma, vma->vm_start, | ||
382 | to_mucontext(context)->uar.pfn + | ||
383 | dev->dev->caps.num_uars, | ||
384 | PAGE_SIZE, vma->vm_page_prot)) | ||
385 | return -EAGAIN; | ||
386 | } else | ||
387 | return -EINVAL; | ||
388 | |||
389 | return 0; | ||
390 | } | ||
391 | |||
392 | static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, | ||
393 | struct ib_ucontext *context, | ||
394 | struct ib_udata *udata) | ||
395 | { | ||
396 | struct mlx4_ib_pd *pd; | ||
397 | int err; | ||
398 | |||
399 | pd = kmalloc(sizeof *pd, GFP_KERNEL); | ||
400 | if (!pd) | ||
401 | return ERR_PTR(-ENOMEM); | ||
402 | |||
403 | err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); | ||
404 | if (err) { | ||
405 | kfree(pd); | ||
406 | return ERR_PTR(err); | ||
407 | } | ||
408 | |||
409 | if (context) | ||
410 | if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { | ||
411 | mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); | ||
412 | kfree(pd); | ||
413 | return ERR_PTR(-EFAULT); | ||
414 | } | ||
415 | |||
416 | return &pd->ibpd; | ||
417 | } | ||
418 | |||
419 | static int mlx4_ib_dealloc_pd(struct ib_pd *pd) | ||
420 | { | ||
421 | mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); | ||
422 | kfree(pd); | ||
423 | |||
424 | return 0; | ||
425 | } | ||
426 | |||
427 | static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) | ||
428 | { | ||
429 | return mlx4_multicast_attach(to_mdev(ibqp->device)->dev, | ||
430 | &to_mqp(ibqp)->mqp, gid->raw); | ||
431 | } | ||
432 | |||
433 | static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) | ||
434 | { | ||
435 | return mlx4_multicast_detach(to_mdev(ibqp->device)->dev, | ||
436 | &to_mqp(ibqp)->mqp, gid->raw); | ||
437 | } | ||
438 | |||
439 | static int init_node_data(struct mlx4_ib_dev *dev) | ||
440 | { | ||
441 | struct ib_smp *in_mad = NULL; | ||
442 | struct ib_smp *out_mad = NULL; | ||
443 | int err = -ENOMEM; | ||
444 | |||
445 | in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); | ||
446 | out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); | ||
447 | if (!in_mad || !out_mad) | ||
448 | goto out; | ||
449 | |||
450 | init_query_mad(in_mad); | ||
451 | in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; | ||
452 | |||
453 | err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); | ||
454 | if (err) | ||
455 | goto out; | ||
456 | |||
457 | memcpy(dev->ib_dev.node_desc, out_mad->data, 64); | ||
458 | |||
459 | in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; | ||
460 | |||
461 | err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); | ||
462 | if (err) | ||
463 | goto out; | ||
464 | |||
465 | memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); | ||
466 | |||
467 | out: | ||
468 | kfree(in_mad); | ||
469 | kfree(out_mad); | ||
470 | return err; | ||
471 | } | ||
472 | |||
473 | static void *mlx4_ib_add(struct mlx4_dev *dev) | ||
474 | { | ||
475 | struct mlx4_ib_dev *ibdev; | ||
476 | |||
477 | ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); | ||
478 | if (!ibdev) { | ||
479 | dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); | ||
480 | return NULL; | ||
481 | } | ||
482 | |||
483 | if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) | ||
484 | goto err_dealloc; | ||
485 | |||
486 | if (mlx4_uar_alloc(dev, &ibdev->priv_uar)) | ||
487 | goto err_pd; | ||
488 | |||
489 | ibdev->uar_map = ioremap(ibdev->priv_uar.pfn << PAGE_SHIFT, PAGE_SIZE); | ||
490 | if (!ibdev->uar_map) | ||
491 | goto err_uar; | ||
492 | |||
493 | INIT_LIST_HEAD(&ibdev->pgdir_list); | ||
494 | mutex_init(&ibdev->pgdir_mutex); | ||
495 | |||
496 | ibdev->dev = dev; | ||
497 | |||
498 | strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); | ||
499 | ibdev->ib_dev.owner = THIS_MODULE; | ||
500 | ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; | ||
501 | ibdev->ib_dev.phys_port_cnt = dev->caps.num_ports; | ||
502 | ibdev->ib_dev.num_comp_vectors = 1; | ||
503 | ibdev->ib_dev.dma_device = &dev->pdev->dev; | ||
504 | |||
505 | ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; | ||
506 | ibdev->ib_dev.uverbs_cmd_mask = | ||
507 | (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | | ||
508 | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | | ||
509 | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | | ||
510 | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | | ||
511 | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | | ||
512 | (1ull << IB_USER_VERBS_CMD_REG_MR) | | ||
513 | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | | ||
514 | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | | ||
515 | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | | ||
516 | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | | ||
517 | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | | ||
518 | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | | ||
519 | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | | ||
520 | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | | ||
521 | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | | ||
522 | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | | ||
523 | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | | ||
524 | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); | ||
525 | |||
526 | ibdev->ib_dev.query_device = mlx4_ib_query_device; | ||
527 | ibdev->ib_dev.query_port = mlx4_ib_query_port; | ||
528 | ibdev->ib_dev.query_gid = mlx4_ib_query_gid; | ||
529 | ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; | ||
530 | ibdev->ib_dev.modify_device = mlx4_ib_modify_device; | ||
531 | ibdev->ib_dev.modify_port = mlx4_ib_modify_port; | ||
532 | ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; | ||
533 | ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; | ||
534 | ibdev->ib_dev.mmap = mlx4_ib_mmap; | ||
535 | ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; | ||
536 | ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; | ||
537 | ibdev->ib_dev.create_ah = mlx4_ib_create_ah; | ||
538 | ibdev->ib_dev.query_ah = mlx4_ib_query_ah; | ||
539 | ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; | ||
540 | ibdev->ib_dev.create_srq = mlx4_ib_create_srq; | ||
541 | ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; | ||
542 | ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; | ||
543 | ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; | ||
544 | ibdev->ib_dev.create_qp = mlx4_ib_create_qp; | ||
545 | ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; | ||
546 | ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; | ||
547 | ibdev->ib_dev.post_send = mlx4_ib_post_send; | ||
548 | ibdev->ib_dev.post_recv = mlx4_ib_post_recv; | ||
549 | ibdev->ib_dev.create_cq = mlx4_ib_create_cq; | ||
550 | ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; | ||
551 | ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; | ||
552 | ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; | ||
553 | ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; | ||
554 | ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; | ||
555 | ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; | ||
556 | ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; | ||
557 | ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; | ||
558 | ibdev->ib_dev.process_mad = mlx4_ib_process_mad; | ||
559 | |||
560 | if (init_node_data(ibdev)) | ||
561 | goto err_map; | ||
562 | |||
563 | spin_lock_init(&ibdev->sm_lock); | ||
564 | mutex_init(&ibdev->cap_mask_mutex); | ||
565 | |||
566 | if (ib_register_device(&ibdev->ib_dev)) | ||
567 | goto err_map; | ||
568 | |||
569 | if (mlx4_ib_mad_init(ibdev)) | ||
570 | goto err_reg; | ||
571 | |||
572 | return ibdev; | ||
573 | |||
574 | err_reg: | ||
575 | ib_unregister_device(&ibdev->ib_dev); | ||
576 | |||
577 | err_map: | ||
578 | iounmap(ibdev->uar_map); | ||
579 | |||
580 | err_uar: | ||
581 | mlx4_uar_free(dev, &ibdev->priv_uar); | ||
582 | |||
583 | err_pd: | ||
584 | mlx4_pd_free(dev, ibdev->priv_pdn); | ||
585 | |||
586 | err_dealloc: | ||
587 | ib_dealloc_device(&ibdev->ib_dev); | ||
588 | |||
589 | return NULL; | ||
590 | } | ||
591 | |||
592 | static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) | ||
593 | { | ||
594 | struct mlx4_ib_dev *ibdev = ibdev_ptr; | ||
595 | int p; | ||
596 | |||
597 | for (p = 1; p <= dev->caps.num_ports; ++p) | ||
598 | mlx4_CLOSE_PORT(dev, p); | ||
599 | |||
600 | mlx4_ib_mad_cleanup(ibdev); | ||
601 | ib_unregister_device(&ibdev->ib_dev); | ||
602 | iounmap(ibdev->uar_map); | ||
603 | mlx4_uar_free(dev, &ibdev->priv_uar); | ||
604 | mlx4_pd_free(dev, ibdev->priv_pdn); | ||
605 | ib_dealloc_device(&ibdev->ib_dev); | ||
606 | } | ||
607 | |||
608 | static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, | ||
609 | enum mlx4_dev_event event, int subtype, | ||
610 | int port) | ||
611 | { | ||
612 | struct ib_event ibev; | ||
613 | |||
614 | switch (event) { | ||
615 | case MLX4_EVENT_TYPE_PORT_CHANGE: | ||
616 | ibev.event = subtype == MLX4_PORT_CHANGE_SUBTYPE_ACTIVE ? | ||
617 | IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; | ||
618 | break; | ||
619 | |||
620 | case MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR: | ||
621 | ibev.event = IB_EVENT_DEVICE_FATAL; | ||
622 | break; | ||
623 | |||
624 | default: | ||
625 | return; | ||
626 | } | ||
627 | |||
628 | ibev.device = ibdev_ptr; | ||
629 | ibev.element.port_num = port; | ||
630 | |||
631 | ib_dispatch_event(&ibev); | ||
632 | } | ||
633 | |||
634 | static struct mlx4_interface mlx4_ib_interface = { | ||
635 | .add = mlx4_ib_add, | ||
636 | .remove = mlx4_ib_remove, | ||
637 | .event = mlx4_ib_event | ||
638 | }; | ||
639 | |||
640 | static int __init mlx4_ib_init(void) | ||
641 | { | ||
642 | return mlx4_register_interface(&mlx4_ib_interface); | ||
643 | } | ||
644 | |||
645 | static void __exit mlx4_ib_cleanup(void) | ||
646 | { | ||
647 | mlx4_unregister_interface(&mlx4_ib_interface); | ||
648 | } | ||
649 | |||
650 | module_init(mlx4_ib_init); | ||
651 | module_exit(mlx4_ib_cleanup); | ||
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h new file mode 100644 index 000000000000..93dac71f3230 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h | |||
@@ -0,0 +1,285 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006, 2007 Cisco Systems. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef MLX4_IB_H | ||
34 | #define MLX4_IB_H | ||
35 | |||
36 | #include <linux/compiler.h> | ||
37 | #include <linux/list.h> | ||
38 | |||
39 | #include <rdma/ib_verbs.h> | ||
40 | #include <rdma/ib_umem.h> | ||
41 | |||
42 | #include <linux/mlx4/device.h> | ||
43 | #include <linux/mlx4/doorbell.h> | ||
44 | |||
45 | enum { | ||
46 | MLX4_IB_DB_PER_PAGE = PAGE_SIZE / 4 | ||
47 | }; | ||
48 | |||
49 | struct mlx4_ib_db_pgdir; | ||
50 | struct mlx4_ib_user_db_page; | ||
51 | |||
52 | struct mlx4_ib_db { | ||
53 | __be32 *db; | ||
54 | union { | ||
55 | struct mlx4_ib_db_pgdir *pgdir; | ||
56 | struct mlx4_ib_user_db_page *user_page; | ||
57 | } u; | ||
58 | dma_addr_t dma; | ||
59 | int index; | ||
60 | int order; | ||
61 | }; | ||
62 | |||
63 | struct mlx4_ib_ucontext { | ||
64 | struct ib_ucontext ibucontext; | ||
65 | struct mlx4_uar uar; | ||
66 | struct list_head db_page_list; | ||
67 | struct mutex db_page_mutex; | ||
68 | }; | ||
69 | |||
70 | struct mlx4_ib_pd { | ||
71 | struct ib_pd ibpd; | ||
72 | u32 pdn; | ||
73 | }; | ||
74 | |||
75 | struct mlx4_ib_cq_buf { | ||
76 | struct mlx4_buf buf; | ||
77 | struct mlx4_mtt mtt; | ||
78 | }; | ||
79 | |||
80 | struct mlx4_ib_cq { | ||
81 | struct ib_cq ibcq; | ||
82 | struct mlx4_cq mcq; | ||
83 | struct mlx4_ib_cq_buf buf; | ||
84 | struct mlx4_ib_db db; | ||
85 | spinlock_t lock; | ||
86 | struct ib_umem *umem; | ||
87 | }; | ||
88 | |||
89 | struct mlx4_ib_mr { | ||
90 | struct ib_mr ibmr; | ||
91 | struct mlx4_mr mmr; | ||
92 | struct ib_umem *umem; | ||
93 | }; | ||
94 | |||
95 | struct mlx4_ib_wq { | ||
96 | u64 *wrid; | ||
97 | spinlock_t lock; | ||
98 | int max; | ||
99 | int max_gs; | ||
100 | int offset; | ||
101 | int wqe_shift; | ||
102 | unsigned head; | ||
103 | unsigned tail; | ||
104 | }; | ||
105 | |||
106 | struct mlx4_ib_qp { | ||
107 | struct ib_qp ibqp; | ||
108 | struct mlx4_qp mqp; | ||
109 | struct mlx4_buf buf; | ||
110 | |||
111 | struct mlx4_ib_db db; | ||
112 | struct mlx4_ib_wq rq; | ||
113 | |||
114 | u32 doorbell_qpn; | ||
115 | __be32 sq_signal_bits; | ||
116 | struct mlx4_ib_wq sq; | ||
117 | |||
118 | struct ib_umem *umem; | ||
119 | struct mlx4_mtt mtt; | ||
120 | int buf_size; | ||
121 | struct mutex mutex; | ||
122 | u8 port; | ||
123 | u8 alt_port; | ||
124 | u8 atomic_rd_en; | ||
125 | u8 resp_depth; | ||
126 | u8 state; | ||
127 | }; | ||
128 | |||
129 | struct mlx4_ib_srq { | ||
130 | struct ib_srq ibsrq; | ||
131 | struct mlx4_srq msrq; | ||
132 | struct mlx4_buf buf; | ||
133 | struct mlx4_ib_db db; | ||
134 | u64 *wrid; | ||
135 | spinlock_t lock; | ||
136 | int head; | ||
137 | int tail; | ||
138 | u16 wqe_ctr; | ||
139 | struct ib_umem *umem; | ||
140 | struct mlx4_mtt mtt; | ||
141 | struct mutex mutex; | ||
142 | }; | ||
143 | |||
144 | struct mlx4_ib_ah { | ||
145 | struct ib_ah ibah; | ||
146 | struct mlx4_av av; | ||
147 | }; | ||
148 | |||
149 | struct mlx4_ib_dev { | ||
150 | struct ib_device ib_dev; | ||
151 | struct mlx4_dev *dev; | ||
152 | void __iomem *uar_map; | ||
153 | |||
154 | struct list_head pgdir_list; | ||
155 | struct mutex pgdir_mutex; | ||
156 | |||
157 | struct mlx4_uar priv_uar; | ||
158 | u32 priv_pdn; | ||
159 | MLX4_DECLARE_DOORBELL_LOCK(uar_lock); | ||
160 | |||
161 | struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; | ||
162 | struct ib_ah *sm_ah[MLX4_MAX_PORTS]; | ||
163 | spinlock_t sm_lock; | ||
164 | |||
165 | struct mutex cap_mask_mutex; | ||
166 | }; | ||
167 | |||
168 | static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) | ||
169 | { | ||
170 | return container_of(ibdev, struct mlx4_ib_dev, ib_dev); | ||
171 | } | ||
172 | |||
173 | static inline struct mlx4_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext) | ||
174 | { | ||
175 | return container_of(ibucontext, struct mlx4_ib_ucontext, ibucontext); | ||
176 | } | ||
177 | |||
178 | static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd) | ||
179 | { | ||
180 | return container_of(ibpd, struct mlx4_ib_pd, ibpd); | ||
181 | } | ||
182 | |||
183 | static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) | ||
184 | { | ||
185 | return container_of(ibcq, struct mlx4_ib_cq, ibcq); | ||
186 | } | ||
187 | |||
188 | static inline struct mlx4_ib_cq *to_mibcq(struct mlx4_cq *mcq) | ||
189 | { | ||
190 | return container_of(mcq, struct mlx4_ib_cq, mcq); | ||
191 | } | ||
192 | |||
193 | static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) | ||
194 | { | ||
195 | return container_of(ibmr, struct mlx4_ib_mr, ibmr); | ||
196 | } | ||
197 | |||
198 | static inline struct mlx4_ib_qp *to_mqp(struct ib_qp *ibqp) | ||
199 | { | ||
200 | return container_of(ibqp, struct mlx4_ib_qp, ibqp); | ||
201 | } | ||
202 | |||
203 | static inline struct mlx4_ib_qp *to_mibqp(struct mlx4_qp *mqp) | ||
204 | { | ||
205 | return container_of(mqp, struct mlx4_ib_qp, mqp); | ||
206 | } | ||
207 | |||
208 | static inline struct mlx4_ib_srq *to_msrq(struct ib_srq *ibsrq) | ||
209 | { | ||
210 | return container_of(ibsrq, struct mlx4_ib_srq, ibsrq); | ||
211 | } | ||
212 | |||
213 | static inline struct mlx4_ib_srq *to_mibsrq(struct mlx4_srq *msrq) | ||
214 | { | ||
215 | return container_of(msrq, struct mlx4_ib_srq, msrq); | ||
216 | } | ||
217 | |||
218 | static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) | ||
219 | { | ||
220 | return container_of(ibah, struct mlx4_ib_ah, ibah); | ||
221 | } | ||
222 | |||
223 | int mlx4_ib_db_alloc(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db, int order); | ||
224 | void mlx4_ib_db_free(struct mlx4_ib_dev *dev, struct mlx4_ib_db *db); | ||
225 | int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, | ||
226 | struct mlx4_ib_db *db); | ||
227 | void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_ib_db *db); | ||
228 | |||
229 | struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc); | ||
230 | int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, | ||
231 | struct ib_umem *umem); | ||
232 | struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | ||
233 | u64 virt_addr, int access_flags, | ||
234 | struct ib_udata *udata); | ||
235 | int mlx4_ib_dereg_mr(struct ib_mr *mr); | ||
236 | |||
237 | struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector, | ||
238 | struct ib_ucontext *context, | ||
239 | struct ib_udata *udata); | ||
240 | int mlx4_ib_destroy_cq(struct ib_cq *cq); | ||
241 | int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); | ||
242 | int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); | ||
243 | void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); | ||
244 | void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); | ||
245 | |||
246 | struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); | ||
247 | int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); | ||
248 | int mlx4_ib_destroy_ah(struct ib_ah *ah); | ||
249 | |||
250 | struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, | ||
251 | struct ib_srq_init_attr *init_attr, | ||
252 | struct ib_udata *udata); | ||
253 | int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, | ||
254 | enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); | ||
255 | int mlx4_ib_destroy_srq(struct ib_srq *srq); | ||
256 | void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); | ||
257 | int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, | ||
258 | struct ib_recv_wr **bad_wr); | ||
259 | |||
260 | struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, | ||
261 | struct ib_qp_init_attr *init_attr, | ||
262 | struct ib_udata *udata); | ||
263 | int mlx4_ib_destroy_qp(struct ib_qp *qp); | ||
264 | int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | ||
265 | int attr_mask, struct ib_udata *udata); | ||
266 | int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | ||
267 | struct ib_send_wr **bad_wr); | ||
268 | int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, | ||
269 | struct ib_recv_wr **bad_wr); | ||
270 | |||
271 | int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, | ||
272 | int port, struct ib_wc *in_wc, struct ib_grh *in_grh, | ||
273 | void *in_mad, void *response_mad); | ||
274 | int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, | ||
275 | struct ib_wc *in_wc, struct ib_grh *in_grh, | ||
276 | struct ib_mad *in_mad, struct ib_mad *out_mad); | ||
277 | int mlx4_ib_mad_init(struct mlx4_ib_dev *dev); | ||
278 | void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev); | ||
279 | |||
280 | static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) | ||
281 | { | ||
282 | return !!(ah->av.g_slid & 0x80); | ||
283 | } | ||
284 | |||
285 | #endif /* MLX4_IB_H */ | ||
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c new file mode 100644 index 000000000000..85ae906f1d12 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/mr.c | |||
@@ -0,0 +1,184 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include "mlx4_ib.h" | ||
34 | |||
35 | static u32 convert_access(int acc) | ||
36 | { | ||
37 | return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | | ||
38 | (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | | ||
39 | (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | | ||
40 | (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | | ||
41 | MLX4_PERM_LOCAL_READ; | ||
42 | } | ||
43 | |||
44 | struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) | ||
45 | { | ||
46 | struct mlx4_ib_mr *mr; | ||
47 | int err; | ||
48 | |||
49 | mr = kmalloc(sizeof *mr, GFP_KERNEL); | ||
50 | if (!mr) | ||
51 | return ERR_PTR(-ENOMEM); | ||
52 | |||
53 | err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, | ||
54 | ~0ull, convert_access(acc), 0, 0, &mr->mmr); | ||
55 | if (err) | ||
56 | goto err_free; | ||
57 | |||
58 | err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); | ||
59 | if (err) | ||
60 | goto err_mr; | ||
61 | |||
62 | mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; | ||
63 | mr->umem = NULL; | ||
64 | |||
65 | return &mr->ibmr; | ||
66 | |||
67 | err_mr: | ||
68 | mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); | ||
69 | |||
70 | err_free: | ||
71 | kfree(mr); | ||
72 | |||
73 | return ERR_PTR(err); | ||
74 | } | ||
75 | |||
76 | int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, | ||
77 | struct ib_umem *umem) | ||
78 | { | ||
79 | u64 *pages; | ||
80 | struct ib_umem_chunk *chunk; | ||
81 | int i, j, k; | ||
82 | int n; | ||
83 | int len; | ||
84 | int err = 0; | ||
85 | |||
86 | pages = (u64 *) __get_free_page(GFP_KERNEL); | ||
87 | if (!pages) | ||
88 | return -ENOMEM; | ||
89 | |||
90 | i = n = 0; | ||
91 | |||
92 | list_for_each_entry(chunk, &umem->chunk_list, list) | ||
93 | for (j = 0; j < chunk->nmap; ++j) { | ||
94 | len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift; | ||
95 | for (k = 0; k < len; ++k) { | ||
96 | pages[i++] = sg_dma_address(&chunk->page_list[j]) + | ||
97 | umem->page_size * k; | ||
98 | /* | ||
99 | * Be friendly to WRITE_MTT firmware | ||
100 | * command, and pass it chunks of | ||
101 | * appropriate size. | ||
102 | */ | ||
103 | if (i == PAGE_SIZE / sizeof (u64) - 2) { | ||
104 | err = mlx4_write_mtt(dev->dev, mtt, n, | ||
105 | i, pages); | ||
106 | if (err) | ||
107 | goto out; | ||
108 | n += i; | ||
109 | i = 0; | ||
110 | } | ||
111 | } | ||
112 | } | ||
113 | |||
114 | if (i) | ||
115 | err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); | ||
116 | |||
117 | out: | ||
118 | free_page((unsigned long) pages); | ||
119 | return err; | ||
120 | } | ||
121 | |||
122 | struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | ||
123 | u64 virt_addr, int access_flags, | ||
124 | struct ib_udata *udata) | ||
125 | { | ||
126 | struct mlx4_ib_dev *dev = to_mdev(pd->device); | ||
127 | struct mlx4_ib_mr *mr; | ||
128 | int shift; | ||
129 | int err; | ||
130 | int n; | ||
131 | |||
132 | mr = kmalloc(sizeof *mr, GFP_KERNEL); | ||
133 | if (!mr) | ||
134 | return ERR_PTR(-ENOMEM); | ||
135 | |||
136 | mr->umem = ib_umem_get(pd->uobject->context, start, length, access_flags); | ||
137 | if (IS_ERR(mr->umem)) { | ||
138 | err = PTR_ERR(mr->umem); | ||
139 | goto err_free; | ||
140 | } | ||
141 | |||
142 | n = ib_umem_page_count(mr->umem); | ||
143 | shift = ilog2(mr->umem->page_size); | ||
144 | |||
145 | err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, | ||
146 | convert_access(access_flags), n, shift, &mr->mmr); | ||
147 | if (err) | ||
148 | goto err_umem; | ||
149 | |||
150 | err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); | ||
151 | if (err) | ||
152 | goto err_mr; | ||
153 | |||
154 | err = mlx4_mr_enable(dev->dev, &mr->mmr); | ||
155 | if (err) | ||
156 | goto err_mr; | ||
157 | |||
158 | mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; | ||
159 | |||
160 | return &mr->ibmr; | ||
161 | |||
162 | err_mr: | ||
163 | mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); | ||
164 | |||
165 | err_umem: | ||
166 | ib_umem_release(mr->umem); | ||
167 | |||
168 | err_free: | ||
169 | kfree(mr); | ||
170 | |||
171 | return ERR_PTR(err); | ||
172 | } | ||
173 | |||
174 | int mlx4_ib_dereg_mr(struct ib_mr *ibmr) | ||
175 | { | ||
176 | struct mlx4_ib_mr *mr = to_mmr(ibmr); | ||
177 | |||
178 | mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); | ||
179 | if (mr->umem) | ||
180 | ib_umem_release(mr->umem); | ||
181 | kfree(mr); | ||
182 | |||
183 | return 0; | ||
184 | } | ||
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c new file mode 100644 index 000000000000..5cd706908450 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/qp.c | |||
@@ -0,0 +1,1294 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <rdma/ib_cache.h> | ||
34 | #include <rdma/ib_pack.h> | ||
35 | |||
36 | #include <linux/mlx4/qp.h> | ||
37 | |||
38 | #include "mlx4_ib.h" | ||
39 | #include "user.h" | ||
40 | |||
41 | enum { | ||
42 | MLX4_IB_ACK_REQ_FREQ = 8, | ||
43 | }; | ||
44 | |||
45 | enum { | ||
46 | MLX4_IB_DEFAULT_SCHED_QUEUE = 0x83, | ||
47 | MLX4_IB_DEFAULT_QP0_SCHED_QUEUE = 0x3f | ||
48 | }; | ||
49 | |||
50 | enum { | ||
51 | /* | ||
52 | * Largest possible UD header: send with GRH and immediate data. | ||
53 | */ | ||
54 | MLX4_IB_UD_HEADER_SIZE = 72 | ||
55 | }; | ||
56 | |||
57 | struct mlx4_ib_sqp { | ||
58 | struct mlx4_ib_qp qp; | ||
59 | int pkey_index; | ||
60 | u32 qkey; | ||
61 | u32 send_psn; | ||
62 | struct ib_ud_header ud_header; | ||
63 | u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; | ||
64 | }; | ||
65 | |||
66 | static const __be32 mlx4_ib_opcode[] = { | ||
67 | [IB_WR_SEND] = __constant_cpu_to_be32(MLX4_OPCODE_SEND), | ||
68 | [IB_WR_SEND_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM), | ||
69 | [IB_WR_RDMA_WRITE] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE), | ||
70 | [IB_WR_RDMA_WRITE_WITH_IMM] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM), | ||
71 | [IB_WR_RDMA_READ] = __constant_cpu_to_be32(MLX4_OPCODE_RDMA_READ), | ||
72 | [IB_WR_ATOMIC_CMP_AND_SWP] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_CS), | ||
73 | [IB_WR_ATOMIC_FETCH_AND_ADD] = __constant_cpu_to_be32(MLX4_OPCODE_ATOMIC_FA), | ||
74 | }; | ||
75 | |||
76 | static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) | ||
77 | { | ||
78 | return container_of(mqp, struct mlx4_ib_sqp, qp); | ||
79 | } | ||
80 | |||
81 | static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) | ||
82 | { | ||
83 | return qp->mqp.qpn >= dev->dev->caps.sqp_start && | ||
84 | qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; | ||
85 | } | ||
86 | |||
87 | static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) | ||
88 | { | ||
89 | return qp->mqp.qpn >= dev->dev->caps.sqp_start && | ||
90 | qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; | ||
91 | } | ||
92 | |||
93 | static void *get_wqe(struct mlx4_ib_qp *qp, int offset) | ||
94 | { | ||
95 | if (qp->buf.nbufs == 1) | ||
96 | return qp->buf.u.direct.buf + offset; | ||
97 | else | ||
98 | return qp->buf.u.page_list[offset >> PAGE_SHIFT].buf + | ||
99 | (offset & (PAGE_SIZE - 1)); | ||
100 | } | ||
101 | |||
102 | static void *get_recv_wqe(struct mlx4_ib_qp *qp, int n) | ||
103 | { | ||
104 | return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); | ||
105 | } | ||
106 | |||
107 | static void *get_send_wqe(struct mlx4_ib_qp *qp, int n) | ||
108 | { | ||
109 | return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); | ||
110 | } | ||
111 | |||
112 | static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) | ||
113 | { | ||
114 | struct ib_event event; | ||
115 | struct ib_qp *ibqp = &to_mibqp(qp)->ibqp; | ||
116 | |||
117 | if (type == MLX4_EVENT_TYPE_PATH_MIG) | ||
118 | to_mibqp(qp)->port = to_mibqp(qp)->alt_port; | ||
119 | |||
120 | if (ibqp->event_handler) { | ||
121 | event.device = ibqp->device; | ||
122 | event.element.qp = ibqp; | ||
123 | switch (type) { | ||
124 | case MLX4_EVENT_TYPE_PATH_MIG: | ||
125 | event.event = IB_EVENT_PATH_MIG; | ||
126 | break; | ||
127 | case MLX4_EVENT_TYPE_COMM_EST: | ||
128 | event.event = IB_EVENT_COMM_EST; | ||
129 | break; | ||
130 | case MLX4_EVENT_TYPE_SQ_DRAINED: | ||
131 | event.event = IB_EVENT_SQ_DRAINED; | ||
132 | break; | ||
133 | case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE: | ||
134 | event.event = IB_EVENT_QP_LAST_WQE_REACHED; | ||
135 | break; | ||
136 | case MLX4_EVENT_TYPE_WQ_CATAS_ERROR: | ||
137 | event.event = IB_EVENT_QP_FATAL; | ||
138 | break; | ||
139 | case MLX4_EVENT_TYPE_PATH_MIG_FAILED: | ||
140 | event.event = IB_EVENT_PATH_MIG_ERR; | ||
141 | break; | ||
142 | case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR: | ||
143 | event.event = IB_EVENT_QP_REQ_ERR; | ||
144 | break; | ||
145 | case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR: | ||
146 | event.event = IB_EVENT_QP_ACCESS_ERR; | ||
147 | break; | ||
148 | default: | ||
149 | printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " | ||
150 | "on QP %06x\n", type, qp->qpn); | ||
151 | return; | ||
152 | } | ||
153 | |||
154 | ibqp->event_handler(&event, ibqp->qp_context); | ||
155 | } | ||
156 | } | ||
157 | |||
158 | static int send_wqe_overhead(enum ib_qp_type type) | ||
159 | { | ||
160 | /* | ||
161 | * UD WQEs must have a datagram segment. | ||
162 | * RC and UC WQEs might have a remote address segment. | ||
163 | * MLX WQEs need two extra inline data segments (for the UD | ||
164 | * header and space for the ICRC). | ||
165 | */ | ||
166 | switch (type) { | ||
167 | case IB_QPT_UD: | ||
168 | return sizeof (struct mlx4_wqe_ctrl_seg) + | ||
169 | sizeof (struct mlx4_wqe_datagram_seg); | ||
170 | case IB_QPT_UC: | ||
171 | return sizeof (struct mlx4_wqe_ctrl_seg) + | ||
172 | sizeof (struct mlx4_wqe_raddr_seg); | ||
173 | case IB_QPT_RC: | ||
174 | return sizeof (struct mlx4_wqe_ctrl_seg) + | ||
175 | sizeof (struct mlx4_wqe_atomic_seg) + | ||
176 | sizeof (struct mlx4_wqe_raddr_seg); | ||
177 | case IB_QPT_SMI: | ||
178 | case IB_QPT_GSI: | ||
179 | return sizeof (struct mlx4_wqe_ctrl_seg) + | ||
180 | ALIGN(MLX4_IB_UD_HEADER_SIZE + | ||
181 | sizeof (struct mlx4_wqe_inline_seg), | ||
182 | sizeof (struct mlx4_wqe_data_seg)) + | ||
183 | ALIGN(4 + | ||
184 | sizeof (struct mlx4_wqe_inline_seg), | ||
185 | sizeof (struct mlx4_wqe_data_seg)); | ||
186 | default: | ||
187 | return sizeof (struct mlx4_wqe_ctrl_seg); | ||
188 | } | ||
189 | } | ||
190 | |||
191 | static int set_qp_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, | ||
192 | enum ib_qp_type type, struct mlx4_ib_qp *qp) | ||
193 | { | ||
194 | /* Sanity check QP size before proceeding */ | ||
195 | if (cap->max_send_wr > dev->dev->caps.max_wqes || | ||
196 | cap->max_recv_wr > dev->dev->caps.max_wqes || | ||
197 | cap->max_send_sge > dev->dev->caps.max_sq_sg || | ||
198 | cap->max_recv_sge > dev->dev->caps.max_rq_sg || | ||
199 | cap->max_inline_data + send_wqe_overhead(type) + | ||
200 | sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) | ||
201 | return -EINVAL; | ||
202 | |||
203 | /* | ||
204 | * For MLX transport we need 2 extra S/G entries: | ||
205 | * one for the header and one for the checksum at the end | ||
206 | */ | ||
207 | if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && | ||
208 | cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) | ||
209 | return -EINVAL; | ||
210 | |||
211 | qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; | ||
212 | qp->sq.max = cap->max_send_wr ? roundup_pow_of_two(cap->max_send_wr) : 0; | ||
213 | |||
214 | qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge * | ||
215 | sizeof (struct mlx4_wqe_data_seg))); | ||
216 | qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg); | ||
217 | |||
218 | qp->sq.wqe_shift = ilog2(roundup_pow_of_two(max(cap->max_send_sge * | ||
219 | sizeof (struct mlx4_wqe_data_seg), | ||
220 | cap->max_inline_data + | ||
221 | sizeof (struct mlx4_wqe_inline_seg)) + | ||
222 | send_wqe_overhead(type))); | ||
223 | qp->sq.max_gs = ((1 << qp->sq.wqe_shift) - send_wqe_overhead(type)) / | ||
224 | sizeof (struct mlx4_wqe_data_seg); | ||
225 | |||
226 | qp->buf_size = (qp->rq.max << qp->rq.wqe_shift) + | ||
227 | (qp->sq.max << qp->sq.wqe_shift); | ||
228 | if (qp->rq.wqe_shift > qp->sq.wqe_shift) { | ||
229 | qp->rq.offset = 0; | ||
230 | qp->sq.offset = qp->rq.max << qp->rq.wqe_shift; | ||
231 | } else { | ||
232 | qp->rq.offset = qp->sq.max << qp->sq.wqe_shift; | ||
233 | qp->sq.offset = 0; | ||
234 | } | ||
235 | |||
236 | cap->max_send_wr = qp->sq.max; | ||
237 | cap->max_recv_wr = qp->rq.max; | ||
238 | cap->max_send_sge = qp->sq.max_gs; | ||
239 | cap->max_recv_sge = qp->rq.max_gs; | ||
240 | cap->max_inline_data = (1 << qp->sq.wqe_shift) - send_wqe_overhead(type) - | ||
241 | sizeof (struct mlx4_wqe_inline_seg); | ||
242 | |||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, | ||
247 | struct ib_qp_init_attr *init_attr, | ||
248 | struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) | ||
249 | { | ||
250 | struct mlx4_wqe_ctrl_seg *ctrl; | ||
251 | int err; | ||
252 | int i; | ||
253 | |||
254 | mutex_init(&qp->mutex); | ||
255 | spin_lock_init(&qp->sq.lock); | ||
256 | spin_lock_init(&qp->rq.lock); | ||
257 | |||
258 | qp->state = IB_QPS_RESET; | ||
259 | qp->atomic_rd_en = 0; | ||
260 | qp->resp_depth = 0; | ||
261 | |||
262 | qp->rq.head = 0; | ||
263 | qp->rq.tail = 0; | ||
264 | qp->sq.head = 0; | ||
265 | qp->sq.tail = 0; | ||
266 | |||
267 | err = set_qp_size(dev, &init_attr->cap, init_attr->qp_type, qp); | ||
268 | if (err) | ||
269 | goto err; | ||
270 | |||
271 | if (pd->uobject) { | ||
272 | struct mlx4_ib_create_qp ucmd; | ||
273 | |||
274 | if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { | ||
275 | err = -EFAULT; | ||
276 | goto err; | ||
277 | } | ||
278 | |||
279 | qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, | ||
280 | qp->buf_size, 0); | ||
281 | if (IS_ERR(qp->umem)) { | ||
282 | err = PTR_ERR(qp->umem); | ||
283 | goto err; | ||
284 | } | ||
285 | |||
286 | err = mlx4_mtt_init(dev->dev, ib_umem_page_count(qp->umem), | ||
287 | ilog2(qp->umem->page_size), &qp->mtt); | ||
288 | if (err) | ||
289 | goto err_buf; | ||
290 | |||
291 | err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); | ||
292 | if (err) | ||
293 | goto err_mtt; | ||
294 | |||
295 | err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), | ||
296 | ucmd.db_addr, &qp->db); | ||
297 | if (err) | ||
298 | goto err_mtt; | ||
299 | } else { | ||
300 | err = mlx4_ib_db_alloc(dev, &qp->db, 0); | ||
301 | if (err) | ||
302 | goto err; | ||
303 | |||
304 | *qp->db.db = 0; | ||
305 | |||
306 | if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) { | ||
307 | err = -ENOMEM; | ||
308 | goto err_db; | ||
309 | } | ||
310 | |||
311 | err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift, | ||
312 | &qp->mtt); | ||
313 | if (err) | ||
314 | goto err_buf; | ||
315 | |||
316 | err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf); | ||
317 | if (err) | ||
318 | goto err_mtt; | ||
319 | |||
320 | for (i = 0; i < qp->sq.max; ++i) { | ||
321 | ctrl = get_send_wqe(qp, i); | ||
322 | ctrl->owner_opcode = cpu_to_be32(1 << 31); | ||
323 | } | ||
324 | |||
325 | qp->sq.wrid = kmalloc(qp->sq.max * sizeof (u64), GFP_KERNEL); | ||
326 | qp->rq.wrid = kmalloc(qp->rq.max * sizeof (u64), GFP_KERNEL); | ||
327 | |||
328 | if (!qp->sq.wrid || !qp->rq.wrid) { | ||
329 | err = -ENOMEM; | ||
330 | goto err_wrid; | ||
331 | } | ||
332 | |||
333 | /* We don't support inline sends for kernel QPs (yet) */ | ||
334 | init_attr->cap.max_inline_data = 0; | ||
335 | } | ||
336 | |||
337 | err = mlx4_qp_alloc(dev->dev, sqpn, &qp->mqp); | ||
338 | if (err) | ||
339 | goto err_wrid; | ||
340 | |||
341 | /* | ||
342 | * Hardware wants QPN written in big-endian order (after | ||
343 | * shifting) for send doorbell. Precompute this value to save | ||
344 | * a little bit when posting sends. | ||
345 | */ | ||
346 | qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); | ||
347 | |||
348 | if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) | ||
349 | qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); | ||
350 | else | ||
351 | qp->sq_signal_bits = 0; | ||
352 | |||
353 | qp->mqp.event = mlx4_ib_qp_event; | ||
354 | |||
355 | return 0; | ||
356 | |||
357 | err_wrid: | ||
358 | if (pd->uobject) | ||
359 | mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); | ||
360 | else { | ||
361 | kfree(qp->sq.wrid); | ||
362 | kfree(qp->rq.wrid); | ||
363 | } | ||
364 | |||
365 | err_mtt: | ||
366 | mlx4_mtt_cleanup(dev->dev, &qp->mtt); | ||
367 | |||
368 | err_buf: | ||
369 | if (pd->uobject) | ||
370 | ib_umem_release(qp->umem); | ||
371 | else | ||
372 | mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); | ||
373 | |||
374 | err_db: | ||
375 | if (!pd->uobject) | ||
376 | mlx4_ib_db_free(dev, &qp->db); | ||
377 | |||
378 | err: | ||
379 | return err; | ||
380 | } | ||
381 | |||
382 | static enum mlx4_qp_state to_mlx4_state(enum ib_qp_state state) | ||
383 | { | ||
384 | switch (state) { | ||
385 | case IB_QPS_RESET: return MLX4_QP_STATE_RST; | ||
386 | case IB_QPS_INIT: return MLX4_QP_STATE_INIT; | ||
387 | case IB_QPS_RTR: return MLX4_QP_STATE_RTR; | ||
388 | case IB_QPS_RTS: return MLX4_QP_STATE_RTS; | ||
389 | case IB_QPS_SQD: return MLX4_QP_STATE_SQD; | ||
390 | case IB_QPS_SQE: return MLX4_QP_STATE_SQER; | ||
391 | case IB_QPS_ERR: return MLX4_QP_STATE_ERR; | ||
392 | default: return -1; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) | ||
397 | { | ||
398 | if (send_cq == recv_cq) | ||
399 | spin_lock_irq(&send_cq->lock); | ||
400 | else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { | ||
401 | spin_lock_irq(&send_cq->lock); | ||
402 | spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); | ||
403 | } else { | ||
404 | spin_lock_irq(&recv_cq->lock); | ||
405 | spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); | ||
406 | } | ||
407 | } | ||
408 | |||
409 | static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq) | ||
410 | { | ||
411 | if (send_cq == recv_cq) | ||
412 | spin_unlock_irq(&send_cq->lock); | ||
413 | else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) { | ||
414 | spin_unlock(&recv_cq->lock); | ||
415 | spin_unlock_irq(&send_cq->lock); | ||
416 | } else { | ||
417 | spin_unlock(&send_cq->lock); | ||
418 | spin_unlock_irq(&recv_cq->lock); | ||
419 | } | ||
420 | } | ||
421 | |||
422 | static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, | ||
423 | int is_user) | ||
424 | { | ||
425 | struct mlx4_ib_cq *send_cq, *recv_cq; | ||
426 | |||
427 | if (qp->state != IB_QPS_RESET) | ||
428 | if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), | ||
429 | MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) | ||
430 | printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n", | ||
431 | qp->mqp.qpn); | ||
432 | |||
433 | send_cq = to_mcq(qp->ibqp.send_cq); | ||
434 | recv_cq = to_mcq(qp->ibqp.recv_cq); | ||
435 | |||
436 | mlx4_ib_lock_cqs(send_cq, recv_cq); | ||
437 | |||
438 | if (!is_user) { | ||
439 | __mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, | ||
440 | qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL); | ||
441 | if (send_cq != recv_cq) | ||
442 | __mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); | ||
443 | } | ||
444 | |||
445 | mlx4_qp_remove(dev->dev, &qp->mqp); | ||
446 | |||
447 | mlx4_ib_unlock_cqs(send_cq, recv_cq); | ||
448 | |||
449 | mlx4_qp_free(dev->dev, &qp->mqp); | ||
450 | mlx4_mtt_cleanup(dev->dev, &qp->mtt); | ||
451 | |||
452 | if (is_user) { | ||
453 | mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), | ||
454 | &qp->db); | ||
455 | ib_umem_release(qp->umem); | ||
456 | } else { | ||
457 | kfree(qp->sq.wrid); | ||
458 | kfree(qp->rq.wrid); | ||
459 | mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); | ||
460 | mlx4_ib_db_free(dev, &qp->db); | ||
461 | } | ||
462 | } | ||
463 | |||
464 | struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, | ||
465 | struct ib_qp_init_attr *init_attr, | ||
466 | struct ib_udata *udata) | ||
467 | { | ||
468 | struct mlx4_ib_dev *dev = to_mdev(pd->device); | ||
469 | struct mlx4_ib_sqp *sqp; | ||
470 | struct mlx4_ib_qp *qp; | ||
471 | int err; | ||
472 | |||
473 | switch (init_attr->qp_type) { | ||
474 | case IB_QPT_RC: | ||
475 | case IB_QPT_UC: | ||
476 | case IB_QPT_UD: | ||
477 | { | ||
478 | qp = kmalloc(sizeof *qp, GFP_KERNEL); | ||
479 | if (!qp) | ||
480 | return ERR_PTR(-ENOMEM); | ||
481 | |||
482 | err = create_qp_common(dev, pd, init_attr, udata, 0, qp); | ||
483 | if (err) { | ||
484 | kfree(qp); | ||
485 | return ERR_PTR(err); | ||
486 | } | ||
487 | |||
488 | qp->ibqp.qp_num = qp->mqp.qpn; | ||
489 | |||
490 | break; | ||
491 | } | ||
492 | case IB_QPT_SMI: | ||
493 | case IB_QPT_GSI: | ||
494 | { | ||
495 | /* Userspace is not allowed to create special QPs: */ | ||
496 | if (pd->uobject) | ||
497 | return ERR_PTR(-EINVAL); | ||
498 | |||
499 | sqp = kmalloc(sizeof *sqp, GFP_KERNEL); | ||
500 | if (!sqp) | ||
501 | return ERR_PTR(-ENOMEM); | ||
502 | |||
503 | qp = &sqp->qp; | ||
504 | |||
505 | err = create_qp_common(dev, pd, init_attr, udata, | ||
506 | dev->dev->caps.sqp_start + | ||
507 | (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + | ||
508 | init_attr->port_num - 1, | ||
509 | qp); | ||
510 | if (err) { | ||
511 | kfree(sqp); | ||
512 | return ERR_PTR(err); | ||
513 | } | ||
514 | |||
515 | qp->port = init_attr->port_num; | ||
516 | qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; | ||
517 | |||
518 | break; | ||
519 | } | ||
520 | default: | ||
521 | /* Don't support raw QPs */ | ||
522 | return ERR_PTR(-EINVAL); | ||
523 | } | ||
524 | |||
525 | return &qp->ibqp; | ||
526 | } | ||
527 | |||
528 | int mlx4_ib_destroy_qp(struct ib_qp *qp) | ||
529 | { | ||
530 | struct mlx4_ib_dev *dev = to_mdev(qp->device); | ||
531 | struct mlx4_ib_qp *mqp = to_mqp(qp); | ||
532 | |||
533 | if (is_qp0(dev, mqp)) | ||
534 | mlx4_CLOSE_PORT(dev->dev, mqp->port); | ||
535 | |||
536 | destroy_qp_common(dev, mqp, !!qp->pd->uobject); | ||
537 | |||
538 | if (is_sqp(dev, mqp)) | ||
539 | kfree(to_msqp(mqp)); | ||
540 | else | ||
541 | kfree(mqp); | ||
542 | |||
543 | return 0; | ||
544 | } | ||
545 | |||
546 | static void init_port(struct mlx4_ib_dev *dev, int port) | ||
547 | { | ||
548 | struct mlx4_init_port_param param; | ||
549 | int err; | ||
550 | |||
551 | memset(¶m, 0, sizeof param); | ||
552 | |||
553 | param.port_width_cap = dev->dev->caps.port_width_cap; | ||
554 | param.vl_cap = dev->dev->caps.vl_cap; | ||
555 | param.mtu = ib_mtu_enum_to_int(dev->dev->caps.mtu_cap); | ||
556 | param.max_gid = dev->dev->caps.gid_table_len; | ||
557 | param.max_pkey = dev->dev->caps.pkey_table_len; | ||
558 | |||
559 | err = mlx4_INIT_PORT(dev->dev, ¶m, port); | ||
560 | if (err) | ||
561 | printk(KERN_WARNING "INIT_PORT failed, return code %d.\n", err); | ||
562 | } | ||
563 | |||
564 | static int to_mlx4_st(enum ib_qp_type type) | ||
565 | { | ||
566 | switch (type) { | ||
567 | case IB_QPT_RC: return MLX4_QP_ST_RC; | ||
568 | case IB_QPT_UC: return MLX4_QP_ST_UC; | ||
569 | case IB_QPT_UD: return MLX4_QP_ST_UD; | ||
570 | case IB_QPT_SMI: | ||
571 | case IB_QPT_GSI: return MLX4_QP_ST_MLX; | ||
572 | default: return -1; | ||
573 | } | ||
574 | } | ||
575 | |||
576 | static __be32 to_mlx4_access_flags(struct mlx4_ib_qp *qp, struct ib_qp_attr *attr, | ||
577 | int attr_mask) | ||
578 | { | ||
579 | u8 dest_rd_atomic; | ||
580 | u32 access_flags; | ||
581 | u32 hw_access_flags = 0; | ||
582 | |||
583 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) | ||
584 | dest_rd_atomic = attr->max_dest_rd_atomic; | ||
585 | else | ||
586 | dest_rd_atomic = qp->resp_depth; | ||
587 | |||
588 | if (attr_mask & IB_QP_ACCESS_FLAGS) | ||
589 | access_flags = attr->qp_access_flags; | ||
590 | else | ||
591 | access_flags = qp->atomic_rd_en; | ||
592 | |||
593 | if (!dest_rd_atomic) | ||
594 | access_flags &= IB_ACCESS_REMOTE_WRITE; | ||
595 | |||
596 | if (access_flags & IB_ACCESS_REMOTE_READ) | ||
597 | hw_access_flags |= MLX4_QP_BIT_RRE; | ||
598 | if (access_flags & IB_ACCESS_REMOTE_ATOMIC) | ||
599 | hw_access_flags |= MLX4_QP_BIT_RAE; | ||
600 | if (access_flags & IB_ACCESS_REMOTE_WRITE) | ||
601 | hw_access_flags |= MLX4_QP_BIT_RWE; | ||
602 | |||
603 | return cpu_to_be32(hw_access_flags); | ||
604 | } | ||
605 | |||
606 | static void store_sqp_attrs(struct mlx4_ib_sqp *sqp, struct ib_qp_attr *attr, | ||
607 | int attr_mask) | ||
608 | { | ||
609 | if (attr_mask & IB_QP_PKEY_INDEX) | ||
610 | sqp->pkey_index = attr->pkey_index; | ||
611 | if (attr_mask & IB_QP_QKEY) | ||
612 | sqp->qkey = attr->qkey; | ||
613 | if (attr_mask & IB_QP_SQ_PSN) | ||
614 | sqp->send_psn = attr->sq_psn; | ||
615 | } | ||
616 | |||
617 | static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port) | ||
618 | { | ||
619 | path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6); | ||
620 | } | ||
621 | |||
622 | static int mlx4_set_path(struct mlx4_ib_dev *dev, struct ib_ah_attr *ah, | ||
623 | struct mlx4_qp_path *path, u8 port) | ||
624 | { | ||
625 | path->grh_mylmc = ah->src_path_bits & 0x7f; | ||
626 | path->rlid = cpu_to_be16(ah->dlid); | ||
627 | if (ah->static_rate) { | ||
628 | path->static_rate = ah->static_rate + MLX4_STAT_RATE_OFFSET; | ||
629 | while (path->static_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET && | ||
630 | !(1 << path->static_rate & dev->dev->caps.stat_rate_support)) | ||
631 | --path->static_rate; | ||
632 | } else | ||
633 | path->static_rate = 0; | ||
634 | path->counter_index = 0xff; | ||
635 | |||
636 | if (ah->ah_flags & IB_AH_GRH) { | ||
637 | if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len) { | ||
638 | printk(KERN_ERR "sgid_index (%u) too large. max is %d\n", | ||
639 | ah->grh.sgid_index, dev->dev->caps.gid_table_len - 1); | ||
640 | return -1; | ||
641 | } | ||
642 | |||
643 | path->grh_mylmc |= 1 << 7; | ||
644 | path->mgid_index = ah->grh.sgid_index; | ||
645 | path->hop_limit = ah->grh.hop_limit; | ||
646 | path->tclass_flowlabel = | ||
647 | cpu_to_be32((ah->grh.traffic_class << 20) | | ||
648 | (ah->grh.flow_label)); | ||
649 | memcpy(path->rgid, ah->grh.dgid.raw, 16); | ||
650 | } | ||
651 | |||
652 | path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | | ||
653 | ((port - 1) << 6) | ((ah->sl & 0xf) << 2); | ||
654 | |||
655 | return 0; | ||
656 | } | ||
657 | |||
658 | int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, | ||
659 | int attr_mask, struct ib_udata *udata) | ||
660 | { | ||
661 | struct mlx4_ib_dev *dev = to_mdev(ibqp->device); | ||
662 | struct mlx4_ib_qp *qp = to_mqp(ibqp); | ||
663 | struct mlx4_qp_context *context; | ||
664 | enum mlx4_qp_optpar optpar = 0; | ||
665 | enum ib_qp_state cur_state, new_state; | ||
666 | int sqd_event; | ||
667 | int err = -EINVAL; | ||
668 | |||
669 | context = kzalloc(sizeof *context, GFP_KERNEL); | ||
670 | if (!context) | ||
671 | return -ENOMEM; | ||
672 | |||
673 | mutex_lock(&qp->mutex); | ||
674 | |||
675 | cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; | ||
676 | new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; | ||
677 | |||
678 | if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) | ||
679 | goto out; | ||
680 | |||
681 | if ((attr_mask & IB_QP_PKEY_INDEX) && | ||
682 | attr->pkey_index >= dev->dev->caps.pkey_table_len) { | ||
683 | goto out; | ||
684 | } | ||
685 | |||
686 | if ((attr_mask & IB_QP_PORT) && | ||
687 | (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { | ||
688 | goto out; | ||
689 | } | ||
690 | |||
691 | if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && | ||
692 | attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { | ||
693 | goto out; | ||
694 | } | ||
695 | |||
696 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && | ||
697 | attr->max_dest_rd_atomic > 1 << dev->dev->caps.max_qp_dest_rdma) { | ||
698 | goto out; | ||
699 | } | ||
700 | |||
701 | context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | | ||
702 | (to_mlx4_st(ibqp->qp_type) << 16)); | ||
703 | context->flags |= cpu_to_be32(1 << 8); /* DE? */ | ||
704 | |||
705 | if (!(attr_mask & IB_QP_PATH_MIG_STATE)) | ||
706 | context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); | ||
707 | else { | ||
708 | optpar |= MLX4_QP_OPTPAR_PM_STATE; | ||
709 | switch (attr->path_mig_state) { | ||
710 | case IB_MIG_MIGRATED: | ||
711 | context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); | ||
712 | break; | ||
713 | case IB_MIG_REARM: | ||
714 | context->flags |= cpu_to_be32(MLX4_QP_PM_REARM << 11); | ||
715 | break; | ||
716 | case IB_MIG_ARMED: | ||
717 | context->flags |= cpu_to_be32(MLX4_QP_PM_ARMED << 11); | ||
718 | break; | ||
719 | } | ||
720 | } | ||
721 | |||
722 | if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || | ||
723 | ibqp->qp_type == IB_QPT_UD) | ||
724 | context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; | ||
725 | else if (attr_mask & IB_QP_PATH_MTU) { | ||
726 | if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { | ||
727 | printk(KERN_ERR "path MTU (%u) is invalid\n", | ||
728 | attr->path_mtu); | ||
729 | return -EINVAL; | ||
730 | } | ||
731 | context->mtu_msgmax = (attr->path_mtu << 5) | 31; | ||
732 | } | ||
733 | |||
734 | if (qp->rq.max) | ||
735 | context->rq_size_stride = ilog2(qp->rq.max) << 3; | ||
736 | context->rq_size_stride |= qp->rq.wqe_shift - 4; | ||
737 | |||
738 | if (qp->sq.max) | ||
739 | context->sq_size_stride = ilog2(qp->sq.max) << 3; | ||
740 | context->sq_size_stride |= qp->sq.wqe_shift - 4; | ||
741 | |||
742 | if (qp->ibqp.uobject) | ||
743 | context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); | ||
744 | else | ||
745 | context->usr_page = cpu_to_be32(dev->priv_uar.index); | ||
746 | |||
747 | if (attr_mask & IB_QP_DEST_QPN) | ||
748 | context->remote_qpn = cpu_to_be32(attr->dest_qp_num); | ||
749 | |||
750 | if (attr_mask & IB_QP_PORT) { | ||
751 | if (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD && | ||
752 | !(attr_mask & IB_QP_AV)) { | ||
753 | mlx4_set_sched(&context->pri_path, attr->port_num); | ||
754 | optpar |= MLX4_QP_OPTPAR_SCHED_QUEUE; | ||
755 | } | ||
756 | } | ||
757 | |||
758 | if (attr_mask & IB_QP_PKEY_INDEX) { | ||
759 | context->pri_path.pkey_index = attr->pkey_index; | ||
760 | optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; | ||
761 | } | ||
762 | |||
763 | if (attr_mask & IB_QP_RNR_RETRY) { | ||
764 | context->params1 |= cpu_to_be32(attr->rnr_retry << 13); | ||
765 | optpar |= MLX4_QP_OPTPAR_RNR_RETRY; | ||
766 | } | ||
767 | |||
768 | if (attr_mask & IB_QP_AV) { | ||
769 | if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, | ||
770 | attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) { | ||
771 | err = -EINVAL; | ||
772 | goto out; | ||
773 | } | ||
774 | |||
775 | optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | | ||
776 | MLX4_QP_OPTPAR_SCHED_QUEUE); | ||
777 | } | ||
778 | |||
779 | if (attr_mask & IB_QP_TIMEOUT) { | ||
780 | context->pri_path.ackto = attr->timeout << 3; | ||
781 | optpar |= MLX4_QP_OPTPAR_ACK_TIMEOUT; | ||
782 | } | ||
783 | |||
784 | if (attr_mask & IB_QP_ALT_PATH) { | ||
785 | if (attr->alt_pkey_index >= dev->dev->caps.pkey_table_len) | ||
786 | return -EINVAL; | ||
787 | |||
788 | if (attr->alt_port_num == 0 || | ||
789 | attr->alt_port_num > dev->dev->caps.num_ports) | ||
790 | return -EINVAL; | ||
791 | |||
792 | if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path, | ||
793 | attr->alt_port_num)) | ||
794 | return -EINVAL; | ||
795 | |||
796 | context->alt_path.pkey_index = attr->alt_pkey_index; | ||
797 | context->alt_path.ackto = attr->alt_timeout << 3; | ||
798 | optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; | ||
799 | } | ||
800 | |||
801 | context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); | ||
802 | context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); | ||
803 | if (attr_mask & IB_QP_RETRY_CNT) { | ||
804 | context->params1 |= cpu_to_be32(attr->retry_cnt << 16); | ||
805 | optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; | ||
806 | } | ||
807 | |||
808 | if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { | ||
809 | if (attr->max_rd_atomic) | ||
810 | context->params1 |= | ||
811 | cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21); | ||
812 | optpar |= MLX4_QP_OPTPAR_SRA_MAX; | ||
813 | } | ||
814 | |||
815 | if (attr_mask & IB_QP_SQ_PSN) | ||
816 | context->next_send_psn = cpu_to_be32(attr->sq_psn); | ||
817 | |||
818 | context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn); | ||
819 | |||
820 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { | ||
821 | if (attr->max_dest_rd_atomic) | ||
822 | context->params2 |= | ||
823 | cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21); | ||
824 | optpar |= MLX4_QP_OPTPAR_RRA_MAX; | ||
825 | } | ||
826 | |||
827 | if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) { | ||
828 | context->params2 |= to_mlx4_access_flags(qp, attr, attr_mask); | ||
829 | optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE; | ||
830 | } | ||
831 | |||
832 | if (ibqp->srq) | ||
833 | context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC); | ||
834 | |||
835 | if (attr_mask & IB_QP_MIN_RNR_TIMER) { | ||
836 | context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); | ||
837 | optpar |= MLX4_QP_OPTPAR_RNR_TIMEOUT; | ||
838 | } | ||
839 | if (attr_mask & IB_QP_RQ_PSN) | ||
840 | context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); | ||
841 | |||
842 | context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn); | ||
843 | |||
844 | if (attr_mask & IB_QP_QKEY) { | ||
845 | context->qkey = cpu_to_be32(attr->qkey); | ||
846 | optpar |= MLX4_QP_OPTPAR_Q_KEY; | ||
847 | } | ||
848 | |||
849 | if (ibqp->srq) | ||
850 | context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); | ||
851 | |||
852 | if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) | ||
853 | context->db_rec_addr = cpu_to_be64(qp->db.dma); | ||
854 | |||
855 | if (cur_state == IB_QPS_INIT && | ||
856 | new_state == IB_QPS_RTR && | ||
857 | (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || | ||
858 | ibqp->qp_type == IB_QPT_UD)) { | ||
859 | context->pri_path.sched_queue = (qp->port - 1) << 6; | ||
860 | if (is_qp0(dev, qp)) | ||
861 | context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; | ||
862 | else | ||
863 | context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; | ||
864 | } | ||
865 | |||
866 | if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && | ||
867 | attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) | ||
868 | sqd_event = 1; | ||
869 | else | ||
870 | sqd_event = 0; | ||
871 | |||
872 | err = mlx4_qp_modify(dev->dev, &qp->mtt, to_mlx4_state(cur_state), | ||
873 | to_mlx4_state(new_state), context, optpar, | ||
874 | sqd_event, &qp->mqp); | ||
875 | if (err) | ||
876 | goto out; | ||
877 | |||
878 | qp->state = new_state; | ||
879 | |||
880 | if (attr_mask & IB_QP_ACCESS_FLAGS) | ||
881 | qp->atomic_rd_en = attr->qp_access_flags; | ||
882 | if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) | ||
883 | qp->resp_depth = attr->max_dest_rd_atomic; | ||
884 | if (attr_mask & IB_QP_PORT) | ||
885 | qp->port = attr->port_num; | ||
886 | if (attr_mask & IB_QP_ALT_PATH) | ||
887 | qp->alt_port = attr->alt_port_num; | ||
888 | |||
889 | if (is_sqp(dev, qp)) | ||
890 | store_sqp_attrs(to_msqp(qp), attr, attr_mask); | ||
891 | |||
892 | /* | ||
893 | * If we moved QP0 to RTR, bring the IB link up; if we moved | ||
894 | * QP0 to RESET or ERROR, bring the link back down. | ||
895 | */ | ||
896 | if (is_qp0(dev, qp)) { | ||
897 | if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) | ||
898 | init_port(dev, qp->port); | ||
899 | |||
900 | if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && | ||
901 | (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR)) | ||
902 | mlx4_CLOSE_PORT(dev->dev, qp->port); | ||
903 | } | ||
904 | |||
905 | /* | ||
906 | * If we moved a kernel QP to RESET, clean up all old CQ | ||
907 | * entries and reinitialize the QP. | ||
908 | */ | ||
909 | if (new_state == IB_QPS_RESET && !ibqp->uobject) { | ||
910 | mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn, | ||
911 | ibqp->srq ? to_msrq(ibqp->srq): NULL); | ||
912 | if (ibqp->send_cq != ibqp->recv_cq) | ||
913 | mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL); | ||
914 | |||
915 | qp->rq.head = 0; | ||
916 | qp->rq.tail = 0; | ||
917 | qp->sq.head = 0; | ||
918 | qp->sq.tail = 0; | ||
919 | *qp->db.db = 0; | ||
920 | } | ||
921 | |||
922 | out: | ||
923 | mutex_unlock(&qp->mutex); | ||
924 | kfree(context); | ||
925 | return err; | ||
926 | } | ||
927 | |||
928 | static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, | ||
929 | void *wqe) | ||
930 | { | ||
931 | struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev; | ||
932 | struct mlx4_wqe_mlx_seg *mlx = wqe; | ||
933 | struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; | ||
934 | struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); | ||
935 | u16 pkey; | ||
936 | int send_size; | ||
937 | int header_size; | ||
938 | int i; | ||
939 | |||
940 | send_size = 0; | ||
941 | for (i = 0; i < wr->num_sge; ++i) | ||
942 | send_size += wr->sg_list[i].length; | ||
943 | |||
944 | ib_ud_header_init(send_size, mlx4_ib_ah_grh_present(ah), &sqp->ud_header); | ||
945 | |||
946 | sqp->ud_header.lrh.service_level = | ||
947 | be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 28; | ||
948 | sqp->ud_header.lrh.destination_lid = ah->av.dlid; | ||
949 | sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.g_slid & 0x7f); | ||
950 | if (mlx4_ib_ah_grh_present(ah)) { | ||
951 | sqp->ud_header.grh.traffic_class = | ||
952 | (be32_to_cpu(ah->av.sl_tclass_flowlabel) >> 20) & 0xff; | ||
953 | sqp->ud_header.grh.flow_label = | ||
954 | ah->av.sl_tclass_flowlabel & cpu_to_be32(0xfffff); | ||
955 | ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.port_pd) >> 24, | ||
956 | ah->av.gid_index, &sqp->ud_header.grh.source_gid); | ||
957 | memcpy(sqp->ud_header.grh.destination_gid.raw, | ||
958 | ah->av.dgid, 16); | ||
959 | } | ||
960 | |||
961 | mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); | ||
962 | mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | | ||
963 | (sqp->ud_header.lrh.destination_lid == | ||
964 | IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | | ||
965 | (sqp->ud_header.lrh.service_level << 8)); | ||
966 | mlx->rlid = sqp->ud_header.lrh.destination_lid; | ||
967 | |||
968 | switch (wr->opcode) { | ||
969 | case IB_WR_SEND: | ||
970 | sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; | ||
971 | sqp->ud_header.immediate_present = 0; | ||
972 | break; | ||
973 | case IB_WR_SEND_WITH_IMM: | ||
974 | sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE; | ||
975 | sqp->ud_header.immediate_present = 1; | ||
976 | sqp->ud_header.immediate_data = wr->imm_data; | ||
977 | break; | ||
978 | default: | ||
979 | return -EINVAL; | ||
980 | } | ||
981 | |||
982 | sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; | ||
983 | if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) | ||
984 | sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; | ||
985 | sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); | ||
986 | if (!sqp->qp.ibqp.qp_num) | ||
987 | ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey); | ||
988 | else | ||
989 | ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey); | ||
990 | sqp->ud_header.bth.pkey = cpu_to_be16(pkey); | ||
991 | sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); | ||
992 | sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); | ||
993 | sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? | ||
994 | sqp->qkey : wr->wr.ud.remote_qkey); | ||
995 | sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); | ||
996 | |||
997 | header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); | ||
998 | |||
999 | if (0) { | ||
1000 | printk(KERN_ERR "built UD header of size %d:\n", header_size); | ||
1001 | for (i = 0; i < header_size / 4; ++i) { | ||
1002 | if (i % 8 == 0) | ||
1003 | printk(" [%02x] ", i * 4); | ||
1004 | printk(" %08x", | ||
1005 | be32_to_cpu(((__be32 *) sqp->header_buf)[i])); | ||
1006 | if ((i + 1) % 8 == 0) | ||
1007 | printk("\n"); | ||
1008 | } | ||
1009 | printk("\n"); | ||
1010 | } | ||
1011 | |||
1012 | inl->byte_count = cpu_to_be32(1 << 31 | header_size); | ||
1013 | memcpy(inl + 1, sqp->header_buf, header_size); | ||
1014 | |||
1015 | return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); | ||
1016 | } | ||
1017 | |||
1018 | static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq) | ||
1019 | { | ||
1020 | unsigned cur; | ||
1021 | struct mlx4_ib_cq *cq; | ||
1022 | |||
1023 | cur = wq->head - wq->tail; | ||
1024 | if (likely(cur + nreq < wq->max)) | ||
1025 | return 0; | ||
1026 | |||
1027 | cq = to_mcq(ib_cq); | ||
1028 | spin_lock(&cq->lock); | ||
1029 | cur = wq->head - wq->tail; | ||
1030 | spin_unlock(&cq->lock); | ||
1031 | |||
1032 | return cur + nreq >= wq->max; | ||
1033 | } | ||
1034 | |||
1035 | int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | ||
1036 | struct ib_send_wr **bad_wr) | ||
1037 | { | ||
1038 | struct mlx4_ib_qp *qp = to_mqp(ibqp); | ||
1039 | void *wqe; | ||
1040 | struct mlx4_wqe_ctrl_seg *ctrl; | ||
1041 | unsigned long flags; | ||
1042 | int nreq; | ||
1043 | int err = 0; | ||
1044 | int ind; | ||
1045 | int size; | ||
1046 | int i; | ||
1047 | |||
1048 | spin_lock_irqsave(&qp->rq.lock, flags); | ||
1049 | |||
1050 | ind = qp->sq.head; | ||
1051 | |||
1052 | for (nreq = 0; wr; ++nreq, wr = wr->next) { | ||
1053 | if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { | ||
1054 | err = -ENOMEM; | ||
1055 | *bad_wr = wr; | ||
1056 | goto out; | ||
1057 | } | ||
1058 | |||
1059 | if (unlikely(wr->num_sge > qp->sq.max_gs)) { | ||
1060 | err = -EINVAL; | ||
1061 | *bad_wr = wr; | ||
1062 | goto out; | ||
1063 | } | ||
1064 | |||
1065 | ctrl = wqe = get_send_wqe(qp, ind & (qp->sq.max - 1)); | ||
1066 | qp->sq.wrid[ind & (qp->sq.max - 1)] = wr->wr_id; | ||
1067 | |||
1068 | ctrl->srcrb_flags = | ||
1069 | (wr->send_flags & IB_SEND_SIGNALED ? | ||
1070 | cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) | | ||
1071 | (wr->send_flags & IB_SEND_SOLICITED ? | ||
1072 | cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) | | ||
1073 | qp->sq_signal_bits; | ||
1074 | |||
1075 | if (wr->opcode == IB_WR_SEND_WITH_IMM || | ||
1076 | wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) | ||
1077 | ctrl->imm = wr->imm_data; | ||
1078 | else | ||
1079 | ctrl->imm = 0; | ||
1080 | |||
1081 | wqe += sizeof *ctrl; | ||
1082 | size = sizeof *ctrl / 16; | ||
1083 | |||
1084 | switch (ibqp->qp_type) { | ||
1085 | case IB_QPT_RC: | ||
1086 | case IB_QPT_UC: | ||
1087 | switch (wr->opcode) { | ||
1088 | case IB_WR_ATOMIC_CMP_AND_SWP: | ||
1089 | case IB_WR_ATOMIC_FETCH_AND_ADD: | ||
1090 | ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = | ||
1091 | cpu_to_be64(wr->wr.atomic.remote_addr); | ||
1092 | ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = | ||
1093 | cpu_to_be32(wr->wr.atomic.rkey); | ||
1094 | ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; | ||
1095 | |||
1096 | wqe += sizeof (struct mlx4_wqe_raddr_seg); | ||
1097 | |||
1098 | if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) { | ||
1099 | ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = | ||
1100 | cpu_to_be64(wr->wr.atomic.swap); | ||
1101 | ((struct mlx4_wqe_atomic_seg *) wqe)->compare = | ||
1102 | cpu_to_be64(wr->wr.atomic.compare_add); | ||
1103 | } else { | ||
1104 | ((struct mlx4_wqe_atomic_seg *) wqe)->swap_add = | ||
1105 | cpu_to_be64(wr->wr.atomic.compare_add); | ||
1106 | ((struct mlx4_wqe_atomic_seg *) wqe)->compare = 0; | ||
1107 | } | ||
1108 | |||
1109 | wqe += sizeof (struct mlx4_wqe_atomic_seg); | ||
1110 | size += (sizeof (struct mlx4_wqe_raddr_seg) + | ||
1111 | sizeof (struct mlx4_wqe_atomic_seg)) / 16; | ||
1112 | |||
1113 | break; | ||
1114 | |||
1115 | case IB_WR_RDMA_READ: | ||
1116 | case IB_WR_RDMA_WRITE: | ||
1117 | case IB_WR_RDMA_WRITE_WITH_IMM: | ||
1118 | ((struct mlx4_wqe_raddr_seg *) wqe)->raddr = | ||
1119 | cpu_to_be64(wr->wr.rdma.remote_addr); | ||
1120 | ((struct mlx4_wqe_raddr_seg *) wqe)->rkey = | ||
1121 | cpu_to_be32(wr->wr.rdma.rkey); | ||
1122 | ((struct mlx4_wqe_raddr_seg *) wqe)->reserved = 0; | ||
1123 | |||
1124 | wqe += sizeof (struct mlx4_wqe_raddr_seg); | ||
1125 | size += sizeof (struct mlx4_wqe_raddr_seg) / 16; | ||
1126 | |||
1127 | break; | ||
1128 | |||
1129 | default: | ||
1130 | /* No extra segments required for sends */ | ||
1131 | break; | ||
1132 | } | ||
1133 | break; | ||
1134 | |||
1135 | case IB_QPT_UD: | ||
1136 | memcpy(((struct mlx4_wqe_datagram_seg *) wqe)->av, | ||
1137 | &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); | ||
1138 | ((struct mlx4_wqe_datagram_seg *) wqe)->dqpn = | ||
1139 | cpu_to_be32(wr->wr.ud.remote_qpn); | ||
1140 | ((struct mlx4_wqe_datagram_seg *) wqe)->qkey = | ||
1141 | cpu_to_be32(wr->wr.ud.remote_qkey); | ||
1142 | |||
1143 | wqe += sizeof (struct mlx4_wqe_datagram_seg); | ||
1144 | size += sizeof (struct mlx4_wqe_datagram_seg) / 16; | ||
1145 | break; | ||
1146 | |||
1147 | case IB_QPT_SMI: | ||
1148 | case IB_QPT_GSI: | ||
1149 | err = build_mlx_header(to_msqp(qp), wr, ctrl); | ||
1150 | if (err < 0) { | ||
1151 | *bad_wr = wr; | ||
1152 | goto out; | ||
1153 | } | ||
1154 | wqe += err; | ||
1155 | size += err / 16; | ||
1156 | |||
1157 | err = 0; | ||
1158 | break; | ||
1159 | |||
1160 | default: | ||
1161 | break; | ||
1162 | } | ||
1163 | |||
1164 | for (i = 0; i < wr->num_sge; ++i) { | ||
1165 | ((struct mlx4_wqe_data_seg *) wqe)->byte_count = | ||
1166 | cpu_to_be32(wr->sg_list[i].length); | ||
1167 | ((struct mlx4_wqe_data_seg *) wqe)->lkey = | ||
1168 | cpu_to_be32(wr->sg_list[i].lkey); | ||
1169 | ((struct mlx4_wqe_data_seg *) wqe)->addr = | ||
1170 | cpu_to_be64(wr->sg_list[i].addr); | ||
1171 | |||
1172 | wqe += sizeof (struct mlx4_wqe_data_seg); | ||
1173 | size += sizeof (struct mlx4_wqe_data_seg) / 16; | ||
1174 | } | ||
1175 | |||
1176 | /* Add one more inline data segment for ICRC for MLX sends */ | ||
1177 | if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == IB_QPT_GSI) { | ||
1178 | ((struct mlx4_wqe_inline_seg *) wqe)->byte_count = | ||
1179 | cpu_to_be32((1 << 31) | 4); | ||
1180 | ((u32 *) wqe)[1] = 0; | ||
1181 | wqe += sizeof (struct mlx4_wqe_data_seg); | ||
1182 | size += sizeof (struct mlx4_wqe_data_seg) / 16; | ||
1183 | } | ||
1184 | |||
1185 | ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? | ||
1186 | MLX4_WQE_CTRL_FENCE : 0) | size; | ||
1187 | |||
1188 | /* | ||
1189 | * Make sure descriptor is fully written before | ||
1190 | * setting ownership bit (because HW can start | ||
1191 | * executing as soon as we do). | ||
1192 | */ | ||
1193 | wmb(); | ||
1194 | |||
1195 | if (wr->opcode < 0 || wr->opcode > ARRAY_SIZE(mlx4_ib_opcode)) { | ||
1196 | err = -EINVAL; | ||
1197 | goto out; | ||
1198 | } | ||
1199 | |||
1200 | ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] | | ||
1201 | (ind & qp->sq.max ? cpu_to_be32(1 << 31) : 0); | ||
1202 | |||
1203 | ++ind; | ||
1204 | } | ||
1205 | |||
1206 | out: | ||
1207 | if (likely(nreq)) { | ||
1208 | qp->sq.head += nreq; | ||
1209 | |||
1210 | /* | ||
1211 | * Make sure that descriptors are written before | ||
1212 | * doorbell record. | ||
1213 | */ | ||
1214 | wmb(); | ||
1215 | |||
1216 | writel(qp->doorbell_qpn, | ||
1217 | to_mdev(ibqp->device)->uar_map + MLX4_SEND_DOORBELL); | ||
1218 | |||
1219 | /* | ||
1220 | * Make sure doorbells don't leak out of SQ spinlock | ||
1221 | * and reach the HCA out of order. | ||
1222 | */ | ||
1223 | mmiowb(); | ||
1224 | } | ||
1225 | |||
1226 | spin_unlock_irqrestore(&qp->rq.lock, flags); | ||
1227 | |||
1228 | return err; | ||
1229 | } | ||
1230 | |||
1231 | int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, | ||
1232 | struct ib_recv_wr **bad_wr) | ||
1233 | { | ||
1234 | struct mlx4_ib_qp *qp = to_mqp(ibqp); | ||
1235 | struct mlx4_wqe_data_seg *scat; | ||
1236 | unsigned long flags; | ||
1237 | int err = 0; | ||
1238 | int nreq; | ||
1239 | int ind; | ||
1240 | int i; | ||
1241 | |||
1242 | spin_lock_irqsave(&qp->rq.lock, flags); | ||
1243 | |||
1244 | ind = qp->rq.head & (qp->rq.max - 1); | ||
1245 | |||
1246 | for (nreq = 0; wr; ++nreq, wr = wr->next) { | ||
1247 | if (mlx4_wq_overflow(&qp->rq, nreq, qp->ibqp.send_cq)) { | ||
1248 | err = -ENOMEM; | ||
1249 | *bad_wr = wr; | ||
1250 | goto out; | ||
1251 | } | ||
1252 | |||
1253 | if (unlikely(wr->num_sge > qp->rq.max_gs)) { | ||
1254 | err = -EINVAL; | ||
1255 | *bad_wr = wr; | ||
1256 | goto out; | ||
1257 | } | ||
1258 | |||
1259 | scat = get_recv_wqe(qp, ind); | ||
1260 | |||
1261 | for (i = 0; i < wr->num_sge; ++i) { | ||
1262 | scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); | ||
1263 | scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey); | ||
1264 | scat[i].addr = cpu_to_be64(wr->sg_list[i].addr); | ||
1265 | } | ||
1266 | |||
1267 | if (i < qp->rq.max_gs) { | ||
1268 | scat[i].byte_count = 0; | ||
1269 | scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); | ||
1270 | scat[i].addr = 0; | ||
1271 | } | ||
1272 | |||
1273 | qp->rq.wrid[ind] = wr->wr_id; | ||
1274 | |||
1275 | ind = (ind + 1) & (qp->rq.max - 1); | ||
1276 | } | ||
1277 | |||
1278 | out: | ||
1279 | if (likely(nreq)) { | ||
1280 | qp->rq.head += nreq; | ||
1281 | |||
1282 | /* | ||
1283 | * Make sure that descriptors are written before | ||
1284 | * doorbell record. | ||
1285 | */ | ||
1286 | wmb(); | ||
1287 | |||
1288 | *qp->db.db = cpu_to_be32(qp->rq.head & 0xffff); | ||
1289 | } | ||
1290 | |||
1291 | spin_unlock_irqrestore(&qp->rq.lock, flags); | ||
1292 | |||
1293 | return err; | ||
1294 | } | ||
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c new file mode 100644 index 000000000000..42ab4a801d6a --- /dev/null +++ b/drivers/infiniband/hw/mlx4/srq.c | |||
@@ -0,0 +1,334 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #include <linux/mlx4/qp.h> | ||
34 | #include <linux/mlx4/srq.h> | ||
35 | |||
36 | #include "mlx4_ib.h" | ||
37 | #include "user.h" | ||
38 | |||
39 | static void *get_wqe(struct mlx4_ib_srq *srq, int n) | ||
40 | { | ||
41 | int offset = n << srq->msrq.wqe_shift; | ||
42 | |||
43 | if (srq->buf.nbufs == 1) | ||
44 | return srq->buf.u.direct.buf + offset; | ||
45 | else | ||
46 | return srq->buf.u.page_list[offset >> PAGE_SHIFT].buf + | ||
47 | (offset & (PAGE_SIZE - 1)); | ||
48 | } | ||
49 | |||
50 | static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type) | ||
51 | { | ||
52 | struct ib_event event; | ||
53 | struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq; | ||
54 | |||
55 | if (ibsrq->event_handler) { | ||
56 | event.device = ibsrq->device; | ||
57 | event.element.srq = ibsrq; | ||
58 | switch (type) { | ||
59 | case MLX4_EVENT_TYPE_SRQ_LIMIT: | ||
60 | event.event = IB_EVENT_SRQ_LIMIT_REACHED; | ||
61 | break; | ||
62 | case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: | ||
63 | event.event = IB_EVENT_SRQ_ERR; | ||
64 | break; | ||
65 | default: | ||
66 | printk(KERN_WARNING "mlx4_ib: Unexpected event type %d " | ||
67 | "on SRQ %06x\n", type, srq->srqn); | ||
68 | return; | ||
69 | } | ||
70 | |||
71 | ibsrq->event_handler(&event, ibsrq->srq_context); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, | ||
76 | struct ib_srq_init_attr *init_attr, | ||
77 | struct ib_udata *udata) | ||
78 | { | ||
79 | struct mlx4_ib_dev *dev = to_mdev(pd->device); | ||
80 | struct mlx4_ib_srq *srq; | ||
81 | struct mlx4_wqe_srq_next_seg *next; | ||
82 | int desc_size; | ||
83 | int buf_size; | ||
84 | int err; | ||
85 | int i; | ||
86 | |||
87 | /* Sanity check SRQ size before proceeding */ | ||
88 | if (init_attr->attr.max_wr >= dev->dev->caps.max_srq_wqes || | ||
89 | init_attr->attr.max_sge > dev->dev->caps.max_srq_sge) | ||
90 | return ERR_PTR(-EINVAL); | ||
91 | |||
92 | srq = kmalloc(sizeof *srq, GFP_KERNEL); | ||
93 | if (!srq) | ||
94 | return ERR_PTR(-ENOMEM); | ||
95 | |||
96 | mutex_init(&srq->mutex); | ||
97 | spin_lock_init(&srq->lock); | ||
98 | srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); | ||
99 | srq->msrq.max_gs = init_attr->attr.max_sge; | ||
100 | |||
101 | desc_size = max(32UL, | ||
102 | roundup_pow_of_two(sizeof (struct mlx4_wqe_srq_next_seg) + | ||
103 | srq->msrq.max_gs * | ||
104 | sizeof (struct mlx4_wqe_data_seg))); | ||
105 | srq->msrq.wqe_shift = ilog2(desc_size); | ||
106 | |||
107 | buf_size = srq->msrq.max * desc_size; | ||
108 | |||
109 | if (pd->uobject) { | ||
110 | struct mlx4_ib_create_srq ucmd; | ||
111 | |||
112 | if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { | ||
113 | err = -EFAULT; | ||
114 | goto err_srq; | ||
115 | } | ||
116 | |||
117 | srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, | ||
118 | buf_size, 0); | ||
119 | if (IS_ERR(srq->umem)) { | ||
120 | err = PTR_ERR(srq->umem); | ||
121 | goto err_srq; | ||
122 | } | ||
123 | |||
124 | err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), | ||
125 | ilog2(srq->umem->page_size), &srq->mtt); | ||
126 | if (err) | ||
127 | goto err_buf; | ||
128 | |||
129 | err = mlx4_ib_umem_write_mtt(dev, &srq->mtt, srq->umem); | ||
130 | if (err) | ||
131 | goto err_mtt; | ||
132 | |||
133 | err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), | ||
134 | ucmd.db_addr, &srq->db); | ||
135 | if (err) | ||
136 | goto err_mtt; | ||
137 | } else { | ||
138 | err = mlx4_ib_db_alloc(dev, &srq->db, 0); | ||
139 | if (err) | ||
140 | goto err_srq; | ||
141 | |||
142 | *srq->db.db = 0; | ||
143 | |||
144 | if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) { | ||
145 | err = -ENOMEM; | ||
146 | goto err_db; | ||
147 | } | ||
148 | |||
149 | srq->head = 0; | ||
150 | srq->tail = srq->msrq.max - 1; | ||
151 | srq->wqe_ctr = 0; | ||
152 | |||
153 | for (i = 0; i < srq->msrq.max; ++i) { | ||
154 | next = get_wqe(srq, i); | ||
155 | next->next_wqe_index = | ||
156 | cpu_to_be16((i + 1) & (srq->msrq.max - 1)); | ||
157 | } | ||
158 | |||
159 | err = mlx4_mtt_init(dev->dev, srq->buf.npages, srq->buf.page_shift, | ||
160 | &srq->mtt); | ||
161 | if (err) | ||
162 | goto err_buf; | ||
163 | |||
164 | err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf); | ||
165 | if (err) | ||
166 | goto err_mtt; | ||
167 | |||
168 | srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL); | ||
169 | if (!srq->wrid) { | ||
170 | err = -ENOMEM; | ||
171 | goto err_mtt; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt, | ||
176 | srq->db.dma, &srq->msrq); | ||
177 | if (err) | ||
178 | goto err_wrid; | ||
179 | |||
180 | srq->msrq.event = mlx4_ib_srq_event; | ||
181 | |||
182 | if (pd->uobject) | ||
183 | if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { | ||
184 | err = -EFAULT; | ||
185 | goto err_wrid; | ||
186 | } | ||
187 | |||
188 | init_attr->attr.max_wr = srq->msrq.max - 1; | ||
189 | |||
190 | return &srq->ibsrq; | ||
191 | |||
192 | err_wrid: | ||
193 | if (pd->uobject) | ||
194 | mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); | ||
195 | else | ||
196 | kfree(srq->wrid); | ||
197 | |||
198 | err_mtt: | ||
199 | mlx4_mtt_cleanup(dev->dev, &srq->mtt); | ||
200 | |||
201 | err_buf: | ||
202 | if (pd->uobject) | ||
203 | ib_umem_release(srq->umem); | ||
204 | else | ||
205 | mlx4_buf_free(dev->dev, buf_size, &srq->buf); | ||
206 | |||
207 | err_db: | ||
208 | if (!pd->uobject) | ||
209 | mlx4_ib_db_free(dev, &srq->db); | ||
210 | |||
211 | err_srq: | ||
212 | kfree(srq); | ||
213 | |||
214 | return ERR_PTR(err); | ||
215 | } | ||
216 | |||
217 | int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, | ||
218 | enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) | ||
219 | { | ||
220 | struct mlx4_ib_dev *dev = to_mdev(ibsrq->device); | ||
221 | struct mlx4_ib_srq *srq = to_msrq(ibsrq); | ||
222 | int ret; | ||
223 | |||
224 | /* We don't support resizing SRQs (yet?) */ | ||
225 | if (attr_mask & IB_SRQ_MAX_WR) | ||
226 | return -EINVAL; | ||
227 | |||
228 | if (attr_mask & IB_SRQ_LIMIT) { | ||
229 | if (attr->srq_limit >= srq->msrq.max) | ||
230 | return -EINVAL; | ||
231 | |||
232 | mutex_lock(&srq->mutex); | ||
233 | ret = mlx4_srq_arm(dev->dev, &srq->msrq, attr->srq_limit); | ||
234 | mutex_unlock(&srq->mutex); | ||
235 | |||
236 | if (ret) | ||
237 | return ret; | ||
238 | } | ||
239 | |||
240 | return 0; | ||
241 | } | ||
242 | |||
243 | int mlx4_ib_destroy_srq(struct ib_srq *srq) | ||
244 | { | ||
245 | struct mlx4_ib_dev *dev = to_mdev(srq->device); | ||
246 | struct mlx4_ib_srq *msrq = to_msrq(srq); | ||
247 | |||
248 | mlx4_srq_free(dev->dev, &msrq->msrq); | ||
249 | mlx4_mtt_cleanup(dev->dev, &msrq->mtt); | ||
250 | |||
251 | if (srq->uobject) { | ||
252 | mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); | ||
253 | ib_umem_release(msrq->umem); | ||
254 | } else { | ||
255 | kfree(msrq->wrid); | ||
256 | mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, | ||
257 | &msrq->buf); | ||
258 | mlx4_ib_db_free(dev, &msrq->db); | ||
259 | } | ||
260 | |||
261 | kfree(msrq); | ||
262 | |||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) | ||
267 | { | ||
268 | struct mlx4_wqe_srq_next_seg *next; | ||
269 | |||
270 | /* always called with interrupts disabled. */ | ||
271 | spin_lock(&srq->lock); | ||
272 | |||
273 | next = get_wqe(srq, srq->tail); | ||
274 | next->next_wqe_index = cpu_to_be16(wqe_index); | ||
275 | srq->tail = wqe_index; | ||
276 | |||
277 | spin_unlock(&srq->lock); | ||
278 | } | ||
279 | |||
280 | int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, | ||
281 | struct ib_recv_wr **bad_wr) | ||
282 | { | ||
283 | struct mlx4_ib_srq *srq = to_msrq(ibsrq); | ||
284 | struct mlx4_wqe_srq_next_seg *next; | ||
285 | struct mlx4_wqe_data_seg *scat; | ||
286 | unsigned long flags; | ||
287 | int err = 0; | ||
288 | int nreq; | ||
289 | int i; | ||
290 | |||
291 | spin_lock_irqsave(&srq->lock, flags); | ||
292 | |||
293 | for (nreq = 0; wr; ++nreq, wr = wr->next) { | ||
294 | if (unlikely(wr->num_sge > srq->msrq.max_gs)) { | ||
295 | err = -EINVAL; | ||
296 | *bad_wr = wr; | ||
297 | break; | ||
298 | } | ||
299 | |||
300 | srq->wrid[srq->head] = wr->wr_id; | ||
301 | |||
302 | next = get_wqe(srq, srq->head); | ||
303 | srq->head = be16_to_cpu(next->next_wqe_index); | ||
304 | scat = (struct mlx4_wqe_data_seg *) (next + 1); | ||
305 | |||
306 | for (i = 0; i < wr->num_sge; ++i) { | ||
307 | scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); | ||
308 | scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey); | ||
309 | scat[i].addr = cpu_to_be64(wr->sg_list[i].addr); | ||
310 | } | ||
311 | |||
312 | if (i < srq->msrq.max_gs) { | ||
313 | scat[i].byte_count = 0; | ||
314 | scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); | ||
315 | scat[i].addr = 0; | ||
316 | } | ||
317 | } | ||
318 | |||
319 | if (likely(nreq)) { | ||
320 | srq->wqe_ctr += nreq; | ||
321 | |||
322 | /* | ||
323 | * Make sure that descriptors are written before | ||
324 | * doorbell record. | ||
325 | */ | ||
326 | wmb(); | ||
327 | |||
328 | *srq->db.db = cpu_to_be32(srq->wqe_ctr); | ||
329 | } | ||
330 | |||
331 | spin_unlock_irqrestore(&srq->lock, flags); | ||
332 | |||
333 | return err; | ||
334 | } | ||
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h new file mode 100644 index 000000000000..5b8eddc9fa83 --- /dev/null +++ b/drivers/infiniband/hw/mlx4/user.h | |||
@@ -0,0 +1,92 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | */ | ||
32 | |||
33 | #ifndef MLX4_IB_USER_H | ||
34 | #define MLX4_IB_USER_H | ||
35 | |||
36 | #include <linux/types.h> | ||
37 | |||
38 | /* | ||
39 | * Increment this value if any changes that break userspace ABI | ||
40 | * compatibility are made. | ||
41 | */ | ||
42 | #define MLX4_IB_UVERBS_ABI_VERSION 1 | ||
43 | |||
44 | /* | ||
45 | * Make sure that all structs defined in this file remain laid out so | ||
46 | * that they pack the same way on 32-bit and 64-bit architectures (to | ||
47 | * avoid incompatibility between 32-bit userspace and 64-bit kernels). | ||
48 | * In particular do not use pointer types -- pass pointers in __u64 | ||
49 | * instead. | ||
50 | */ | ||
51 | |||
52 | struct mlx4_ib_alloc_ucontext_resp { | ||
53 | __u32 qp_tab_size; | ||
54 | __u16 bf_reg_size; | ||
55 | __u16 bf_regs_per_page; | ||
56 | }; | ||
57 | |||
58 | struct mlx4_ib_alloc_pd_resp { | ||
59 | __u32 pdn; | ||
60 | __u32 reserved; | ||
61 | }; | ||
62 | |||
63 | struct mlx4_ib_create_cq { | ||
64 | __u64 buf_addr; | ||
65 | __u64 db_addr; | ||
66 | }; | ||
67 | |||
68 | struct mlx4_ib_create_cq_resp { | ||
69 | __u32 cqn; | ||
70 | __u32 reserved; | ||
71 | }; | ||
72 | |||
73 | struct mlx4_ib_resize_cq { | ||
74 | __u64 buf_addr; | ||
75 | }; | ||
76 | |||
77 | struct mlx4_ib_create_srq { | ||
78 | __u64 buf_addr; | ||
79 | __u64 db_addr; | ||
80 | }; | ||
81 | |||
82 | struct mlx4_ib_create_srq_resp { | ||
83 | __u32 srqn; | ||
84 | __u32 reserved; | ||
85 | }; | ||
86 | |||
87 | struct mlx4_ib_create_qp { | ||
88 | __u64 buf_addr; | ||
89 | __u64 db_addr; | ||
90 | }; | ||
91 | |||
92 | #endif /* MLX4_IB_USER_H */ | ||
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 1c05486c3c68..6bcde1cb9688 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c | |||
@@ -37,6 +37,7 @@ | |||
37 | */ | 37 | */ |
38 | 38 | ||
39 | #include <rdma/ib_smi.h> | 39 | #include <rdma/ib_smi.h> |
40 | #include <rdma/ib_umem.h> | ||
40 | #include <rdma/ib_user_verbs.h> | 41 | #include <rdma/ib_user_verbs.h> |
41 | #include <linux/mm.h> | 42 | #include <linux/mm.h> |
42 | 43 | ||
@@ -908,6 +909,8 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc) | |||
908 | return ERR_PTR(err); | 909 | return ERR_PTR(err); |
909 | } | 910 | } |
910 | 911 | ||
912 | mr->umem = NULL; | ||
913 | |||
911 | return &mr->ibmr; | 914 | return &mr->ibmr; |
912 | } | 915 | } |
913 | 916 | ||
@@ -1003,11 +1006,13 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, | |||
1003 | } | 1006 | } |
1004 | 1007 | ||
1005 | kfree(page_list); | 1008 | kfree(page_list); |
1009 | mr->umem = NULL; | ||
1010 | |||
1006 | return &mr->ibmr; | 1011 | return &mr->ibmr; |
1007 | } | 1012 | } |
1008 | 1013 | ||
1009 | static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | 1014 | static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, |
1010 | int acc, struct ib_udata *udata) | 1015 | u64 virt, int acc, struct ib_udata *udata) |
1011 | { | 1016 | { |
1012 | struct mthca_dev *dev = to_mdev(pd->device); | 1017 | struct mthca_dev *dev = to_mdev(pd->device); |
1013 | struct ib_umem_chunk *chunk; | 1018 | struct ib_umem_chunk *chunk; |
@@ -1018,20 +1023,26 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
1018 | int err = 0; | 1023 | int err = 0; |
1019 | int write_mtt_size; | 1024 | int write_mtt_size; |
1020 | 1025 | ||
1021 | shift = ffs(region->page_size) - 1; | ||
1022 | |||
1023 | mr = kmalloc(sizeof *mr, GFP_KERNEL); | 1026 | mr = kmalloc(sizeof *mr, GFP_KERNEL); |
1024 | if (!mr) | 1027 | if (!mr) |
1025 | return ERR_PTR(-ENOMEM); | 1028 | return ERR_PTR(-ENOMEM); |
1026 | 1029 | ||
1030 | mr->umem = ib_umem_get(pd->uobject->context, start, length, acc); | ||
1031 | if (IS_ERR(mr->umem)) { | ||
1032 | err = PTR_ERR(mr->umem); | ||
1033 | goto err; | ||
1034 | } | ||
1035 | |||
1036 | shift = ffs(mr->umem->page_size) - 1; | ||
1037 | |||
1027 | n = 0; | 1038 | n = 0; |
1028 | list_for_each_entry(chunk, ®ion->chunk_list, list) | 1039 | list_for_each_entry(chunk, &mr->umem->chunk_list, list) |
1029 | n += chunk->nents; | 1040 | n += chunk->nents; |
1030 | 1041 | ||
1031 | mr->mtt = mthca_alloc_mtt(dev, n); | 1042 | mr->mtt = mthca_alloc_mtt(dev, n); |
1032 | if (IS_ERR(mr->mtt)) { | 1043 | if (IS_ERR(mr->mtt)) { |
1033 | err = PTR_ERR(mr->mtt); | 1044 | err = PTR_ERR(mr->mtt); |
1034 | goto err; | 1045 | goto err_umem; |
1035 | } | 1046 | } |
1036 | 1047 | ||
1037 | pages = (u64 *) __get_free_page(GFP_KERNEL); | 1048 | pages = (u64 *) __get_free_page(GFP_KERNEL); |
@@ -1044,12 +1055,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, | |||
1044 | 1055 | ||
1045 | write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); | 1056 | write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); |
1046 | 1057 | ||
1047 | list_for_each_entry(chunk, ®ion->chunk_list, list) | 1058 | list_for_each_entry(chunk, &mr->umem->chunk_list, list) |
1048 | for (j = 0; j < chunk->nmap; ++j) { | 1059 | for (j = 0; j < chunk->nmap; ++j) { |
1049 | len = sg_dma_len(&chunk->page_list[j]) >> shift; | 1060 | len = sg_dma_len(&chunk->page_list[j]) >> shift; |
1050 | for (k = 0; k < len; ++k) { | 1061 | for (k = 0; k < len; ++k) { |
1051 | pages[i++] = sg_dma_address(&chunk->page_list[j]) + | 1062 | pages[i++] = sg_dma_address(&chunk->page_list[j]) + |
1052 | region->page_size * k; | 1063 | mr->umem->page_size * k; |
1053 | /* | 1064 | /* |
1054 | * Be friendly to write_mtt and pass it chunks | 1065 | * Be friendly to write_mtt and pass it chunks |
1055 | * of appropriate size. | 1066 | * of appropriate size. |
@@ -1071,8 +1082,8 @@ mtt_done: | |||
1071 | if (err) | 1082 | if (err) |
1072 | goto err_mtt; | 1083 | goto err_mtt; |
1073 | 1084 | ||
1074 | err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, | 1085 | err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, virt, length, |
1075 | region->length, convert_access(acc), mr); | 1086 | convert_access(acc), mr); |
1076 | 1087 | ||
1077 | if (err) | 1088 | if (err) |
1078 | goto err_mtt; | 1089 | goto err_mtt; |
@@ -1082,6 +1093,9 @@ mtt_done: | |||
1082 | err_mtt: | 1093 | err_mtt: |
1083 | mthca_free_mtt(dev, mr->mtt); | 1094 | mthca_free_mtt(dev, mr->mtt); |
1084 | 1095 | ||
1096 | err_umem: | ||
1097 | ib_umem_release(mr->umem); | ||
1098 | |||
1085 | err: | 1099 | err: |
1086 | kfree(mr); | 1100 | kfree(mr); |
1087 | return ERR_PTR(err); | 1101 | return ERR_PTR(err); |
@@ -1090,8 +1104,12 @@ err: | |||
1090 | static int mthca_dereg_mr(struct ib_mr *mr) | 1104 | static int mthca_dereg_mr(struct ib_mr *mr) |
1091 | { | 1105 | { |
1092 | struct mthca_mr *mmr = to_mmr(mr); | 1106 | struct mthca_mr *mmr = to_mmr(mr); |
1107 | |||
1093 | mthca_free_mr(to_mdev(mr->device), mmr); | 1108 | mthca_free_mr(to_mdev(mr->device), mmr); |
1109 | if (mmr->umem) | ||
1110 | ib_umem_release(mmr->umem); | ||
1094 | kfree(mmr); | 1111 | kfree(mmr); |
1112 | |||
1095 | return 0; | 1113 | return 0; |
1096 | } | 1114 | } |
1097 | 1115 | ||
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 1d266ac2e094..262616c8ebb6 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h | |||
@@ -73,6 +73,7 @@ struct mthca_mtt; | |||
73 | 73 | ||
74 | struct mthca_mr { | 74 | struct mthca_mr { |
75 | struct ib_mr ibmr; | 75 | struct ib_mr ibmr; |
76 | struct ib_umem *umem; | ||
76 | struct mthca_mtt *mtt; | 77 | struct mthca_mtt *mtt; |
77 | }; | 78 | }; |
78 | 79 | ||