aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c64
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c250
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h6
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c688
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c437
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c31
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c1573
-rw-r--r--drivers/infiniband/hw/mlx4/main.c273
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c1254
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h341
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c660
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c794
-rw-r--r--drivers/infiniband/hw/nes/nes.h15
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c34
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c10
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c45
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c14
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c8
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h14
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2
29 files changed, 6125 insertions, 413 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 51f42061dae9..6cfd4d8fd0bd 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1361,11 +1361,11 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1361 struct tid_info *t = dev->rdev.lldi.tids; 1361 struct tid_info *t = dev->rdev.lldi.tids;
1362 1362
1363 ep = lookup_tid(t, tid); 1363 ep = lookup_tid(t, tid);
1364 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1365 if (!ep) { 1364 if (!ep) {
1366 printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n"); 1365 printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
1367 return 0; 1366 return 0;
1368 } 1367 }
1368 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1369 mutex_lock(&ep->com.mutex); 1369 mutex_lock(&ep->com.mutex);
1370 switch (ep->com.state) { 1370 switch (ep->com.state) {
1371 case ABORTING: 1371 case ABORTING:
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 45aedf1d9338..05bfe53bff64 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -137,19 +137,25 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
137 return -ENOMEM; 137 return -ENOMEM;
138 138
139 wq->rq.qid = c4iw_get_qpid(rdev, uctx); 139 wq->rq.qid = c4iw_get_qpid(rdev, uctx);
140 if (!wq->rq.qid) 140 if (!wq->rq.qid) {
141 goto err1; 141 ret = -ENOMEM;
142 goto free_sq_qid;
143 }
142 144
143 if (!user) { 145 if (!user) {
144 wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, 146 wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq,
145 GFP_KERNEL); 147 GFP_KERNEL);
146 if (!wq->sq.sw_sq) 148 if (!wq->sq.sw_sq) {
147 goto err2; 149 ret = -ENOMEM;
150 goto free_rq_qid;
151 }
148 152
149 wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, 153 wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq,
150 GFP_KERNEL); 154 GFP_KERNEL);
151 if (!wq->rq.sw_rq) 155 if (!wq->rq.sw_rq) {
152 goto err3; 156 ret = -ENOMEM;
157 goto free_sw_sq;
158 }
153 } 159 }
154 160
155 /* 161 /*
@@ -157,15 +163,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
157 */ 163 */
158 wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); 164 wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size);
159 wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); 165 wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
160 if (!wq->rq.rqt_hwaddr) 166 if (!wq->rq.rqt_hwaddr) {
161 goto err4; 167 ret = -ENOMEM;
168 goto free_sw_rq;
169 }
162 170
163 if (user) { 171 if (user) {
164 if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq)) 172 ret = alloc_oc_sq(rdev, &wq->sq);
165 goto err5; 173 if (ret)
174 goto free_hwaddr;
175
176 ret = alloc_host_sq(rdev, &wq->sq);
177 if (ret)
178 goto free_sq;
166 } else 179 } else
167 if (alloc_host_sq(rdev, &wq->sq)) 180 ret = alloc_host_sq(rdev, &wq->sq);
168 goto err5; 181 if (ret)
182 goto free_hwaddr;
169 memset(wq->sq.queue, 0, wq->sq.memsize); 183 memset(wq->sq.queue, 0, wq->sq.memsize);
170 dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); 184 dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
171 185
@@ -173,7 +187,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
173 wq->rq.memsize, &(wq->rq.dma_addr), 187 wq->rq.memsize, &(wq->rq.dma_addr),
174 GFP_KERNEL); 188 GFP_KERNEL);
175 if (!wq->rq.queue) 189 if (!wq->rq.queue)
176 goto err6; 190 goto free_sq;
177 PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", 191 PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
178 __func__, wq->sq.queue, 192 __func__, wq->sq.queue,
179 (unsigned long long)virt_to_phys(wq->sq.queue), 193 (unsigned long long)virt_to_phys(wq->sq.queue),
@@ -201,7 +215,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
201 skb = alloc_skb(wr_len, GFP_KERNEL); 215 skb = alloc_skb(wr_len, GFP_KERNEL);
202 if (!skb) { 216 if (!skb) {
203 ret = -ENOMEM; 217 ret = -ENOMEM;
204 goto err7; 218 goto free_dma;
205 } 219 }
206 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 220 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
207 221
@@ -266,33 +280,33 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
266 280
267 ret = c4iw_ofld_send(rdev, skb); 281 ret = c4iw_ofld_send(rdev, skb);
268 if (ret) 282 if (ret)
269 goto err7; 283 goto free_dma;
270 ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); 284 ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__);
271 if (ret) 285 if (ret)
272 goto err7; 286 goto free_dma;
273 287
274 PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", 288 PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n",
275 __func__, wq->sq.qid, wq->rq.qid, wq->db, 289 __func__, wq->sq.qid, wq->rq.qid, wq->db,
276 (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); 290 (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
277 291
278 return 0; 292 return 0;
279err7: 293free_dma:
280 dma_free_coherent(&(rdev->lldi.pdev->dev), 294 dma_free_coherent(&(rdev->lldi.pdev->dev),
281 wq->rq.memsize, wq->rq.queue, 295 wq->rq.memsize, wq->rq.queue,
282 dma_unmap_addr(&wq->rq, mapping)); 296 dma_unmap_addr(&wq->rq, mapping));
283err6: 297free_sq:
284 dealloc_sq(rdev, &wq->sq); 298 dealloc_sq(rdev, &wq->sq);
285err5: 299free_hwaddr:
286 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); 300 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
287err4: 301free_sw_rq:
288 kfree(wq->rq.sw_rq); 302 kfree(wq->rq.sw_rq);
289err3: 303free_sw_sq:
290 kfree(wq->sq.sw_sq); 304 kfree(wq->sq.sw_sq);
291err2: 305free_rq_qid:
292 c4iw_put_qpid(rdev, wq->rq.qid, uctx); 306 c4iw_put_qpid(rdev, wq->rq.qid, uctx);
293err1: 307free_sq_qid:
294 c4iw_put_qpid(rdev, wq->sq.qid, uctx); 308 c4iw_put_qpid(rdev, wq->sq.qid, uctx);
295 return -ENOMEM; 309 return ret;
296} 310}
297 311
298static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, 312static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
@@ -1155,7 +1169,7 @@ static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
1155 */ 1169 */
1156 if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < 1170 if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) <
1157 (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) { 1171 (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) {
1158 writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db); 1172 writel(QID(qid) | PIDX(inc), qhp->wq.db);
1159 break; 1173 break;
1160 } 1174 }
1161 set_current_state(TASK_UNINTERRUPTIBLE); 1175 set_current_state(TASK_UNINTERRUPTIBLE);
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 53589000fd07..8615d7cf7e01 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -42,6 +42,7 @@
42 */ 42 */
43 43
44#include <linux/slab.h> 44#include <linux/slab.h>
45#include <linux/smpboot.h>
45 46
46#include "ehca_classes.h" 47#include "ehca_classes.h"
47#include "ehca_irq.h" 48#include "ehca_irq.h"
@@ -652,7 +653,7 @@ void ehca_tasklet_eq(unsigned long data)
652 ehca_process_eq((struct ehca_shca*)data, 1); 653 ehca_process_eq((struct ehca_shca*)data, 1);
653} 654}
654 655
655static inline int find_next_online_cpu(struct ehca_comp_pool *pool) 656static int find_next_online_cpu(struct ehca_comp_pool *pool)
656{ 657{
657 int cpu; 658 int cpu;
658 unsigned long flags; 659 unsigned long flags;
@@ -662,17 +663,20 @@ static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
662 ehca_dmp(cpu_online_mask, cpumask_size(), ""); 663 ehca_dmp(cpu_online_mask, cpumask_size(), "");
663 664
664 spin_lock_irqsave(&pool->last_cpu_lock, flags); 665 spin_lock_irqsave(&pool->last_cpu_lock, flags);
665 cpu = cpumask_next(pool->last_cpu, cpu_online_mask); 666 do {
666 if (cpu >= nr_cpu_ids) 667 cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
667 cpu = cpumask_first(cpu_online_mask); 668 if (cpu >= nr_cpu_ids)
668 pool->last_cpu = cpu; 669 cpu = cpumask_first(cpu_online_mask);
670 pool->last_cpu = cpu;
671 } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
669 spin_unlock_irqrestore(&pool->last_cpu_lock, flags); 672 spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
670 673
671 return cpu; 674 return cpu;
672} 675}
673 676
674static void __queue_comp_task(struct ehca_cq *__cq, 677static void __queue_comp_task(struct ehca_cq *__cq,
675 struct ehca_cpu_comp_task *cct) 678 struct ehca_cpu_comp_task *cct,
679 struct task_struct *thread)
676{ 680{
677 unsigned long flags; 681 unsigned long flags;
678 682
@@ -683,7 +687,7 @@ static void __queue_comp_task(struct ehca_cq *__cq,
683 __cq->nr_callbacks++; 687 __cq->nr_callbacks++;
684 list_add_tail(&__cq->entry, &cct->cq_list); 688 list_add_tail(&__cq->entry, &cct->cq_list);
685 cct->cq_jobs++; 689 cct->cq_jobs++;
686 wake_up(&cct->wait_queue); 690 wake_up_process(thread);
687 } else 691 } else
688 __cq->nr_callbacks++; 692 __cq->nr_callbacks++;
689 693
@@ -695,6 +699,7 @@ static void queue_comp_task(struct ehca_cq *__cq)
695{ 699{
696 int cpu_id; 700 int cpu_id;
697 struct ehca_cpu_comp_task *cct; 701 struct ehca_cpu_comp_task *cct;
702 struct task_struct *thread;
698 int cq_jobs; 703 int cq_jobs;
699 unsigned long flags; 704 unsigned long flags;
700 705
@@ -702,7 +707,8 @@ static void queue_comp_task(struct ehca_cq *__cq)
702 BUG_ON(!cpu_online(cpu_id)); 707 BUG_ON(!cpu_online(cpu_id));
703 708
704 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 709 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
705 BUG_ON(!cct); 710 thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
711 BUG_ON(!cct || !thread);
706 712
707 spin_lock_irqsave(&cct->task_lock, flags); 713 spin_lock_irqsave(&cct->task_lock, flags);
708 cq_jobs = cct->cq_jobs; 714 cq_jobs = cct->cq_jobs;
@@ -710,28 +716,25 @@ static void queue_comp_task(struct ehca_cq *__cq)
710 if (cq_jobs > 0) { 716 if (cq_jobs > 0) {
711 cpu_id = find_next_online_cpu(pool); 717 cpu_id = find_next_online_cpu(pool);
712 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 718 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
713 BUG_ON(!cct); 719 thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id);
720 BUG_ON(!cct || !thread);
714 } 721 }
715 722 __queue_comp_task(__cq, cct, thread);
716 __queue_comp_task(__cq, cct);
717} 723}
718 724
719static void run_comp_task(struct ehca_cpu_comp_task *cct) 725static void run_comp_task(struct ehca_cpu_comp_task *cct)
720{ 726{
721 struct ehca_cq *cq; 727 struct ehca_cq *cq;
722 unsigned long flags;
723
724 spin_lock_irqsave(&cct->task_lock, flags);
725 728
726 while (!list_empty(&cct->cq_list)) { 729 while (!list_empty(&cct->cq_list)) {
727 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 730 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
728 spin_unlock_irqrestore(&cct->task_lock, flags); 731 spin_unlock_irq(&cct->task_lock);
729 732
730 comp_event_callback(cq); 733 comp_event_callback(cq);
731 if (atomic_dec_and_test(&cq->nr_events)) 734 if (atomic_dec_and_test(&cq->nr_events))
732 wake_up(&cq->wait_completion); 735 wake_up(&cq->wait_completion);
733 736
734 spin_lock_irqsave(&cct->task_lock, flags); 737 spin_lock_irq(&cct->task_lock);
735 spin_lock(&cq->task_lock); 738 spin_lock(&cq->task_lock);
736 cq->nr_callbacks--; 739 cq->nr_callbacks--;
737 if (!cq->nr_callbacks) { 740 if (!cq->nr_callbacks) {
@@ -740,159 +743,76 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct)
740 } 743 }
741 spin_unlock(&cq->task_lock); 744 spin_unlock(&cq->task_lock);
742 } 745 }
743
744 spin_unlock_irqrestore(&cct->task_lock, flags);
745} 746}
746 747
747static int comp_task(void *__cct) 748static void comp_task_park(unsigned int cpu)
748{ 749{
749 struct ehca_cpu_comp_task *cct = __cct; 750 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
750 int cql_empty; 751 struct ehca_cpu_comp_task *target;
751 DECLARE_WAITQUEUE(wait, current); 752 struct task_struct *thread;
752 753 struct ehca_cq *cq, *tmp;
753 set_current_state(TASK_INTERRUPTIBLE); 754 LIST_HEAD(list);
754 while (!kthread_should_stop()) {
755 add_wait_queue(&cct->wait_queue, &wait);
756
757 spin_lock_irq(&cct->task_lock);
758 cql_empty = list_empty(&cct->cq_list);
759 spin_unlock_irq(&cct->task_lock);
760 if (cql_empty)
761 schedule();
762 else
763 __set_current_state(TASK_RUNNING);
764
765 remove_wait_queue(&cct->wait_queue, &wait);
766 755
767 spin_lock_irq(&cct->task_lock); 756 spin_lock_irq(&cct->task_lock);
768 cql_empty = list_empty(&cct->cq_list); 757 cct->cq_jobs = 0;
769 spin_unlock_irq(&cct->task_lock); 758 cct->active = 0;
770 if (!cql_empty) 759 list_splice_init(&cct->cq_list, &list);
771 run_comp_task(__cct); 760 spin_unlock_irq(&cct->task_lock);
772 761
773 set_current_state(TASK_INTERRUPTIBLE); 762 cpu = find_next_online_cpu(pool);
763 target = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
764 thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu);
765 spin_lock_irq(&target->task_lock);
766 list_for_each_entry_safe(cq, tmp, &list, entry) {
767 list_del(&cq->entry);
768 __queue_comp_task(cq, target, thread);
774 } 769 }
775 __set_current_state(TASK_RUNNING); 770 spin_unlock_irq(&target->task_lock);
776
777 return 0;
778}
779
780static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
781 int cpu)
782{
783 struct ehca_cpu_comp_task *cct;
784
785 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
786 spin_lock_init(&cct->task_lock);
787 INIT_LIST_HEAD(&cct->cq_list);
788 init_waitqueue_head(&cct->wait_queue);
789 cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu),
790 "ehca_comp/%d", cpu);
791
792 return cct->task;
793} 771}
794 772
795static void destroy_comp_task(struct ehca_comp_pool *pool, 773static void comp_task_stop(unsigned int cpu, bool online)
796 int cpu)
797{ 774{
798 struct ehca_cpu_comp_task *cct; 775 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
799 struct task_struct *task;
800 unsigned long flags_cct;
801
802 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
803
804 spin_lock_irqsave(&cct->task_lock, flags_cct);
805 776
806 task = cct->task; 777 spin_lock_irq(&cct->task_lock);
807 cct->task = NULL;
808 cct->cq_jobs = 0; 778 cct->cq_jobs = 0;
809 779 cct->active = 0;
810 spin_unlock_irqrestore(&cct->task_lock, flags_cct); 780 WARN_ON(!list_empty(&cct->cq_list));
811 781 spin_unlock_irq(&cct->task_lock);
812 if (task)
813 kthread_stop(task);
814} 782}
815 783
816static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu) 784static int comp_task_should_run(unsigned int cpu)
817{ 785{
818 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 786 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
819 LIST_HEAD(list);
820 struct ehca_cq *cq;
821 unsigned long flags_cct;
822
823 spin_lock_irqsave(&cct->task_lock, flags_cct);
824
825 list_splice_init(&cct->cq_list, &list);
826
827 while (!list_empty(&list)) {
828 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
829
830 list_del(&cq->entry);
831 __queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks));
832 }
833
834 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
835 787
788 return cct->cq_jobs;
836} 789}
837 790
838static int __cpuinit comp_pool_callback(struct notifier_block *nfb, 791static void comp_task(unsigned int cpu)
839 unsigned long action,
840 void *hcpu)
841{ 792{
842 unsigned int cpu = (unsigned long)hcpu; 793 struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks);
843 struct ehca_cpu_comp_task *cct; 794 int cql_empty;
844 795
845 switch (action) { 796 spin_lock_irq(&cct->task_lock);
846 case CPU_UP_PREPARE: 797 cql_empty = list_empty(&cct->cq_list);
847 case CPU_UP_PREPARE_FROZEN: 798 if (!cql_empty) {
848 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); 799 __set_current_state(TASK_RUNNING);
849 if (!create_comp_task(pool, cpu)) { 800 run_comp_task(cct);
850 ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
851 return notifier_from_errno(-ENOMEM);
852 }
853 break;
854 case CPU_UP_CANCELED:
855 case CPU_UP_CANCELED_FROZEN:
856 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
857 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
858 kthread_bind(cct->task, cpumask_any(cpu_online_mask));
859 destroy_comp_task(pool, cpu);
860 break;
861 case CPU_ONLINE:
862 case CPU_ONLINE_FROZEN:
863 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
864 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
865 kthread_bind(cct->task, cpu);
866 wake_up_process(cct->task);
867 break;
868 case CPU_DOWN_PREPARE:
869 case CPU_DOWN_PREPARE_FROZEN:
870 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
871 break;
872 case CPU_DOWN_FAILED:
873 case CPU_DOWN_FAILED_FROZEN:
874 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
875 break;
876 case CPU_DEAD:
877 case CPU_DEAD_FROZEN:
878 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
879 destroy_comp_task(pool, cpu);
880 take_over_work(pool, cpu);
881 break;
882 } 801 }
883 802 spin_unlock_irq(&cct->task_lock);
884 return NOTIFY_OK;
885} 803}
886 804
887static struct notifier_block comp_pool_callback_nb __cpuinitdata = { 805static struct smp_hotplug_thread comp_pool_threads = {
888 .notifier_call = comp_pool_callback, 806 .thread_should_run = comp_task_should_run,
889 .priority = 0, 807 .thread_fn = comp_task,
808 .thread_comm = "ehca_comp/%u",
809 .cleanup = comp_task_stop,
810 .park = comp_task_park,
890}; 811};
891 812
892int ehca_create_comp_pool(void) 813int ehca_create_comp_pool(void)
893{ 814{
894 int cpu; 815 int cpu, ret = -ENOMEM;
895 struct task_struct *task;
896 816
897 if (!ehca_scaling_code) 817 if (!ehca_scaling_code)
898 return 0; 818 return 0;
@@ -905,38 +825,46 @@ int ehca_create_comp_pool(void)
905 pool->last_cpu = cpumask_any(cpu_online_mask); 825 pool->last_cpu = cpumask_any(cpu_online_mask);
906 826
907 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); 827 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
908 if (pool->cpu_comp_tasks == NULL) { 828 if (!pool->cpu_comp_tasks)
909 kfree(pool); 829 goto out_pool;
910 return -EINVAL;
911 }
912 830
913 for_each_online_cpu(cpu) { 831 pool->cpu_comp_threads = alloc_percpu(struct task_struct *);
914 task = create_comp_task(pool, cpu); 832 if (!pool->cpu_comp_threads)
915 if (task) { 833 goto out_tasks;
916 kthread_bind(task, cpu); 834
917 wake_up_process(task); 835 for_each_present_cpu(cpu) {
918 } 836 struct ehca_cpu_comp_task *cct;
837
838 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
839 spin_lock_init(&cct->task_lock);
840 INIT_LIST_HEAD(&cct->cq_list);
919 } 841 }
920 842
921 register_hotcpu_notifier(&comp_pool_callback_nb); 843 comp_pool_threads.store = pool->cpu_comp_threads;
844 ret = smpboot_register_percpu_thread(&comp_pool_threads);
845 if (ret)
846 goto out_threads;
922 847
923 printk(KERN_INFO "eHCA scaling code enabled\n"); 848 pr_info("eHCA scaling code enabled\n");
849 return ret;
924 850
925 return 0; 851out_threads:
852 free_percpu(pool->cpu_comp_threads);
853out_tasks:
854 free_percpu(pool->cpu_comp_tasks);
855out_pool:
856 kfree(pool);
857 return ret;
926} 858}
927 859
928void ehca_destroy_comp_pool(void) 860void ehca_destroy_comp_pool(void)
929{ 861{
930 int i;
931
932 if (!ehca_scaling_code) 862 if (!ehca_scaling_code)
933 return; 863 return;
934 864
935 unregister_hotcpu_notifier(&comp_pool_callback_nb); 865 smpboot_unregister_percpu_thread(&comp_pool_threads);
936
937 for_each_online_cpu(i)
938 destroy_comp_task(pool, i);
939 866
867 free_percpu(pool->cpu_comp_threads);
940 free_percpu(pool->cpu_comp_tasks); 868 free_percpu(pool->cpu_comp_tasks);
941 kfree(pool); 869 kfree(pool);
942} 870}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index 3346cb06cea6..5370199f08c7 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data);
58void ehca_process_eq(struct ehca_shca *shca, int is_irq); 58void ehca_process_eq(struct ehca_shca *shca, int is_irq);
59 59
60struct ehca_cpu_comp_task { 60struct ehca_cpu_comp_task {
61 wait_queue_head_t wait_queue;
62 struct list_head cq_list; 61 struct list_head cq_list;
63 struct task_struct *task;
64 spinlock_t task_lock; 62 spinlock_t task_lock;
65 int cq_jobs; 63 int cq_jobs;
64 int active;
66}; 65};
67 66
68struct ehca_comp_pool { 67struct ehca_comp_pool {
69 struct ehca_cpu_comp_task *cpu_comp_tasks; 68 struct ehca_cpu_comp_task __percpu *cpu_comp_tasks;
69 struct task_struct * __percpu *cpu_comp_threads;
70 int last_cpu; 70 int last_cpu;
71 spinlock_t last_cpu_lock; 71 spinlock_t last_cpu_lock;
72}; 72};
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
index 70f09c7826da..f4213b3a8fe1 100644
--- a/drivers/infiniband/hw/mlx4/Makefile
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -1,3 +1,3 @@
1obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o 1obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
2 2
3mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o 3mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
new file mode 100644
index 000000000000..d2fb38d43571
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -0,0 +1,688 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 /***********************************************************/
33/*This file support the handling of the Alias GUID feature. */
34/***********************************************************/
35#include <rdma/ib_mad.h>
36#include <rdma/ib_smi.h>
37#include <rdma/ib_cache.h>
38#include <rdma/ib_sa.h>
39#include <rdma/ib_pack.h>
40#include <linux/mlx4/cmd.h>
41#include <linux/module.h>
42#include <linux/init.h>
43#include <linux/errno.h>
44#include <rdma/ib_user_verbs.h>
45#include <linux/delay.h>
46#include "mlx4_ib.h"
47
48/*
49The driver keeps the current state of all guids, as they are in the HW.
50Whenever we receive an smp mad GUIDInfo record, the data will be cached.
51*/
52
53struct mlx4_alias_guid_work_context {
54 u8 port;
55 struct mlx4_ib_dev *dev ;
56 struct ib_sa_query *sa_query;
57 struct completion done;
58 int query_id;
59 struct list_head list;
60 int block_num;
61};
62
63struct mlx4_next_alias_guid_work {
64 u8 port;
65 u8 block_num;
66 struct mlx4_sriov_alias_guid_info_rec_det rec_det;
67};
68
69
70void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
71 u8 port_num, u8 *p_data)
72{
73 int i;
74 u64 guid_indexes;
75 int slave_id;
76 int port_index = port_num - 1;
77
78 if (!mlx4_is_master(dev->dev))
79 return;
80
81 guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
82 ports_guid[port_num - 1].
83 all_rec_per_port[block_num].guid_indexes);
84 pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
85
86 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
87 /* The location of the specific index starts from bit number 4
88 * until bit num 11 */
89 if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
90 slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
91 if (slave_id >= dev->dev->num_slaves) {
92 pr_debug("The last slave: %d\n", slave_id);
93 return;
94 }
95
96 /* cache the guid: */
97 memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
98 &p_data[i * GUID_REC_SIZE],
99 GUID_REC_SIZE);
100 } else
101 pr_debug("Guid number: %d in block: %d"
102 " was not updated\n", i, block_num);
103 }
104}
105
106static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
107{
108 if (index >= NUM_ALIAS_GUID_PER_PORT) {
109 pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
110 return (__force __be64) ((u64) 0xFFFFFFFFFFFFFFFFUL);
111 }
112 return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
113}
114
115
116ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
117{
118 return IB_SA_COMP_MASK(4 + index);
119}
120
121/*
122 * Whenever new GUID is set/unset (guid table change) create event and
123 * notify the relevant slave (master also should be notified).
124 * If the GUID value is not as we have in the cache the slave will not be
125 * updated; in this case it waits for the smp_snoop or the port management
126 * event to call the function and to update the slave.
127 * block_number - the index of the block (16 blocks available)
128 * port_number - 1 or 2
129 */
130void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
131 int block_num, u8 port_num,
132 u8 *p_data)
133{
134 int i;
135 u64 guid_indexes;
136 int slave_id;
137 enum slave_port_state new_state;
138 enum slave_port_state prev_state;
139 __be64 tmp_cur_ag, form_cache_ag;
140 enum slave_port_gen_event gen_event;
141
142 if (!mlx4_is_master(dev->dev))
143 return;
144
145 guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
146 ports_guid[port_num - 1].
147 all_rec_per_port[block_num].guid_indexes);
148 pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
149
150 /*calculate the slaves and notify them*/
151 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
152 /* the location of the specific index runs from bits 4..11 */
153 if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
154 continue;
155
156 slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
157 if (slave_id >= dev->dev->num_slaves)
158 return;
159 tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
160 form_cache_ag = get_cached_alias_guid(dev, port_num,
161 (NUM_ALIAS_GUID_IN_REC * block_num) + i);
162 /*
163 * Check if guid is not the same as in the cache,
164 * If it is different, wait for the snoop_smp or the port mgmt
165 * change event to update the slave on its port state change
166 */
167 if (tmp_cur_ag != form_cache_ag)
168 continue;
169 mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
170
171 /*2 cases: Valid GUID, and Invalid Guid*/
172
173 if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
174 prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
175 new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
176 MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
177 &gen_event);
178 pr_debug("slave: %d, port: %d prev_port_state: %d,"
179 " new_port_state: %d, gen_event: %d\n",
180 slave_id, port_num, prev_state, new_state, gen_event);
181 if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
182 pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
183 slave_id, port_num);
184 mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
185 port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
186 }
187 } else { /* request to invalidate GUID */
188 set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
189 MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
190 &gen_event);
191 pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
192 slave_id, port_num);
193 mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
194 MLX4_PORT_CHANGE_SUBTYPE_DOWN);
195 }
196 }
197}
198
199static void aliasguid_query_handler(int status,
200 struct ib_sa_guidinfo_rec *guid_rec,
201 void *context)
202{
203 struct mlx4_ib_dev *dev;
204 struct mlx4_alias_guid_work_context *cb_ctx = context;
205 u8 port_index ;
206 int i;
207 struct mlx4_sriov_alias_guid_info_rec_det *rec;
208 unsigned long flags, flags1;
209
210 if (!context)
211 return;
212
213 dev = cb_ctx->dev;
214 port_index = cb_ctx->port - 1;
215 rec = &dev->sriov.alias_guid.ports_guid[port_index].
216 all_rec_per_port[cb_ctx->block_num];
217
218 if (status) {
219 rec->status = MLX4_GUID_INFO_STATUS_IDLE;
220 pr_debug("(port: %d) failed: status = %d\n",
221 cb_ctx->port, status);
222 goto out;
223 }
224
225 if (guid_rec->block_num != cb_ctx->block_num) {
226 pr_err("block num mismatch: %d != %d\n",
227 cb_ctx->block_num, guid_rec->block_num);
228 goto out;
229 }
230
231 pr_debug("lid/port: %d/%d, block_num: %d\n",
232 be16_to_cpu(guid_rec->lid), cb_ctx->port,
233 guid_rec->block_num);
234
235 rec = &dev->sriov.alias_guid.ports_guid[port_index].
236 all_rec_per_port[guid_rec->block_num];
237
238 rec->status = MLX4_GUID_INFO_STATUS_SET;
239 rec->method = MLX4_GUID_INFO_RECORD_SET;
240
241 for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
242 __be64 tmp_cur_ag;
243 tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
244 /* check if the SM didn't assign one of the records.
245 * if it didn't, if it was not sysadmin request:
246 * ask the SM to give a new GUID, (instead of the driver request).
247 */
248 if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
249 mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
250 "block_num: %d was declined by SM, "
251 "ownership by %d (0 = driver, 1=sysAdmin,"
252 " 2=None)\n", __func__, i,
253 guid_rec->block_num, rec->ownership);
254 if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
255 /* if it is driver assign, asks for new GUID from SM*/
256 *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
257 MLX4_NOT_SET_GUID;
258
259 /* Mark the record as not assigned, and let it
260 * be sent again in the next work sched.*/
261 rec->status = MLX4_GUID_INFO_STATUS_IDLE;
262 rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
263 }
264 } else {
265 /* properly assigned record. */
266 /* We save the GUID we just got from the SM in the
267 * admin_guid in order to be persistent, and in the
268 * request from the sm the process will ask for the same GUID */
269 if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
270 tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
271 /* the sysadmin assignment failed.*/
272 mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
273 " admin guid after SysAdmin "
274 "configuration. "
275 "Record num %d in block_num:%d "
276 "was declined by SM, "
277 "new val(0x%llx) was kept\n",
278 __func__, i,
279 guid_rec->block_num,
280 be64_to_cpu(*(__be64 *) &
281 rec->all_recs[i * GUID_REC_SIZE]));
282 } else {
283 memcpy(&rec->all_recs[i * GUID_REC_SIZE],
284 &guid_rec->guid_info_list[i * GUID_REC_SIZE],
285 GUID_REC_SIZE);
286 }
287 }
288 }
289 /*
290 The func is call here to close the cases when the
291 sm doesn't send smp, so in the sa response the driver
292 notifies the slave.
293 */
294 mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
295 cb_ctx->port,
296 guid_rec->guid_info_list);
297out:
298 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
299 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
300 if (!dev->sriov.is_going_down)
301 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
302 &dev->sriov.alias_guid.ports_guid[port_index].
303 alias_guid_work, 0);
304 if (cb_ctx->sa_query) {
305 list_del(&cb_ctx->list);
306 kfree(cb_ctx);
307 } else
308 complete(&cb_ctx->done);
309 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
310 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
311}
312
313static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
314{
315 int i;
316 u64 cur_admin_val;
317 ib_sa_comp_mask comp_mask = 0;
318
319 dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
320 = MLX4_GUID_INFO_STATUS_IDLE;
321 dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
322 = MLX4_GUID_INFO_RECORD_SET;
323
324 /* calculate the comp_mask for that record.*/
325 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
326 cur_admin_val =
327 *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
328 all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
329 /*
330 check the admin value: if it's for delete (~00LL) or
331 it is the first guid of the first record (hw guid) or
332 the records is not in ownership of the sysadmin and the sm doesn't
333 need to assign GUIDs, then don't put it up for assignment.
334 */
335 if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
336 (!index && !i) ||
337 MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
338 ports_guid[port - 1].all_rec_per_port[index].ownership)
339 continue;
340 comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
341 }
342 dev->sriov.alias_guid.ports_guid[port - 1].
343 all_rec_per_port[index].guid_indexes = comp_mask;
344}
345
346static int set_guid_rec(struct ib_device *ibdev,
347 u8 port, int index,
348 struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
349{
350 int err;
351 struct mlx4_ib_dev *dev = to_mdev(ibdev);
352 struct ib_sa_guidinfo_rec guid_info_rec;
353 ib_sa_comp_mask comp_mask;
354 struct ib_port_attr attr;
355 struct mlx4_alias_guid_work_context *callback_context;
356 unsigned long resched_delay, flags, flags1;
357 struct list_head *head =
358 &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
359
360 err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
361 if (err) {
362 pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
363 err, port);
364 return err;
365 }
366 /*check the port was configured by the sm, otherwise no need to send */
367 if (attr.state != IB_PORT_ACTIVE) {
368 pr_debug("port %d not active...rescheduling\n", port);
369 resched_delay = 5 * HZ;
370 err = -EAGAIN;
371 goto new_schedule;
372 }
373
374 callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
375 if (!callback_context) {
376 err = -ENOMEM;
377 resched_delay = HZ * 5;
378 goto new_schedule;
379 }
380 callback_context->port = port;
381 callback_context->dev = dev;
382 callback_context->block_num = index;
383
384 memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
385
386 guid_info_rec.lid = cpu_to_be16(attr.lid);
387 guid_info_rec.block_num = index;
388
389 memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
390 GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
391 comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
392 rec_det->guid_indexes;
393
394 init_completion(&callback_context->done);
395 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
396 list_add_tail(&callback_context->list, head);
397 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
398
399 callback_context->query_id =
400 ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
401 ibdev, port, &guid_info_rec,
402 comp_mask, rec_det->method, 1000,
403 GFP_KERNEL, aliasguid_query_handler,
404 callback_context,
405 &callback_context->sa_query);
406 if (callback_context->query_id < 0) {
407 pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
408 "%d. will reschedule to the next 1 sec.\n",
409 callback_context->query_id);
410 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
411 list_del(&callback_context->list);
412 kfree(callback_context);
413 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
414 resched_delay = 1 * HZ;
415 err = -EAGAIN;
416 goto new_schedule;
417 }
418 err = 0;
419 goto out;
420
421new_schedule:
422 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
423 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
424 invalidate_guid_record(dev, port, index);
425 if (!dev->sriov.is_going_down) {
426 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
427 &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
428 resched_delay);
429 }
430 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
431 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
432
433out:
434 return err;
435}
436
437void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
438{
439 int i;
440 unsigned long flags, flags1;
441
442 pr_debug("port %d\n", port);
443
444 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
445 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
446 for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
447 invalidate_guid_record(dev, port, i);
448
449 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
450 /*
451 make sure no work waits in the queue, if the work is already
452 queued(not on the timer) the cancel will fail. That is not a problem
453 because we just want the work started.
454 */
455 cancel_delayed_work(&dev->sriov.alias_guid.
456 ports_guid[port - 1].alias_guid_work);
457 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
458 &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
459 0);
460 }
461 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
462 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
463}
464
465/* The function returns the next record that was
466 * not configured (or failed to be configured) */
467static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
468 struct mlx4_next_alias_guid_work *rec)
469{
470 int j;
471 unsigned long flags;
472
473 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
474 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
475 if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
476 MLX4_GUID_INFO_STATUS_IDLE) {
477 memcpy(&rec->rec_det,
478 &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
479 sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
480 rec->port = port;
481 rec->block_num = j;
482 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
483 MLX4_GUID_INFO_STATUS_PENDING;
484 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
485 return 0;
486 }
487 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
488 }
489 return -ENOENT;
490}
491
492static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
493 int rec_index,
494 struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
495{
496 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
497 rec_det->guid_indexes;
498 memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
499 rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
500 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
501 rec_det->status;
502}
503
504static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
505{
506 int j;
507 struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
508
509 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
510 memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
511 rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
512 IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
513 IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
514 IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
515 IB_SA_GUIDINFO_REC_GID7;
516 rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
517 set_administratively_guid_record(dev, port, j, &rec_det);
518 }
519}
520
521static void alias_guid_work(struct work_struct *work)
522{
523 struct delayed_work *delay = to_delayed_work(work);
524 int ret = 0;
525 struct mlx4_next_alias_guid_work *rec;
526 struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
527 container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
528 alias_guid_work);
529 struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
530 struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
531 struct mlx4_ib_sriov,
532 alias_guid);
533 struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
534
535 rec = kzalloc(sizeof *rec, GFP_KERNEL);
536 if (!rec) {
537 pr_err("alias_guid_work: No Memory\n");
538 return;
539 }
540
541 pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
542 ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
543 if (ret) {
544 pr_debug("No more records to update.\n");
545 goto out;
546 }
547
548 set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
549 &rec->rec_det);
550
551out:
552 kfree(rec);
553}
554
555
556void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
557{
558 unsigned long flags, flags1;
559
560 if (!mlx4_is_master(dev->dev))
561 return;
562 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
563 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
564 if (!dev->sriov.is_going_down) {
565 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
566 &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
567 }
568 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
569 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
570}
571
572void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
573{
574 int i;
575 struct mlx4_ib_sriov *sriov = &dev->sriov;
576 struct mlx4_alias_guid_work_context *cb_ctx;
577 struct mlx4_sriov_alias_guid_port_rec_det *det;
578 struct ib_sa_query *sa_query;
579 unsigned long flags;
580
581 for (i = 0 ; i < dev->num_ports; i++) {
582 cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
583 det = &sriov->alias_guid.ports_guid[i];
584 spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
585 while (!list_empty(&det->cb_list)) {
586 cb_ctx = list_entry(det->cb_list.next,
587 struct mlx4_alias_guid_work_context,
588 list);
589 sa_query = cb_ctx->sa_query;
590 cb_ctx->sa_query = NULL;
591 list_del(&cb_ctx->list);
592 spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
593 ib_sa_cancel_query(cb_ctx->query_id, sa_query);
594 wait_for_completion(&cb_ctx->done);
595 kfree(cb_ctx);
596 spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
597 }
598 spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
599 }
600 for (i = 0 ; i < dev->num_ports; i++) {
601 flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
602 destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
603 }
604 ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
605 kfree(dev->sriov.alias_guid.sa_client);
606}
607
608int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
609{
610 char alias_wq_name[15];
611 int ret = 0;
612 int i, j, k;
613 union ib_gid gid;
614
615 if (!mlx4_is_master(dev->dev))
616 return 0;
617 dev->sriov.alias_guid.sa_client =
618 kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
619 if (!dev->sriov.alias_guid.sa_client)
620 return -ENOMEM;
621
622 ib_sa_register_client(dev->sriov.alias_guid.sa_client);
623
624 spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
625
626 for (i = 1; i <= dev->num_ports; ++i) {
627 if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
628 ret = -EFAULT;
629 goto err_unregister;
630 }
631 }
632
633 for (i = 0 ; i < dev->num_ports; i++) {
634 memset(&dev->sriov.alias_guid.ports_guid[i], 0,
635 sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
636 /*Check if the SM doesn't need to assign the GUIDs*/
637 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
638 if (mlx4_ib_sm_guid_assign) {
639 dev->sriov.alias_guid.ports_guid[i].
640 all_rec_per_port[j].
641 ownership = MLX4_GUID_DRIVER_ASSIGN;
642 continue;
643 }
644 dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
645 ownership = MLX4_GUID_NONE_ASSIGN;
646 /*mark each val as it was deleted,
647 till the sysAdmin will give it valid val*/
648 for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
649 *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
650 all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
651 cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
652 }
653 }
654 INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
655 /*prepare the records, set them to be allocated by sm*/
656 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
657 invalidate_guid_record(dev, i + 1, j);
658
659 dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
660 dev->sriov.alias_guid.ports_guid[i].port = i;
661 if (mlx4_ib_sm_guid_assign)
662 set_all_slaves_guids(dev, i);
663
664 snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
665 dev->sriov.alias_guid.ports_guid[i].wq =
666 create_singlethread_workqueue(alias_wq_name);
667 if (!dev->sriov.alias_guid.ports_guid[i].wq) {
668 ret = -ENOMEM;
669 goto err_thread;
670 }
671 INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
672 alias_guid_work);
673 }
674 return 0;
675
676err_thread:
677 for (--i; i >= 0; i--) {
678 destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
679 dev->sriov.alias_guid.ports_guid[i].wq = NULL;
680 }
681
682err_unregister:
683 ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
684 kfree(dev->sriov.alias_guid.sa_client);
685 dev->sriov.alias_guid.sa_client = NULL;
686 pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
687 return ret;
688}
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
new file mode 100644
index 000000000000..80079e5a2e30
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -0,0 +1,437 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34
35#include <linux/mlx4/cmd.h>
36#include <linux/rbtree.h>
37#include <linux/idr.h>
38#include <rdma/ib_cm.h>
39
40#include "mlx4_ib.h"
41
42#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
43
44struct id_map_entry {
45 struct rb_node node;
46
47 u32 sl_cm_id;
48 u32 pv_cm_id;
49 int slave_id;
50 int scheduled_delete;
51 struct mlx4_ib_dev *dev;
52
53 struct list_head list;
54 struct delayed_work timeout;
55};
56
57struct cm_generic_msg {
58 struct ib_mad_hdr hdr;
59
60 __be32 local_comm_id;
61 __be32 remote_comm_id;
62};
63
64struct cm_req_msg {
65 unsigned char unused[0x60];
66 union ib_gid primary_path_sgid;
67};
68
69
70static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
71{
72 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
73 msg->local_comm_id = cpu_to_be32(cm_id);
74}
75
76static u32 get_local_comm_id(struct ib_mad *mad)
77{
78 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
79
80 return be32_to_cpu(msg->local_comm_id);
81}
82
83static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
84{
85 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
86 msg->remote_comm_id = cpu_to_be32(cm_id);
87}
88
89static u32 get_remote_comm_id(struct ib_mad *mad)
90{
91 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
92
93 return be32_to_cpu(msg->remote_comm_id);
94}
95
96static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
97{
98 struct cm_req_msg *msg = (struct cm_req_msg *)mad;
99
100 return msg->primary_path_sgid;
101}
102
103/* Lock should be taken before called */
104static struct id_map_entry *
105id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
106{
107 struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
108 struct rb_node *node = sl_id_map->rb_node;
109
110 while (node) {
111 struct id_map_entry *id_map_entry =
112 rb_entry(node, struct id_map_entry, node);
113
114 if (id_map_entry->sl_cm_id > sl_cm_id)
115 node = node->rb_left;
116 else if (id_map_entry->sl_cm_id < sl_cm_id)
117 node = node->rb_right;
118 else if (id_map_entry->slave_id > slave_id)
119 node = node->rb_left;
120 else if (id_map_entry->slave_id < slave_id)
121 node = node->rb_right;
122 else
123 return id_map_entry;
124 }
125 return NULL;
126}
127
128static void id_map_ent_timeout(struct work_struct *work)
129{
130 struct delayed_work *delay = to_delayed_work(work);
131 struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
132 struct id_map_entry *db_ent, *found_ent;
133 struct mlx4_ib_dev *dev = ent->dev;
134 struct mlx4_ib_sriov *sriov = &dev->sriov;
135 struct rb_root *sl_id_map = &sriov->sl_id_map;
136 int pv_id = (int) ent->pv_cm_id;
137
138 spin_lock(&sriov->id_map_lock);
139 db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
140 if (!db_ent)
141 goto out;
142 found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
143 if (found_ent && found_ent == ent)
144 rb_erase(&found_ent->node, sl_id_map);
145 idr_remove(&sriov->pv_id_table, pv_id);
146
147out:
148 list_del(&ent->list);
149 spin_unlock(&sriov->id_map_lock);
150 kfree(ent);
151}
152
153static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
154{
155 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
156 struct rb_root *sl_id_map = &sriov->sl_id_map;
157 struct id_map_entry *ent, *found_ent;
158
159 spin_lock(&sriov->id_map_lock);
160 ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
161 if (!ent)
162 goto out;
163 found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
164 if (found_ent && found_ent == ent)
165 rb_erase(&found_ent->node, sl_id_map);
166 idr_remove(&sriov->pv_id_table, pv_cm_id);
167out:
168 spin_unlock(&sriov->id_map_lock);
169}
170
171static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
172{
173 struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
174 struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
175 struct id_map_entry *ent;
176 int slave_id = new->slave_id;
177 int sl_cm_id = new->sl_cm_id;
178
179 ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
180 if (ent) {
181 pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
182 sl_cm_id);
183
184 rb_replace_node(&ent->node, &new->node, sl_id_map);
185 return;
186 }
187
188 /* Go to the bottom of the tree */
189 while (*link) {
190 parent = *link;
191 ent = rb_entry(parent, struct id_map_entry, node);
192
193 if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
194 link = &(*link)->rb_left;
195 else
196 link = &(*link)->rb_right;
197 }
198
199 rb_link_node(&new->node, parent, link);
200 rb_insert_color(&new->node, sl_id_map);
201}
202
203static struct id_map_entry *
204id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
205{
206 int ret, id;
207 static int next_id;
208 struct id_map_entry *ent;
209 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
210
211 ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
212 if (!ent) {
213 mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
214 return ERR_PTR(-ENOMEM);
215 }
216
217 ent->sl_cm_id = sl_cm_id;
218 ent->slave_id = slave_id;
219 ent->scheduled_delete = 0;
220 ent->dev = to_mdev(ibdev);
221 INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
222
223 do {
224 spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
225 ret = idr_get_new_above(&sriov->pv_id_table, ent,
226 next_id, &id);
227 if (!ret) {
228 next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
229 ent->pv_cm_id = (u32)id;
230 sl_id_map_add(ibdev, ent);
231 }
232
233 spin_unlock(&sriov->id_map_lock);
234 } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
235 /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
236 if (!ret) {
237 spin_lock(&sriov->id_map_lock);
238 list_add_tail(&ent->list, &sriov->cm_list);
239 spin_unlock(&sriov->id_map_lock);
240 return ent;
241 }
242 /*error flow*/
243 kfree(ent);
244 mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
245 return ERR_PTR(-ENOMEM);
246}
247
248static struct id_map_entry *
249id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
250{
251 struct id_map_entry *ent;
252 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
253
254 spin_lock(&sriov->id_map_lock);
255 if (*pv_cm_id == -1) {
256 ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
257 if (ent)
258 *pv_cm_id = (int) ent->pv_cm_id;
259 } else
260 ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
261 spin_unlock(&sriov->id_map_lock);
262
263 return ent;
264}
265
266static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
267{
268 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
269 unsigned long flags;
270
271 spin_lock_irqsave(&sriov->going_down_lock, flags);
272 spin_lock(&sriov->id_map_lock);
273 /*make sure that there is no schedule inside the scheduled work.*/
274 if (!sriov->is_going_down) {
275 id->scheduled_delete = 1;
276 schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
277 }
278 spin_unlock(&sriov->id_map_lock);
279 spin_unlock_irqrestore(&sriov->going_down_lock, flags);
280}
281
282int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
283 struct ib_mad *mad)
284{
285 struct id_map_entry *id;
286 u32 sl_cm_id;
287 int pv_cm_id = -1;
288
289 sl_cm_id = get_local_comm_id(mad);
290
291 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
292 mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
293 id = id_map_alloc(ibdev, slave_id, sl_cm_id);
294 if (IS_ERR(id)) {
295 mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
296 __func__, slave_id, sl_cm_id);
297 return PTR_ERR(id);
298 }
299 } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
300 return 0;
301 } else {
302 id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
303 }
304
305 if (!id) {
306 pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
307 slave_id, sl_cm_id);
308 return -EINVAL;
309 }
310
311 set_local_comm_id(mad, id->pv_cm_id);
312
313 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
314 schedule_delayed(ibdev, id);
315 else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
316 id_map_find_del(ibdev, pv_cm_id);
317
318 return 0;
319}
320
321int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
322 struct ib_mad *mad)
323{
324 u32 pv_cm_id;
325 struct id_map_entry *id;
326
327 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
328 union ib_gid gid;
329
330 gid = gid_from_req_msg(ibdev, mad);
331 *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
332 if (*slave < 0) {
333 mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
334 gid.global.interface_id);
335 return -ENOENT;
336 }
337 return 0;
338 }
339
340 pv_cm_id = get_remote_comm_id(mad);
341 id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
342
343 if (!id) {
344 pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
345 return -ENOENT;
346 }
347
348 *slave = id->slave_id;
349 set_remote_comm_id(mad, id->sl_cm_id);
350
351 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
352 schedule_delayed(ibdev, id);
353 else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
354 mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
355 id_map_find_del(ibdev, (int) pv_cm_id);
356 }
357
358 return 0;
359}
360
361void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
362{
363 spin_lock_init(&dev->sriov.id_map_lock);
364 INIT_LIST_HEAD(&dev->sriov.cm_list);
365 dev->sriov.sl_id_map = RB_ROOT;
366 idr_init(&dev->sriov.pv_id_table);
367 idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL);
368}
369
370/* slave = -1 ==> all slaves */
371/* TBD -- call paravirt clean for single slave. Need for slave RESET event */
372void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
373{
374 struct mlx4_ib_sriov *sriov = &dev->sriov;
375 struct rb_root *sl_id_map = &sriov->sl_id_map;
376 struct list_head lh;
377 struct rb_node *nd;
378 int need_flush = 1;
379 struct id_map_entry *map, *tmp_map;
380 /* cancel all delayed work queue entries */
381 INIT_LIST_HEAD(&lh);
382 spin_lock(&sriov->id_map_lock);
383 list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
384 if (slave < 0 || slave == map->slave_id) {
385 if (map->scheduled_delete)
386 need_flush &= !!cancel_delayed_work(&map->timeout);
387 }
388 }
389
390 spin_unlock(&sriov->id_map_lock);
391
392 if (!need_flush)
393 flush_scheduled_work(); /* make sure all timers were flushed */
394
395 /* now, remove all leftover entries from databases*/
396 spin_lock(&sriov->id_map_lock);
397 if (slave < 0) {
398 while (rb_first(sl_id_map)) {
399 struct id_map_entry *ent =
400 rb_entry(rb_first(sl_id_map),
401 struct id_map_entry, node);
402
403 rb_erase(&ent->node, sl_id_map);
404 idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id);
405 }
406 list_splice_init(&dev->sriov.cm_list, &lh);
407 } else {
408 /* first, move nodes belonging to slave to db remove list */
409 nd = rb_first(sl_id_map);
410 while (nd) {
411 struct id_map_entry *ent =
412 rb_entry(nd, struct id_map_entry, node);
413 nd = rb_next(nd);
414 if (ent->slave_id == slave)
415 list_move_tail(&ent->list, &lh);
416 }
417 /* remove those nodes from databases */
418 list_for_each_entry_safe(map, tmp_map, &lh, list) {
419 rb_erase(&map->node, sl_id_map);
420 idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id);
421 }
422
423 /* add remaining nodes from cm_list */
424 list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
425 if (slave == map->slave_id)
426 list_move_tail(&map->list, &lh);
427 }
428 }
429
430 spin_unlock(&sriov->id_map_lock);
431
432 /* free any map entries left behind due to cancel_delayed_work above */
433 list_for_each_entry_safe(map, tmp_map, &lh, list) {
434 list_del(&map->list);
435 kfree(map);
436 }
437}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 6d4ef71cbcdf..c9eb6a6815ce 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -547,6 +547,26 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
547 checksum == cpu_to_be16(0xffff); 547 checksum == cpu_to_be16(0xffff);
548} 548}
549 549
550static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
551 unsigned tail, struct mlx4_cqe *cqe)
552{
553 struct mlx4_ib_proxy_sqp_hdr *hdr;
554
555 ib_dma_sync_single_for_cpu(qp->ibqp.device,
556 qp->sqp_proxy_rcv[tail].map,
557 sizeof (struct mlx4_ib_proxy_sqp_hdr),
558 DMA_FROM_DEVICE);
559 hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
560 wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
561 wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
562 wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
563 wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
564 wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
565 wc->dlid_path_bits = 0;
566
567 return 0;
568}
569
550static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, 570static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
551 struct mlx4_ib_qp **cur_qp, 571 struct mlx4_ib_qp **cur_qp,
552 struct ib_wc *wc) 572 struct ib_wc *wc)
@@ -559,6 +579,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
559 int is_error; 579 int is_error;
560 u32 g_mlpath_rqpn; 580 u32 g_mlpath_rqpn;
561 u16 wqe_ctr; 581 u16 wqe_ctr;
582 unsigned tail = 0;
562 583
563repoll: 584repoll:
564 cqe = next_cqe_sw(cq); 585 cqe = next_cqe_sw(cq);
@@ -634,7 +655,8 @@ repoll:
634 mlx4_ib_free_srq_wqe(srq, wqe_ctr); 655 mlx4_ib_free_srq_wqe(srq, wqe_ctr);
635 } else { 656 } else {
636 wq = &(*cur_qp)->rq; 657 wq = &(*cur_qp)->rq;
637 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 658 tail = wq->tail & (wq->wqe_cnt - 1);
659 wc->wr_id = wq->wrid[tail];
638 ++wq->tail; 660 ++wq->tail;
639 } 661 }
640 662
@@ -717,6 +739,13 @@ repoll:
717 break; 739 break;
718 } 740 }
719 741
742 if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
743 if ((*cur_qp)->mlx4_ib_qp_type &
744 (MLX4_IB_QPT_PROXY_SMI_OWNER |
745 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
746 return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
747 }
748
720 wc->slid = be16_to_cpu(cqe->rlid); 749 wc->slid = be16_to_cpu(cqe->rlid);
721 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); 750 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
722 wc->src_qp = g_mlpath_rqpn & 0xffffff; 751 wc->src_qp = g_mlpath_rqpn & 0xffffff;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 9c2ae7efd00f..21a794152d15 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -32,7 +32,10 @@
32 32
33#include <rdma/ib_mad.h> 33#include <rdma/ib_mad.h>
34#include <rdma/ib_smi.h> 34#include <rdma/ib_smi.h>
35#include <rdma/ib_sa.h>
36#include <rdma/ib_cache.h>
35 37
38#include <linux/random.h>
36#include <linux/mlx4/cmd.h> 39#include <linux/mlx4/cmd.h>
37#include <linux/gfp.h> 40#include <linux/gfp.h>
38#include <rdma/ib_pma.h> 41#include <rdma/ib_pma.h>
@@ -44,7 +47,62 @@ enum {
44 MLX4_IB_VENDOR_CLASS2 = 0xa 47 MLX4_IB_VENDOR_CLASS2 = 0xa
45}; 48};
46 49
47int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, 50#define MLX4_TUN_SEND_WRID_SHIFT 34
51#define MLX4_TUN_QPN_SHIFT 32
52#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
53#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
54
55#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
56#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
57
58 /* Port mgmt change event handling */
59
60#define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
61#define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
62#define NUM_IDX_IN_PKEY_TBL_BLK 32
63#define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */
64#define GUID_TBL_BLK_NUM_ENTRIES 8
65#define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
66
67struct mlx4_mad_rcv_buf {
68 struct ib_grh grh;
69 u8 payload[256];
70} __packed;
71
72struct mlx4_mad_snd_buf {
73 u8 payload[256];
74} __packed;
75
76struct mlx4_tunnel_mad {
77 struct ib_grh grh;
78 struct mlx4_ib_tunnel_header hdr;
79 struct ib_mad mad;
80} __packed;
81
82struct mlx4_rcv_tunnel_mad {
83 struct mlx4_rcv_tunnel_hdr hdr;
84 struct ib_grh grh;
85 struct ib_mad mad;
86} __packed;
87
88static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
89static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
90static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
91 int block, u32 change_bitmap);
92
93__be64 mlx4_ib_gen_node_guid(void)
94{
95#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
96 return cpu_to_be64(NODE_GUID_HI | random32());
97}
98
99__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
100{
101 return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
102 cpu_to_be64(0xff00000000000000LL);
103}
104
105int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
48 int port, struct ib_wc *in_wc, struct ib_grh *in_grh, 106 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
49 void *in_mad, void *response_mad) 107 void *in_mad, void *response_mad)
50{ 108{
@@ -71,10 +129,13 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
71 * Key check traps can't be generated unless we have in_wc to 129 * Key check traps can't be generated unless we have in_wc to
72 * tell us where to send the trap. 130 * tell us where to send the trap.
73 */ 131 */
74 if (ignore_mkey || !in_wc) 132 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
75 op_modifier |= 0x1; 133 op_modifier |= 0x1;
76 if (ignore_bkey || !in_wc) 134 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
77 op_modifier |= 0x2; 135 op_modifier |= 0x2;
136 if (mlx4_is_mfunc(dev->dev) &&
137 (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
138 op_modifier |= 0x8;
78 139
79 if (in_wc) { 140 if (in_wc) {
80 struct { 141 struct {
@@ -107,10 +168,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
107 in_modifier |= in_wc->slid << 16; 168 in_modifier |= in_wc->slid << 16;
108 } 169 }
109 170
110 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, 171 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
111 in_modifier, op_modifier, 172 mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
112 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, 173 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
113 MLX4_CMD_NATIVE); 174 (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
114 175
115 if (!err) 176 if (!err)
116 memcpy(response_mad, outmailbox->buf, 256); 177 memcpy(response_mad, outmailbox->buf, 256);
@@ -156,6 +217,10 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
156{ 217{
157 struct ib_port_info *pinfo; 218 struct ib_port_info *pinfo;
158 u16 lid; 219 u16 lid;
220 __be16 *base;
221 u32 bn, pkey_change_bitmap;
222 int i;
223
159 224
160 struct mlx4_ib_dev *dev = to_mdev(ibdev); 225 struct mlx4_ib_dev *dev = to_mdev(ibdev);
161 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || 226 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
@@ -171,17 +236,46 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
171 pinfo->neighbormtu_mastersmsl & 0xf); 236 pinfo->neighbormtu_mastersmsl & 0xf);
172 237
173 if (pinfo->clientrereg_resv_subnetto & 0x80) 238 if (pinfo->clientrereg_resv_subnetto & 0x80)
174 mlx4_ib_dispatch_event(dev, port_num, 239 handle_client_rereg_event(dev, port_num);
175 IB_EVENT_CLIENT_REREGISTER);
176 240
177 if (prev_lid != lid) 241 if (prev_lid != lid)
178 mlx4_ib_dispatch_event(dev, port_num, 242 handle_lid_change_event(dev, port_num);
179 IB_EVENT_LID_CHANGE);
180 break; 243 break;
181 244
182 case IB_SMP_ATTR_PKEY_TABLE: 245 case IB_SMP_ATTR_PKEY_TABLE:
183 mlx4_ib_dispatch_event(dev, port_num, 246 if (!mlx4_is_mfunc(dev->dev)) {
184 IB_EVENT_PKEY_CHANGE); 247 mlx4_ib_dispatch_event(dev, port_num,
248 IB_EVENT_PKEY_CHANGE);
249 break;
250 }
251
252 /* at this point, we are running in the master.
253 * Slaves do not receive SMPs.
254 */
255 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
256 base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
257 pkey_change_bitmap = 0;
258 for (i = 0; i < 32; i++) {
259 pr_debug("PKEY[%d] = x%x\n",
260 i + bn*32, be16_to_cpu(base[i]));
261 if (be16_to_cpu(base[i]) !=
262 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
263 pkey_change_bitmap |= (1 << i);
264 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
265 be16_to_cpu(base[i]);
266 }
267 }
268 pr_debug("PKEY Change event: port=%d, "
269 "block=0x%x, change_bitmap=0x%x\n",
270 port_num, bn, pkey_change_bitmap);
271
272 if (pkey_change_bitmap) {
273 mlx4_ib_dispatch_event(dev, port_num,
274 IB_EVENT_PKEY_CHANGE);
275 if (!dev->sriov.is_going_down)
276 __propagate_pkey_ev(dev, port_num, bn,
277 pkey_change_bitmap);
278 }
185 break; 279 break;
186 280
187 case IB_SMP_ATTR_GUID_INFO: 281 case IB_SMP_ATTR_GUID_INFO:
@@ -189,12 +283,56 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
189 if (!mlx4_is_master(dev->dev)) 283 if (!mlx4_is_master(dev->dev))
190 mlx4_ib_dispatch_event(dev, port_num, 284 mlx4_ib_dispatch_event(dev, port_num,
191 IB_EVENT_GID_CHANGE); 285 IB_EVENT_GID_CHANGE);
286 /*if master, notify relevant slaves*/
287 if (mlx4_is_master(dev->dev) &&
288 !dev->sriov.is_going_down) {
289 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
290 mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
291 (u8 *)(&((struct ib_smp *)mad)->data));
292 mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
293 (u8 *)(&((struct ib_smp *)mad)->data));
294 }
192 break; 295 break;
296
193 default: 297 default:
194 break; 298 break;
195 } 299 }
196} 300}
197 301
302static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
303 int block, u32 change_bitmap)
304{
305 int i, ix, slave, err;
306 int have_event = 0;
307
308 for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
309 if (slave == mlx4_master_func_num(dev->dev))
310 continue;
311 if (!mlx4_is_slave_active(dev->dev, slave))
312 continue;
313
314 have_event = 0;
315 for (i = 0; i < 32; i++) {
316 if (!(change_bitmap & (1 << i)))
317 continue;
318 for (ix = 0;
319 ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
320 if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
321 [ix] == i + 32 * block) {
322 err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
323 pr_debug("propagate_pkey_ev: slave %d,"
324 " port %d, ix %d (%d)\n",
325 slave, port_num, ix, err);
326 have_event = 1;
327 break;
328 }
329 }
330 if (have_event)
331 break;
332 }
333 }
334}
335
198static void node_desc_override(struct ib_device *dev, 336static void node_desc_override(struct ib_device *dev,
199 struct ib_mad *mad) 337 struct ib_mad *mad)
200{ 338{
@@ -242,6 +380,268 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
242 } 380 }
243} 381}
244 382
383static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
384 struct ib_sa_mad *sa_mad)
385{
386 int ret = 0;
387
388 /* dispatch to different sa handlers */
389 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
390 case IB_SA_ATTR_MC_MEMBER_REC:
391 ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
392 break;
393 default:
394 break;
395 }
396 return ret;
397}
398
399int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
400{
401 struct mlx4_ib_dev *dev = to_mdev(ibdev);
402 int i;
403
404 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
405 if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
406 return i;
407 }
408 return -1;
409}
410
411
412static int get_pkey_phys_indices(struct mlx4_ib_dev *ibdev, u8 port, u8 ph_pkey_ix,
413 u8 *full_pk_ix, u8 *partial_pk_ix,
414 int *is_full_member)
415{
416 u16 search_pkey;
417 int fm;
418 int err = 0;
419 u16 pk;
420
421 err = ib_get_cached_pkey(&ibdev->ib_dev, port, ph_pkey_ix, &search_pkey);
422 if (err)
423 return err;
424
425 fm = (search_pkey & 0x8000) ? 1 : 0;
426 if (fm) {
427 *full_pk_ix = ph_pkey_ix;
428 search_pkey &= 0x7FFF;
429 } else {
430 *partial_pk_ix = ph_pkey_ix;
431 search_pkey |= 0x8000;
432 }
433
434 if (ib_find_exact_cached_pkey(&ibdev->ib_dev, port, search_pkey, &pk))
435 pk = 0xFFFF;
436
437 if (fm)
438 *partial_pk_ix = (pk & 0xFF);
439 else
440 *full_pk_ix = (pk & 0xFF);
441
442 *is_full_member = fm;
443 return err;
444}
445
446int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
447 enum ib_qp_type dest_qpt, struct ib_wc *wc,
448 struct ib_grh *grh, struct ib_mad *mad)
449{
450 struct ib_sge list;
451 struct ib_send_wr wr, *bad_wr;
452 struct mlx4_ib_demux_pv_ctx *tun_ctx;
453 struct mlx4_ib_demux_pv_qp *tun_qp;
454 struct mlx4_rcv_tunnel_mad *tun_mad;
455 struct ib_ah_attr attr;
456 struct ib_ah *ah;
457 struct ib_qp *src_qp = NULL;
458 unsigned tun_tx_ix = 0;
459 int dqpn;
460 int ret = 0;
461 int i;
462 int is_full_member = 0;
463 u16 tun_pkey_ix;
464 u8 ph_pkey_ix, full_pk_ix = 0, partial_pk_ix = 0;
465
466 if (dest_qpt > IB_QPT_GSI)
467 return -EINVAL;
468
469 tun_ctx = dev->sriov.demux[port-1].tun[slave];
470
471 /* check if proxy qp created */
472 if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
473 return -EAGAIN;
474
475 /* QP0 forwarding only for Dom0 */
476 if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
477 return -EINVAL;
478
479 if (!dest_qpt)
480 tun_qp = &tun_ctx->qp[0];
481 else
482 tun_qp = &tun_ctx->qp[1];
483
484 /* compute pkey index for slave */
485 /* get physical pkey -- virtualized Dom0 pkey to phys*/
486 if (dest_qpt) {
487 ph_pkey_ix =
488 dev->pkeys.virt2phys_pkey[mlx4_master_func_num(dev->dev)][port - 1][wc->pkey_index];
489
490 /* now, translate this to the slave pkey index */
491 ret = get_pkey_phys_indices(dev, port, ph_pkey_ix, &full_pk_ix,
492 &partial_pk_ix, &is_full_member);
493 if (ret)
494 return -EINVAL;
495
496 for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
497 if ((dev->pkeys.virt2phys_pkey[slave][port - 1][i] == full_pk_ix) ||
498 (is_full_member &&
499 (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == partial_pk_ix)))
500 break;
501 }
502 if (i == dev->dev->caps.pkey_table_len[port])
503 return -EINVAL;
504 tun_pkey_ix = i;
505 } else
506 tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
507
508 dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
509
510 /* get tunnel tx data buf for slave */
511 src_qp = tun_qp->qp;
512
513 /* create ah. Just need an empty one with the port num for the post send.
514 * The driver will set the force loopback bit in post_send */
515 memset(&attr, 0, sizeof attr);
516 attr.port_num = port;
517 ah = ib_create_ah(tun_ctx->pd, &attr);
518 if (IS_ERR(ah))
519 return -ENOMEM;
520
521 /* allocate tunnel tx buf after pass failure returns */
522 spin_lock(&tun_qp->tx_lock);
523 if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
524 (MLX4_NUM_TUNNEL_BUFS - 1))
525 ret = -EAGAIN;
526 else
527 tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
528 spin_unlock(&tun_qp->tx_lock);
529 if (ret)
530 goto out;
531
532 tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
533 if (tun_qp->tx_ring[tun_tx_ix].ah)
534 ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
535 tun_qp->tx_ring[tun_tx_ix].ah = ah;
536 ib_dma_sync_single_for_cpu(&dev->ib_dev,
537 tun_qp->tx_ring[tun_tx_ix].buf.map,
538 sizeof (struct mlx4_rcv_tunnel_mad),
539 DMA_TO_DEVICE);
540
541 /* copy over to tunnel buffer */
542 if (grh)
543 memcpy(&tun_mad->grh, grh, sizeof *grh);
544 memcpy(&tun_mad->mad, mad, sizeof *mad);
545
546 /* adjust tunnel data */
547 tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
548 tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
549 tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
550 tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
551 tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
552
553 ib_dma_sync_single_for_device(&dev->ib_dev,
554 tun_qp->tx_ring[tun_tx_ix].buf.map,
555 sizeof (struct mlx4_rcv_tunnel_mad),
556 DMA_TO_DEVICE);
557
558 list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
559 list.length = sizeof (struct mlx4_rcv_tunnel_mad);
560 list.lkey = tun_ctx->mr->lkey;
561
562 wr.wr.ud.ah = ah;
563 wr.wr.ud.port_num = port;
564 wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
565 wr.wr.ud.remote_qpn = dqpn;
566 wr.next = NULL;
567 wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
568 wr.sg_list = &list;
569 wr.num_sge = 1;
570 wr.opcode = IB_WR_SEND;
571 wr.send_flags = IB_SEND_SIGNALED;
572
573 ret = ib_post_send(src_qp, &wr, &bad_wr);
574out:
575 if (ret)
576 ib_destroy_ah(ah);
577 return ret;
578}
579
580static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
581 struct ib_wc *wc, struct ib_grh *grh,
582 struct ib_mad *mad)
583{
584 struct mlx4_ib_dev *dev = to_mdev(ibdev);
585 int err;
586 int slave;
587 u8 *slave_id;
588
589 /* Initially assume that this mad is for us */
590 slave = mlx4_master_func_num(dev->dev);
591
592 /* See if the slave id is encoded in a response mad */
593 if (mad->mad_hdr.method & 0x80) {
594 slave_id = (u8 *) &mad->mad_hdr.tid;
595 slave = *slave_id;
596 if (slave != 255) /*255 indicates the dom0*/
597 *slave_id = 0; /* remap tid */
598 }
599
600 /* If a grh is present, we demux according to it */
601 if (wc->wc_flags & IB_WC_GRH) {
602 slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
603 if (slave < 0) {
604 mlx4_ib_warn(ibdev, "failed matching grh\n");
605 return -ENOENT;
606 }
607 }
608 /* Class-specific handling */
609 switch (mad->mad_hdr.mgmt_class) {
610 case IB_MGMT_CLASS_SUBN_ADM:
611 if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
612 (struct ib_sa_mad *) mad))
613 return 0;
614 break;
615 case IB_MGMT_CLASS_CM:
616 if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad))
617 return 0;
618 break;
619 case IB_MGMT_CLASS_DEVICE_MGMT:
620 if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
621 return 0;
622 break;
623 default:
624 /* Drop unsupported classes for slaves in tunnel mode */
625 if (slave != mlx4_master_func_num(dev->dev)) {
626 pr_debug("dropping unsupported ingress mad from class:%d "
627 "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
628 return 0;
629 }
630 }
631 /*make sure that no slave==255 was not handled yet.*/
632 if (slave >= dev->dev->caps.sqp_demux) {
633 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
634 slave, dev->dev->caps.sqp_demux);
635 return -ENOENT;
636 }
637
638 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
639 if (err)
640 pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
641 slave, err);
642 return 0;
643}
644
245static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 645static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
246 struct ib_wc *in_wc, struct ib_grh *in_grh, 646 struct ib_wc *in_wc, struct ib_grh *in_grh,
247 struct ib_mad *in_mad, struct ib_mad *out_mad) 647 struct ib_mad *in_mad, struct ib_mad *out_mad)
@@ -306,8 +706,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
306 prev_lid = pattr.lid; 706 prev_lid = pattr.lid;
307 707
308 err = mlx4_MAD_IFC(to_mdev(ibdev), 708 err = mlx4_MAD_IFC(to_mdev(ibdev),
309 mad_flags & IB_MAD_IGNORE_MKEY, 709 (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
310 mad_flags & IB_MAD_IGNORE_BKEY, 710 (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
711 MLX4_MAD_IFC_NET_VIEW,
311 port_num, in_wc, in_grh, in_mad, out_mad); 712 port_num, in_wc, in_grh, in_mad, out_mad);
312 if (err) 713 if (err)
313 return IB_MAD_RESULT_FAILURE; 714 return IB_MAD_RESULT_FAILURE;
@@ -315,7 +716,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
315 if (!out_mad->mad_hdr.status) { 716 if (!out_mad->mad_hdr.status) {
316 if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) 717 if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV))
317 smp_snoop(ibdev, port_num, in_mad, prev_lid); 718 smp_snoop(ibdev, port_num, in_mad, prev_lid);
318 node_desc_override(ibdev, out_mad); 719 /* slaves get node desc from FW */
720 if (!mlx4_is_slave(to_mdev(ibdev)->dev))
721 node_desc_override(ibdev, out_mad);
319 } 722 }
320 723
321 /* set return bit in status of directed route responses */ 724 /* set return bit in status of directed route responses */
@@ -398,6 +801,8 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
398static void send_handler(struct ib_mad_agent *agent, 801static void send_handler(struct ib_mad_agent *agent,
399 struct ib_mad_send_wc *mad_send_wc) 802 struct ib_mad_send_wc *mad_send_wc)
400{ 803{
804 if (mad_send_wc->send_buf->context[0])
805 ib_destroy_ah(mad_send_wc->send_buf->context[0]);
401 ib_free_send_mad(mad_send_wc->send_buf); 806 ib_free_send_mad(mad_send_wc->send_buf);
402} 807}
403 808
@@ -456,6 +861,90 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
456 } 861 }
457} 862}
458 863
864static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
865{
866 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
867
868 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
869 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
870 MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
871}
872
873static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
874{
875 /* re-configure the alias-guid and mcg's */
876 if (mlx4_is_master(dev->dev)) {
877 mlx4_ib_invalidate_all_guid_record(dev, port_num);
878
879 if (!dev->sriov.is_going_down) {
880 mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
881 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
882 MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
883 }
884 }
885 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
886}
887
888static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
889 struct mlx4_eqe *eqe)
890{
891 __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
892 GET_MASK_FROM_EQE(eqe));
893}
894
895static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
896 u32 guid_tbl_blk_num, u32 change_bitmap)
897{
898 struct ib_smp *in_mad = NULL;
899 struct ib_smp *out_mad = NULL;
900 u16 i;
901
902 if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
903 return;
904
905 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
906 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
907 if (!in_mad || !out_mad) {
908 mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
909 goto out;
910 }
911
912 guid_tbl_blk_num *= 4;
913
914 for (i = 0; i < 4; i++) {
915 if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
916 continue;
917 memset(in_mad, 0, sizeof *in_mad);
918 memset(out_mad, 0, sizeof *out_mad);
919
920 in_mad->base_version = 1;
921 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
922 in_mad->class_version = 1;
923 in_mad->method = IB_MGMT_METHOD_GET;
924 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
925 in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i);
926
927 if (mlx4_MAD_IFC(dev,
928 MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW,
929 port_num, NULL, NULL, in_mad, out_mad)) {
930 mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
931 goto out;
932 }
933
934 mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
935 port_num,
936 (u8 *)(&((struct ib_smp *)out_mad)->data));
937 mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
938 port_num,
939 (u8 *)(&((struct ib_smp *)out_mad)->data));
940 }
941
942out:
943 kfree(in_mad);
944 kfree(out_mad);
945 return;
946}
947
459void handle_port_mgmt_change_event(struct work_struct *work) 948void handle_port_mgmt_change_event(struct work_struct *work)
460{ 949{
461 struct ib_event_work *ew = container_of(work, struct ib_event_work, work); 950 struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
@@ -463,6 +952,8 @@ void handle_port_mgmt_change_event(struct work_struct *work)
463 struct mlx4_eqe *eqe = &(ew->ib_eqe); 952 struct mlx4_eqe *eqe = &(ew->ib_eqe);
464 u8 port = eqe->event.port_mgmt_change.port; 953 u8 port = eqe->event.port_mgmt_change.port;
465 u32 changed_attr; 954 u32 changed_attr;
955 u32 tbl_block;
956 u32 change_bitmap;
466 957
467 switch (eqe->subtype) { 958 switch (eqe->subtype) {
468 case MLX4_DEV_PMC_SUBTYPE_PORT_INFO: 959 case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
@@ -478,24 +969,36 @@ void handle_port_mgmt_change_event(struct work_struct *work)
478 969
479 /* Check if it is a lid change event */ 970 /* Check if it is a lid change event */
480 if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK) 971 if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
481 mlx4_ib_dispatch_event(dev, port, IB_EVENT_LID_CHANGE); 972 handle_lid_change_event(dev, port);
482 973
483 /* Generate GUID changed event */ 974 /* Generate GUID changed event */
484 if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) 975 if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
485 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); 976 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
977 /*if master, notify all slaves*/
978 if (mlx4_is_master(dev->dev))
979 mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
980 MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK);
981 }
486 982
487 if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK) 983 if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
488 mlx4_ib_dispatch_event(dev, port, 984 handle_client_rereg_event(dev, port);
489 IB_EVENT_CLIENT_REREGISTER);
490 break; 985 break;
491 986
492 case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE: 987 case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
493 mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE); 988 mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
989 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
990 propagate_pkey_ev(dev, port, eqe);
494 break; 991 break;
495 case MLX4_DEV_PMC_SUBTYPE_GUID_INFO: 992 case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
496 /* paravirtualized master's guid is guid 0 -- does not change */ 993 /* paravirtualized master's guid is guid 0 -- does not change */
497 if (!mlx4_is_master(dev->dev)) 994 if (!mlx4_is_master(dev->dev))
498 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); 995 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
996 /*if master, notify relevant slaves*/
997 else if (!dev->sriov.is_going_down) {
998 tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
999 change_bitmap = GET_MASK_FROM_EQE(eqe);
1000 handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
1001 }
499 break; 1002 break;
500 default: 1003 default:
501 pr_warn("Unsupported subtype 0x%x for " 1004 pr_warn("Unsupported subtype 0x%x for "
@@ -516,3 +1019,1035 @@ void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
516 1019
517 ib_dispatch_event(&event); 1020 ib_dispatch_event(&event);
518} 1021}
1022
1023static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
1024{
1025 unsigned long flags;
1026 struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
1027 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1028 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
1029 if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
1030 queue_work(ctx->wq, &ctx->work);
1031 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
1032}
1033
1034static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
1035 struct mlx4_ib_demux_pv_qp *tun_qp,
1036 int index)
1037{
1038 struct ib_sge sg_list;
1039 struct ib_recv_wr recv_wr, *bad_recv_wr;
1040 int size;
1041
1042 size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
1043 sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
1044
1045 sg_list.addr = tun_qp->ring[index].map;
1046 sg_list.length = size;
1047 sg_list.lkey = ctx->mr->lkey;
1048
1049 recv_wr.next = NULL;
1050 recv_wr.sg_list = &sg_list;
1051 recv_wr.num_sge = 1;
1052 recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
1053 MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
1054 ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
1055 size, DMA_FROM_DEVICE);
1056 return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
1057}
1058
1059static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
1060 int slave, struct ib_sa_mad *sa_mad)
1061{
1062 int ret = 0;
1063
1064 /* dispatch to different sa handlers */
1065 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
1066 case IB_SA_ATTR_MC_MEMBER_REC:
1067 ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
1068 break;
1069 default:
1070 break;
1071 }
1072 return ret;
1073}
1074
1075static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
1076{
1077 int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
1078
1079 return (qpn >= proxy_start && qpn <= proxy_start + 1);
1080}
1081
1082
1083int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1084 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
1085 u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
1086{
1087 struct ib_sge list;
1088 struct ib_send_wr wr, *bad_wr;
1089 struct mlx4_ib_demux_pv_ctx *sqp_ctx;
1090 struct mlx4_ib_demux_pv_qp *sqp;
1091 struct mlx4_mad_snd_buf *sqp_mad;
1092 struct ib_ah *ah;
1093 struct ib_qp *send_qp = NULL;
1094 unsigned wire_tx_ix = 0;
1095 int ret = 0;
1096 u16 wire_pkey_ix;
1097 int src_qpnum;
1098 u8 sgid_index;
1099
1100
1101 sqp_ctx = dev->sriov.sqps[port-1];
1102
1103 /* check if proxy qp created */
1104 if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
1105 return -EAGAIN;
1106
1107 /* QP0 forwarding only for Dom0 */
1108 if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
1109 return -EINVAL;
1110
1111 if (dest_qpt == IB_QPT_SMI) {
1112 src_qpnum = 0;
1113 sqp = &sqp_ctx->qp[0];
1114 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
1115 } else {
1116 src_qpnum = 1;
1117 sqp = &sqp_ctx->qp[1];
1118 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
1119 }
1120
1121 send_qp = sqp->qp;
1122
1123 /* create ah */
1124 sgid_index = attr->grh.sgid_index;
1125 attr->grh.sgid_index = 0;
1126 ah = ib_create_ah(sqp_ctx->pd, attr);
1127 if (IS_ERR(ah))
1128 return -ENOMEM;
1129 attr->grh.sgid_index = sgid_index;
1130 to_mah(ah)->av.ib.gid_index = sgid_index;
1131 /* get rid of force-loopback bit */
1132 to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
1133 spin_lock(&sqp->tx_lock);
1134 if (sqp->tx_ix_head - sqp->tx_ix_tail >=
1135 (MLX4_NUM_TUNNEL_BUFS - 1))
1136 ret = -EAGAIN;
1137 else
1138 wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
1139 spin_unlock(&sqp->tx_lock);
1140 if (ret)
1141 goto out;
1142
1143 sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
1144 if (sqp->tx_ring[wire_tx_ix].ah)
1145 ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
1146 sqp->tx_ring[wire_tx_ix].ah = ah;
1147 ib_dma_sync_single_for_cpu(&dev->ib_dev,
1148 sqp->tx_ring[wire_tx_ix].buf.map,
1149 sizeof (struct mlx4_mad_snd_buf),
1150 DMA_TO_DEVICE);
1151
1152 memcpy(&sqp_mad->payload, mad, sizeof *mad);
1153
1154 ib_dma_sync_single_for_device(&dev->ib_dev,
1155 sqp->tx_ring[wire_tx_ix].buf.map,
1156 sizeof (struct mlx4_mad_snd_buf),
1157 DMA_TO_DEVICE);
1158
1159 list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
1160 list.length = sizeof (struct mlx4_mad_snd_buf);
1161 list.lkey = sqp_ctx->mr->lkey;
1162
1163 wr.wr.ud.ah = ah;
1164 wr.wr.ud.port_num = port;
1165 wr.wr.ud.pkey_index = wire_pkey_ix;
1166 wr.wr.ud.remote_qkey = qkey;
1167 wr.wr.ud.remote_qpn = remote_qpn;
1168 wr.next = NULL;
1169 wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
1170 wr.sg_list = &list;
1171 wr.num_sge = 1;
1172 wr.opcode = IB_WR_SEND;
1173 wr.send_flags = IB_SEND_SIGNALED;
1174
1175 ret = ib_post_send(send_qp, &wr, &bad_wr);
1176out:
1177 if (ret)
1178 ib_destroy_ah(ah);
1179 return ret;
1180}
1181
1182static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
1183{
1184 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1185 struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
1186 int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
1187 struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
1188 struct mlx4_ib_ah ah;
1189 struct ib_ah_attr ah_attr;
1190 u8 *slave_id;
1191 int slave;
1192
1193 /* Get slave that sent this packet */
1194 if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
1195 wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
1196 (wc->src_qp & 0x1) != ctx->port - 1 ||
1197 wc->src_qp & 0x4) {
1198 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
1199 return;
1200 }
1201 slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
1202 if (slave != ctx->slave) {
1203 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1204 "belongs to another slave\n", wc->src_qp);
1205 return;
1206 }
1207 if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
1208 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1209 "non-master trying to send QP0 packets\n", wc->src_qp);
1210 return;
1211 }
1212
1213 /* Map transaction ID */
1214 ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
1215 sizeof (struct mlx4_tunnel_mad),
1216 DMA_FROM_DEVICE);
1217 switch (tunnel->mad.mad_hdr.method) {
1218 case IB_MGMT_METHOD_SET:
1219 case IB_MGMT_METHOD_GET:
1220 case IB_MGMT_METHOD_REPORT:
1221 case IB_SA_METHOD_GET_TABLE:
1222 case IB_SA_METHOD_DELETE:
1223 case IB_SA_METHOD_GET_MULTI:
1224 case IB_SA_METHOD_GET_TRACE_TBL:
1225 slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
1226 if (*slave_id) {
1227 mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
1228 "class:%d slave:%d\n", *slave_id,
1229 tunnel->mad.mad_hdr.mgmt_class, slave);
1230 return;
1231 } else
1232 *slave_id = slave;
1233 default:
1234 /* nothing */;
1235 }
1236
1237 /* Class-specific handling */
1238 switch (tunnel->mad.mad_hdr.mgmt_class) {
1239 case IB_MGMT_CLASS_SUBN_ADM:
1240 if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
1241 (struct ib_sa_mad *) &tunnel->mad))
1242 return;
1243 break;
1244 case IB_MGMT_CLASS_CM:
1245 if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
1246 (struct ib_mad *) &tunnel->mad))
1247 return;
1248 break;
1249 case IB_MGMT_CLASS_DEVICE_MGMT:
1250 if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
1251 tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
1252 return;
1253 break;
1254 default:
1255 /* Drop unsupported classes for slaves in tunnel mode */
1256 if (slave != mlx4_master_func_num(dev->dev)) {
1257 mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
1258 "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
1259 return;
1260 }
1261 }
1262
1263 /* We are using standard ib_core services to send the mad, so generate a
1264 * stadard address handle by decoding the tunnelled mlx4_ah fields */
1265 memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
1266 ah.ibah.device = ctx->ib_dev;
1267 mlx4_ib_query_ah(&ah.ibah, &ah_attr);
1268 if ((ah_attr.ah_flags & IB_AH_GRH) &&
1269 (ah_attr.grh.sgid_index != slave)) {
1270 mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
1271 slave, ah_attr.grh.sgid_index);
1272 return;
1273 }
1274
1275 mlx4_ib_send_to_wire(dev, slave, ctx->port,
1276 is_proxy_qp0(dev, wc->src_qp, slave) ?
1277 IB_QPT_SMI : IB_QPT_GSI,
1278 be16_to_cpu(tunnel->hdr.pkey_index),
1279 be32_to_cpu(tunnel->hdr.remote_qpn),
1280 be32_to_cpu(tunnel->hdr.qkey),
1281 &ah_attr, &tunnel->mad);
1282}
1283
1284static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1285 enum ib_qp_type qp_type, int is_tun)
1286{
1287 int i;
1288 struct mlx4_ib_demux_pv_qp *tun_qp;
1289 int rx_buf_size, tx_buf_size;
1290
1291 if (qp_type > IB_QPT_GSI)
1292 return -EINVAL;
1293
1294 tun_qp = &ctx->qp[qp_type];
1295
1296 tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
1297 GFP_KERNEL);
1298 if (!tun_qp->ring)
1299 return -ENOMEM;
1300
1301 tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
1302 sizeof (struct mlx4_ib_tun_tx_buf),
1303 GFP_KERNEL);
1304 if (!tun_qp->tx_ring) {
1305 kfree(tun_qp->ring);
1306 tun_qp->ring = NULL;
1307 return -ENOMEM;
1308 }
1309
1310 if (is_tun) {
1311 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1312 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1313 } else {
1314 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1315 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1316 }
1317
1318 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1319 tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
1320 if (!tun_qp->ring[i].addr)
1321 goto err;
1322 tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
1323 tun_qp->ring[i].addr,
1324 rx_buf_size,
1325 DMA_FROM_DEVICE);
1326 }
1327
1328 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1329 tun_qp->tx_ring[i].buf.addr =
1330 kmalloc(tx_buf_size, GFP_KERNEL);
1331 if (!tun_qp->tx_ring[i].buf.addr)
1332 goto tx_err;
1333 tun_qp->tx_ring[i].buf.map =
1334 ib_dma_map_single(ctx->ib_dev,
1335 tun_qp->tx_ring[i].buf.addr,
1336 tx_buf_size,
1337 DMA_TO_DEVICE);
1338 tun_qp->tx_ring[i].ah = NULL;
1339 }
1340 spin_lock_init(&tun_qp->tx_lock);
1341 tun_qp->tx_ix_head = 0;
1342 tun_qp->tx_ix_tail = 0;
1343 tun_qp->proxy_qpt = qp_type;
1344
1345 return 0;
1346
1347tx_err:
1348 while (i > 0) {
1349 --i;
1350 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1351 tx_buf_size, DMA_TO_DEVICE);
1352 kfree(tun_qp->tx_ring[i].buf.addr);
1353 }
1354 kfree(tun_qp->tx_ring);
1355 tun_qp->tx_ring = NULL;
1356 i = MLX4_NUM_TUNNEL_BUFS;
1357err:
1358 while (i > 0) {
1359 --i;
1360 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1361 rx_buf_size, DMA_FROM_DEVICE);
1362 kfree(tun_qp->ring[i].addr);
1363 }
1364 kfree(tun_qp->ring);
1365 tun_qp->ring = NULL;
1366 return -ENOMEM;
1367}
1368
1369static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1370 enum ib_qp_type qp_type, int is_tun)
1371{
1372 int i;
1373 struct mlx4_ib_demux_pv_qp *tun_qp;
1374 int rx_buf_size, tx_buf_size;
1375
1376 if (qp_type > IB_QPT_GSI)
1377 return;
1378
1379 tun_qp = &ctx->qp[qp_type];
1380 if (is_tun) {
1381 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1382 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1383 } else {
1384 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1385 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1386 }
1387
1388
1389 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1390 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1391 rx_buf_size, DMA_FROM_DEVICE);
1392 kfree(tun_qp->ring[i].addr);
1393 }
1394
1395 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1396 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1397 tx_buf_size, DMA_TO_DEVICE);
1398 kfree(tun_qp->tx_ring[i].buf.addr);
1399 if (tun_qp->tx_ring[i].ah)
1400 ib_destroy_ah(tun_qp->tx_ring[i].ah);
1401 }
1402 kfree(tun_qp->tx_ring);
1403 kfree(tun_qp->ring);
1404}
1405
1406static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
1407{
1408 struct mlx4_ib_demux_pv_ctx *ctx;
1409 struct mlx4_ib_demux_pv_qp *tun_qp;
1410 struct ib_wc wc;
1411 int ret;
1412 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1413 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1414
1415 while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1416 tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1417 if (wc.status == IB_WC_SUCCESS) {
1418 switch (wc.opcode) {
1419 case IB_WC_RECV:
1420 mlx4_ib_multiplex_mad(ctx, &wc);
1421 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
1422 wc.wr_id &
1423 (MLX4_NUM_TUNNEL_BUFS - 1));
1424 if (ret)
1425 pr_err("Failed reposting tunnel "
1426 "buf:%lld\n", wc.wr_id);
1427 break;
1428 case IB_WC_SEND:
1429 pr_debug("received tunnel send completion:"
1430 "wrid=0x%llx, status=0x%x\n",
1431 wc.wr_id, wc.status);
1432 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1433 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1434 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1435 = NULL;
1436 spin_lock(&tun_qp->tx_lock);
1437 tun_qp->tx_ix_tail++;
1438 spin_unlock(&tun_qp->tx_lock);
1439
1440 break;
1441 default:
1442 break;
1443 }
1444 } else {
1445 pr_debug("mlx4_ib: completion error in tunnel: %d."
1446 " status = %d, wrid = 0x%llx\n",
1447 ctx->slave, wc.status, wc.wr_id);
1448 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1449 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1450 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1451 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1452 = NULL;
1453 spin_lock(&tun_qp->tx_lock);
1454 tun_qp->tx_ix_tail++;
1455 spin_unlock(&tun_qp->tx_lock);
1456 }
1457 }
1458 }
1459}
1460
1461static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
1462{
1463 struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
1464
1465 /* It's worse than that! He's dead, Jim! */
1466 pr_err("Fatal error (%d) on a MAD QP on port %d\n",
1467 event->event, sqp->port);
1468}
1469
1470static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
1471 enum ib_qp_type qp_type, int create_tun)
1472{
1473 int i, ret;
1474 struct mlx4_ib_demux_pv_qp *tun_qp;
1475 struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
1476 struct ib_qp_attr attr;
1477 int qp_attr_mask_INIT;
1478
1479 if (qp_type > IB_QPT_GSI)
1480 return -EINVAL;
1481
1482 tun_qp = &ctx->qp[qp_type];
1483
1484 memset(&qp_init_attr, 0, sizeof qp_init_attr);
1485 qp_init_attr.init_attr.send_cq = ctx->cq;
1486 qp_init_attr.init_attr.recv_cq = ctx->cq;
1487 qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
1488 qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
1489 qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
1490 qp_init_attr.init_attr.cap.max_send_sge = 1;
1491 qp_init_attr.init_attr.cap.max_recv_sge = 1;
1492 if (create_tun) {
1493 qp_init_attr.init_attr.qp_type = IB_QPT_UD;
1494 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP;
1495 qp_init_attr.port = ctx->port;
1496 qp_init_attr.slave = ctx->slave;
1497 qp_init_attr.proxy_qp_type = qp_type;
1498 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
1499 IB_QP_QKEY | IB_QP_PORT;
1500 } else {
1501 qp_init_attr.init_attr.qp_type = qp_type;
1502 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP;
1503 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
1504 }
1505 qp_init_attr.init_attr.port_num = ctx->port;
1506 qp_init_attr.init_attr.qp_context = ctx;
1507 qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
1508 tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
1509 if (IS_ERR(tun_qp->qp)) {
1510 ret = PTR_ERR(tun_qp->qp);
1511 tun_qp->qp = NULL;
1512 pr_err("Couldn't create %s QP (%d)\n",
1513 create_tun ? "tunnel" : "special", ret);
1514 return ret;
1515 }
1516
1517 memset(&attr, 0, sizeof attr);
1518 attr.qp_state = IB_QPS_INIT;
1519 attr.pkey_index =
1520 to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
1521 attr.qkey = IB_QP1_QKEY;
1522 attr.port_num = ctx->port;
1523 ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
1524 if (ret) {
1525 pr_err("Couldn't change %s qp state to INIT (%d)\n",
1526 create_tun ? "tunnel" : "special", ret);
1527 goto err_qp;
1528 }
1529 attr.qp_state = IB_QPS_RTR;
1530 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
1531 if (ret) {
1532 pr_err("Couldn't change %s qp state to RTR (%d)\n",
1533 create_tun ? "tunnel" : "special", ret);
1534 goto err_qp;
1535 }
1536 attr.qp_state = IB_QPS_RTS;
1537 attr.sq_psn = 0;
1538 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
1539 if (ret) {
1540 pr_err("Couldn't change %s qp state to RTS (%d)\n",
1541 create_tun ? "tunnel" : "special", ret);
1542 goto err_qp;
1543 }
1544
1545 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1546 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
1547 if (ret) {
1548 pr_err(" mlx4_ib_post_pv_buf error"
1549 " (err = %d, i = %d)\n", ret, i);
1550 goto err_qp;
1551 }
1552 }
1553 return 0;
1554
1555err_qp:
1556 ib_destroy_qp(tun_qp->qp);
1557 tun_qp->qp = NULL;
1558 return ret;
1559}
1560
1561/*
1562 * IB MAD completion callback for real SQPs
1563 */
1564static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
1565{
1566 struct mlx4_ib_demux_pv_ctx *ctx;
1567 struct mlx4_ib_demux_pv_qp *sqp;
1568 struct ib_wc wc;
1569 struct ib_grh *grh;
1570 struct ib_mad *mad;
1571
1572 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1573 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1574
1575 while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1576 sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1577 if (wc.status == IB_WC_SUCCESS) {
1578 switch (wc.opcode) {
1579 case IB_WC_SEND:
1580 ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1581 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1582 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1583 = NULL;
1584 spin_lock(&sqp->tx_lock);
1585 sqp->tx_ix_tail++;
1586 spin_unlock(&sqp->tx_lock);
1587 break;
1588 case IB_WC_RECV:
1589 mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
1590 (sqp->ring[wc.wr_id &
1591 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
1592 grh = &(((struct mlx4_mad_rcv_buf *)
1593 (sqp->ring[wc.wr_id &
1594 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
1595 mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
1596 if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
1597 (MLX4_NUM_TUNNEL_BUFS - 1)))
1598 pr_err("Failed reposting SQP "
1599 "buf:%lld\n", wc.wr_id);
1600 break;
1601 default:
1602 BUG_ON(1);
1603 break;
1604 }
1605 } else {
1606 pr_debug("mlx4_ib: completion error in tunnel: %d."
1607 " status = %d, wrid = 0x%llx\n",
1608 ctx->slave, wc.status, wc.wr_id);
1609 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1610 ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1611 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1612 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1613 = NULL;
1614 spin_lock(&sqp->tx_lock);
1615 sqp->tx_ix_tail++;
1616 spin_unlock(&sqp->tx_lock);
1617 }
1618 }
1619 }
1620}
1621
1622static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
1623 struct mlx4_ib_demux_pv_ctx **ret_ctx)
1624{
1625 struct mlx4_ib_demux_pv_ctx *ctx;
1626
1627 *ret_ctx = NULL;
1628 ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
1629 if (!ctx) {
1630 pr_err("failed allocating pv resource context "
1631 "for port %d, slave %d\n", port, slave);
1632 return -ENOMEM;
1633 }
1634
1635 ctx->ib_dev = &dev->ib_dev;
1636 ctx->port = port;
1637 ctx->slave = slave;
1638 *ret_ctx = ctx;
1639 return 0;
1640}
1641
1642static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
1643{
1644 if (dev->sriov.demux[port - 1].tun[slave]) {
1645 kfree(dev->sriov.demux[port - 1].tun[slave]);
1646 dev->sriov.demux[port - 1].tun[slave] = NULL;
1647 }
1648}
1649
1650static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
1651 int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
1652{
1653 int ret, cq_size;
1654
1655 if (ctx->state != DEMUX_PV_STATE_DOWN)
1656 return -EEXIST;
1657
1658 ctx->state = DEMUX_PV_STATE_STARTING;
1659 /* have QP0 only on port owner, and only if link layer is IB */
1660 if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
1661 rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
1662 ctx->has_smi = 1;
1663
1664 if (ctx->has_smi) {
1665 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
1666 if (ret) {
1667 pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
1668 goto err_out;
1669 }
1670 }
1671
1672 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
1673 if (ret) {
1674 pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
1675 goto err_out_qp0;
1676 }
1677
1678 cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
1679 if (ctx->has_smi)
1680 cq_size *= 2;
1681
1682 ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
1683 NULL, ctx, cq_size, 0);
1684 if (IS_ERR(ctx->cq)) {
1685 ret = PTR_ERR(ctx->cq);
1686 pr_err("Couldn't create tunnel CQ (%d)\n", ret);
1687 goto err_buf;
1688 }
1689
1690 ctx->pd = ib_alloc_pd(ctx->ib_dev);
1691 if (IS_ERR(ctx->pd)) {
1692 ret = PTR_ERR(ctx->pd);
1693 pr_err("Couldn't create tunnel PD (%d)\n", ret);
1694 goto err_cq;
1695 }
1696
1697 ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
1698 if (IS_ERR(ctx->mr)) {
1699 ret = PTR_ERR(ctx->mr);
1700 pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
1701 goto err_pd;
1702 }
1703
1704 if (ctx->has_smi) {
1705 ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
1706 if (ret) {
1707 pr_err("Couldn't create %s QP0 (%d)\n",
1708 create_tun ? "tunnel for" : "", ret);
1709 goto err_mr;
1710 }
1711 }
1712
1713 ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
1714 if (ret) {
1715 pr_err("Couldn't create %s QP1 (%d)\n",
1716 create_tun ? "tunnel for" : "", ret);
1717 goto err_qp0;
1718 }
1719
1720 if (create_tun)
1721 INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
1722 else
1723 INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
1724
1725 ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
1726
1727 ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1728 if (ret) {
1729 pr_err("Couldn't arm tunnel cq (%d)\n", ret);
1730 goto err_wq;
1731 }
1732 ctx->state = DEMUX_PV_STATE_ACTIVE;
1733 return 0;
1734
1735err_wq:
1736 ctx->wq = NULL;
1737 ib_destroy_qp(ctx->qp[1].qp);
1738 ctx->qp[1].qp = NULL;
1739
1740
1741err_qp0:
1742 if (ctx->has_smi)
1743 ib_destroy_qp(ctx->qp[0].qp);
1744 ctx->qp[0].qp = NULL;
1745
1746err_mr:
1747 ib_dereg_mr(ctx->mr);
1748 ctx->mr = NULL;
1749
1750err_pd:
1751 ib_dealloc_pd(ctx->pd);
1752 ctx->pd = NULL;
1753
1754err_cq:
1755 ib_destroy_cq(ctx->cq);
1756 ctx->cq = NULL;
1757
1758err_buf:
1759 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
1760
1761err_out_qp0:
1762 if (ctx->has_smi)
1763 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
1764err_out:
1765 ctx->state = DEMUX_PV_STATE_DOWN;
1766 return ret;
1767}
1768
1769static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
1770 struct mlx4_ib_demux_pv_ctx *ctx, int flush)
1771{
1772 if (!ctx)
1773 return;
1774 if (ctx->state > DEMUX_PV_STATE_DOWN) {
1775 ctx->state = DEMUX_PV_STATE_DOWNING;
1776 if (flush)
1777 flush_workqueue(ctx->wq);
1778 if (ctx->has_smi) {
1779 ib_destroy_qp(ctx->qp[0].qp);
1780 ctx->qp[0].qp = NULL;
1781 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
1782 }
1783 ib_destroy_qp(ctx->qp[1].qp);
1784 ctx->qp[1].qp = NULL;
1785 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
1786 ib_dereg_mr(ctx->mr);
1787 ctx->mr = NULL;
1788 ib_dealloc_pd(ctx->pd);
1789 ctx->pd = NULL;
1790 ib_destroy_cq(ctx->cq);
1791 ctx->cq = NULL;
1792 ctx->state = DEMUX_PV_STATE_DOWN;
1793 }
1794}
1795
1796static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
1797 int port, int do_init)
1798{
1799 int ret = 0;
1800
1801 if (!do_init) {
1802 clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
1803 /* for master, destroy real sqp resources */
1804 if (slave == mlx4_master_func_num(dev->dev))
1805 destroy_pv_resources(dev, slave, port,
1806 dev->sriov.sqps[port - 1], 1);
1807 /* destroy the tunnel qp resources */
1808 destroy_pv_resources(dev, slave, port,
1809 dev->sriov.demux[port - 1].tun[slave], 1);
1810 return 0;
1811 }
1812
1813 /* create the tunnel qp resources */
1814 ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
1815 dev->sriov.demux[port - 1].tun[slave]);
1816
1817 /* for master, create the real sqp resources */
1818 if (!ret && slave == mlx4_master_func_num(dev->dev))
1819 ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
1820 dev->sriov.sqps[port - 1]);
1821 return ret;
1822}
1823
1824void mlx4_ib_tunnels_update_work(struct work_struct *work)
1825{
1826 struct mlx4_ib_demux_work *dmxw;
1827
1828 dmxw = container_of(work, struct mlx4_ib_demux_work, work);
1829 mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
1830 dmxw->do_init);
1831 kfree(dmxw);
1832 return;
1833}
1834
1835static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
1836 struct mlx4_ib_demux_ctx *ctx,
1837 int port)
1838{
1839 char name[12];
1840 int ret = 0;
1841 int i;
1842
1843 ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
1844 sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
1845 if (!ctx->tun)
1846 return -ENOMEM;
1847
1848 ctx->dev = dev;
1849 ctx->port = port;
1850 ctx->ib_dev = &dev->ib_dev;
1851
1852 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1853 ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
1854 if (ret) {
1855 ret = -ENOMEM;
1856 goto err_mcg;
1857 }
1858 }
1859
1860 ret = mlx4_ib_mcg_port_init(ctx);
1861 if (ret) {
1862 pr_err("Failed initializing mcg para-virt (%d)\n", ret);
1863 goto err_mcg;
1864 }
1865
1866 snprintf(name, sizeof name, "mlx4_ibt%d", port);
1867 ctx->wq = create_singlethread_workqueue(name);
1868 if (!ctx->wq) {
1869 pr_err("Failed to create tunnelling WQ for port %d\n", port);
1870 ret = -ENOMEM;
1871 goto err_wq;
1872 }
1873
1874 snprintf(name, sizeof name, "mlx4_ibud%d", port);
1875 ctx->ud_wq = create_singlethread_workqueue(name);
1876 if (!ctx->ud_wq) {
1877 pr_err("Failed to create up/down WQ for port %d\n", port);
1878 ret = -ENOMEM;
1879 goto err_udwq;
1880 }
1881
1882 return 0;
1883
1884err_udwq:
1885 destroy_workqueue(ctx->wq);
1886 ctx->wq = NULL;
1887
1888err_wq:
1889 mlx4_ib_mcg_port_cleanup(ctx, 1);
1890err_mcg:
1891 for (i = 0; i < dev->dev->caps.sqp_demux; i++)
1892 free_pv_object(dev, i, port);
1893 kfree(ctx->tun);
1894 ctx->tun = NULL;
1895 return ret;
1896}
1897
1898static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
1899{
1900 if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
1901 sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
1902 flush_workqueue(sqp_ctx->wq);
1903 if (sqp_ctx->has_smi) {
1904 ib_destroy_qp(sqp_ctx->qp[0].qp);
1905 sqp_ctx->qp[0].qp = NULL;
1906 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
1907 }
1908 ib_destroy_qp(sqp_ctx->qp[1].qp);
1909 sqp_ctx->qp[1].qp = NULL;
1910 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
1911 ib_dereg_mr(sqp_ctx->mr);
1912 sqp_ctx->mr = NULL;
1913 ib_dealloc_pd(sqp_ctx->pd);
1914 sqp_ctx->pd = NULL;
1915 ib_destroy_cq(sqp_ctx->cq);
1916 sqp_ctx->cq = NULL;
1917 sqp_ctx->state = DEMUX_PV_STATE_DOWN;
1918 }
1919}
1920
1921static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
1922{
1923 int i;
1924 if (ctx) {
1925 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1926 mlx4_ib_mcg_port_cleanup(ctx, 1);
1927 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1928 if (!ctx->tun[i])
1929 continue;
1930 if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
1931 ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
1932 }
1933 flush_workqueue(ctx->wq);
1934 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1935 destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
1936 free_pv_object(dev, i, ctx->port);
1937 }
1938 kfree(ctx->tun);
1939 destroy_workqueue(ctx->ud_wq);
1940 destroy_workqueue(ctx->wq);
1941 }
1942}
1943
1944static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
1945{
1946 int i;
1947
1948 if (!mlx4_is_master(dev->dev))
1949 return;
1950 /* initialize or tear down tunnel QPs for the master */
1951 for (i = 0; i < dev->dev->caps.num_ports; i++)
1952 mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
1953 return;
1954}
1955
1956int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
1957{
1958 int i = 0;
1959 int err;
1960
1961 if (!mlx4_is_mfunc(dev->dev))
1962 return 0;
1963
1964 dev->sriov.is_going_down = 0;
1965 spin_lock_init(&dev->sriov.going_down_lock);
1966 mlx4_ib_cm_paravirt_init(dev);
1967
1968 mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
1969
1970 if (mlx4_is_slave(dev->dev)) {
1971 mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
1972 return 0;
1973 }
1974
1975 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1976 if (i == mlx4_master_func_num(dev->dev))
1977 mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
1978 else
1979 mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid());
1980 }
1981
1982 err = mlx4_ib_init_alias_guid_service(dev);
1983 if (err) {
1984 mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
1985 goto paravirt_err;
1986 }
1987 err = mlx4_ib_device_register_sysfs(dev);
1988 if (err) {
1989 mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
1990 goto sysfs_err;
1991 }
1992
1993 mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
1994 dev->dev->caps.sqp_demux);
1995 for (i = 0; i < dev->num_ports; i++) {
1996 union ib_gid gid;
1997 err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
1998 if (err)
1999 goto demux_err;
2000 dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
2001 err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
2002 &dev->sriov.sqps[i]);
2003 if (err)
2004 goto demux_err;
2005 err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
2006 if (err)
2007 goto demux_err;
2008 }
2009 mlx4_ib_master_tunnels(dev, 1);
2010 return 0;
2011
2012demux_err:
2013 while (i > 0) {
2014 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
2015 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2016 --i;
2017 }
2018 mlx4_ib_device_unregister_sysfs(dev);
2019
2020sysfs_err:
2021 mlx4_ib_destroy_alias_guid_service(dev);
2022
2023paravirt_err:
2024 mlx4_ib_cm_paravirt_clean(dev, -1);
2025
2026 return err;
2027}
2028
2029void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
2030{
2031 int i;
2032 unsigned long flags;
2033
2034 if (!mlx4_is_mfunc(dev->dev))
2035 return;
2036
2037 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
2038 dev->sriov.is_going_down = 1;
2039 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
2040 if (mlx4_is_master(dev->dev)) {
2041 for (i = 0; i < dev->num_ports; i++) {
2042 flush_workqueue(dev->sriov.demux[i].ud_wq);
2043 mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
2044 kfree(dev->sriov.sqps[i]);
2045 dev->sriov.sqps[i] = NULL;
2046 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2047 }
2048
2049 mlx4_ib_cm_paravirt_clean(dev, -1);
2050 mlx4_ib_destroy_alias_guid_service(dev);
2051 mlx4_ib_device_unregister_sysfs(dev);
2052 }
2053}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index cc05579ebce7..718ec6b2bad2 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -59,6 +59,10 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
59MODULE_LICENSE("Dual BSD/GPL"); 59MODULE_LICENSE("Dual BSD/GPL");
60MODULE_VERSION(DRV_VERSION); 60MODULE_VERSION(DRV_VERSION);
61 61
62int mlx4_ib_sm_guid_assign = 1;
63module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
64MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
65
62static const char mlx4_ib_version[] = 66static const char mlx4_ib_version[] =
63 DRV_NAME ": Mellanox ConnectX InfiniBand driver v" 67 DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
64 DRV_VERSION " (" DRV_RELDATE ")\n"; 68 DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -70,6 +74,8 @@ struct update_gid_work {
70 int port; 74 int port;
71}; 75};
72 76
77static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
78
73static struct workqueue_struct *wq; 79static struct workqueue_struct *wq;
74 80
75static void init_query_mad(struct ib_smp *mad) 81static void init_query_mad(struct ib_smp *mad)
@@ -98,7 +104,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
98 init_query_mad(in_mad); 104 init_query_mad(in_mad);
99 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; 105 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
100 106
101 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad); 107 err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
108 1, NULL, NULL, in_mad, out_mad);
102 if (err) 109 if (err)
103 goto out; 110 goto out;
104 111
@@ -133,7 +140,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
133 140
134 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 141 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
135 0xffffff; 142 0xffffff;
136 props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); 143 props->vendor_part_id = dev->dev->pdev->device;
137 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); 144 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
138 memcpy(&props->sys_image_guid, out_mad->data + 4, 8); 145 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
139 146
@@ -182,11 +189,12 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
182} 189}
183 190
184static int ib_link_query_port(struct ib_device *ibdev, u8 port, 191static int ib_link_query_port(struct ib_device *ibdev, u8 port,
185 struct ib_port_attr *props) 192 struct ib_port_attr *props, int netw_view)
186{ 193{
187 struct ib_smp *in_mad = NULL; 194 struct ib_smp *in_mad = NULL;
188 struct ib_smp *out_mad = NULL; 195 struct ib_smp *out_mad = NULL;
189 int ext_active_speed; 196 int ext_active_speed;
197 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
190 int err = -ENOMEM; 198 int err = -ENOMEM;
191 199
192 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 200 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -198,7 +206,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
198 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; 206 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
199 in_mad->attr_mod = cpu_to_be32(port); 207 in_mad->attr_mod = cpu_to_be32(port);
200 208
201 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, 209 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
210 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
211
212 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
202 in_mad, out_mad); 213 in_mad, out_mad);
203 if (err) 214 if (err)
204 goto out; 215 goto out;
@@ -211,7 +222,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
211 props->state = out_mad->data[32] & 0xf; 222 props->state = out_mad->data[32] & 0xf;
212 props->phys_state = out_mad->data[33] >> 4; 223 props->phys_state = out_mad->data[33] >> 4;
213 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); 224 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
214 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; 225 if (netw_view)
226 props->gid_tbl_len = out_mad->data[50];
227 else
228 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
215 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; 229 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
216 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; 230 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
217 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); 231 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
@@ -244,7 +258,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
244 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; 258 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
245 in_mad->attr_mod = cpu_to_be32(port); 259 in_mad->attr_mod = cpu_to_be32(port);
246 260
247 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, 261 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
248 NULL, NULL, in_mad, out_mad); 262 NULL, NULL, in_mad, out_mad);
249 if (err) 263 if (err)
250 goto out; 264 goto out;
@@ -270,7 +284,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
270} 284}
271 285
272static int eth_link_query_port(struct ib_device *ibdev, u8 port, 286static int eth_link_query_port(struct ib_device *ibdev, u8 port,
273 struct ib_port_attr *props) 287 struct ib_port_attr *props, int netw_view)
274{ 288{
275 289
276 struct mlx4_ib_dev *mdev = to_mdev(ibdev); 290 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -320,26 +334,36 @@ out:
320 return err; 334 return err;
321} 335}
322 336
323static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, 337int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
324 struct ib_port_attr *props) 338 struct ib_port_attr *props, int netw_view)
325{ 339{
326 int err; 340 int err;
327 341
328 memset(props, 0, sizeof *props); 342 memset(props, 0, sizeof *props);
329 343
330 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? 344 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
331 ib_link_query_port(ibdev, port, props) : 345 ib_link_query_port(ibdev, port, props, netw_view) :
332 eth_link_query_port(ibdev, port, props); 346 eth_link_query_port(ibdev, port, props, netw_view);
333 347
334 return err; 348 return err;
335} 349}
336 350
337static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 351static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
338 union ib_gid *gid) 352 struct ib_port_attr *props)
353{
354 /* returns host view */
355 return __mlx4_ib_query_port(ibdev, port, props, 0);
356}
357
358int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
359 union ib_gid *gid, int netw_view)
339{ 360{
340 struct ib_smp *in_mad = NULL; 361 struct ib_smp *in_mad = NULL;
341 struct ib_smp *out_mad = NULL; 362 struct ib_smp *out_mad = NULL;
342 int err = -ENOMEM; 363 int err = -ENOMEM;
364 struct mlx4_ib_dev *dev = to_mdev(ibdev);
365 int clear = 0;
366 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
343 367
344 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 368 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
345 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 369 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -350,23 +374,38 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
350 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; 374 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
351 in_mad->attr_mod = cpu_to_be32(port); 375 in_mad->attr_mod = cpu_to_be32(port);
352 376
353 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); 377 if (mlx4_is_mfunc(dev->dev) && netw_view)
378 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
379
380 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
354 if (err) 381 if (err)
355 goto out; 382 goto out;
356 383
357 memcpy(gid->raw, out_mad->data + 8, 8); 384 memcpy(gid->raw, out_mad->data + 8, 8);
358 385
386 if (mlx4_is_mfunc(dev->dev) && !netw_view) {
387 if (index) {
388 /* For any index > 0, return the null guid */
389 err = 0;
390 clear = 1;
391 goto out;
392 }
393 }
394
359 init_query_mad(in_mad); 395 init_query_mad(in_mad);
360 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; 396 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
361 in_mad->attr_mod = cpu_to_be32(index / 8); 397 in_mad->attr_mod = cpu_to_be32(index / 8);
362 398
363 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); 399 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
400 NULL, NULL, in_mad, out_mad);
364 if (err) 401 if (err)
365 goto out; 402 goto out;
366 403
367 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); 404 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
368 405
369out: 406out:
407 if (clear)
408 memset(gid->raw + 8, 0, 8);
370 kfree(in_mad); 409 kfree(in_mad);
371 kfree(out_mad); 410 kfree(out_mad);
372 return err; 411 return err;
@@ -386,16 +425,17 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
386 union ib_gid *gid) 425 union ib_gid *gid)
387{ 426{
388 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) 427 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
389 return __mlx4_ib_query_gid(ibdev, port, index, gid); 428 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
390 else 429 else
391 return iboe_query_gid(ibdev, port, index, gid); 430 return iboe_query_gid(ibdev, port, index, gid);
392} 431}
393 432
394static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 433int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
395 u16 *pkey) 434 u16 *pkey, int netw_view)
396{ 435{
397 struct ib_smp *in_mad = NULL; 436 struct ib_smp *in_mad = NULL;
398 struct ib_smp *out_mad = NULL; 437 struct ib_smp *out_mad = NULL;
438 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
399 int err = -ENOMEM; 439 int err = -ENOMEM;
400 440
401 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 441 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -407,7 +447,11 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
407 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; 447 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
408 in_mad->attr_mod = cpu_to_be32(index / 32); 448 in_mad->attr_mod = cpu_to_be32(index / 32);
409 449
410 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); 450 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
451 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
452
453 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
454 in_mad, out_mad);
411 if (err) 455 if (err)
412 goto out; 456 goto out;
413 457
@@ -419,6 +463,11 @@ out:
419 return err; 463 return err;
420} 464}
421 465
466static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
467{
468 return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
469}
470
422static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, 471static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
423 struct ib_device_modify *props) 472 struct ib_device_modify *props)
424{ 473{
@@ -431,6 +480,9 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
431 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 480 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
432 return 0; 481 return 0;
433 482
483 if (mlx4_is_slave(to_mdev(ibdev)->dev))
484 return -EOPNOTSUPP;
485
434 spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); 486 spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
435 memcpy(ibdev->node_desc, props->node_desc, 64); 487 memcpy(ibdev->node_desc, props->node_desc, 64);
436 spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); 488 spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
@@ -446,7 +498,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
446 memset(mailbox->buf, 0, 256); 498 memset(mailbox->buf, 0, 256);
447 memcpy(mailbox->buf, props->node_desc, 64); 499 memcpy(mailbox->buf, props->node_desc, 64);
448 mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, 500 mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
449 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); 501 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
450 502
451 mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); 503 mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
452 504
@@ -849,6 +901,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)
849{ 901{
850 struct ib_smp *in_mad = NULL; 902 struct ib_smp *in_mad = NULL;
851 struct ib_smp *out_mad = NULL; 903 struct ib_smp *out_mad = NULL;
904 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
852 int err = -ENOMEM; 905 int err = -ENOMEM;
853 906
854 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 907 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -858,8 +911,10 @@ static int init_node_data(struct mlx4_ib_dev *dev)
858 911
859 init_query_mad(in_mad); 912 init_query_mad(in_mad);
860 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; 913 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
914 if (mlx4_is_master(dev->dev))
915 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
861 916
862 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); 917 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
863 if (err) 918 if (err)
864 goto out; 919 goto out;
865 920
@@ -867,10 +922,11 @@ static int init_node_data(struct mlx4_ib_dev *dev)
867 922
868 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; 923 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
869 924
870 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad); 925 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
871 if (err) 926 if (err)
872 goto out; 927 goto out;
873 928
929 dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
874 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); 930 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
875 931
876out: 932out:
@@ -959,7 +1015,7 @@ static void update_gids_task(struct work_struct *work)
959 1015
960 err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, 1016 err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
961 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, 1017 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
962 MLX4_CMD_NATIVE); 1018 MLX4_CMD_WRAPPED);
963 if (err) 1019 if (err)
964 pr_warn("set port command failed\n"); 1020 pr_warn("set port command failed\n");
965 else { 1021 else {
@@ -1121,6 +1177,38 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event
1121 return NOTIFY_DONE; 1177 return NOTIFY_DONE;
1122} 1178}
1123 1179
1180static void init_pkeys(struct mlx4_ib_dev *ibdev)
1181{
1182 int port;
1183 int slave;
1184 int i;
1185
1186 if (mlx4_is_master(ibdev->dev)) {
1187 for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
1188 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1189 for (i = 0;
1190 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1191 ++i) {
1192 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
1193 /* master has the identity virt2phys pkey mapping */
1194 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
1195 ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
1196 mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
1197 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
1198 }
1199 }
1200 }
1201 /* initialize pkey cache */
1202 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1203 for (i = 0;
1204 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1205 ++i)
1206 ibdev->pkeys.phys_pkey_cache[port-1][i] =
1207 (i) ? 0 : 0xFFFF;
1208 }
1209 }
1210}
1211
1124static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) 1212static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1125{ 1213{
1126 char name[32]; 1214 char name[32];
@@ -1207,11 +1295,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1207 1295
1208 pr_info_once("%s", mlx4_ib_version); 1296 pr_info_once("%s", mlx4_ib_version);
1209 1297
1210 if (mlx4_is_mfunc(dev)) { 1298 mlx4_foreach_non_ib_transport_port(i, dev)
1211 pr_warn("IB not yet supported in SRIOV\n"); 1299 num_ports++;
1300
1301 if (mlx4_is_mfunc(dev) && num_ports) {
1302 dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
1212 return NULL; 1303 return NULL;
1213 } 1304 }
1214 1305
1306 num_ports = 0;
1215 mlx4_foreach_ib_transport_port(i, dev) 1307 mlx4_foreach_ib_transport_port(i, dev)
1216 num_ports++; 1308 num_ports++;
1217 1309
@@ -1318,10 +1410,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1318 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; 1410 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
1319 ibdev->ib_dev.process_mad = mlx4_ib_process_mad; 1411 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
1320 1412
1321 ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; 1413 if (!mlx4_is_slave(ibdev->dev)) {
1322 ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; 1414 ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
1323 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; 1415 ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
1324 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; 1416 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
1417 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
1418 }
1325 1419
1326 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { 1420 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
1327 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; 1421 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
@@ -1357,11 +1451,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1357 if (mlx4_ib_mad_init(ibdev)) 1451 if (mlx4_ib_mad_init(ibdev))
1358 goto err_reg; 1452 goto err_reg;
1359 1453
1454 if (mlx4_ib_init_sriov(ibdev))
1455 goto err_mad;
1456
1360 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { 1457 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
1361 iboe->nb.notifier_call = mlx4_ib_netdev_event; 1458 iboe->nb.notifier_call = mlx4_ib_netdev_event;
1362 err = register_netdevice_notifier(&iboe->nb); 1459 err = register_netdevice_notifier(&iboe->nb);
1363 if (err) 1460 if (err)
1364 goto err_reg; 1461 goto err_sriov;
1365 } 1462 }
1366 1463
1367 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { 1464 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -1372,6 +1469,18 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1372 1469
1373 ibdev->ib_active = true; 1470 ibdev->ib_active = true;
1374 1471
1472 if (mlx4_is_mfunc(ibdev->dev))
1473 init_pkeys(ibdev);
1474
1475 /* create paravirt contexts for any VFs which are active */
1476 if (mlx4_is_master(ibdev->dev)) {
1477 for (j = 0; j < MLX4_MFUNC_MAX; j++) {
1478 if (j == mlx4_master_func_num(ibdev->dev))
1479 continue;
1480 if (mlx4_is_slave_active(ibdev->dev, j))
1481 do_slave_init(ibdev, j, 1);
1482 }
1483 }
1375 return ibdev; 1484 return ibdev;
1376 1485
1377err_notif: 1486err_notif:
@@ -1379,6 +1488,12 @@ err_notif:
1379 pr_warn("failure unregistering notifier\n"); 1488 pr_warn("failure unregistering notifier\n");
1380 flush_workqueue(wq); 1489 flush_workqueue(wq);
1381 1490
1491err_sriov:
1492 mlx4_ib_close_sriov(ibdev);
1493
1494err_mad:
1495 mlx4_ib_mad_cleanup(ibdev);
1496
1382err_reg: 1497err_reg:
1383 ib_unregister_device(&ibdev->ib_dev); 1498 ib_unregister_device(&ibdev->ib_dev);
1384 1499
@@ -1407,6 +1522,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1407 struct mlx4_ib_dev *ibdev = ibdev_ptr; 1522 struct mlx4_ib_dev *ibdev = ibdev_ptr;
1408 int p; 1523 int p;
1409 1524
1525 mlx4_ib_close_sriov(ibdev);
1410 mlx4_ib_mad_cleanup(ibdev); 1526 mlx4_ib_mad_cleanup(ibdev);
1411 ib_unregister_device(&ibdev->ib_dev); 1527 ib_unregister_device(&ibdev->ib_dev);
1412 if (ibdev->iboe.nb.notifier_call) { 1528 if (ibdev->iboe.nb.notifier_call) {
@@ -1428,6 +1544,51 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1428 ib_dealloc_device(&ibdev->ib_dev); 1544 ib_dealloc_device(&ibdev->ib_dev);
1429} 1545}
1430 1546
1547static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
1548{
1549 struct mlx4_ib_demux_work **dm = NULL;
1550 struct mlx4_dev *dev = ibdev->dev;
1551 int i;
1552 unsigned long flags;
1553
1554 if (!mlx4_is_master(dev))
1555 return;
1556
1557 dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
1558 if (!dm) {
1559 pr_err("failed to allocate memory for tunneling qp update\n");
1560 goto out;
1561 }
1562
1563 for (i = 0; i < dev->caps.num_ports; i++) {
1564 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
1565 if (!dm[i]) {
1566 pr_err("failed to allocate memory for tunneling qp update work struct\n");
1567 for (i = 0; i < dev->caps.num_ports; i++) {
1568 if (dm[i])
1569 kfree(dm[i]);
1570 }
1571 goto out;
1572 }
1573 }
1574 /* initialize or tear down tunnel QPs for the slave */
1575 for (i = 0; i < dev->caps.num_ports; i++) {
1576 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
1577 dm[i]->port = i + 1;
1578 dm[i]->slave = slave;
1579 dm[i]->do_init = do_init;
1580 dm[i]->dev = ibdev;
1581 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
1582 if (!ibdev->sriov.is_going_down)
1583 queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
1584 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
1585 }
1586out:
1587 if (dm)
1588 kfree(dm);
1589 return;
1590}
1591
1431static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, 1592static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1432 enum mlx4_dev_event event, unsigned long param) 1593 enum mlx4_dev_event event, unsigned long param)
1433{ 1594{
@@ -1435,22 +1596,28 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1435 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); 1596 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
1436 struct mlx4_eqe *eqe = NULL; 1597 struct mlx4_eqe *eqe = NULL;
1437 struct ib_event_work *ew; 1598 struct ib_event_work *ew;
1438 int port = 0; 1599 int p = 0;
1439 1600
1440 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) 1601 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
1441 eqe = (struct mlx4_eqe *)param; 1602 eqe = (struct mlx4_eqe *)param;
1442 else 1603 else
1443 port = (u8)param; 1604 p = (int) param;
1444
1445 if (port > ibdev->num_ports)
1446 return;
1447 1605
1448 switch (event) { 1606 switch (event) {
1449 case MLX4_DEV_EVENT_PORT_UP: 1607 case MLX4_DEV_EVENT_PORT_UP:
1608 if (p > ibdev->num_ports)
1609 return;
1610 if (mlx4_is_master(dev) &&
1611 rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
1612 IB_LINK_LAYER_INFINIBAND) {
1613 mlx4_ib_invalidate_all_guid_record(ibdev, p);
1614 }
1450 ibev.event = IB_EVENT_PORT_ACTIVE; 1615 ibev.event = IB_EVENT_PORT_ACTIVE;
1451 break; 1616 break;
1452 1617
1453 case MLX4_DEV_EVENT_PORT_DOWN: 1618 case MLX4_DEV_EVENT_PORT_DOWN:
1619 if (p > ibdev->num_ports)
1620 return;
1454 ibev.event = IB_EVENT_PORT_ERR; 1621 ibev.event = IB_EVENT_PORT_ERR;
1455 break; 1622 break;
1456 1623
@@ -1469,7 +1636,21 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1469 INIT_WORK(&ew->work, handle_port_mgmt_change_event); 1636 INIT_WORK(&ew->work, handle_port_mgmt_change_event);
1470 memcpy(&ew->ib_eqe, eqe, sizeof *eqe); 1637 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
1471 ew->ib_dev = ibdev; 1638 ew->ib_dev = ibdev;
1472 handle_port_mgmt_change_event(&ew->work); 1639 /* need to queue only for port owner, which uses GEN_EQE */
1640 if (mlx4_is_master(dev))
1641 queue_work(wq, &ew->work);
1642 else
1643 handle_port_mgmt_change_event(&ew->work);
1644 return;
1645
1646 case MLX4_DEV_EVENT_SLAVE_INIT:
1647 /* here, p is the slave id */
1648 do_slave_init(ibdev, p, 1);
1649 return;
1650
1651 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
1652 /* here, p is the slave id */
1653 do_slave_init(ibdev, p, 0);
1473 return; 1654 return;
1474 1655
1475 default: 1656 default:
@@ -1477,7 +1658,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1477 } 1658 }
1478 1659
1479 ibev.device = ibdev_ptr; 1660 ibev.device = ibdev_ptr;
1480 ibev.element.port_num = port; 1661 ibev.element.port_num = (u8) p;
1481 1662
1482 ib_dispatch_event(&ibev); 1663 ib_dispatch_event(&ibev);
1483} 1664}
@@ -1497,18 +1678,28 @@ static int __init mlx4_ib_init(void)
1497 if (!wq) 1678 if (!wq)
1498 return -ENOMEM; 1679 return -ENOMEM;
1499 1680
1681 err = mlx4_ib_mcg_init();
1682 if (err)
1683 goto clean_wq;
1684
1500 err = mlx4_register_interface(&mlx4_ib_interface); 1685 err = mlx4_register_interface(&mlx4_ib_interface);
1501 if (err) { 1686 if (err)
1502 destroy_workqueue(wq); 1687 goto clean_mcg;
1503 return err;
1504 }
1505 1688
1506 return 0; 1689 return 0;
1690
1691clean_mcg:
1692 mlx4_ib_mcg_destroy();
1693
1694clean_wq:
1695 destroy_workqueue(wq);
1696 return err;
1507} 1697}
1508 1698
1509static void __exit mlx4_ib_cleanup(void) 1699static void __exit mlx4_ib_cleanup(void)
1510{ 1700{
1511 mlx4_unregister_interface(&mlx4_ib_interface); 1701 mlx4_unregister_interface(&mlx4_ib_interface);
1702 mlx4_ib_mcg_destroy();
1512 destroy_workqueue(wq); 1703 destroy_workqueue(wq);
1513} 1704}
1514 1705
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
new file mode 100644
index 000000000000..3c3b54c3fdd9
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -0,0 +1,1254 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34#include <rdma/ib_smi.h>
35#include <rdma/ib_cache.h>
36#include <rdma/ib_sa.h>
37
38#include <linux/mlx4/cmd.h>
39#include <linux/rbtree.h>
40#include <linux/delay.h>
41
42#include "mlx4_ib.h"
43
44#define MAX_VFS 80
45#define MAX_PEND_REQS_PER_FUNC 4
46#define MAD_TIMEOUT_MS 2000
47
48#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
49#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
50#define mcg_warn_group(group, format, arg...) \
51 pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
52 (group)->name, group->demux->port, ## arg)
53
54#define mcg_error_group(group, format, arg...) \
55 pr_err(" %16s: " format, (group)->name, ## arg)
56
57
58static union ib_gid mgid0;
59
60static struct workqueue_struct *clean_wq;
61
62enum mcast_state {
63 MCAST_NOT_MEMBER = 0,
64 MCAST_MEMBER,
65};
66
67enum mcast_group_state {
68 MCAST_IDLE,
69 MCAST_JOIN_SENT,
70 MCAST_LEAVE_SENT,
71 MCAST_RESP_READY
72};
73
74struct mcast_member {
75 enum mcast_state state;
76 uint8_t join_state;
77 int num_pend_reqs;
78 struct list_head pending;
79};
80
81struct ib_sa_mcmember_data {
82 union ib_gid mgid;
83 union ib_gid port_gid;
84 __be32 qkey;
85 __be16 mlid;
86 u8 mtusel_mtu;
87 u8 tclass;
88 __be16 pkey;
89 u8 ratesel_rate;
90 u8 lifetmsel_lifetm;
91 __be32 sl_flowlabel_hoplimit;
92 u8 scope_join_state;
93 u8 proxy_join;
94 u8 reserved[2];
95};
96
97struct mcast_group {
98 struct ib_sa_mcmember_data rec;
99 struct rb_node node;
100 struct list_head mgid0_list;
101 struct mlx4_ib_demux_ctx *demux;
102 struct mcast_member func[MAX_VFS];
103 struct mutex lock;
104 struct work_struct work;
105 struct list_head pending_list;
106 int members[3];
107 enum mcast_group_state state;
108 enum mcast_group_state prev_state;
109 struct ib_sa_mad response_sa_mad;
110 __be64 last_req_tid;
111
112 char name[33]; /* MGID string */
113 struct device_attribute dentry;
114
115 /* refcount is the reference count for the following:
116 1. Each queued request
117 2. Each invocation of the worker thread
118 3. Membership of the port at the SA
119 */
120 atomic_t refcount;
121
122 /* delayed work to clean pending SM request */
123 struct delayed_work timeout_work;
124 struct list_head cleanup_list;
125};
126
127struct mcast_req {
128 int func;
129 struct ib_sa_mad sa_mad;
130 struct list_head group_list;
131 struct list_head func_list;
132 struct mcast_group *group;
133 int clean;
134};
135
136
137#define safe_atomic_dec(ref) \
138 do {\
139 if (atomic_dec_and_test(ref)) \
140 mcg_warn_group(group, "did not expect to reach zero\n"); \
141 } while (0)
142
143static const char *get_state_string(enum mcast_group_state state)
144{
145 switch (state) {
146 case MCAST_IDLE:
147 return "MCAST_IDLE";
148 case MCAST_JOIN_SENT:
149 return "MCAST_JOIN_SENT";
150 case MCAST_LEAVE_SENT:
151 return "MCAST_LEAVE_SENT";
152 case MCAST_RESP_READY:
153 return "MCAST_RESP_READY";
154 }
155 return "Invalid State";
156}
157
158static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx,
159 union ib_gid *mgid)
160{
161 struct rb_node *node = ctx->mcg_table.rb_node;
162 struct mcast_group *group;
163 int ret;
164
165 while (node) {
166 group = rb_entry(node, struct mcast_group, node);
167 ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
168 if (!ret)
169 return group;
170
171 if (ret < 0)
172 node = node->rb_left;
173 else
174 node = node->rb_right;
175 }
176 return NULL;
177}
178
179static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
180 struct mcast_group *group)
181{
182 struct rb_node **link = &ctx->mcg_table.rb_node;
183 struct rb_node *parent = NULL;
184 struct mcast_group *cur_group;
185 int ret;
186
187 while (*link) {
188 parent = *link;
189 cur_group = rb_entry(parent, struct mcast_group, node);
190
191 ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
192 sizeof group->rec.mgid);
193 if (ret < 0)
194 link = &(*link)->rb_left;
195 else if (ret > 0)
196 link = &(*link)->rb_right;
197 else
198 return cur_group;
199 }
200 rb_link_node(&group->node, parent, link);
201 rb_insert_color(&group->node, &ctx->mcg_table);
202 return NULL;
203}
204
205static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
206{
207 struct mlx4_ib_dev *dev = ctx->dev;
208 struct ib_ah_attr ah_attr;
209
210 spin_lock(&dev->sm_lock);
211 if (!dev->sm_ah[ctx->port - 1]) {
212 /* port is not yet Active, sm_ah not ready */
213 spin_unlock(&dev->sm_lock);
214 return -EAGAIN;
215 }
216 mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
217 spin_unlock(&dev->sm_lock);
218 return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
219 IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
220}
221
222static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
223 struct ib_mad *mad)
224{
225 struct mlx4_ib_dev *dev = ctx->dev;
226 struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
227 struct ib_wc wc;
228 struct ib_ah_attr ah_attr;
229
230 /* Our agent might not yet be registered when mads start to arrive */
231 if (!agent)
232 return -EAGAIN;
233
234 ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
235
236 wc.pkey_index = 0;
237 wc.sl = 0;
238 wc.dlid_path_bits = 0;
239 wc.port_num = ctx->port;
240 wc.slid = ah_attr.dlid; /* opensm lid */
241 wc.src_qp = 1;
242 return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
243}
244
245static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad)
246{
247 struct ib_sa_mad mad;
248 struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data;
249 int ret;
250
251 /* we rely on a mad request as arrived from a VF */
252 memcpy(&mad, sa_mad, sizeof mad);
253
254 /* fix port GID to be the real one (slave 0) */
255 sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
256
257 /* assign our own TID */
258 mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
259 group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
260
261 ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
262 /* set timeout handler */
263 if (!ret) {
264 /* calls mlx4_ib_mcg_timeout_handler */
265 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
266 msecs_to_jiffies(MAD_TIMEOUT_MS));
267 }
268
269 return ret;
270}
271
272static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
273{
274 struct ib_sa_mad mad;
275 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
276 int ret;
277
278 memset(&mad, 0, sizeof mad);
279 mad.mad_hdr.base_version = 1;
280 mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
281 mad.mad_hdr.class_version = 2;
282 mad.mad_hdr.method = IB_SA_METHOD_DELETE;
283 mad.mad_hdr.status = cpu_to_be16(0);
284 mad.mad_hdr.class_specific = cpu_to_be16(0);
285 mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
286 group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
287 mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
288 mad.mad_hdr.attr_mod = cpu_to_be32(0);
289 mad.sa_hdr.sm_key = 0x0;
290 mad.sa_hdr.attr_offset = cpu_to_be16(7);
291 mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID |
292 IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE;
293
294 *sa_data = group->rec;
295 sa_data->scope_join_state = join_state;
296
297 ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
298 if (ret)
299 group->state = MCAST_IDLE;
300
301 /* set timeout handler */
302 if (!ret) {
303 /* calls mlx4_ib_mcg_timeout_handler */
304 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
305 msecs_to_jiffies(MAD_TIMEOUT_MS));
306 }
307
308 return ret;
309}
310
311static int send_reply_to_slave(int slave, struct mcast_group *group,
312 struct ib_sa_mad *req_sa_mad, u16 status)
313{
314 struct ib_sa_mad mad;
315 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
316 struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data;
317 int ret;
318
319 memset(&mad, 0, sizeof mad);
320 mad.mad_hdr.base_version = 1;
321 mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
322 mad.mad_hdr.class_version = 2;
323 mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
324 mad.mad_hdr.status = cpu_to_be16(status);
325 mad.mad_hdr.class_specific = cpu_to_be16(0);
326 mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid;
327 *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */
328 mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
329 mad.mad_hdr.attr_mod = cpu_to_be32(0);
330 mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key;
331 mad.sa_hdr.attr_offset = cpu_to_be16(7);
332 mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */
333
334 *sa_data = group->rec;
335
336 /* reconstruct VF's requested join_state and port_gid */
337 sa_data->scope_join_state &= 0xf0;
338 sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f);
339 memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid);
340
341 ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad);
342 return ret;
343}
344
345static int check_selector(ib_sa_comp_mask comp_mask,
346 ib_sa_comp_mask selector_mask,
347 ib_sa_comp_mask value_mask,
348 u8 src_value, u8 dst_value)
349{
350 int err;
351 u8 selector = dst_value >> 6;
352 dst_value &= 0x3f;
353 src_value &= 0x3f;
354
355 if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
356 return 0;
357
358 switch (selector) {
359 case IB_SA_GT:
360 err = (src_value <= dst_value);
361 break;
362 case IB_SA_LT:
363 err = (src_value >= dst_value);
364 break;
365 case IB_SA_EQ:
366 err = (src_value != dst_value);
367 break;
368 default:
369 err = 0;
370 break;
371 }
372
373 return err;
374}
375
376static u16 cmp_rec(struct ib_sa_mcmember_data *src,
377 struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
378{
379 /* src is group record, dst is request record */
380 /* MGID must already match */
381 /* Port_GID we always replace to our Port_GID, so it is a match */
382
383#define MAD_STATUS_REQ_INVALID 0x0200
384 if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
385 return MAD_STATUS_REQ_INVALID;
386 if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
387 return MAD_STATUS_REQ_INVALID;
388 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
389 IB_SA_MCMEMBER_REC_MTU,
390 src->mtusel_mtu, dst->mtusel_mtu))
391 return MAD_STATUS_REQ_INVALID;
392 if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
393 src->tclass != dst->tclass)
394 return MAD_STATUS_REQ_INVALID;
395 if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
396 return MAD_STATUS_REQ_INVALID;
397 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
398 IB_SA_MCMEMBER_REC_RATE,
399 src->ratesel_rate, dst->ratesel_rate))
400 return MAD_STATUS_REQ_INVALID;
401 if (check_selector(comp_mask,
402 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
403 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
404 src->lifetmsel_lifetm, dst->lifetmsel_lifetm))
405 return MAD_STATUS_REQ_INVALID;
406 if (comp_mask & IB_SA_MCMEMBER_REC_SL &&
407 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) !=
408 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000))
409 return MAD_STATUS_REQ_INVALID;
410 if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
411 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) !=
412 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00))
413 return MAD_STATUS_REQ_INVALID;
414 if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
415 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) !=
416 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff))
417 return MAD_STATUS_REQ_INVALID;
418 if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE &&
419 (src->scope_join_state & 0xf0) !=
420 (dst->scope_join_state & 0xf0))
421 return MAD_STATUS_REQ_INVALID;
422
423 /* join_state checked separately, proxy_join ignored */
424
425 return 0;
426}
427
428/* release group, return 1 if this was last release and group is destroyed
429 * timout work is canceled sync */
430static int release_group(struct mcast_group *group, int from_timeout_handler)
431{
432 struct mlx4_ib_demux_ctx *ctx = group->demux;
433 int nzgroup;
434
435 mutex_lock(&ctx->mcg_table_lock);
436 mutex_lock(&group->lock);
437 if (atomic_dec_and_test(&group->refcount)) {
438 if (!from_timeout_handler) {
439 if (group->state != MCAST_IDLE &&
440 !cancel_delayed_work(&group->timeout_work)) {
441 atomic_inc(&group->refcount);
442 mutex_unlock(&group->lock);
443 mutex_unlock(&ctx->mcg_table_lock);
444 return 0;
445 }
446 }
447
448 nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0);
449 if (nzgroup)
450 del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
451 if (!list_empty(&group->pending_list))
452 mcg_warn_group(group, "releasing a group with non empty pending list\n");
453 if (nzgroup)
454 rb_erase(&group->node, &ctx->mcg_table);
455 list_del_init(&group->mgid0_list);
456 mutex_unlock(&group->lock);
457 mutex_unlock(&ctx->mcg_table_lock);
458 kfree(group);
459 return 1;
460 } else {
461 mutex_unlock(&group->lock);
462 mutex_unlock(&ctx->mcg_table_lock);
463 }
464 return 0;
465}
466
467static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
468{
469 int i;
470
471 for (i = 0; i < 3; i++, join_state >>= 1)
472 if (join_state & 0x1)
473 group->members[i] += inc;
474}
475
476static u8 get_leave_state(struct mcast_group *group)
477{
478 u8 leave_state = 0;
479 int i;
480
481 for (i = 0; i < 3; i++)
482 if (!group->members[i])
483 leave_state |= (1 << i);
484
485 return leave_state & (group->rec.scope_join_state & 7);
486}
487
488static int join_group(struct mcast_group *group, int slave, u8 join_mask)
489{
490 int ret = 0;
491 u8 join_state;
492
493 /* remove bits that slave is already member of, and adjust */
494 join_state = join_mask & (~group->func[slave].join_state);
495 adjust_membership(group, join_state, 1);
496 group->func[slave].join_state |= join_state;
497 if (group->func[slave].state != MCAST_MEMBER && join_state) {
498 group->func[slave].state = MCAST_MEMBER;
499 ret = 1;
500 }
501 return ret;
502}
503
504static int leave_group(struct mcast_group *group, int slave, u8 leave_state)
505{
506 int ret = 0;
507
508 adjust_membership(group, leave_state, -1);
509 group->func[slave].join_state &= ~leave_state;
510 if (!group->func[slave].join_state) {
511 group->func[slave].state = MCAST_NOT_MEMBER;
512 ret = 1;
513 }
514 return ret;
515}
516
517static int check_leave(struct mcast_group *group, int slave, u8 leave_mask)
518{
519 if (group->func[slave].state != MCAST_MEMBER)
520 return MAD_STATUS_REQ_INVALID;
521
522 /* make sure we're not deleting unset bits */
523 if (~group->func[slave].join_state & leave_mask)
524 return MAD_STATUS_REQ_INVALID;
525
526 if (!leave_mask)
527 return MAD_STATUS_REQ_INVALID;
528
529 return 0;
530}
531
532static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
533{
534 struct delayed_work *delay = to_delayed_work(work);
535 struct mcast_group *group;
536 struct mcast_req *req = NULL;
537
538 group = container_of(delay, typeof(*group), timeout_work);
539
540 mutex_lock(&group->lock);
541 if (group->state == MCAST_JOIN_SENT) {
542 if (!list_empty(&group->pending_list)) {
543 req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
544 list_del(&req->group_list);
545 list_del(&req->func_list);
546 --group->func[req->func].num_pend_reqs;
547 mutex_unlock(&group->lock);
548 kfree(req);
549 if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) {
550 if (release_group(group, 1))
551 return;
552 } else {
553 kfree(group);
554 return;
555 }
556 mutex_lock(&group->lock);
557 } else
558 mcg_warn_group(group, "DRIVER BUG\n");
559 } else if (group->state == MCAST_LEAVE_SENT) {
560 if (group->rec.scope_join_state & 7)
561 group->rec.scope_join_state &= 0xf8;
562 group->state = MCAST_IDLE;
563 mutex_unlock(&group->lock);
564 if (release_group(group, 1))
565 return;
566 mutex_lock(&group->lock);
567 } else
568 mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
569 group->state = MCAST_IDLE;
570 atomic_inc(&group->refcount);
571 if (!queue_work(group->demux->mcg_wq, &group->work))
572 safe_atomic_dec(&group->refcount);
573
574 mutex_unlock(&group->lock);
575}
576
577static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
578 struct mcast_req *req)
579{
580 u16 status;
581
582 if (req->clean)
583 leave_mask = group->func[req->func].join_state;
584
585 status = check_leave(group, req->func, leave_mask);
586 if (!status)
587 leave_group(group, req->func, leave_mask);
588
589 if (!req->clean)
590 send_reply_to_slave(req->func, group, &req->sa_mad, status);
591 --group->func[req->func].num_pend_reqs;
592 list_del(&req->group_list);
593 list_del(&req->func_list);
594 kfree(req);
595 return 1;
596}
597
598static int handle_join_req(struct mcast_group *group, u8 join_mask,
599 struct mcast_req *req)
600{
601 u8 group_join_state = group->rec.scope_join_state & 7;
602 int ref = 0;
603 u16 status;
604 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
605
606 if (join_mask == (group_join_state & join_mask)) {
607 /* port's membership need not change */
608 status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask);
609 if (!status)
610 join_group(group, req->func, join_mask);
611
612 --group->func[req->func].num_pend_reqs;
613 send_reply_to_slave(req->func, group, &req->sa_mad, status);
614 list_del(&req->group_list);
615 list_del(&req->func_list);
616 kfree(req);
617 ++ref;
618 } else {
619 /* port's membership needs to be updated */
620 group->prev_state = group->state;
621 if (send_join_to_wire(group, &req->sa_mad)) {
622 --group->func[req->func].num_pend_reqs;
623 list_del(&req->group_list);
624 list_del(&req->func_list);
625 kfree(req);
626 ref = 1;
627 group->state = group->prev_state;
628 } else
629 group->state = MCAST_JOIN_SENT;
630 }
631
632 return ref;
633}
634
635static void mlx4_ib_mcg_work_handler(struct work_struct *work)
636{
637 struct mcast_group *group;
638 struct mcast_req *req = NULL;
639 struct ib_sa_mcmember_data *sa_data;
640 u8 req_join_state;
641 int rc = 1; /* release_count - this is for the scheduled work */
642 u16 status;
643 u8 method;
644
645 group = container_of(work, typeof(*group), work);
646
647 mutex_lock(&group->lock);
648
649 /* First, let's see if a response from SM is waiting regarding this group.
650 * If so, we need to update the group's REC. If this is a bad response, we
651 * may need to send a bad response to a VF waiting for it. If VF is waiting
652 * and this is a good response, the VF will be answered later in this func. */
653 if (group->state == MCAST_RESP_READY) {
654 /* cancels mlx4_ib_mcg_timeout_handler */
655 cancel_delayed_work(&group->timeout_work);
656 status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
657 method = group->response_sa_mad.mad_hdr.method;
658 if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
659 mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
660 be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
661 be64_to_cpu(group->last_req_tid));
662 group->state = group->prev_state;
663 goto process_requests;
664 }
665 if (status) {
666 if (!list_empty(&group->pending_list))
667 req = list_first_entry(&group->pending_list,
668 struct mcast_req, group_list);
669 if ((method == IB_MGMT_METHOD_GET_RESP)) {
670 if (req) {
671 send_reply_to_slave(req->func, group, &req->sa_mad, status);
672 --group->func[req->func].num_pend_reqs;
673 list_del(&req->group_list);
674 list_del(&req->func_list);
675 kfree(req);
676 ++rc;
677 } else
678 mcg_warn_group(group, "no request for failed join\n");
679 } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
680 ++rc;
681 } else {
682 u8 resp_join_state;
683 u8 cur_join_state;
684
685 resp_join_state = ((struct ib_sa_mcmember_data *)
686 group->response_sa_mad.data)->scope_join_state & 7;
687 cur_join_state = group->rec.scope_join_state & 7;
688
689 if (method == IB_MGMT_METHOD_GET_RESP) {
690 /* successfull join */
691 if (!cur_join_state && resp_join_state)
692 --rc;
693 } else if (!resp_join_state)
694 ++rc;
695 memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec);
696 }
697 group->state = MCAST_IDLE;
698 }
699
700process_requests:
701 /* We should now go over pending join/leave requests, as long as we are idle. */
702 while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
703 req = list_first_entry(&group->pending_list, struct mcast_req,
704 group_list);
705 sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
706 req_join_state = sa_data->scope_join_state & 0x7;
707
708 /* For a leave request, we will immediately answer the VF, and
709 * update our internal counters. The actual leave will be sent
710 * to SM later, if at all needed. We dequeue the request now. */
711 if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
712 rc += handle_leave_req(group, req_join_state, req);
713 else
714 rc += handle_join_req(group, req_join_state, req);
715 }
716
717 /* Handle leaves */
718 if (group->state == MCAST_IDLE) {
719 req_join_state = get_leave_state(group);
720 if (req_join_state) {
721 group->rec.scope_join_state &= ~req_join_state;
722 group->prev_state = group->state;
723 if (send_leave_to_wire(group, req_join_state)) {
724 group->state = group->prev_state;
725 ++rc;
726 } else
727 group->state = MCAST_LEAVE_SENT;
728 }
729 }
730
731 if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE)
732 goto process_requests;
733 mutex_unlock(&group->lock);
734
735 while (rc--)
736 release_group(group, 0);
737}
738
739static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx,
740 __be64 tid,
741 union ib_gid *new_mgid)
742{
743 struct mcast_group *group = NULL, *cur_group;
744 struct mcast_req *req;
745 struct list_head *pos;
746 struct list_head *n;
747
748 mutex_lock(&ctx->mcg_table_lock);
749 list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) {
750 group = list_entry(pos, struct mcast_group, mgid0_list);
751 mutex_lock(&group->lock);
752 if (group->last_req_tid == tid) {
753 if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
754 group->rec.mgid = *new_mgid;
755 sprintf(group->name, "%016llx%016llx",
756 be64_to_cpu(group->rec.mgid.global.subnet_prefix),
757 be64_to_cpu(group->rec.mgid.global.interface_id));
758 list_del_init(&group->mgid0_list);
759 cur_group = mcast_insert(ctx, group);
760 if (cur_group) {
761 /* A race between our code and SM. Silently cleaning the new one */
762 req = list_first_entry(&group->pending_list,
763 struct mcast_req, group_list);
764 --group->func[req->func].num_pend_reqs;
765 list_del(&req->group_list);
766 list_del(&req->func_list);
767 kfree(req);
768 mutex_unlock(&group->lock);
769 mutex_unlock(&ctx->mcg_table_lock);
770 release_group(group, 0);
771 return NULL;
772 }
773
774 atomic_inc(&group->refcount);
775 add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
776 mutex_unlock(&group->lock);
777 mutex_unlock(&ctx->mcg_table_lock);
778 return group;
779 } else {
780 struct mcast_req *tmp1, *tmp2;
781
782 list_del(&group->mgid0_list);
783 if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE)
784 cancel_delayed_work_sync(&group->timeout_work);
785
786 list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) {
787 list_del(&tmp1->group_list);
788 kfree(tmp1);
789 }
790 mutex_unlock(&group->lock);
791 mutex_unlock(&ctx->mcg_table_lock);
792 kfree(group);
793 return NULL;
794 }
795 }
796 mutex_unlock(&group->lock);
797 }
798 mutex_unlock(&ctx->mcg_table_lock);
799
800 return NULL;
801}
802
803static ssize_t sysfs_show_group(struct device *dev,
804 struct device_attribute *attr, char *buf);
805
806static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
807 union ib_gid *mgid, int create,
808 gfp_t gfp_mask)
809{
810 struct mcast_group *group, *cur_group;
811 int is_mgid0;
812 int i;
813
814 is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
815 if (!is_mgid0) {
816 group = mcast_find(ctx, mgid);
817 if (group)
818 goto found;
819 }
820
821 if (!create)
822 return ERR_PTR(-ENOENT);
823
824 group = kzalloc(sizeof *group, gfp_mask);
825 if (!group)
826 return ERR_PTR(-ENOMEM);
827
828 group->demux = ctx;
829 group->rec.mgid = *mgid;
830 INIT_LIST_HEAD(&group->pending_list);
831 INIT_LIST_HEAD(&group->mgid0_list);
832 for (i = 0; i < MAX_VFS; ++i)
833 INIT_LIST_HEAD(&group->func[i].pending);
834 INIT_WORK(&group->work, mlx4_ib_mcg_work_handler);
835 INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
836 mutex_init(&group->lock);
837 sprintf(group->name, "%016llx%016llx",
838 be64_to_cpu(group->rec.mgid.global.subnet_prefix),
839 be64_to_cpu(group->rec.mgid.global.interface_id));
840 sysfs_attr_init(&group->dentry.attr);
841 group->dentry.show = sysfs_show_group;
842 group->dentry.store = NULL;
843 group->dentry.attr.name = group->name;
844 group->dentry.attr.mode = 0400;
845 group->state = MCAST_IDLE;
846
847 if (is_mgid0) {
848 list_add(&group->mgid0_list, &ctx->mcg_mgid0_list);
849 goto found;
850 }
851
852 cur_group = mcast_insert(ctx, group);
853 if (cur_group) {
854 mcg_warn("group just showed up %s - confused\n", cur_group->name);
855 kfree(group);
856 return ERR_PTR(-EINVAL);
857 }
858
859 add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
860
861found:
862 atomic_inc(&group->refcount);
863 return group;
864}
865
866static void queue_req(struct mcast_req *req)
867{
868 struct mcast_group *group = req->group;
869
870 atomic_inc(&group->refcount); /* for the request */
871 atomic_inc(&group->refcount); /* for scheduling the work */
872 list_add_tail(&req->group_list, &group->pending_list);
873 list_add_tail(&req->func_list, &group->func[req->func].pending);
874 /* calls mlx4_ib_mcg_work_handler */
875 if (!queue_work(group->demux->mcg_wq, &group->work))
876 safe_atomic_dec(&group->refcount);
877}
878
879int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
880 struct ib_sa_mad *mad)
881{
882 struct mlx4_ib_dev *dev = to_mdev(ibdev);
883 struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data;
884 struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
885 struct mcast_group *group;
886
887 switch (mad->mad_hdr.method) {
888 case IB_MGMT_METHOD_GET_RESP:
889 case IB_SA_METHOD_DELETE_RESP:
890 mutex_lock(&ctx->mcg_table_lock);
891 group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL);
892 mutex_unlock(&ctx->mcg_table_lock);
893 if (IS_ERR(group)) {
894 if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
895 __be64 tid = mad->mad_hdr.tid;
896 *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */
897 group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
898 } else
899 group = NULL;
900 }
901
902 if (!group)
903 return 1;
904
905 mutex_lock(&group->lock);
906 group->response_sa_mad = *mad;
907 group->prev_state = group->state;
908 group->state = MCAST_RESP_READY;
909 /* calls mlx4_ib_mcg_work_handler */
910 atomic_inc(&group->refcount);
911 if (!queue_work(ctx->mcg_wq, &group->work))
912 safe_atomic_dec(&group->refcount);
913 mutex_unlock(&group->lock);
914 release_group(group, 0);
915 return 1; /* consumed */
916 case IB_MGMT_METHOD_SET:
917 case IB_SA_METHOD_GET_TABLE:
918 case IB_SA_METHOD_GET_TABLE_RESP:
919 case IB_SA_METHOD_DELETE:
920 return 0; /* not consumed, pass-through to guest over tunnel */
921 default:
922 mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
923 port, mad->mad_hdr.method);
924 return 1; /* consumed */
925 }
926}
927
928int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
929 int slave, struct ib_sa_mad *sa_mad)
930{
931 struct mlx4_ib_dev *dev = to_mdev(ibdev);
932 struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data;
933 struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
934 struct mcast_group *group;
935 struct mcast_req *req;
936 int may_create = 0;
937
938 if (ctx->flushing)
939 return -EAGAIN;
940
941 switch (sa_mad->mad_hdr.method) {
942 case IB_MGMT_METHOD_SET:
943 may_create = 1;
944 case IB_SA_METHOD_DELETE:
945 req = kzalloc(sizeof *req, GFP_KERNEL);
946 if (!req)
947 return -ENOMEM;
948
949 req->func = slave;
950 req->sa_mad = *sa_mad;
951
952 mutex_lock(&ctx->mcg_table_lock);
953 group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL);
954 mutex_unlock(&ctx->mcg_table_lock);
955 if (IS_ERR(group)) {
956 kfree(req);
957 return PTR_ERR(group);
958 }
959 mutex_lock(&group->lock);
960 if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
961 mutex_unlock(&group->lock);
962 mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
963 port, slave, MAX_PEND_REQS_PER_FUNC);
964 release_group(group, 0);
965 kfree(req);
966 return -ENOMEM;
967 }
968 ++group->func[slave].num_pend_reqs;
969 req->group = group;
970 queue_req(req);
971 mutex_unlock(&group->lock);
972 release_group(group, 0);
973 return 1; /* consumed */
974 case IB_SA_METHOD_GET_TABLE:
975 case IB_MGMT_METHOD_GET_RESP:
976 case IB_SA_METHOD_GET_TABLE_RESP:
977 case IB_SA_METHOD_DELETE_RESP:
978 return 0; /* not consumed, pass-through */
979 default:
980 mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n",
981 port, slave, sa_mad->mad_hdr.method);
982 return 1; /* consumed */
983 }
984}
985
986static ssize_t sysfs_show_group(struct device *dev,
987 struct device_attribute *attr, char *buf)
988{
989 struct mcast_group *group =
990 container_of(attr, struct mcast_group, dentry);
991 struct mcast_req *req = NULL;
992 char pending_str[40];
993 char state_str[40];
994 ssize_t len = 0;
995 int f;
996
997 if (group->state == MCAST_IDLE)
998 sprintf(state_str, "%s", get_state_string(group->state));
999 else
1000 sprintf(state_str, "%s(TID=0x%llx)",
1001 get_state_string(group->state),
1002 be64_to_cpu(group->last_req_tid));
1003 if (list_empty(&group->pending_list)) {
1004 sprintf(pending_str, "No");
1005 } else {
1006 req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1007 sprintf(pending_str, "Yes(TID=0x%llx)",
1008 be64_to_cpu(req->sa_mad.mad_hdr.tid));
1009 }
1010 len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
1011 group->rec.scope_join_state & 0xf,
1012 group->members[2], group->members[1], group->members[0],
1013 atomic_read(&group->refcount),
1014 pending_str,
1015 state_str);
1016 for (f = 0; f < MAX_VFS; ++f)
1017 if (group->func[f].state == MCAST_MEMBER)
1018 len += sprintf(buf + len, "%d[%1x] ",
1019 f, group->func[f].join_state);
1020
1021 len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
1022 "%4x %4x %2x %2x)\n",
1023 be16_to_cpu(group->rec.pkey),
1024 be32_to_cpu(group->rec.qkey),
1025 (group->rec.mtusel_mtu & 0xc0) >> 6,
1026 group->rec.mtusel_mtu & 0x3f,
1027 group->rec.tclass,
1028 (group->rec.ratesel_rate & 0xc0) >> 6,
1029 group->rec.ratesel_rate & 0x3f,
1030 (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
1031 (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
1032 be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
1033 group->rec.proxy_join);
1034
1035 return len;
1036}
1037
1038int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
1039{
1040 char name[20];
1041
1042 atomic_set(&ctx->tid, 0);
1043 sprintf(name, "mlx4_ib_mcg%d", ctx->port);
1044 ctx->mcg_wq = create_singlethread_workqueue(name);
1045 if (!ctx->mcg_wq)
1046 return -ENOMEM;
1047
1048 mutex_init(&ctx->mcg_table_lock);
1049 ctx->mcg_table = RB_ROOT;
1050 INIT_LIST_HEAD(&ctx->mcg_mgid0_list);
1051 ctx->flushing = 0;
1052
1053 return 0;
1054}
1055
1056static void force_clean_group(struct mcast_group *group)
1057{
1058 struct mcast_req *req, *tmp
1059 ;
1060 list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) {
1061 list_del(&req->group_list);
1062 kfree(req);
1063 }
1064 del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr);
1065 rb_erase(&group->node, &group->demux->mcg_table);
1066 kfree(group);
1067}
1068
1069static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1070{
1071 int i;
1072 struct rb_node *p;
1073 struct mcast_group *group;
1074 unsigned long end;
1075 int count;
1076
1077 if (ctx->flushing)
1078 return;
1079
1080 ctx->flushing = 1;
1081 for (i = 0; i < MAX_VFS; ++i)
1082 clean_vf_mcast(ctx, i);
1083
1084 end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
1085 do {
1086 count = 0;
1087 mutex_lock(&ctx->mcg_table_lock);
1088 for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p))
1089 ++count;
1090 mutex_unlock(&ctx->mcg_table_lock);
1091 if (!count)
1092 break;
1093
1094 msleep(1);
1095 } while (time_after(end, jiffies));
1096
1097 flush_workqueue(ctx->mcg_wq);
1098 if (destroy_wq)
1099 destroy_workqueue(ctx->mcg_wq);
1100
1101 mutex_lock(&ctx->mcg_table_lock);
1102 while ((p = rb_first(&ctx->mcg_table)) != NULL) {
1103 group = rb_entry(p, struct mcast_group, node);
1104 if (atomic_read(&group->refcount))
1105 mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
1106
1107 force_clean_group(group);
1108 }
1109 mutex_unlock(&ctx->mcg_table_lock);
1110
1111 if (!destroy_wq)
1112 ctx->flushing = 0;
1113}
1114
1115struct clean_work {
1116 struct work_struct work;
1117 struct mlx4_ib_demux_ctx *ctx;
1118 int destroy_wq;
1119};
1120
1121static void mcg_clean_task(struct work_struct *work)
1122{
1123 struct clean_work *cw = container_of(work, struct clean_work, work);
1124
1125 _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
1126 kfree(cw);
1127}
1128
1129void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1130{
1131 struct clean_work *work;
1132
1133 if (destroy_wq) {
1134 _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
1135 return;
1136 }
1137
1138 work = kmalloc(sizeof *work, GFP_KERNEL);
1139 if (!work) {
1140 mcg_warn("failed allocating work for cleanup\n");
1141 return;
1142 }
1143
1144 work->ctx = ctx;
1145 work->destroy_wq = destroy_wq;
1146 INIT_WORK(&work->work, mcg_clean_task);
1147 queue_work(clean_wq, &work->work);
1148}
1149
1150static void build_leave_mad(struct mcast_req *req)
1151{
1152 struct ib_sa_mad *mad = &req->sa_mad;
1153
1154 mad->mad_hdr.method = IB_SA_METHOD_DELETE;
1155}
1156
1157
1158static void clear_pending_reqs(struct mcast_group *group, int vf)
1159{
1160 struct mcast_req *req, *tmp, *group_first = NULL;
1161 int clear;
1162 int pend = 0;
1163
1164 if (!list_empty(&group->pending_list))
1165 group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1166
1167 list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) {
1168 clear = 1;
1169 if (group_first == req &&
1170 (group->state == MCAST_JOIN_SENT ||
1171 group->state == MCAST_LEAVE_SENT)) {
1172 clear = cancel_delayed_work(&group->timeout_work);
1173 pend = !clear;
1174 group->state = MCAST_IDLE;
1175 }
1176 if (clear) {
1177 --group->func[vf].num_pend_reqs;
1178 list_del(&req->group_list);
1179 list_del(&req->func_list);
1180 kfree(req);
1181 atomic_dec(&group->refcount);
1182 }
1183 }
1184
1185 if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) {
1186 mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n",
1187 list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs);
1188 }
1189}
1190
1191static int push_deleteing_req(struct mcast_group *group, int slave)
1192{
1193 struct mcast_req *req;
1194 struct mcast_req *pend_req;
1195
1196 if (!group->func[slave].join_state)
1197 return 0;
1198
1199 req = kzalloc(sizeof *req, GFP_KERNEL);
1200 if (!req) {
1201 mcg_warn_group(group, "failed allocation - may leave stall groups\n");
1202 return -ENOMEM;
1203 }
1204
1205 if (!list_empty(&group->func[slave].pending)) {
1206 pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
1207 if (pend_req->clean) {
1208 kfree(req);
1209 return 0;
1210 }
1211 }
1212
1213 req->clean = 1;
1214 req->func = slave;
1215 req->group = group;
1216 ++group->func[slave].num_pend_reqs;
1217 build_leave_mad(req);
1218 queue_req(req);
1219 return 0;
1220}
1221
1222void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
1223{
1224 struct mcast_group *group;
1225 struct rb_node *p;
1226
1227 mutex_lock(&ctx->mcg_table_lock);
1228 for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) {
1229 group = rb_entry(p, struct mcast_group, node);
1230 mutex_lock(&group->lock);
1231 if (atomic_read(&group->refcount)) {
1232 /* clear pending requests of this VF */
1233 clear_pending_reqs(group, slave);
1234 push_deleteing_req(group, slave);
1235 }
1236 mutex_unlock(&group->lock);
1237 }
1238 mutex_unlock(&ctx->mcg_table_lock);
1239}
1240
1241
1242int mlx4_ib_mcg_init(void)
1243{
1244 clean_wq = create_singlethread_workqueue("mlx4_ib_mcg");
1245 if (!clean_wq)
1246 return -ENOMEM;
1247
1248 return 0;
1249}
1250
1251void mlx4_ib_mcg_destroy(void)
1252{
1253 destroy_workqueue(clean_wq);
1254}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index c136bb618e29..e04cbc9a54a5 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -37,9 +37,12 @@
37#include <linux/compiler.h> 37#include <linux/compiler.h>
38#include <linux/list.h> 38#include <linux/list.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/idr.h>
40 41
41#include <rdma/ib_verbs.h> 42#include <rdma/ib_verbs.h>
42#include <rdma/ib_umem.h> 43#include <rdma/ib_umem.h>
44#include <rdma/ib_mad.h>
45#include <rdma/ib_sa.h>
43 46
44#include <linux/mlx4/device.h> 47#include <linux/mlx4/device.h>
45#include <linux/mlx4/doorbell.h> 48#include <linux/mlx4/doorbell.h>
@@ -62,6 +65,9 @@ enum {
62#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1) 65#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
63#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) 66#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
64 67
68/*module param to indicate if SM assigns the alias_GUID*/
69extern int mlx4_ib_sm_guid_assign;
70
65struct mlx4_ib_ucontext { 71struct mlx4_ib_ucontext {
66 struct ib_ucontext ibucontext; 72 struct ib_ucontext ibucontext;
67 struct mlx4_uar uar; 73 struct mlx4_uar uar;
@@ -133,8 +139,10 @@ struct mlx4_ib_wq {
133}; 139};
134 140
135enum mlx4_ib_qp_flags { 141enum mlx4_ib_qp_flags {
136 MLX4_IB_QP_LSO = 1 << 0, 142 MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
137 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, 143 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
144 MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
145 MLX4_IB_SRIOV_SQP = 1 << 31,
138}; 146};
139 147
140struct mlx4_ib_gid_entry { 148struct mlx4_ib_gid_entry {
@@ -144,6 +152,80 @@ struct mlx4_ib_gid_entry {
144 u8 port; 152 u8 port;
145}; 153};
146 154
155enum mlx4_ib_qp_type {
156 /*
157 * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
158 * here (and in that order) since the MAD layer uses them as
159 * indices into a 2-entry table.
160 */
161 MLX4_IB_QPT_SMI = IB_QPT_SMI,
162 MLX4_IB_QPT_GSI = IB_QPT_GSI,
163
164 MLX4_IB_QPT_RC = IB_QPT_RC,
165 MLX4_IB_QPT_UC = IB_QPT_UC,
166 MLX4_IB_QPT_UD = IB_QPT_UD,
167 MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6,
168 MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE,
169 MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET,
170 MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI,
171 MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT,
172
173 MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16,
174 MLX4_IB_QPT_PROXY_SMI = 1 << 17,
175 MLX4_IB_QPT_PROXY_GSI = 1 << 18,
176 MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19,
177 MLX4_IB_QPT_TUN_SMI = 1 << 20,
178 MLX4_IB_QPT_TUN_GSI = 1 << 21,
179};
180
181#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \
182 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
183 MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
184
185enum mlx4_ib_mad_ifc_flags {
186 MLX4_MAD_IFC_IGNORE_MKEY = 1,
187 MLX4_MAD_IFC_IGNORE_BKEY = 2,
188 MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY |
189 MLX4_MAD_IFC_IGNORE_BKEY),
190 MLX4_MAD_IFC_NET_VIEW = 4,
191};
192
193enum {
194 MLX4_NUM_TUNNEL_BUFS = 256,
195};
196
197struct mlx4_ib_tunnel_header {
198 struct mlx4_av av;
199 __be32 remote_qpn;
200 __be32 qkey;
201 __be16 vlan;
202 u8 mac[6];
203 __be16 pkey_index;
204 u8 reserved[6];
205};
206
207struct mlx4_ib_buf {
208 void *addr;
209 dma_addr_t map;
210};
211
212struct mlx4_rcv_tunnel_hdr {
213 __be32 flags_src_qp; /* flags[6:5] is defined for VLANs:
214 * 0x0 - no vlan was in the packet
215 * 0x01 - C-VLAN was in the packet */
216 u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */
217 u8 reserved;
218 __be16 pkey_index;
219 __be16 sl_vid;
220 __be16 slid_mac_47_32;
221 __be32 mac_31_0;
222};
223
224struct mlx4_ib_proxy_sqp_hdr {
225 struct ib_grh grh;
226 struct mlx4_rcv_tunnel_hdr tun;
227} __packed;
228
147struct mlx4_ib_qp { 229struct mlx4_ib_qp {
148 struct ib_qp ibqp; 230 struct ib_qp ibqp;
149 struct mlx4_qp mqp; 231 struct mlx4_qp mqp;
@@ -159,6 +241,7 @@ struct mlx4_ib_qp {
159 int sq_spare_wqes; 241 int sq_spare_wqes;
160 struct mlx4_ib_wq sq; 242 struct mlx4_ib_wq sq;
161 243
244 enum mlx4_ib_qp_type mlx4_ib_qp_type;
162 struct ib_umem *umem; 245 struct ib_umem *umem;
163 struct mlx4_mtt mtt; 246 struct mlx4_mtt mtt;
164 int buf_size; 247 int buf_size;
@@ -174,6 +257,8 @@ struct mlx4_ib_qp {
174 int mlx_type; 257 int mlx_type;
175 struct list_head gid_list; 258 struct list_head gid_list;
176 struct list_head steering_rules; 259 struct list_head steering_rules;
260 struct mlx4_ib_buf *sqp_proxy_rcv;
261
177}; 262};
178 263
179struct mlx4_ib_srq { 264struct mlx4_ib_srq {
@@ -196,6 +281,138 @@ struct mlx4_ib_ah {
196 union mlx4_ext_av av; 281 union mlx4_ext_av av;
197}; 282};
198 283
284/****************************************/
285/* alias guid support */
286/****************************************/
287#define NUM_PORT_ALIAS_GUID 2
288#define NUM_ALIAS_GUID_IN_REC 8
289#define NUM_ALIAS_GUID_REC_IN_PORT 16
290#define GUID_REC_SIZE 8
291#define NUM_ALIAS_GUID_PER_PORT 128
292#define MLX4_NOT_SET_GUID (0x00LL)
293#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
294
295enum mlx4_guid_alias_rec_status {
296 MLX4_GUID_INFO_STATUS_IDLE,
297 MLX4_GUID_INFO_STATUS_SET,
298 MLX4_GUID_INFO_STATUS_PENDING,
299};
300
301enum mlx4_guid_alias_rec_ownership {
302 MLX4_GUID_DRIVER_ASSIGN,
303 MLX4_GUID_SYSADMIN_ASSIGN,
304 MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
305};
306
307enum mlx4_guid_alias_rec_method {
308 MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
309 MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE,
310};
311
312struct mlx4_sriov_alias_guid_info_rec_det {
313 u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
314 ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
315 enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
316 u8 method; /*set or delete*/
317 enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
318};
319
320struct mlx4_sriov_alias_guid_port_rec_det {
321 struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
322 struct workqueue_struct *wq;
323 struct delayed_work alias_guid_work;
324 u8 port;
325 struct mlx4_sriov_alias_guid *parent;
326 struct list_head cb_list;
327};
328
329struct mlx4_sriov_alias_guid {
330 struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS];
331 spinlock_t ag_work_lock;
332 struct ib_sa_client *sa_client;
333};
334
335struct mlx4_ib_demux_work {
336 struct work_struct work;
337 struct mlx4_ib_dev *dev;
338 int slave;
339 int do_init;
340 u8 port;
341
342};
343
344struct mlx4_ib_tun_tx_buf {
345 struct mlx4_ib_buf buf;
346 struct ib_ah *ah;
347};
348
349struct mlx4_ib_demux_pv_qp {
350 struct ib_qp *qp;
351 enum ib_qp_type proxy_qpt;
352 struct mlx4_ib_buf *ring;
353 struct mlx4_ib_tun_tx_buf *tx_ring;
354 spinlock_t tx_lock;
355 unsigned tx_ix_head;
356 unsigned tx_ix_tail;
357};
358
359enum mlx4_ib_demux_pv_state {
360 DEMUX_PV_STATE_DOWN,
361 DEMUX_PV_STATE_STARTING,
362 DEMUX_PV_STATE_ACTIVE,
363 DEMUX_PV_STATE_DOWNING,
364};
365
366struct mlx4_ib_demux_pv_ctx {
367 int port;
368 int slave;
369 enum mlx4_ib_demux_pv_state state;
370 int has_smi;
371 struct ib_device *ib_dev;
372 struct ib_cq *cq;
373 struct ib_pd *pd;
374 struct ib_mr *mr;
375 struct work_struct work;
376 struct workqueue_struct *wq;
377 struct mlx4_ib_demux_pv_qp qp[2];
378};
379
380struct mlx4_ib_demux_ctx {
381 struct ib_device *ib_dev;
382 int port;
383 struct workqueue_struct *wq;
384 struct workqueue_struct *ud_wq;
385 spinlock_t ud_lock;
386 __be64 subnet_prefix;
387 __be64 guid_cache[128];
388 struct mlx4_ib_dev *dev;
389 /* the following lock protects both mcg_table and mcg_mgid0_list */
390 struct mutex mcg_table_lock;
391 struct rb_root mcg_table;
392 struct list_head mcg_mgid0_list;
393 struct workqueue_struct *mcg_wq;
394 struct mlx4_ib_demux_pv_ctx **tun;
395 atomic_t tid;
396 int flushing; /* flushing the work queue */
397};
398
399struct mlx4_ib_sriov {
400 struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS];
401 struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS];
402 /* when using this spinlock you should use "irq" because
403 * it may be called from interrupt context.*/
404 spinlock_t going_down_lock;
405 int is_going_down;
406
407 struct mlx4_sriov_alias_guid alias_guid;
408
409 /* CM paravirtualization fields */
410 struct list_head cm_list;
411 spinlock_t id_map_lock;
412 struct rb_root sl_id_map;
413 struct idr pv_id_table;
414};
415
199struct mlx4_ib_iboe { 416struct mlx4_ib_iboe {
200 spinlock_t lock; 417 spinlock_t lock;
201 struct net_device *netdevs[MLX4_MAX_PORTS]; 418 struct net_device *netdevs[MLX4_MAX_PORTS];
@@ -203,6 +420,42 @@ struct mlx4_ib_iboe {
203 union ib_gid gid_table[MLX4_MAX_PORTS][128]; 420 union ib_gid gid_table[MLX4_MAX_PORTS][128];
204}; 421};
205 422
423struct pkey_mgt {
424 u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
425 u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
426 struct list_head pkey_port_list[MLX4_MFUNC_MAX];
427 struct kobject *device_parent[MLX4_MFUNC_MAX];
428};
429
430struct mlx4_ib_iov_sysfs_attr {
431 void *ctx;
432 struct kobject *kobj;
433 unsigned long data;
434 u32 entry_num;
435 char name[15];
436 struct device_attribute dentry;
437 struct device *dev;
438};
439
440struct mlx4_ib_iov_sysfs_attr_ar {
441 struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1];
442};
443
444struct mlx4_ib_iov_port {
445 char name[100];
446 u8 num;
447 struct mlx4_ib_dev *dev;
448 struct list_head list;
449 struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar;
450 struct ib_port_attr attr;
451 struct kobject *cur_port;
452 struct kobject *admin_alias_parent;
453 struct kobject *gids_parent;
454 struct kobject *pkeys_parent;
455 struct kobject *mcgs_parent;
456 struct mlx4_ib_iov_sysfs_attr mcg_dentry;
457};
458
206struct mlx4_ib_dev { 459struct mlx4_ib_dev {
207 struct ib_device ib_dev; 460 struct ib_device ib_dev;
208 struct mlx4_dev *dev; 461 struct mlx4_dev *dev;
@@ -216,6 +469,7 @@ struct mlx4_ib_dev {
216 struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; 469 struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
217 struct ib_ah *sm_ah[MLX4_MAX_PORTS]; 470 struct ib_ah *sm_ah[MLX4_MAX_PORTS];
218 spinlock_t sm_lock; 471 spinlock_t sm_lock;
472 struct mlx4_ib_sriov sriov;
219 473
220 struct mutex cap_mask_mutex; 474 struct mutex cap_mask_mutex;
221 bool ib_active; 475 bool ib_active;
@@ -223,6 +477,11 @@ struct mlx4_ib_dev {
223 int counters[MLX4_MAX_PORTS]; 477 int counters[MLX4_MAX_PORTS];
224 int *eq_table; 478 int *eq_table;
225 int eq_added; 479 int eq_added;
480 struct kobject *iov_parent;
481 struct kobject *ports_parent;
482 struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
483 struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
484 struct pkey_mgt pkeys;
226}; 485};
227 486
228struct ib_event_work { 487struct ib_event_work {
@@ -231,6 +490,13 @@ struct ib_event_work {
231 struct mlx4_eqe ib_eqe; 490 struct mlx4_eqe ib_eqe;
232}; 491};
233 492
493struct mlx4_ib_qp_tunnel_init_attr {
494 struct ib_qp_init_attr init_attr;
495 int slave;
496 enum ib_qp_type proxy_qp_type;
497 u8 port;
498};
499
234static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) 500static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
235{ 501{
236 return container_of(ibdev, struct mlx4_ib_dev, ib_dev); 502 return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@@ -300,6 +566,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
300 return container_of(ibah, struct mlx4_ib_ah, ibah); 566 return container_of(ibah, struct mlx4_ib_ah, ibah);
301} 567}
302 568
569int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
570void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
571
303int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, 572int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
304 struct mlx4_db *db); 573 struct mlx4_db *db);
305void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); 574void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@@ -356,7 +625,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
356int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 625int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
357 struct ib_recv_wr **bad_wr); 626 struct ib_recv_wr **bad_wr);
358 627
359int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, 628int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
360 int port, struct ib_wc *in_wc, struct ib_grh *in_grh, 629 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
361 void *in_mad, void *response_mad); 630 void *in_mad, void *response_mad);
362int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 631int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -371,6 +640,13 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
371 u64 iova); 640 u64 iova);
372int mlx4_ib_unmap_fmr(struct list_head *fmr_list); 641int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
373int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); 642int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
643int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
644 struct ib_port_attr *props, int netw_view);
645int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
646 u16 *pkey, int netw_view);
647
648int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
649 union ib_gid *gid, int netw_view);
374 650
375int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, 651int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
376 u8 *mac, int *is_mcast, u8 port); 652 u8 *mac, int *is_mcast, u8 port);
@@ -385,10 +661,69 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
385 return !!(ah->av.ib.g_slid & 0x80); 661 return !!(ah->av.ib.g_slid & 0x80);
386} 662}
387 663
664int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
665void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
666void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
667int mlx4_ib_mcg_init(void);
668void mlx4_ib_mcg_destroy(void);
669
670int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
671
672int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
673 struct ib_sa_mad *sa_mad);
674int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
675 struct ib_sa_mad *mad);
676
388int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 677int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
389 union ib_gid *gid); 678 union ib_gid *gid);
390 679
391void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, 680void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
392 enum ib_event_type type); 681 enum ib_event_type type);
393 682
683void mlx4_ib_tunnels_update_work(struct work_struct *work);
684
685int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
686 enum ib_qp_type qpt, struct ib_wc *wc,
687 struct ib_grh *grh, struct ib_mad *mad);
688int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
689 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
690 u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
691__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
692
693int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
694 struct ib_mad *mad);
695
696int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
697 struct ib_mad *mad);
698
699void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev);
700void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id);
701
702/* alias guid support */
703void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port);
704int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev);
705void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev);
706void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
707
708void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
709 int block_num,
710 u8 port_num, u8 *p_data);
711
712void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
713 int block_num, u8 port_num,
714 u8 *p_data);
715
716int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
717 struct attribute *attr);
718void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
719 struct attribute *attr);
720ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
721
722int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
723
724void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
725
726__be64 mlx4_ib_gen_node_guid(void);
727
728
394#endif /* MLX4_IB_H */ 729#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index f585eddef4b7..19e0637220b9 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -38,6 +38,7 @@
38#include <rdma/ib_cache.h> 38#include <rdma/ib_cache.h>
39#include <rdma/ib_pack.h> 39#include <rdma/ib_pack.h>
40#include <rdma/ib_addr.h> 40#include <rdma/ib_addr.h>
41#include <rdma/ib_mad.h>
41 42
42#include <linux/mlx4/qp.h> 43#include <linux/mlx4/qp.h>
43 44
@@ -110,16 +111,62 @@ static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
110 return container_of(mqp, struct mlx4_ib_sqp, qp); 111 return container_of(mqp, struct mlx4_ib_sqp, qp);
111} 112}
112 113
114static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
115{
116 if (!mlx4_is_master(dev->dev))
117 return 0;
118
119 return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
120 qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
121 8 * MLX4_MFUNC_MAX;
122}
123
113static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 124static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
114{ 125{
115 return qp->mqp.qpn >= dev->dev->caps.sqp_start && 126 int proxy_sqp = 0;
116 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; 127 int real_sqp = 0;
128 int i;
129 /* PPF or Native -- real SQP */
130 real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
131 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
132 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
133 if (real_sqp)
134 return 1;
135 /* VF or PF -- proxy SQP */
136 if (mlx4_is_mfunc(dev->dev)) {
137 for (i = 0; i < dev->dev->caps.num_ports; i++) {
138 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
139 qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
140 proxy_sqp = 1;
141 break;
142 }
143 }
144 }
145 return proxy_sqp;
117} 146}
118 147
148/* used for INIT/CLOSE port logic */
119static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 149static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
120{ 150{
121 return qp->mqp.qpn >= dev->dev->caps.sqp_start && 151 int proxy_qp0 = 0;
122 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; 152 int real_qp0 = 0;
153 int i;
154 /* PPF or Native -- real QP0 */
155 real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
156 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
157 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
158 if (real_qp0)
159 return 1;
160 /* VF or PF -- proxy QP0 */
161 if (mlx4_is_mfunc(dev->dev)) {
162 for (i = 0; i < dev->dev->caps.num_ports; i++) {
163 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
164 proxy_qp0 = 1;
165 break;
166 }
167 }
168 }
169 return proxy_qp0;
123} 170}
124 171
125static void *get_wqe(struct mlx4_ib_qp *qp, int offset) 172static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
@@ -270,7 +317,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
270 } 317 }
271} 318}
272 319
273static int send_wqe_overhead(enum ib_qp_type type, u32 flags) 320static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
274{ 321{
275 /* 322 /*
276 * UD WQEs must have a datagram segment. 323 * UD WQEs must have a datagram segment.
@@ -279,19 +326,29 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
279 * header and space for the ICRC). 326 * header and space for the ICRC).
280 */ 327 */
281 switch (type) { 328 switch (type) {
282 case IB_QPT_UD: 329 case MLX4_IB_QPT_UD:
283 return sizeof (struct mlx4_wqe_ctrl_seg) + 330 return sizeof (struct mlx4_wqe_ctrl_seg) +
284 sizeof (struct mlx4_wqe_datagram_seg) + 331 sizeof (struct mlx4_wqe_datagram_seg) +
285 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); 332 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
286 case IB_QPT_UC: 333 case MLX4_IB_QPT_PROXY_SMI_OWNER:
334 case MLX4_IB_QPT_PROXY_SMI:
335 case MLX4_IB_QPT_PROXY_GSI:
336 return sizeof (struct mlx4_wqe_ctrl_seg) +
337 sizeof (struct mlx4_wqe_datagram_seg) + 64;
338 case MLX4_IB_QPT_TUN_SMI_OWNER:
339 case MLX4_IB_QPT_TUN_GSI:
340 return sizeof (struct mlx4_wqe_ctrl_seg) +
341 sizeof (struct mlx4_wqe_datagram_seg);
342
343 case MLX4_IB_QPT_UC:
287 return sizeof (struct mlx4_wqe_ctrl_seg) + 344 return sizeof (struct mlx4_wqe_ctrl_seg) +
288 sizeof (struct mlx4_wqe_raddr_seg); 345 sizeof (struct mlx4_wqe_raddr_seg);
289 case IB_QPT_RC: 346 case MLX4_IB_QPT_RC:
290 return sizeof (struct mlx4_wqe_ctrl_seg) + 347 return sizeof (struct mlx4_wqe_ctrl_seg) +
291 sizeof (struct mlx4_wqe_atomic_seg) + 348 sizeof (struct mlx4_wqe_atomic_seg) +
292 sizeof (struct mlx4_wqe_raddr_seg); 349 sizeof (struct mlx4_wqe_raddr_seg);
293 case IB_QPT_SMI: 350 case MLX4_IB_QPT_SMI:
294 case IB_QPT_GSI: 351 case MLX4_IB_QPT_GSI:
295 return sizeof (struct mlx4_wqe_ctrl_seg) + 352 return sizeof (struct mlx4_wqe_ctrl_seg) +
296 ALIGN(MLX4_IB_UD_HEADER_SIZE + 353 ALIGN(MLX4_IB_UD_HEADER_SIZE +
297 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, 354 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
@@ -345,7 +402,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
345} 402}
346 403
347static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 404static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
348 enum ib_qp_type type, struct mlx4_ib_qp *qp) 405 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
349{ 406{
350 int s; 407 int s;
351 408
@@ -360,7 +417,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
360 * For MLX transport we need 2 extra S/G entries: 417 * For MLX transport we need 2 extra S/G entries:
361 * one for the header and one for the checksum at the end 418 * one for the header and one for the checksum at the end
362 */ 419 */
363 if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && 420 if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
421 type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
364 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 422 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
365 return -EINVAL; 423 return -EINVAL;
366 424
@@ -404,7 +462,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
404 */ 462 */
405 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && 463 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
406 qp->sq_signal_bits && BITS_PER_LONG == 64 && 464 qp->sq_signal_bits && BITS_PER_LONG == 64 &&
407 type != IB_QPT_SMI && type != IB_QPT_GSI) 465 type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
466 !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
467 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
408 qp->sq.wqe_shift = ilog2(64); 468 qp->sq.wqe_shift = ilog2(64);
409 else 469 else
410 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); 470 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
@@ -476,6 +536,54 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
476 return 0; 536 return 0;
477} 537}
478 538
539static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
540{
541 int i;
542
543 qp->sqp_proxy_rcv =
544 kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
545 GFP_KERNEL);
546 if (!qp->sqp_proxy_rcv)
547 return -ENOMEM;
548 for (i = 0; i < qp->rq.wqe_cnt; i++) {
549 qp->sqp_proxy_rcv[i].addr =
550 kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
551 GFP_KERNEL);
552 if (!qp->sqp_proxy_rcv[i].addr)
553 goto err;
554 qp->sqp_proxy_rcv[i].map =
555 ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
556 sizeof (struct mlx4_ib_proxy_sqp_hdr),
557 DMA_FROM_DEVICE);
558 }
559 return 0;
560
561err:
562 while (i > 0) {
563 --i;
564 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
565 sizeof (struct mlx4_ib_proxy_sqp_hdr),
566 DMA_FROM_DEVICE);
567 kfree(qp->sqp_proxy_rcv[i].addr);
568 }
569 kfree(qp->sqp_proxy_rcv);
570 qp->sqp_proxy_rcv = NULL;
571 return -ENOMEM;
572}
573
574static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
575{
576 int i;
577
578 for (i = 0; i < qp->rq.wqe_cnt; i++) {
579 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
580 sizeof (struct mlx4_ib_proxy_sqp_hdr),
581 DMA_FROM_DEVICE);
582 kfree(qp->sqp_proxy_rcv[i].addr);
583 }
584 kfree(qp->sqp_proxy_rcv);
585}
586
479static int qp_has_rq(struct ib_qp_init_attr *attr) 587static int qp_has_rq(struct ib_qp_init_attr *attr)
480{ 588{
481 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) 589 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
@@ -486,10 +594,67 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
486 594
487static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 595static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
488 struct ib_qp_init_attr *init_attr, 596 struct ib_qp_init_attr *init_attr,
489 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) 597 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
490{ 598{
491 int qpn; 599 int qpn;
492 int err; 600 int err;
601 struct mlx4_ib_sqp *sqp;
602 struct mlx4_ib_qp *qp;
603 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
604
605 /* When tunneling special qps, we use a plain UD qp */
606 if (sqpn) {
607 if (mlx4_is_mfunc(dev->dev) &&
608 (!mlx4_is_master(dev->dev) ||
609 !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
610 if (init_attr->qp_type == IB_QPT_GSI)
611 qp_type = MLX4_IB_QPT_PROXY_GSI;
612 else if (mlx4_is_master(dev->dev))
613 qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
614 else
615 qp_type = MLX4_IB_QPT_PROXY_SMI;
616 }
617 qpn = sqpn;
618 /* add extra sg entry for tunneling */
619 init_attr->cap.max_recv_sge++;
620 } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
621 struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
622 container_of(init_attr,
623 struct mlx4_ib_qp_tunnel_init_attr, init_attr);
624 if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
625 tnl_init->proxy_qp_type != IB_QPT_GSI) ||
626 !mlx4_is_master(dev->dev))
627 return -EINVAL;
628 if (tnl_init->proxy_qp_type == IB_QPT_GSI)
629 qp_type = MLX4_IB_QPT_TUN_GSI;
630 else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
631 qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
632 else
633 qp_type = MLX4_IB_QPT_TUN_SMI;
634 /* we are definitely in the PPF here, since we are creating
635 * tunnel QPs. base_tunnel_sqpn is therefore valid. */
636 qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
637 + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
638 sqpn = qpn;
639 }
640
641 if (!*caller_qp) {
642 if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
643 (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
644 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
645 sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
646 if (!sqp)
647 return -ENOMEM;
648 qp = &sqp->qp;
649 } else {
650 qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
651 if (!qp)
652 return -ENOMEM;
653 }
654 } else
655 qp = *caller_qp;
656
657 qp->mlx4_ib_qp_type = qp_type;
493 658
494 mutex_init(&qp->mutex); 659 mutex_init(&qp->mutex);
495 spin_lock_init(&qp->sq.lock); 660 spin_lock_init(&qp->sq.lock);
@@ -550,7 +715,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
550 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 715 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
551 qp->flags |= MLX4_IB_QP_LSO; 716 qp->flags |= MLX4_IB_QP_LSO;
552 717
553 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); 718 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
554 if (err) 719 if (err)
555 goto err; 720 goto err;
556 721
@@ -586,7 +751,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
586 } 751 }
587 752
588 if (sqpn) { 753 if (sqpn) {
589 qpn = sqpn; 754 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
755 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
756 if (alloc_proxy_bufs(pd->device, qp)) {
757 err = -ENOMEM;
758 goto err_wrid;
759 }
760 }
590 } else { 761 } else {
591 /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE 762 /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
592 * BlueFlame setup flow wrongly causes VLAN insertion. */ 763 * BlueFlame setup flow wrongly causes VLAN insertion. */
@@ -595,7 +766,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
595 else 766 else
596 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); 767 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
597 if (err) 768 if (err)
598 goto err_wrid; 769 goto err_proxy;
599 } 770 }
600 771
601 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); 772 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
@@ -613,13 +784,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
613 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); 784 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
614 785
615 qp->mqp.event = mlx4_ib_qp_event; 786 qp->mqp.event = mlx4_ib_qp_event;
616 787 if (!*caller_qp)
788 *caller_qp = qp;
617 return 0; 789 return 0;
618 790
619err_qpn: 791err_qpn:
620 if (!sqpn) 792 if (!sqpn)
621 mlx4_qp_release_range(dev->dev, qpn, 1); 793 mlx4_qp_release_range(dev->dev, qpn, 1);
622 794err_proxy:
795 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
796 free_proxy_bufs(pd->device, qp);
623err_wrid: 797err_wrid:
624 if (pd->uobject) { 798 if (pd->uobject) {
625 if (qp_has_rq(init_attr)) 799 if (qp_has_rq(init_attr))
@@ -643,6 +817,8 @@ err_db:
643 mlx4_db_free(dev->dev, &qp->db); 817 mlx4_db_free(dev->dev, &qp->db);
644 818
645err: 819err:
820 if (!*caller_qp)
821 kfree(qp);
646 return err; 822 return err;
647} 823}
648 824
@@ -755,7 +931,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
755 931
756 mlx4_qp_free(dev->dev, &qp->mqp); 932 mlx4_qp_free(dev->dev, &qp->mqp);
757 933
758 if (!is_sqp(dev, qp)) 934 if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
759 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); 935 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
760 936
761 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 937 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
@@ -768,6 +944,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
768 } else { 944 } else {
769 kfree(qp->sq.wrid); 945 kfree(qp->sq.wrid);
770 kfree(qp->rq.wrid); 946 kfree(qp->rq.wrid);
947 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
948 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
949 free_proxy_bufs(&dev->ib_dev, qp);
771 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 950 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
772 if (qp->rq.wqe_cnt) 951 if (qp->rq.wqe_cnt)
773 mlx4_db_free(dev->dev, &qp->db); 952 mlx4_db_free(dev->dev, &qp->db);
@@ -776,25 +955,46 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
776 del_gid_entries(qp); 955 del_gid_entries(qp);
777} 956}
778 957
958static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
959{
960 /* Native or PPF */
961 if (!mlx4_is_mfunc(dev->dev) ||
962 (mlx4_is_master(dev->dev) &&
963 attr->create_flags & MLX4_IB_SRIOV_SQP)) {
964 return dev->dev->phys_caps.base_sqpn +
965 (attr->qp_type == IB_QPT_SMI ? 0 : 2) +
966 attr->port_num - 1;
967 }
968 /* PF or VF -- creating proxies */
969 if (attr->qp_type == IB_QPT_SMI)
970 return dev->dev->caps.qp0_proxy[attr->port_num - 1];
971 else
972 return dev->dev->caps.qp1_proxy[attr->port_num - 1];
973}
974
779struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 975struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
780 struct ib_qp_init_attr *init_attr, 976 struct ib_qp_init_attr *init_attr,
781 struct ib_udata *udata) 977 struct ib_udata *udata)
782{ 978{
783 struct mlx4_ib_sqp *sqp; 979 struct mlx4_ib_qp *qp = NULL;
784 struct mlx4_ib_qp *qp;
785 int err; 980 int err;
786 u16 xrcdn = 0; 981 u16 xrcdn = 0;
787 982
788 /* 983 /*
789 * We only support LSO and multicast loopback blocking, and 984 * We only support LSO, vendor flag1, and multicast loopback blocking,
790 * only for kernel UD QPs. 985 * and only for kernel UD QPs.
791 */ 986 */
792 if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | 987 if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
793 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) 988 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
989 MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
794 return ERR_PTR(-EINVAL); 990 return ERR_PTR(-EINVAL);
795 991
796 if (init_attr->create_flags && 992 if (init_attr->create_flags &&
797 (udata || init_attr->qp_type != IB_QPT_UD)) 993 (udata ||
994 ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
995 init_attr->qp_type != IB_QPT_UD) ||
996 ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
997 init_attr->qp_type > IB_QPT_GSI)))
798 return ERR_PTR(-EINVAL); 998 return ERR_PTR(-EINVAL);
799 999
800 switch (init_attr->qp_type) { 1000 switch (init_attr->qp_type) {
@@ -810,18 +1010,17 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
810 /* fall through */ 1010 /* fall through */
811 case IB_QPT_RC: 1011 case IB_QPT_RC:
812 case IB_QPT_UC: 1012 case IB_QPT_UC:
813 case IB_QPT_UD:
814 case IB_QPT_RAW_PACKET: 1013 case IB_QPT_RAW_PACKET:
815 {
816 qp = kzalloc(sizeof *qp, GFP_KERNEL); 1014 qp = kzalloc(sizeof *qp, GFP_KERNEL);
817 if (!qp) 1015 if (!qp)
818 return ERR_PTR(-ENOMEM); 1016 return ERR_PTR(-ENOMEM);
819 1017 /* fall through */
820 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp); 1018 case IB_QPT_UD:
821 if (err) { 1019 {
822 kfree(qp); 1020 err = create_qp_common(to_mdev(pd->device), pd, init_attr,
1021 udata, 0, &qp);
1022 if (err)
823 return ERR_PTR(err); 1023 return ERR_PTR(err);
824 }
825 1024
826 qp->ibqp.qp_num = qp->mqp.qpn; 1025 qp->ibqp.qp_num = qp->mqp.qpn;
827 qp->xrcdn = xrcdn; 1026 qp->xrcdn = xrcdn;
@@ -835,21 +1034,11 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
835 if (udata) 1034 if (udata)
836 return ERR_PTR(-EINVAL); 1035 return ERR_PTR(-EINVAL);
837 1036
838 sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
839 if (!sqp)
840 return ERR_PTR(-ENOMEM);
841
842 qp = &sqp->qp;
843
844 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 1037 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
845 to_mdev(pd->device)->dev->caps.sqp_start + 1038 get_sqp_num(to_mdev(pd->device), init_attr),
846 (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + 1039 &qp);
847 init_attr->port_num - 1, 1040 if (err)
848 qp);
849 if (err) {
850 kfree(sqp);
851 return ERR_PTR(err); 1041 return ERR_PTR(err);
852 }
853 1042
854 qp->port = init_attr->port_num; 1043 qp->port = init_attr->port_num;
855 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; 1044 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
@@ -884,18 +1073,27 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
884 return 0; 1073 return 0;
885} 1074}
886 1075
887static int to_mlx4_st(enum ib_qp_type type) 1076static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
888{ 1077{
889 switch (type) { 1078 switch (type) {
890 case IB_QPT_RC: return MLX4_QP_ST_RC; 1079 case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC;
891 case IB_QPT_UC: return MLX4_QP_ST_UC; 1080 case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC;
892 case IB_QPT_UD: return MLX4_QP_ST_UD; 1081 case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD;
893 case IB_QPT_XRC_INI: 1082 case MLX4_IB_QPT_XRC_INI:
894 case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; 1083 case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
895 case IB_QPT_SMI: 1084 case MLX4_IB_QPT_SMI:
896 case IB_QPT_GSI: 1085 case MLX4_IB_QPT_GSI:
897 case IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; 1086 case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
898 default: return -1; 1087
1088 case MLX4_IB_QPT_PROXY_SMI_OWNER:
1089 case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
1090 MLX4_QP_ST_MLX : -1);
1091 case MLX4_IB_QPT_PROXY_SMI:
1092 case MLX4_IB_QPT_TUN_SMI:
1093 case MLX4_IB_QPT_PROXY_GSI:
1094 case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
1095 MLX4_QP_ST_UD : -1);
1096 default: return -1;
899 } 1097 }
900} 1098}
901 1099
@@ -1043,7 +1241,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1043 return -ENOMEM; 1241 return -ENOMEM;
1044 1242
1045 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 1243 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
1046 (to_mlx4_st(ibqp->qp_type) << 16)); 1244 (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
1047 1245
1048 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) 1246 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
1049 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 1247 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1121,13 +1319,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1121 } 1319 }
1122 1320
1123 if (attr_mask & IB_QP_PKEY_INDEX) { 1321 if (attr_mask & IB_QP_PKEY_INDEX) {
1322 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1323 context->pri_path.disable_pkey_check = 0x40;
1124 context->pri_path.pkey_index = attr->pkey_index; 1324 context->pri_path.pkey_index = attr->pkey_index;
1125 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; 1325 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
1126 } 1326 }
1127 1327
1128 if (attr_mask & IB_QP_AV) { 1328 if (attr_mask & IB_QP_AV) {
1129 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, 1329 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
1130 attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) 1330 attr_mask & IB_QP_PORT ?
1331 attr->port_num : qp->port))
1131 goto out; 1332 goto out;
1132 1333
1133 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | 1334 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@@ -1210,8 +1411,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1210 if (attr_mask & IB_QP_RQ_PSN) 1411 if (attr_mask & IB_QP_RQ_PSN)
1211 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); 1412 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1212 1413
1414 /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */
1213 if (attr_mask & IB_QP_QKEY) { 1415 if (attr_mask & IB_QP_QKEY) {
1214 context->qkey = cpu_to_be32(attr->qkey); 1416 if (qp->mlx4_ib_qp_type &
1417 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
1418 context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
1419 else {
1420 if (mlx4_is_mfunc(dev->dev) &&
1421 !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
1422 (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
1423 MLX4_RESERVED_QKEY_BASE) {
1424 pr_err("Cannot use reserved QKEY"
1425 " 0x%x (range 0xffff0000..0xffffffff"
1426 " is reserved)\n", attr->qkey);
1427 err = -EINVAL;
1428 goto out;
1429 }
1430 context->qkey = cpu_to_be32(attr->qkey);
1431 }
1215 optpar |= MLX4_QP_OPTPAR_Q_KEY; 1432 optpar |= MLX4_QP_OPTPAR_Q_KEY;
1216 } 1433 }
1217 1434
@@ -1227,10 +1444,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1227 ibqp->qp_type == IB_QPT_UD || 1444 ibqp->qp_type == IB_QPT_UD ||
1228 ibqp->qp_type == IB_QPT_RAW_PACKET)) { 1445 ibqp->qp_type == IB_QPT_RAW_PACKET)) {
1229 context->pri_path.sched_queue = (qp->port - 1) << 6; 1446 context->pri_path.sched_queue = (qp->port - 1) << 6;
1230 if (is_qp0(dev, qp)) 1447 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
1448 qp->mlx4_ib_qp_type &
1449 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
1231 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; 1450 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
1232 else 1451 if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
1452 context->pri_path.fl = 0x80;
1453 } else {
1454 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1455 context->pri_path.fl = 0x80;
1233 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; 1456 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
1457 }
1234 } 1458 }
1235 1459
1236 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && 1460 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
@@ -1346,7 +1570,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1346 } 1570 }
1347 1571
1348 if ((attr_mask & IB_QP_PORT) && 1572 if ((attr_mask & IB_QP_PORT) &&
1349 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { 1573 (attr->port_num == 0 || attr->port_num > dev->num_ports)) {
1350 pr_debug("qpn 0x%x: invalid port number (%d) specified " 1574 pr_debug("qpn 0x%x: invalid port number (%d) specified "
1351 "for transition %d to %d. qp_type %d\n", 1575 "for transition %d to %d. qp_type %d\n",
1352 ibqp->qp_num, attr->port_num, cur_state, 1576 ibqp->qp_num, attr->port_num, cur_state,
@@ -1400,6 +1624,114 @@ out:
1400 return err; 1624 return err;
1401} 1625}
1402 1626
1627static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
1628 struct ib_send_wr *wr,
1629 void *wqe, unsigned *mlx_seg_len)
1630{
1631 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
1632 struct ib_device *ib_dev = &mdev->ib_dev;
1633 struct mlx4_wqe_mlx_seg *mlx = wqe;
1634 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1635 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1636 u16 pkey;
1637 u32 qkey;
1638 int send_size;
1639 int header_size;
1640 int spc;
1641 int i;
1642
1643 if (wr->opcode != IB_WR_SEND)
1644 return -EINVAL;
1645
1646 send_size = 0;
1647
1648 for (i = 0; i < wr->num_sge; ++i)
1649 send_size += wr->sg_list[i].length;
1650
1651 /* for proxy-qp0 sends, need to add in size of tunnel header */
1652 /* for tunnel-qp0 sends, tunnel header is already in s/g list */
1653 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
1654 send_size += sizeof (struct mlx4_ib_tunnel_header);
1655
1656 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
1657
1658 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
1659 sqp->ud_header.lrh.service_level =
1660 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1661 sqp->ud_header.lrh.destination_lid =
1662 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1663 sqp->ud_header.lrh.source_lid =
1664 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1665 }
1666
1667 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1668
1669 /* force loopback */
1670 mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
1671 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1672
1673 sqp->ud_header.lrh.virtual_lane = 0;
1674 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1675 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
1676 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1677 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
1678 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1679 else
1680 sqp->ud_header.bth.destination_qpn =
1681 cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
1682
1683 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1684 if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
1685 return -EINVAL;
1686 sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
1687 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
1688
1689 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1690 sqp->ud_header.immediate_present = 0;
1691
1692 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
1693
1694 /*
1695 * Inline data segments may not cross a 64 byte boundary. If
1696 * our UD header is bigger than the space available up to the
1697 * next 64 byte boundary in the WQE, use two inline data
1698 * segments to hold the UD header.
1699 */
1700 spc = MLX4_INLINE_ALIGN -
1701 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
1702 if (header_size <= spc) {
1703 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1704 memcpy(inl + 1, sqp->header_buf, header_size);
1705 i = 1;
1706 } else {
1707 inl->byte_count = cpu_to_be32(1 << 31 | spc);
1708 memcpy(inl + 1, sqp->header_buf, spc);
1709
1710 inl = (void *) (inl + 1) + spc;
1711 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
1712 /*
1713 * Need a barrier here to make sure all the data is
1714 * visible before the byte_count field is set.
1715 * Otherwise the HCA prefetcher could grab the 64-byte
1716 * chunk with this inline segment and get a valid (!=
1717 * 0xffffffff) byte count but stale data, and end up
1718 * generating a packet with bad headers.
1719 *
1720 * The first inline segment's byte_count field doesn't
1721 * need a barrier, because it comes after a
1722 * control/MLX segment and therefore is at an offset
1723 * of 16 mod 64.
1724 */
1725 wmb();
1726 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
1727 i = 2;
1728 }
1729
1730 *mlx_seg_len =
1731 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1732 return 0;
1733}
1734
1403static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 1735static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1404 void *wqe, unsigned *mlx_seg_len) 1736 void *wqe, unsigned *mlx_seg_len)
1405{ 1737{
@@ -1418,6 +1750,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1418 int is_vlan = 0; 1750 int is_vlan = 0;
1419 int is_grh; 1751 int is_grh;
1420 u16 vlan; 1752 u16 vlan;
1753 int err = 0;
1421 1754
1422 send_size = 0; 1755 send_size = 0;
1423 for (i = 0; i < wr->num_sge; ++i) 1756 for (i = 0; i < wr->num_sge; ++i)
@@ -1426,8 +1759,24 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1426 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; 1759 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
1427 is_grh = mlx4_ib_ah_grh_present(ah); 1760 is_grh = mlx4_ib_ah_grh_present(ah);
1428 if (is_eth) { 1761 if (is_eth) {
1429 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, 1762 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1430 ah->av.ib.gid_index, &sgid); 1763 /* When multi-function is enabled, the ib_core gid
1764 * indexes don't necessarily match the hw ones, so
1765 * we must use our own cache */
1766 sgid.global.subnet_prefix =
1767 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1768 subnet_prefix;
1769 sgid.global.interface_id =
1770 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1771 guid_cache[ah->av.ib.gid_index];
1772 } else {
1773 err = ib_get_cached_gid(ib_dev,
1774 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1775 ah->av.ib.gid_index, &sgid);
1776 if (err)
1777 return err;
1778 }
1779
1431 vlan = rdma_get_vlan_id(&sgid); 1780 vlan = rdma_get_vlan_id(&sgid);
1432 is_vlan = vlan < 0x1000; 1781 is_vlan = vlan < 0x1000;
1433 } 1782 }
@@ -1446,8 +1795,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1446 sqp->ud_header.grh.flow_label = 1795 sqp->ud_header.grh.flow_label =
1447 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1796 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1448 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; 1797 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
1449 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, 1798 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
1450 ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); 1799 /* When multi-function is enabled, the ib_core gid
1800 * indexes don't necessarily match the hw ones, so
1801 * we must use our own cache */
1802 sqp->ud_header.grh.source_gid.global.subnet_prefix =
1803 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1804 subnet_prefix;
1805 sqp->ud_header.grh.source_gid.global.interface_id =
1806 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1807 guid_cache[ah->av.ib.gid_index];
1808 } else
1809 ib_get_cached_gid(ib_dev,
1810 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1811 ah->av.ib.gid_index,
1812 &sqp->ud_header.grh.source_gid);
1451 memcpy(sqp->ud_header.grh.destination_gid.raw, 1813 memcpy(sqp->ud_header.grh.destination_gid.raw,
1452 ah->av.ib.dgid, 16); 1814 ah->av.ib.dgid, 16);
1453 } 1815 }
@@ -1459,6 +1821,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1459 (sqp->ud_header.lrh.destination_lid == 1821 (sqp->ud_header.lrh.destination_lid ==
1460 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | 1822 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
1461 (sqp->ud_header.lrh.service_level << 8)); 1823 (sqp->ud_header.lrh.service_level << 8));
1824 if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
1825 mlx->flags |= cpu_to_be32(0x1); /* force loopback */
1462 mlx->rlid = sqp->ud_header.lrh.destination_lid; 1826 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1463 } 1827 }
1464 1828
@@ -1667,6 +2031,63 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
1667 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); 2031 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
1668} 2032}
1669 2033
2034static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
2035 struct mlx4_wqe_datagram_seg *dseg,
2036 struct ib_send_wr *wr, enum ib_qp_type qpt)
2037{
2038 union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
2039 struct mlx4_av sqp_av = {0};
2040 int port = *((u8 *) &av->ib.port_pd) & 0x3;
2041
2042 /* force loopback */
2043 sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
2044 sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
2045 sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
2046 cpu_to_be32(0xf0000000);
2047
2048 memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
2049 /* This function used only for sending on QP1 proxies */
2050 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
2051 /* Use QKEY from the QP context, which is set by master */
2052 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
2053}
2054
2055static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
2056{
2057 struct mlx4_wqe_inline_seg *inl = wqe;
2058 struct mlx4_ib_tunnel_header hdr;
2059 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
2060 int spc;
2061 int i;
2062
2063 memcpy(&hdr.av, &ah->av, sizeof hdr.av);
2064 hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2065 hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
2066 hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2067
2068 spc = MLX4_INLINE_ALIGN -
2069 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
2070 if (sizeof (hdr) <= spc) {
2071 memcpy(inl + 1, &hdr, sizeof (hdr));
2072 wmb();
2073 inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
2074 i = 1;
2075 } else {
2076 memcpy(inl + 1, &hdr, spc);
2077 wmb();
2078 inl->byte_count = cpu_to_be32(1 << 31 | spc);
2079
2080 inl = (void *) (inl + 1) + spc;
2081 memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
2082 wmb();
2083 inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
2084 i = 2;
2085 }
2086
2087 *mlx_seg_len =
2088 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
2089}
2090
1670static void set_mlx_icrc_seg(void *dseg) 2091static void set_mlx_icrc_seg(void *dseg)
1671{ 2092{
1672 u32 *t = dseg; 2093 u32 *t = dseg;
@@ -1748,6 +2169,13 @@ static __be32 send_ieth(struct ib_send_wr *wr)
1748 } 2169 }
1749} 2170}
1750 2171
2172static void add_zero_len_inline(void *wqe)
2173{
2174 struct mlx4_wqe_inline_seg *inl = wqe;
2175 memset(wqe, 0, 16);
2176 inl->byte_count = cpu_to_be32(1 << 31);
2177}
2178
1751int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 2179int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1752 struct ib_send_wr **bad_wr) 2180 struct ib_send_wr **bad_wr)
1753{ 2181{
@@ -1806,9 +2234,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1806 wqe += sizeof *ctrl; 2234 wqe += sizeof *ctrl;
1807 size = sizeof *ctrl / 16; 2235 size = sizeof *ctrl / 16;
1808 2236
1809 switch (ibqp->qp_type) { 2237 switch (qp->mlx4_ib_qp_type) {
1810 case IB_QPT_RC: 2238 case MLX4_IB_QPT_RC:
1811 case IB_QPT_UC: 2239 case MLX4_IB_QPT_UC:
1812 switch (wr->opcode) { 2240 switch (wr->opcode) {
1813 case IB_WR_ATOMIC_CMP_AND_SWP: 2241 case IB_WR_ATOMIC_CMP_AND_SWP:
1814 case IB_WR_ATOMIC_FETCH_AND_ADD: 2242 case IB_WR_ATOMIC_FETCH_AND_ADD:
@@ -1869,7 +2297,25 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1869 } 2297 }
1870 break; 2298 break;
1871 2299
1872 case IB_QPT_UD: 2300 case MLX4_IB_QPT_TUN_SMI_OWNER:
2301 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2302 if (unlikely(err)) {
2303 *bad_wr = wr;
2304 goto out;
2305 }
2306 wqe += seglen;
2307 size += seglen / 16;
2308 break;
2309 case MLX4_IB_QPT_TUN_SMI:
2310 case MLX4_IB_QPT_TUN_GSI:
2311 /* this is a UD qp used in MAD responses to slaves. */
2312 set_datagram_seg(wqe, wr);
2313 /* set the forced-loopback bit in the data seg av */
2314 *(__be32 *) wqe |= cpu_to_be32(0x80000000);
2315 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2316 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2317 break;
2318 case MLX4_IB_QPT_UD:
1873 set_datagram_seg(wqe, wr); 2319 set_datagram_seg(wqe, wr);
1874 wqe += sizeof (struct mlx4_wqe_datagram_seg); 2320 wqe += sizeof (struct mlx4_wqe_datagram_seg);
1875 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 2321 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
@@ -1886,8 +2332,47 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1886 } 2332 }
1887 break; 2333 break;
1888 2334
1889 case IB_QPT_SMI: 2335 case MLX4_IB_QPT_PROXY_SMI_OWNER:
1890 case IB_QPT_GSI: 2336 if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
2337 err = -ENOSYS;
2338 *bad_wr = wr;
2339 goto out;
2340 }
2341 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2342 if (unlikely(err)) {
2343 *bad_wr = wr;
2344 goto out;
2345 }
2346 wqe += seglen;
2347 size += seglen / 16;
2348 /* to start tunnel header on a cache-line boundary */
2349 add_zero_len_inline(wqe);
2350 wqe += 16;
2351 size++;
2352 build_tunnel_header(wr, wqe, &seglen);
2353 wqe += seglen;
2354 size += seglen / 16;
2355 break;
2356 case MLX4_IB_QPT_PROXY_SMI:
2357 /* don't allow QP0 sends on guests */
2358 err = -ENOSYS;
2359 *bad_wr = wr;
2360 goto out;
2361 case MLX4_IB_QPT_PROXY_GSI:
2362 /* If we are tunneling special qps, this is a UD qp.
2363 * In this case we first add a UD segment targeting
2364 * the tunnel qp, and then add a header with address
2365 * information */
2366 set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
2367 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2368 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2369 build_tunnel_header(wr, wqe, &seglen);
2370 wqe += seglen;
2371 size += seglen / 16;
2372 break;
2373
2374 case MLX4_IB_QPT_SMI:
2375 case MLX4_IB_QPT_GSI:
1891 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); 2376 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
1892 if (unlikely(err)) { 2377 if (unlikely(err)) {
1893 *bad_wr = wr; 2378 *bad_wr = wr;
@@ -1913,8 +2398,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1913 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); 2398 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
1914 2399
1915 /* Add one more inline data segment for ICRC for MLX sends */ 2400 /* Add one more inline data segment for ICRC for MLX sends */
1916 if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || 2401 if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
1917 qp->ibqp.qp_type == IB_QPT_GSI)) { 2402 qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
2403 qp->mlx4_ib_qp_type &
2404 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
1918 set_mlx_icrc_seg(dseg + 1); 2405 set_mlx_icrc_seg(dseg + 1);
1919 size += sizeof (struct mlx4_wqe_data_seg) / 16; 2406 size += sizeof (struct mlx4_wqe_data_seg) / 16;
1920 } 2407 }
@@ -2006,8 +2493,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2006 int err = 0; 2493 int err = 0;
2007 int nreq; 2494 int nreq;
2008 int ind; 2495 int ind;
2496 int max_gs;
2009 int i; 2497 int i;
2010 2498
2499 max_gs = qp->rq.max_gs;
2011 spin_lock_irqsave(&qp->rq.lock, flags); 2500 spin_lock_irqsave(&qp->rq.lock, flags);
2012 2501
2013 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 2502 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
@@ -2027,10 +2516,25 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2027 2516
2028 scat = get_recv_wqe(qp, ind); 2517 scat = get_recv_wqe(qp, ind);
2029 2518
2519 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
2520 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
2521 ib_dma_sync_single_for_device(ibqp->device,
2522 qp->sqp_proxy_rcv[ind].map,
2523 sizeof (struct mlx4_ib_proxy_sqp_hdr),
2524 DMA_FROM_DEVICE);
2525 scat->byte_count =
2526 cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
2527 /* use dma lkey from upper layer entry */
2528 scat->lkey = cpu_to_be32(wr->sg_list->lkey);
2529 scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
2530 scat++;
2531 max_gs--;
2532 }
2533
2030 for (i = 0; i < wr->num_sge; ++i) 2534 for (i = 0; i < wr->num_sge; ++i)
2031 __set_data_seg(scat + i, wr->sg_list + i); 2535 __set_data_seg(scat + i, wr->sg_list + i);
2032 2536
2033 if (i < qp->rq.max_gs) { 2537 if (i < max_gs) {
2034 scat[i].byte_count = 0; 2538 scat[i].byte_count = 0;
2035 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); 2539 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
2036 scat[i].addr = 0; 2540 scat[i].addr = 0;
@@ -2225,6 +2729,10 @@ done:
2225 if (qp->flags & MLX4_IB_QP_LSO) 2729 if (qp->flags & MLX4_IB_QP_LSO)
2226 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; 2730 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
2227 2731
2732 qp_init_attr->sq_sig_type =
2733 qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
2734 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2735
2228out: 2736out:
2229 mutex_unlock(&qp->mutex); 2737 mutex_unlock(&qp->mutex);
2230 return err; 2738 return err;
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
new file mode 100644
index 000000000000..5b2a01dfb907
--- /dev/null
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -0,0 +1,794 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*#include "core_priv.h"*/
34#include "mlx4_ib.h"
35#include <linux/slab.h>
36#include <linux/string.h>
37#include <linux/stat.h>
38
39#include <rdma/ib_mad.h>
40/*show_admin_alias_guid returns the administratively assigned value of that GUID.
41 * Values returned in buf parameter string:
42 * 0 - requests opensm to assign a value.
43 * ffffffffffffffff - delete this entry.
44 * other - value assigned by administrator.
45 */
46static ssize_t show_admin_alias_guid(struct device *dev,
47 struct device_attribute *attr, char *buf)
48{
49 int record_num;/*0-15*/
50 int guid_index_in_rec; /*0 - 7*/
51 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
52 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
53 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
54 struct mlx4_ib_dev *mdev = port->dev;
55
56 record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
57 guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
58
59 return sprintf(buf, "%llx\n",
60 be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
61 ports_guid[port->num - 1].
62 all_rec_per_port[record_num].
63 all_recs[8 * guid_index_in_rec]));
64}
65
66/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
67 * Values in buf parameter string:
68 * 0 - requests opensm to assign a value.
69 * 0xffffffffffffffff - delete this entry.
70 * other - guid value assigned by the administrator.
71 */
72static ssize_t store_admin_alias_guid(struct device *dev,
73 struct device_attribute *attr,
74 const char *buf, size_t count)
75{
76 int record_num;/*0-15*/
77 int guid_index_in_rec; /*0 - 7*/
78 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
79 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
80 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
81 struct mlx4_ib_dev *mdev = port->dev;
82 u64 sysadmin_ag_val;
83
84 record_num = mlx4_ib_iov_dentry->entry_num / 8;
85 guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
86 if (0 == record_num && 0 == guid_index_in_rec) {
87 pr_err("GUID 0 block 0 is RO\n");
88 return count;
89 }
90 sscanf(buf, "%llx", &sysadmin_ag_val);
91 *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
92 all_rec_per_port[record_num].
93 all_recs[GUID_REC_SIZE * guid_index_in_rec] =
94 cpu_to_be64(sysadmin_ag_val);
95
96 /* Change the state to be pending for update */
97 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
98 = MLX4_GUID_INFO_STATUS_IDLE ;
99
100 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
101 = MLX4_GUID_INFO_RECORD_SET;
102
103 switch (sysadmin_ag_val) {
104 case MLX4_GUID_FOR_DELETE_VAL:
105 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
106 = MLX4_GUID_INFO_RECORD_DELETE;
107 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
108 = MLX4_GUID_SYSADMIN_ASSIGN;
109 break;
110 /* The sysadmin requests the SM to re-assign */
111 case MLX4_NOT_SET_GUID:
112 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
113 = MLX4_GUID_DRIVER_ASSIGN;
114 break;
115 /* The sysadmin requests a specific value.*/
116 default:
117 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
118 = MLX4_GUID_SYSADMIN_ASSIGN;
119 break;
120 }
121
122 /* set the record index */
123 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
124 = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
125
126 mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
127
128 return count;
129}
130
131static ssize_t show_port_gid(struct device *dev,
132 struct device_attribute *attr,
133 char *buf)
134{
135 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
136 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
137 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
138 struct mlx4_ib_dev *mdev = port->dev;
139 union ib_gid gid;
140 ssize_t ret;
141
142 ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
143 mlx4_ib_iov_dentry->entry_num, &gid, 1);
144 if (ret)
145 return ret;
146 ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
147 be16_to_cpu(((__be16 *) gid.raw)[0]),
148 be16_to_cpu(((__be16 *) gid.raw)[1]),
149 be16_to_cpu(((__be16 *) gid.raw)[2]),
150 be16_to_cpu(((__be16 *) gid.raw)[3]),
151 be16_to_cpu(((__be16 *) gid.raw)[4]),
152 be16_to_cpu(((__be16 *) gid.raw)[5]),
153 be16_to_cpu(((__be16 *) gid.raw)[6]),
154 be16_to_cpu(((__be16 *) gid.raw)[7]));
155 return ret;
156}
157
158static ssize_t show_phys_port_pkey(struct device *dev,
159 struct device_attribute *attr,
160 char *buf)
161{
162 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
163 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
164 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
165 struct mlx4_ib_dev *mdev = port->dev;
166 u16 pkey;
167 ssize_t ret;
168
169 ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num,
170 mlx4_ib_iov_dentry->entry_num, &pkey, 1);
171 if (ret)
172 return ret;
173
174 return sprintf(buf, "0x%04x\n", pkey);
175}
176
177#define DENTRY_REMOVE(_dentry) \
178do { \
179 sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \
180} while (0);
181
182static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry,
183 char *_name, struct kobject *_kobj,
184 ssize_t (*show)(struct device *dev,
185 struct device_attribute *attr,
186 char *buf),
187 ssize_t (*store)(struct device *dev,
188 struct device_attribute *attr,
189 const char *buf, size_t count)
190 )
191{
192 int ret = 0;
193 struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry;
194
195 vdentry->ctx = _ctx;
196 vdentry->dentry.show = show;
197 vdentry->dentry.store = store;
198 sysfs_attr_init(&vdentry->dentry.attr);
199 vdentry->dentry.attr.name = vdentry->name;
200 vdentry->dentry.attr.mode = 0;
201 vdentry->kobj = _kobj;
202 snprintf(vdentry->name, 15, "%s", _name);
203
204 if (vdentry->dentry.store)
205 vdentry->dentry.attr.mode |= S_IWUSR;
206
207 if (vdentry->dentry.show)
208 vdentry->dentry.attr.mode |= S_IRUGO;
209
210 ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr);
211 if (ret) {
212 pr_err("failed to create %s\n", vdentry->dentry.attr.name);
213 vdentry->ctx = NULL;
214 return ret;
215 }
216
217 return ret;
218}
219
220int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
221 struct attribute *attr)
222{
223 struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
224 int ret;
225
226 ret = sysfs_create_file(port->mcgs_parent, attr);
227 if (ret)
228 pr_err("failed to create %s\n", attr->name);
229
230 return ret;
231}
232
233void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
234 struct attribute *attr)
235{
236 struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
237
238 sysfs_remove_file(port->mcgs_parent, attr);
239}
240
241static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
242{
243 int i;
244 char buff[10];
245 struct mlx4_ib_iov_port *port = NULL;
246 int ret = 0 ;
247 struct ib_port_attr attr;
248
249 /* get the physical gid and pkey table sizes.*/
250 ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
251 if (ret)
252 goto err;
253
254 port = &device->iov_ports[port_num - 1];
255 port->dev = device;
256 port->num = port_num;
257 /* Directory structure:
258 * iov -
259 * port num -
260 * admin_guids
261 * gids (operational)
262 * mcg_table
263 */
264 port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar),
265 GFP_KERNEL);
266 if (!port->dentr_ar) {
267 ret = -ENOMEM;
268 goto err;
269 }
270 sprintf(buff, "%d", port_num);
271 port->cur_port = kobject_create_and_add(buff,
272 kobject_get(device->ports_parent));
273 if (!port->cur_port) {
274 ret = -ENOMEM;
275 goto kobj_create_err;
276 }
277 /* admin GUIDs */
278 port->admin_alias_parent = kobject_create_and_add("admin_guids",
279 kobject_get(port->cur_port));
280 if (!port->admin_alias_parent) {
281 ret = -ENOMEM;
282 goto err_admin_guids;
283 }
284 for (i = 0 ; i < attr.gid_tbl_len; i++) {
285 sprintf(buff, "%d", i);
286 port->dentr_ar->dentries[i].entry_num = i;
287 ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i],
288 buff, port->admin_alias_parent,
289 show_admin_alias_guid, store_admin_alias_guid);
290 if (ret)
291 goto err_admin_alias_parent;
292 }
293
294 /* gids subdirectory (operational gids) */
295 port->gids_parent = kobject_create_and_add("gids",
296 kobject_get(port->cur_port));
297 if (!port->gids_parent) {
298 ret = -ENOMEM;
299 goto err_gids;
300 }
301
302 for (i = 0 ; i < attr.gid_tbl_len; i++) {
303 sprintf(buff, "%d", i);
304 port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i;
305 ret = create_sysfs_entry(port,
306 &port->dentr_ar->dentries[attr.gid_tbl_len + i],
307 buff,
308 port->gids_parent, show_port_gid, NULL);
309 if (ret)
310 goto err_gids_parent;
311 }
312
313 /* physical port pkey table */
314 port->pkeys_parent =
315 kobject_create_and_add("pkeys", kobject_get(port->cur_port));
316 if (!port->pkeys_parent) {
317 ret = -ENOMEM;
318 goto err_pkeys;
319 }
320
321 for (i = 0 ; i < attr.pkey_tbl_len; i++) {
322 sprintf(buff, "%d", i);
323 port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i;
324 ret = create_sysfs_entry(port,
325 &port->dentr_ar->dentries[2 * attr.gid_tbl_len + i],
326 buff, port->pkeys_parent,
327 show_phys_port_pkey, NULL);
328 if (ret)
329 goto err_pkeys_parent;
330 }
331
332 /* MCGs table */
333 port->mcgs_parent =
334 kobject_create_and_add("mcgs", kobject_get(port->cur_port));
335 if (!port->mcgs_parent) {
336 ret = -ENOMEM;
337 goto err_mcgs;
338 }
339 return 0;
340
341err_mcgs:
342 kobject_put(port->cur_port);
343
344err_pkeys_parent:
345 kobject_put(port->pkeys_parent);
346
347err_pkeys:
348 kobject_put(port->cur_port);
349
350err_gids_parent:
351 kobject_put(port->gids_parent);
352
353err_gids:
354 kobject_put(port->cur_port);
355
356err_admin_alias_parent:
357 kobject_put(port->admin_alias_parent);
358
359err_admin_guids:
360 kobject_put(port->cur_port);
361 kobject_put(port->cur_port); /* once more for create_and_add buff */
362
363kobj_create_err:
364 kobject_put(device->ports_parent);
365 kfree(port->dentr_ar);
366
367err:
368 pr_err("add_port_entries FAILED: for port:%d, error: %d\n",
369 port_num, ret);
370 return ret;
371}
372
373static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
374{
375 char base_name[9];
376
377 /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
378 strlcpy(name, pci_name(dev->dev->pdev), max);
379 strncpy(base_name, name, 8); /*till xxxx:yy:*/
380 base_name[8] = '\0';
381 /* with no ARI only 3 last bits are used so when the fn is higher than 8
382 * need to add it to the dev num, so count in the last number will be
383 * modulo 8 */
384 sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
385}
386
387struct mlx4_port {
388 struct kobject kobj;
389 struct mlx4_ib_dev *dev;
390 struct attribute_group pkey_group;
391 struct attribute_group gid_group;
392 u8 port_num;
393 int slave;
394};
395
396
397static void mlx4_port_release(struct kobject *kobj)
398{
399 struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
400 struct attribute *a;
401 int i;
402
403 for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
404 kfree(a);
405 kfree(p->pkey_group.attrs);
406 for (i = 0; (a = p->gid_group.attrs[i]); ++i)
407 kfree(a);
408 kfree(p->gid_group.attrs);
409 kfree(p);
410}
411
412struct port_attribute {
413 struct attribute attr;
414 ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf);
415 ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
416 const char *buf, size_t count);
417};
418
419static ssize_t port_attr_show(struct kobject *kobj,
420 struct attribute *attr, char *buf)
421{
422 struct port_attribute *port_attr =
423 container_of(attr, struct port_attribute, attr);
424 struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
425
426 if (!port_attr->show)
427 return -EIO;
428 return port_attr->show(p, port_attr, buf);
429}
430
431static ssize_t port_attr_store(struct kobject *kobj,
432 struct attribute *attr,
433 const char *buf, size_t size)
434{
435 struct port_attribute *port_attr =
436 container_of(attr, struct port_attribute, attr);
437 struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
438
439 if (!port_attr->store)
440 return -EIO;
441 return port_attr->store(p, port_attr, buf, size);
442}
443
444static const struct sysfs_ops port_sysfs_ops = {
445 .show = port_attr_show,
446 .store = port_attr_store,
447};
448
449static struct kobj_type port_type = {
450 .release = mlx4_port_release,
451 .sysfs_ops = &port_sysfs_ops,
452};
453
454struct port_table_attribute {
455 struct port_attribute attr;
456 char name[8];
457 int index;
458};
459
460static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
461 char *buf)
462{
463 struct port_table_attribute *tab_attr =
464 container_of(attr, struct port_table_attribute, attr);
465 ssize_t ret = -ENODEV;
466
467 if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
468 (p->dev->dev->caps.pkey_table_len[p->port_num]))
469 ret = sprintf(buf, "none\n");
470 else
471 ret = sprintf(buf, "%d\n",
472 p->dev->pkeys.virt2phys_pkey[p->slave]
473 [p->port_num - 1][tab_attr->index]);
474 return ret;
475}
476
477static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
478 const char *buf, size_t count)
479{
480 struct port_table_attribute *tab_attr =
481 container_of(attr, struct port_table_attribute, attr);
482 int idx;
483 int err;
484
485 /* do not allow remapping Dom0 virtual pkey table */
486 if (p->slave == mlx4_master_func_num(p->dev->dev))
487 return -EINVAL;
488
489 if (!strncasecmp(buf, "no", 2))
490 idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1;
491 else if (sscanf(buf, "%i", &idx) != 1 ||
492 idx >= p->dev->dev->caps.pkey_table_len[p->port_num] ||
493 idx < 0)
494 return -EINVAL;
495
496 p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1]
497 [tab_attr->index] = idx;
498 mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num,
499 tab_attr->index, idx);
500 err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num);
501 if (err) {
502 pr_err("mlx4_gen_pkey_eqe failed for slave %d,"
503 " port %d, index %d\n", p->slave, p->port_num, idx);
504 return err;
505 }
506 return count;
507}
508
509static ssize_t show_port_gid_idx(struct mlx4_port *p,
510 struct port_attribute *attr, char *buf)
511{
512 return sprintf(buf, "%d\n", p->slave);
513}
514
515static struct attribute **
516alloc_group_attrs(ssize_t (*show)(struct mlx4_port *,
517 struct port_attribute *, char *buf),
518 ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
519 const char *buf, size_t count),
520 int len)
521{
522 struct attribute **tab_attr;
523 struct port_table_attribute *element;
524 int i;
525
526 tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL);
527 if (!tab_attr)
528 return NULL;
529
530 for (i = 0; i < len; i++) {
531 element = kzalloc(sizeof (struct port_table_attribute),
532 GFP_KERNEL);
533 if (!element)
534 goto err;
535 if (snprintf(element->name, sizeof (element->name),
536 "%d", i) >= sizeof (element->name)) {
537 kfree(element);
538 goto err;
539 }
540 sysfs_attr_init(&element->attr.attr);
541 element->attr.attr.name = element->name;
542 if (store) {
543 element->attr.attr.mode = S_IWUSR | S_IRUGO;
544 element->attr.store = store;
545 } else
546 element->attr.attr.mode = S_IRUGO;
547
548 element->attr.show = show;
549 element->index = i;
550 tab_attr[i] = &element->attr.attr;
551 }
552 return tab_attr;
553
554err:
555 while (--i >= 0)
556 kfree(tab_attr[i]);
557 kfree(tab_attr);
558 return NULL;
559}
560
561static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
562{
563 struct mlx4_port *p;
564 int i;
565 int ret;
566
567 p = kzalloc(sizeof *p, GFP_KERNEL);
568 if (!p)
569 return -ENOMEM;
570
571 p->dev = dev;
572 p->port_num = port_num;
573 p->slave = slave;
574
575 ret = kobject_init_and_add(&p->kobj, &port_type,
576 kobject_get(dev->dev_ports_parent[slave]),
577 "%d", port_num);
578 if (ret)
579 goto err_alloc;
580
581 p->pkey_group.name = "pkey_idx";
582 p->pkey_group.attrs =
583 alloc_group_attrs(show_port_pkey, store_port_pkey,
584 dev->dev->caps.pkey_table_len[port_num]);
585 if (!p->pkey_group.attrs)
586 goto err_alloc;
587
588 ret = sysfs_create_group(&p->kobj, &p->pkey_group);
589 if (ret)
590 goto err_free_pkey;
591
592 p->gid_group.name = "gid_idx";
593 p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
594 if (!p->gid_group.attrs)
595 goto err_free_pkey;
596
597 ret = sysfs_create_group(&p->kobj, &p->gid_group);
598 if (ret)
599 goto err_free_gid;
600
601 list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
602 return 0;
603
604err_free_gid:
605 kfree(p->gid_group.attrs[0]);
606 kfree(p->gid_group.attrs);
607
608err_free_pkey:
609 for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i)
610 kfree(p->pkey_group.attrs[i]);
611 kfree(p->pkey_group.attrs);
612
613err_alloc:
614 kobject_put(dev->dev_ports_parent[slave]);
615 kfree(p);
616 return ret;
617}
618
619static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
620{
621 char name[32];
622 int err;
623 int port;
624 struct kobject *p, *t;
625 struct mlx4_port *mport;
626
627 get_name(dev, name, slave, sizeof name);
628
629 dev->pkeys.device_parent[slave] =
630 kobject_create_and_add(name, kobject_get(dev->iov_parent));
631
632 if (!dev->pkeys.device_parent[slave]) {
633 err = -ENOMEM;
634 goto fail_dev;
635 }
636
637 INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]);
638
639 dev->dev_ports_parent[slave] =
640 kobject_create_and_add("ports",
641 kobject_get(dev->pkeys.device_parent[slave]));
642
643 if (!dev->dev_ports_parent[slave]) {
644 err = -ENOMEM;
645 goto err_ports;
646 }
647
648 for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
649 err = add_port(dev, port, slave);
650 if (err)
651 goto err_add;
652 }
653 return 0;
654
655err_add:
656 list_for_each_entry_safe(p, t,
657 &dev->pkeys.pkey_port_list[slave],
658 entry) {
659 list_del(&p->entry);
660 mport = container_of(p, struct mlx4_port, kobj);
661 sysfs_remove_group(p, &mport->pkey_group);
662 sysfs_remove_group(p, &mport->gid_group);
663 kobject_put(p);
664 }
665 kobject_put(dev->dev_ports_parent[slave]);
666
667err_ports:
668 kobject_put(dev->pkeys.device_parent[slave]);
669 /* extra put for the device_parent create_and_add */
670 kobject_put(dev->pkeys.device_parent[slave]);
671
672fail_dev:
673 kobject_put(dev->iov_parent);
674 return err;
675}
676
677static int register_pkey_tree(struct mlx4_ib_dev *device)
678{
679 int i;
680
681 if (!mlx4_is_master(device->dev))
682 return 0;
683
684 for (i = 0; i <= device->dev->num_vfs; ++i)
685 register_one_pkey_tree(device, i);
686
687 return 0;
688}
689
690static void unregister_pkey_tree(struct mlx4_ib_dev *device)
691{
692 int slave;
693 struct kobject *p, *t;
694 struct mlx4_port *port;
695
696 if (!mlx4_is_master(device->dev))
697 return;
698
699 for (slave = device->dev->num_vfs; slave >= 0; --slave) {
700 list_for_each_entry_safe(p, t,
701 &device->pkeys.pkey_port_list[slave],
702 entry) {
703 list_del(&p->entry);
704 port = container_of(p, struct mlx4_port, kobj);
705 sysfs_remove_group(p, &port->pkey_group);
706 sysfs_remove_group(p, &port->gid_group);
707 kobject_put(p);
708 kobject_put(device->dev_ports_parent[slave]);
709 }
710 kobject_put(device->dev_ports_parent[slave]);
711 kobject_put(device->pkeys.device_parent[slave]);
712 kobject_put(device->pkeys.device_parent[slave]);
713 kobject_put(device->iov_parent);
714 }
715}
716
717int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
718{
719 int i;
720 int ret = 0;
721
722 if (!mlx4_is_master(dev->dev))
723 return 0;
724
725 dev->iov_parent =
726 kobject_create_and_add("iov",
727 kobject_get(dev->ib_dev.ports_parent->parent));
728 if (!dev->iov_parent) {
729 ret = -ENOMEM;
730 goto err;
731 }
732 dev->ports_parent =
733 kobject_create_and_add("ports",
734 kobject_get(dev->iov_parent));
735 if (!dev->iov_parent) {
736 ret = -ENOMEM;
737 goto err_ports;
738 }
739
740 for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
741 ret = add_port_entries(dev, i);
742 if (ret)
743 goto err_add_entries;
744 }
745
746 ret = register_pkey_tree(dev);
747 if (ret)
748 goto err_add_entries;
749 return 0;
750
751err_add_entries:
752 kobject_put(dev->ports_parent);
753
754err_ports:
755 kobject_put(dev->iov_parent);
756err:
757 kobject_put(dev->ib_dev.ports_parent->parent);
758 pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
759 return ret;
760}
761
762static void unregister_alias_guid_tree(struct mlx4_ib_dev *device)
763{
764 struct mlx4_ib_iov_port *p;
765 int i;
766
767 if (!mlx4_is_master(device->dev))
768 return;
769
770 for (i = 0; i < device->dev->caps.num_ports; i++) {
771 p = &device->iov_ports[i];
772 kobject_put(p->admin_alias_parent);
773 kobject_put(p->gids_parent);
774 kobject_put(p->pkeys_parent);
775 kobject_put(p->mcgs_parent);
776 kobject_put(p->cur_port);
777 kobject_put(p->cur_port);
778 kobject_put(p->cur_port);
779 kobject_put(p->cur_port);
780 kobject_put(p->cur_port);
781 kobject_put(p->dev->ports_parent);
782 kfree(p->dentr_ar);
783 }
784}
785
786void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
787{
788 unregister_alias_guid_tree(device);
789 unregister_pkey_tree(device);
790 kobject_put(device->ports_parent);
791 kobject_put(device->iov_parent);
792 kobject_put(device->iov_parent);
793 kobject_put(device->ib_dev.ports_parent->parent);
794}
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index c438e4691b3c..0da62b904d00 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -399,11 +399,20 @@ static inline void nes_write8(void __iomem *addr, u8 val)
399 writeb(val, addr); 399 writeb(val, addr);
400} 400}
401 401
402 402enum nes_resource {
403 NES_RESOURCE_MW = 1,
404 NES_RESOURCE_FAST_MR,
405 NES_RESOURCE_PHYS_MR,
406 NES_RESOURCE_USER_MR,
407 NES_RESOURCE_PD,
408 NES_RESOURCE_QP,
409 NES_RESOURCE_CQ,
410 NES_RESOURCE_ARP
411};
403 412
404static inline int nes_alloc_resource(struct nes_adapter *nesadapter, 413static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
405 unsigned long *resource_array, u32 max_resources, 414 unsigned long *resource_array, u32 max_resources,
406 u32 *req_resource_num, u32 *next) 415 u32 *req_resource_num, u32 *next, enum nes_resource resource_type)
407{ 416{
408 unsigned long flags; 417 unsigned long flags;
409 u32 resource_num; 418 u32 resource_num;
@@ -414,7 +423,7 @@ static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
414 if (resource_num >= max_resources) { 423 if (resource_num >= max_resources) {
415 resource_num = find_first_zero_bit(resource_array, max_resources); 424 resource_num = find_first_zero_bit(resource_array, max_resources);
416 if (resource_num >= max_resources) { 425 if (resource_num >= max_resources) {
417 printk(KERN_ERR PFX "%s: No available resourcess.\n", __func__); 426 printk(KERN_ERR PFX "%s: No available resources [type=%u].\n", __func__, resource_type);
418 spin_unlock_irqrestore(&nesadapter->resource_lock, flags); 427 spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
419 return -EMFILE; 428 return -EMFILE;
420 } 429 }
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 020e95c4c4b9..cfaacaf6bf5f 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -430,6 +430,8 @@ static void form_cm_frame(struct sk_buff *skb,
430 buf += sizeof(*tcph); 430 buf += sizeof(*tcph);
431 431
432 skb->ip_summed = CHECKSUM_PARTIAL; 432 skb->ip_summed = CHECKSUM_PARTIAL;
433 if (!(cm_node->netdev->features & NETIF_F_IP_CSUM))
434 skb->ip_summed = CHECKSUM_NONE;
433 skb->protocol = htons(0x800); 435 skb->protocol = htons(0x800);
434 skb->data_len = 0; 436 skb->data_len = 0;
435 skb->mac_len = ETH_HLEN; 437 skb->mac_len = ETH_HLEN;
@@ -1356,7 +1358,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
1356 else 1358 else
1357 netdev = nesvnic->netdev; 1359 netdev = nesvnic->netdev;
1358 1360
1359 neigh = dst_neigh_lookup(&rt->dst, &dst_ip); 1361 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev);
1360 1362
1361 rcu_read_lock(); 1363 rcu_read_lock();
1362 if (neigh) { 1364 if (neigh) {
@@ -1465,12 +1467,8 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1465 cm_node->loopbackpartner = NULL; 1467 cm_node->loopbackpartner = NULL;
1466 1468
1467 /* get the mac addr for the remote node */ 1469 /* get the mac addr for the remote node */
1468 if (ipv4_is_loopback(htonl(cm_node->rem_addr))) { 1470 oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
1469 arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE); 1471 arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
1470 } else {
1471 oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
1472 arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
1473 }
1474 if (arpindex < 0) { 1472 if (arpindex < 0) {
1475 kfree(cm_node); 1473 kfree(cm_node);
1476 return NULL; 1474 return NULL;
@@ -3153,11 +3151,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3153 nesqp->nesqp_context->tcpPorts[1] = 3151 nesqp->nesqp_context->tcpPorts[1] =
3154 cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); 3152 cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
3155 3153
3156 if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) 3154 nesqp->nesqp_context->ip0 =
3157 nesqp->nesqp_context->ip0 =
3158 cpu_to_le32(ntohl(nesvnic->local_ipaddr));
3159 else
3160 nesqp->nesqp_context->ip0 =
3161 cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); 3155 cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
3162 3156
3163 nesqp->nesqp_context->misc2 |= cpu_to_le32( 3157 nesqp->nesqp_context->misc2 |= cpu_to_le32(
@@ -3182,10 +3176,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3182 memset(&nes_quad, 0, sizeof(nes_quad)); 3176 memset(&nes_quad, 0, sizeof(nes_quad));
3183 nes_quad.DstIpAdrIndex = 3177 nes_quad.DstIpAdrIndex =
3184 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); 3178 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
3185 if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) 3179 nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
3186 nes_quad.SrcIpadr = nesvnic->local_ipaddr;
3187 else
3188 nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
3189 nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; 3180 nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
3190 nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; 3181 nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
3191 3182
@@ -3538,11 +3529,7 @@ static void cm_event_connected(struct nes_cm_event *event)
3538 cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); 3529 cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
3539 nesqp->nesqp_context->tcpPorts[1] = 3530 nesqp->nesqp_context->tcpPorts[1] =
3540 cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); 3531 cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
3541 if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) 3532 nesqp->nesqp_context->ip0 =
3542 nesqp->nesqp_context->ip0 =
3543 cpu_to_le32(ntohl(nesvnic->local_ipaddr));
3544 else
3545 nesqp->nesqp_context->ip0 =
3546 cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); 3533 cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
3547 3534
3548 nesqp->nesqp_context->misc2 |= cpu_to_le32( 3535 nesqp->nesqp_context->misc2 |= cpu_to_le32(
@@ -3571,10 +3558,7 @@ static void cm_event_connected(struct nes_cm_event *event)
3571 3558
3572 nes_quad.DstIpAdrIndex = 3559 nes_quad.DstIpAdrIndex =
3573 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); 3560 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
3574 if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr)) 3561 nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
3575 nes_quad.SrcIpadr = nesvnic->local_ipaddr;
3576 else
3577 nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
3578 nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; 3562 nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
3579 nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; 3563 nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
3580 3564
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index d42c9f435b1b..fe7965ee4096 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -2679,11 +2679,9 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
2679 } 2679 }
2680 } 2680 }
2681 if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) { 2681 if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) {
2682 if (nesdev->link_recheck)
2683 cancel_delayed_work(&nesdev->work);
2684 nesdev->link_recheck = 1; 2682 nesdev->link_recheck = 1;
2685 schedule_delayed_work(&nesdev->work, 2683 mod_delayed_work(system_wq, &nesdev->work,
2686 NES_LINK_RECHECK_DELAY); 2684 NES_LINK_RECHECK_DELAY);
2687 } 2685 }
2688 } 2686 }
2689 2687
@@ -3577,10 +3575,10 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3577 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; 3575 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
3578 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; 3576 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
3579 nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p," 3577 nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p,"
3580 " Tcp state = %s, iWARP state = %s\n", 3578 " Tcp state = %d, iWARP state = %d\n",
3581 async_event_id, 3579 async_event_id,
3582 le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe, 3580 le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
3583 nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]); 3581 tcp_state, iwarp_state);
3584 3582
3585 aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]); 3583 aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
3586 if (aeq_info & NES_AEQE_QP) { 3584 if (aeq_info & NES_AEQE_QP) {
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index f3a3ecf8d09e..0564be757d82 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -243,10 +243,9 @@ static int nes_netdev_open(struct net_device *netdev)
243 243
244 spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); 244 spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
245 if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) { 245 if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) {
246 if (nesdev->link_recheck)
247 cancel_delayed_work(&nesdev->work);
248 nesdev->link_recheck = 1; 246 nesdev->link_recheck = 1;
249 schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY); 247 mod_delayed_work(system_wq, &nesdev->work,
248 NES_LINK_RECHECK_DELAY);
250 } 249 }
251 spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); 250 spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
252 251
@@ -385,24 +384,20 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
385 /* bump past the vlan tag */ 384 /* bump past the vlan tag */
386 wqe_fragment_length++; 385 wqe_fragment_length++;
387 /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */ 386 /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */
387 wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
388 388
389 if (skb->ip_summed == CHECKSUM_PARTIAL) { 389 if (skb->ip_summed == CHECKSUM_PARTIAL) {
390 tcph = tcp_hdr(skb); 390 if (skb_is_gso(skb)) {
391 if (1) { 391 tcph = tcp_hdr(skb);
392 if (skb_is_gso(skb)) { 392 /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... is_gso = %u seg size = %u\n",
393 /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n", 393 netdev->name, skb_is_gso(skb), skb_shinfo(skb)->gso_size); */
394 netdev->name, skb_is_gso(skb)); */ 394 wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | (u16)skb_shinfo(skb)->gso_size;
395 wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | 395 set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
396 NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); 396 ((u32)tcph->doff) |
397 set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, 397 (((u32)(((unsigned char *)tcph) - skb->data)) << 4));
398 ((u32)tcph->doff) |
399 (((u32)(((unsigned char *)tcph) - skb->data)) << 4));
400 } else {
401 wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
402 }
403 } 398 }
404 } else { /* CHECKSUM_HW */ 399 } else { /* CHECKSUM_HW */
405 wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM | NES_NIC_SQ_WQE_COMPLETION; 400 wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM;
406 } 401 }
407 402
408 set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX, 403 set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
@@ -597,10 +592,10 @@ tso_sq_no_longer_full:
597 nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n", 592 nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n",
598 original_first_length, NES_FIRST_FRAG_SIZE); 593 original_first_length, NES_FIRST_FRAG_SIZE);
599 nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," 594 nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
600 " (%u frags), tso_size=%u\n", 595 " (%u frags), is_gso = %u tso_size=%u\n",
601 netdev->name, 596 netdev->name,
602 skb->len, skb_headlen(skb), 597 skb->len, skb_headlen(skb),
603 skb_shinfo(skb)->nr_frags, skb_is_gso(skb)); 598 skb_shinfo(skb)->nr_frags, skb_is_gso(skb), skb_shinfo(skb)->gso_size);
604 } 599 }
605 memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, 600 memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
606 skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), 601 skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE),
@@ -652,8 +647,8 @@ tso_sq_no_longer_full:
652 } else { 647 } else {
653 nesnic->tx_skb[nesnic->sq_head] = NULL; 648 nesnic->tx_skb[nesnic->sq_head] = NULL;
654 } 649 }
655 wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb); 650 wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_shinfo(skb)->gso_size;
656 if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) { 651 if ((tso_wqe_length + original_first_length) > skb_shinfo(skb)->gso_size) {
657 wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE; 652 wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE;
658 } else { 653 } else {
659 iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset); 654 iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset);
@@ -1679,12 +1674,10 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
1679 netdev->hard_header_len = ETH_HLEN; 1674 netdev->hard_header_len = ETH_HLEN;
1680 netdev->addr_len = ETH_ALEN; 1675 netdev->addr_len = ETH_ALEN;
1681 netdev->type = ARPHRD_ETHER; 1676 netdev->type = ARPHRD_ETHER;
1682 netdev->features = NETIF_F_HIGHDMA;
1683 netdev->netdev_ops = &nes_netdev_ops; 1677 netdev->netdev_ops = &nes_netdev_ops;
1684 netdev->ethtool_ops = &nes_ethtool_ops; 1678 netdev->ethtool_ops = &nes_ethtool_ops;
1685 netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128); 1679 netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128);
1686 nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n"); 1680 nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n");
1687 netdev->features |= NETIF_F_HW_VLAN_TX;
1688 1681
1689 /* Fill in the port structure */ 1682 /* Fill in the port structure */
1690 nesvnic->netdev = netdev; 1683 nesvnic->netdev = netdev;
@@ -1711,11 +1704,11 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
1711 netdev->dev_addr[5] = (u8)u64temp; 1704 netdev->dev_addr[5] = (u8)u64temp;
1712 memcpy(netdev->perm_addr, netdev->dev_addr, 6); 1705 memcpy(netdev->perm_addr, netdev->dev_addr, 6);
1713 1706
1714 netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM | 1707 netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX;
1715 NETIF_F_HW_VLAN_RX;
1716 if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) 1708 if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV))
1717 netdev->hw_features |= NETIF_F_TSO; 1709 netdev->hw_features |= NETIF_F_TSO;
1718 netdev->features |= netdev->hw_features; 1710
1711 netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX;
1719 netdev->hw_features |= NETIF_F_LRO; 1712 netdev->hw_features |= NETIF_F_LRO;
1720 1713
1721 nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," 1714 nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index e98f4fc0b768..2042c0f29759 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -699,7 +699,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
699 699
700 arp_index = 0; 700 arp_index = 0;
701 err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps, 701 err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps,
702 nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index); 702 nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index, NES_RESOURCE_ARP);
703 if (err) { 703 if (err) {
704 nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err); 704 nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err);
705 return err; 705 return err;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 8b8812de4b5c..1dadcf388c02 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -80,7 +80,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
80 next_stag_index %= nesadapter->max_mr; 80 next_stag_index %= nesadapter->max_mr;
81 81
82 ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, 82 ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
83 nesadapter->max_mr, &stag_index, &next_stag_index); 83 nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_MW);
84 if (ret) { 84 if (ret) {
85 return ERR_PTR(ret); 85 return ERR_PTR(ret);
86 } 86 }
@@ -404,7 +404,7 @@ static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list
404 404
405 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, 405 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
406 nesadapter->max_mr, &stag_index, 406 nesadapter->max_mr, &stag_index,
407 &next_stag_index); 407 &next_stag_index, NES_RESOURCE_FAST_MR);
408 if (err) 408 if (err)
409 return ERR_PTR(err); 409 return ERR_PTR(err);
410 410
@@ -780,7 +780,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
780 netdev_refcnt_read(nesvnic->netdev)); 780 netdev_refcnt_read(nesvnic->netdev));
781 781
782 err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, 782 err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
783 nesadapter->max_pd, &pd_num, &nesadapter->next_pd); 783 nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD);
784 if (err) { 784 if (err) {
785 return ERR_PTR(err); 785 return ERR_PTR(err);
786 } 786 }
@@ -1157,7 +1157,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1157 nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size); 1157 nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size);
1158 1158
1159 ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps, 1159 ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps,
1160 nesadapter->max_qp, &qp_num, &nesadapter->next_qp); 1160 nesadapter->max_qp, &qp_num, &nesadapter->next_qp, NES_RESOURCE_QP);
1161 if (ret) { 1161 if (ret) {
1162 return ERR_PTR(ret); 1162 return ERR_PTR(ret);
1163 } 1163 }
@@ -1546,7 +1546,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
1546 return ERR_PTR(-EINVAL); 1546 return ERR_PTR(-EINVAL);
1547 1547
1548 err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs, 1548 err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
1549 nesadapter->max_cq, &cq_num, &nesadapter->next_cq); 1549 nesadapter->max_cq, &cq_num, &nesadapter->next_cq, NES_RESOURCE_CQ);
1550 if (err) { 1550 if (err) {
1551 return ERR_PTR(err); 1551 return ERR_PTR(err);
1552 } 1552 }
@@ -2129,7 +2129,7 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
2129 return ERR_PTR(-EINVAL); 2129 return ERR_PTR(-EINVAL);
2130 2130
2131 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr, 2131 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
2132 &stag_index, &next_stag_index); 2132 &stag_index, &next_stag_index, NES_RESOURCE_PHYS_MR);
2133 if (err) { 2133 if (err) {
2134 return ERR_PTR(err); 2134 return ERR_PTR(err);
2135 } 2135 }
@@ -2360,7 +2360,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2360 next_stag_index %= nesadapter->max_mr; 2360 next_stag_index %= nesadapter->max_mr;
2361 2361
2362 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, 2362 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
2363 nesadapter->max_mr, &stag_index, &next_stag_index); 2363 nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_USER_MR);
2364 if (err) { 2364 if (err) {
2365 ib_umem_release(region); 2365 ib_umem_release(region);
2366 return ERR_PTR(err); 2366 return ERR_PTR(err);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index cb5b7f7d4d38..b29a4246ef41 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -2219,7 +2219,6 @@ static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2219 u32 wqe_idx; 2219 u32 wqe_idx;
2220 2220
2221 if (!qp->wqe_wr_id_tbl[tail].signaled) { 2221 if (!qp->wqe_wr_id_tbl[tail].signaled) {
2222 expand = true; /* CQE cannot be consumed yet */
2223 *polled = false; /* WC cannot be consumed yet */ 2222 *polled = false; /* WC cannot be consumed yet */
2224 } else { 2223 } else {
2225 ibwc->status = IB_WC_SUCCESS; 2224 ibwc->status = IB_WC_SUCCESS;
@@ -2227,10 +2226,11 @@ static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2227 ibwc->qp = &qp->ibqp; 2226 ibwc->qp = &qp->ibqp;
2228 ocrdma_update_wc(qp, ibwc, tail); 2227 ocrdma_update_wc(qp, ibwc, tail);
2229 *polled = true; 2228 *polled = true;
2230 wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK;
2231 if (tail != wqe_idx)
2232 expand = true; /* Coalesced CQE can't be consumed yet */
2233 } 2229 }
2230 wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK;
2231 if (tail != wqe_idx)
2232 expand = true; /* Coalesced CQE can't be consumed yet */
2233
2234 ocrdma_hwq_inc_tail(&qp->sq); 2234 ocrdma_hwq_inc_tail(&qp->sq);
2235 return expand; 2235 return expand;
2236} 2236}
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 7b1b86690024..4d11575c2010 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -87,7 +87,7 @@ struct qlogic_ib_stats {
87}; 87};
88 88
89extern struct qlogic_ib_stats qib_stats; 89extern struct qlogic_ib_stats qib_stats;
90extern struct pci_error_handlers qib_pci_err_handler; 90extern const struct pci_error_handlers qib_pci_err_handler;
91extern struct pci_driver qib_driver; 91extern struct pci_driver qib_driver;
92 92
93#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ 93#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 145da4040883..d39e0183ff82 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -285,7 +285,6 @@ struct qib_base_info {
285 285
286#ifndef QIB_KERN_TYPE 286#ifndef QIB_KERN_TYPE
287#define QIB_KERN_TYPE 0 287#define QIB_KERN_TYPE 0
288#define QIB_IDSTR "QLogic kernel.org driver"
289#endif 288#endif
290 289
291/* 290/*
@@ -302,6 +301,19 @@ struct qib_base_info {
302#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION) 301#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION)
303 302
304/* 303/*
304 * Define the driver version number. This is something that refers only
305 * to the driver itself, not the software interfaces it supports.
306 */
307#define QIB_DRIVER_VERSION_BASE "1.11"
308
309/* create the final driver version string */
310#ifdef QIB_IDSTR
311#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE " " QIB_IDSTR
312#else
313#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE
314#endif
315
316/*
305 * If the unit is specified via open, HCA choice is fixed. If port is 317 * If the unit is specified via open, HCA choice is fixed. If port is
306 * specified, it's also fixed. Otherwise we try to spread contexts 318 * specified, it's also fixed. Otherwise we try to spread contexts
307 * across ports and HCAs, using different algorithims. WITHIN is 319 * across ports and HCAs, using different algorithims. WITHIN is
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index e41e7f7fc763..5423edcab51f 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -46,7 +46,7 @@
46 * The size has to be longer than this string, so we can append 46 * The size has to be longer than this string, so we can append
47 * board/chip information to it in the init code. 47 * board/chip information to it in the init code.
48 */ 48 */
49const char ib_qib_version[] = QIB_IDSTR "\n"; 49const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
50 50
51DEFINE_SPINLOCK(qib_devs_lock); 51DEFINE_SPINLOCK(qib_devs_lock);
52LIST_HEAD(qib_dev_list); 52LIST_HEAD(qib_dev_list);
@@ -65,6 +65,7 @@ MODULE_PARM_DESC(compat_ddr_negotiate,
65MODULE_LICENSE("Dual BSD/GPL"); 65MODULE_LICENSE("Dual BSD/GPL");
66MODULE_AUTHOR("QLogic <support@qlogic.com>"); 66MODULE_AUTHOR("QLogic <support@qlogic.com>");
67MODULE_DESCRIPTION("QLogic IB driver"); 67MODULE_DESCRIPTION("QLogic IB driver");
68MODULE_VERSION(QIB_DRIVER_VERSION);
68 69
69/* 70/*
70 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our 71 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index cff8a6c32161..65a2a23f6f8a 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -61,8 +61,8 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
61 61
62 inode->i_ino = get_next_ino(); 62 inode->i_ino = get_next_ino();
63 inode->i_mode = mode; 63 inode->i_mode = mode;
64 inode->i_uid = 0; 64 inode->i_uid = GLOBAL_ROOT_UID;
65 inode->i_gid = 0; 65 inode->i_gid = GLOBAL_ROOT_GID;
66 inode->i_blocks = 0; 66 inode->i_blocks = 0;
67 inode->i_atime = CURRENT_TIME; 67 inode->i_atime = CURRENT_TIME;
68 inode->i_mtime = inode->i_atime; 68 inode->i_mtime = inode->i_atime;
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index e9486c74c226..81c7b73695d2 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -186,8 +186,9 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
186 goto bail; 186 goto bail;
187 187
188 off = sge->addr - mr->user_base; 188 off = sge->addr - mr->user_base;
189 if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length || 189 if (unlikely(sge->addr < mr->user_base ||
190 (mr->access_flags & acc) == 0)) 190 off + sge->length > mr->length ||
191 (mr->access_flags & acc) != acc))
191 goto bail; 192 goto bail;
192 if (unlikely(!atomic_inc_not_zero(&mr->refcount))) 193 if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
193 goto bail; 194 goto bail;
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 19f1e6c45fb6..ccb119143d20 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -471,9 +471,10 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
471 if (port_num != port) { 471 if (port_num != port) {
472 ibp = to_iport(ibdev, port_num); 472 ibp = to_iport(ibdev, port_num);
473 ret = check_mkey(ibp, smp, 0); 473 ret = check_mkey(ibp, smp, 0);
474 if (ret) 474 if (ret) {
475 ret = IB_MAD_RESULT_FAILURE; 475 ret = IB_MAD_RESULT_FAILURE;
476 goto bail; 476 goto bail;
477 }
477 } 478 }
478 } 479 }
479 480
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 900137173210..c574ec7c85e6 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -745,7 +745,7 @@ qib_pci_resume(struct pci_dev *pdev)
745 qib_init(dd, 1); /* same as re-init after reset */ 745 qib_init(dd, 1); /* same as re-init after reset */
746} 746}
747 747
748struct pci_error_handlers qib_pci_err_handler = { 748const struct pci_error_handlers qib_pci_err_handler = {
749 .error_detected = qib_pci_error_detected, 749 .error_detected = qib_pci_error_detected,
750 .mmio_enabled = qib_pci_mmio_enabled, 750 .mmio_enabled = qib_pci_mmio_enabled,
751 .link_reset = qib_pci_link_reset, 751 .link_reset = qib_pci_link_reset,
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index fc9b205c2412..ba51a4715a1d 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -2224,7 +2224,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
2224 ibdev->dma_ops = &qib_dma_mapping_ops; 2224 ibdev->dma_ops = &qib_dma_mapping_ops;
2225 2225
2226 snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), 2226 snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
2227 QIB_IDSTR " %s", init_utsname()->nodename); 2227 "QLogic Infiniband HCA %s", init_utsname()->nodename);
2228 2228
2229 ret = ib_register_device(ibdev, qib_create_port_files); 2229 ret = ib_register_device(ibdev, qib_create_port_files);
2230 if (ret) 2230 if (ret)