aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRalph Campbell <ralph.campbell@qlogic.com>2008-12-01 23:59:08 -0500
committerRoland Dreier <rolandd@cisco.com>2008-12-01 23:59:08 -0500
commit7c37d74474c8ee8ddcd5a2d2a9571d4a1290c844 (patch)
treed44fb97adfa0b036d0a0db193b3273eff5616246
parent64f22fa17c1a531e682ebc882566856ea5718495 (diff)
IB/ipath: Improve UD loopback performance by allocating temp array only once
Receive work queue entries are checked for L_Key validity, and pointers to the memory region structure are saved in an allocated structure. For UD loopback packets, this structure is allocated and freed for each packet. This patch changes that to allocate/free during QP creation and destruction. Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c32
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c19
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h1
3 files changed, 26 insertions, 26 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 4715911101e4..3a5a89b609c4 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -745,6 +745,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
745 struct ipath_swqe *swq = NULL; 745 struct ipath_swqe *swq = NULL;
746 struct ipath_ibdev *dev; 746 struct ipath_ibdev *dev;
747 size_t sz; 747 size_t sz;
748 size_t sg_list_sz;
748 struct ib_qp *ret; 749 struct ib_qp *ret;
749 750
750 if (init_attr->create_flags) { 751 if (init_attr->create_flags) {
@@ -789,19 +790,31 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
789 goto bail; 790 goto bail;
790 } 791 }
791 sz = sizeof(*qp); 792 sz = sizeof(*qp);
793 sg_list_sz = 0;
792 if (init_attr->srq) { 794 if (init_attr->srq) {
793 struct ipath_srq *srq = to_isrq(init_attr->srq); 795 struct ipath_srq *srq = to_isrq(init_attr->srq);
794 796
795 sz += sizeof(*qp->r_sg_list) * 797 if (srq->rq.max_sge > 1)
796 srq->rq.max_sge; 798 sg_list_sz = sizeof(*qp->r_sg_list) *
797 } else 799 (srq->rq.max_sge - 1);
798 sz += sizeof(*qp->r_sg_list) * 800 } else if (init_attr->cap.max_recv_sge > 1)
799 init_attr->cap.max_recv_sge; 801 sg_list_sz = sizeof(*qp->r_sg_list) *
800 qp = kmalloc(sz, GFP_KERNEL); 802 (init_attr->cap.max_recv_sge - 1);
803 qp = kmalloc(sz + sg_list_sz, GFP_KERNEL);
801 if (!qp) { 804 if (!qp) {
802 ret = ERR_PTR(-ENOMEM); 805 ret = ERR_PTR(-ENOMEM);
803 goto bail_swq; 806 goto bail_swq;
804 } 807 }
808 if (sg_list_sz && (init_attr->qp_type == IB_QPT_UD ||
809 init_attr->qp_type == IB_QPT_SMI ||
810 init_attr->qp_type == IB_QPT_GSI)) {
811 qp->r_ud_sg_list = kmalloc(sg_list_sz, GFP_KERNEL);
812 if (!qp->r_ud_sg_list) {
813 ret = ERR_PTR(-ENOMEM);
814 goto bail_qp;
815 }
816 } else
817 qp->r_ud_sg_list = NULL;
805 if (init_attr->srq) { 818 if (init_attr->srq) {
806 sz = 0; 819 sz = 0;
807 qp->r_rq.size = 0; 820 qp->r_rq.size = 0;
@@ -818,7 +831,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
818 qp->r_rq.size * sz); 831 qp->r_rq.size * sz);
819 if (!qp->r_rq.wq) { 832 if (!qp->r_rq.wq) {
820 ret = ERR_PTR(-ENOMEM); 833 ret = ERR_PTR(-ENOMEM);
821 goto bail_qp; 834 goto bail_sg_list;
822 } 835 }
823 } 836 }
824 837
@@ -848,7 +861,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
848 if (err) { 861 if (err) {
849 ret = ERR_PTR(err); 862 ret = ERR_PTR(err);
850 vfree(qp->r_rq.wq); 863 vfree(qp->r_rq.wq);
851 goto bail_qp; 864 goto bail_sg_list;
852 } 865 }
853 qp->ip = NULL; 866 qp->ip = NULL;
854 qp->s_tx = NULL; 867 qp->s_tx = NULL;
@@ -925,6 +938,8 @@ bail_ip:
925 vfree(qp->r_rq.wq); 938 vfree(qp->r_rq.wq);
926 ipath_free_qp(&dev->qp_table, qp); 939 ipath_free_qp(&dev->qp_table, qp);
927 free_qpn(&dev->qp_table, qp->ibqp.qp_num); 940 free_qpn(&dev->qp_table, qp->ibqp.qp_num);
941bail_sg_list:
942 kfree(qp->r_ud_sg_list);
928bail_qp: 943bail_qp:
929 kfree(qp); 944 kfree(qp);
930bail_swq: 945bail_swq:
@@ -989,6 +1004,7 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
989 kref_put(&qp->ip->ref, ipath_release_mmap_info); 1004 kref_put(&qp->ip->ref, ipath_release_mmap_info);
990 else 1005 else
991 vfree(qp->r_rq.wq); 1006 vfree(qp->r_rq.wq);
1007 kfree(qp->r_ud_sg_list);
992 vfree(qp->s_wq); 1008 vfree(qp->s_wq);
993 kfree(qp); 1009 kfree(qp);
994 return 0; 1010 return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 729446f56aab..91c74cc797ae 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -70,8 +70,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
70 goto done; 70 goto done;
71 } 71 }
72 72
73 rsge.sg_list = NULL;
74
75 /* 73 /*
76 * Check that the qkey matches (except for QP0, see 9.6.1.4.1). 74 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
77 * Qkeys with the high order bit set mean use the 75 * Qkeys with the high order bit set mean use the
@@ -115,21 +113,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
115 rq = &qp->r_rq; 113 rq = &qp->r_rq;
116 } 114 }
117 115
118 if (rq->max_sge > 1) {
119 /*
120 * XXX We could use GFP_KERNEL if ipath_do_send()
121 * was always called from the tasklet instead of
122 * from ipath_post_send().
123 */
124 rsge.sg_list = kmalloc((rq->max_sge - 1) *
125 sizeof(struct ipath_sge),
126 GFP_ATOMIC);
127 if (!rsge.sg_list) {
128 dev->n_pkt_drops++;
129 goto drop;
130 }
131 }
132
133 /* 116 /*
134 * Get the next work request entry to find where to put the data. 117 * Get the next work request entry to find where to put the data.
135 * Note that it is safe to drop the lock after changing rq->tail 118 * Note that it is safe to drop the lock after changing rq->tail
@@ -147,6 +130,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
147 goto drop; 130 goto drop;
148 } 131 }
149 wqe = get_rwqe_ptr(rq, tail); 132 wqe = get_rwqe_ptr(rq, tail);
133 rsge.sg_list = qp->r_ud_sg_list;
150 if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) { 134 if (!ipath_init_sge(qp, wqe, &rlen, &rsge)) {
151 spin_unlock_irqrestore(&rq->lock, flags); 135 spin_unlock_irqrestore(&rq->lock, flags);
152 dev->n_pkt_drops++; 136 dev->n_pkt_drops++;
@@ -242,7 +226,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
242 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 226 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
243 swqe->wr.send_flags & IB_SEND_SOLICITED); 227 swqe->wr.send_flags & IB_SEND_SOLICITED);
244drop: 228drop:
245 kfree(rsge.sg_list);
246 if (atomic_dec_and_test(&qp->refcount)) 229 if (atomic_dec_and_test(&qp->refcount))
247 wake_up(&qp->wait); 230 wake_up(&qp->wait);
248done:; 231done:;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 9d12ae8a778e..11e3f613df93 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -431,6 +431,7 @@ struct ipath_qp {
431 u32 s_lsn; /* limit sequence number (credit) */ 431 u32 s_lsn; /* limit sequence number (credit) */
432 struct ipath_swqe *s_wq; /* send work queue */ 432 struct ipath_swqe *s_wq; /* send work queue */
433 struct ipath_swqe *s_wqe; 433 struct ipath_swqe *s_wqe;
434 struct ipath_sge *r_ud_sg_list;
434 struct ipath_rq r_rq; /* receive work queue */ 435 struct ipath_rq r_rq; /* receive work queue */
435 struct ipath_sge r_sg_list[0]; /* verified SGEs */ 436 struct ipath_sge r_sg_list[0]; /* verified SGEs */
436}; 437};