aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw/rdmavt
diff options
context:
space:
mode:
authorMike Marciniszyn <mike.marciniszyn@intel.com>2016-10-10 09:14:39 -0400
committerDoug Ledford <dledford@redhat.com>2016-11-15 16:25:59 -0500
commit99f80d2f5fb6d4165186390ecba83952803b667b (patch)
treebcd019917becfed26d1aefad8945a122adead81a /drivers/infiniband/sw/rdmavt
parent63df8e09e185796ba532cff7c31f88811dd6cae6 (diff)
IB/hfi1: Optimize lkey validation structures
Profiling shows that the key validation is susceptible to cache line trading when accessing the lkey table. Fix by separating out the read mostly fields from the write fields. In addition the shift amount, which is function of the lkey table size, is precomputed and stored with the table pointer. Since both the shift and table pointer are in the same read mostly cacheline, this saves a cache line in this hot path. Reviewed-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
Diffstat (limited to 'drivers/infiniband/sw/rdmavt')
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c10
1 files changed, 5 insertions, 5 deletions
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 46b64970058e..4acf179adf3b 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -84,6 +84,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi)
84 lkey_table_size = rdi->dparms.lkey_table_size; 84 lkey_table_size = rdi->dparms.lkey_table_size;
85 } 85 }
86 rdi->lkey_table.max = 1 << lkey_table_size; 86 rdi->lkey_table.max = 1 << lkey_table_size;
87 rdi->lkey_table.shift = 32 - lkey_table_size;
87 lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); 88 lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
88 rdi->lkey_table.table = (struct rvt_mregion __rcu **) 89 rdi->lkey_table.table = (struct rvt_mregion __rcu **)
89 vmalloc_node(lk_tab_size, rdi->dparms.node); 90 vmalloc_node(lk_tab_size, rdi->dparms.node);
@@ -774,7 +775,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
774 struct rvt_mregion *mr; 775 struct rvt_mregion *mr;
775 unsigned n, m; 776 unsigned n, m;
776 size_t off; 777 size_t off;
777 struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
778 778
779 /* 779 /*
780 * We use LKEY == zero for kernel virtual addresses 780 * We use LKEY == zero for kernel virtual addresses
@@ -782,6 +782,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
782 */ 782 */
783 rcu_read_lock(); 783 rcu_read_lock();
784 if (sge->lkey == 0) { 784 if (sge->lkey == 0) {
785 struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
786
785 if (pd->user) 787 if (pd->user)
786 goto bail; 788 goto bail;
787 mr = rcu_dereference(dev->dma_mr); 789 mr = rcu_dereference(dev->dma_mr);
@@ -798,8 +800,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
798 isge->n = 0; 800 isge->n = 0;
799 goto ok; 801 goto ok;
800 } 802 }
801 mr = rcu_dereference( 803 mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
802 rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
803 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 804 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
804 mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 805 mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
805 goto bail; 806 goto bail;
@@ -899,8 +900,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
899 goto ok; 900 goto ok;
900 } 901 }
901 902
902 mr = rcu_dereference( 903 mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
903 rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
904 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 904 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
905 mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 905 mr->lkey != rkey || qp->ibqp.pd != mr->pd))
906 goto bail; 906 goto bail;