aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Cohen <eli@mellanox.co.il>2006-03-02 15:40:46 -0500
committerRoland Dreier <rolandd@cisco.com>2006-03-20 13:08:22 -0500
commit651eaac92894f8b6761c51b6637ea9cacea7fba2 (patch)
tree8be06461ef870b7d130ebce39f55fe93af6ac542
parent27d56300647f6e76847bc2407d7abc782fe87495 (diff)
IB/mthca: Optimize large messages on Sinai HCAs
Sinai (one-port PCI Express) HCAs get improved throughput for messages bigger than 80 KB in DDR mode if memory keys are formatted in a specific way. The enhancement only works if the memory key table is smaller than 2^24 entries. For larger tables, the enhancement is off and a warning is printed (to avoid silent performance loss). Signed-off-by: Eli Cohen <eli@mellanox.co.il> Signed-off-by: Michael Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c23
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c20
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c10
5 files changed, 51 insertions, 17 deletions
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 948a2861cae3..343eca507870 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1277,7 +1277,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
1277 int err; 1277 int err;
1278 1278
1279#define INIT_HCA_IN_SIZE 0x200 1279#define INIT_HCA_IN_SIZE 0x200
1280#define INIT_HCA_FLAGS_OFFSET 0x014 1280#define INIT_HCA_FLAGS1_OFFSET 0x00c
1281#define INIT_HCA_FLAGS2_OFFSET 0x014
1281#define INIT_HCA_QPC_OFFSET 0x020 1282#define INIT_HCA_QPC_OFFSET 0x020
1282#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10) 1283#define INIT_HCA_QPC_BASE_OFFSET (INIT_HCA_QPC_OFFSET + 0x10)
1283#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17) 1284#define INIT_HCA_LOG_QP_OFFSET (INIT_HCA_QPC_OFFSET + 0x17)
@@ -1320,15 +1321,18 @@ int mthca_INIT_HCA(struct mthca_dev *dev,
1320 1321
1321 memset(inbox, 0, INIT_HCA_IN_SIZE); 1322 memset(inbox, 0, INIT_HCA_IN_SIZE);
1322 1323
1324 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
1325 MTHCA_PUT(inbox, 0x1, INIT_HCA_FLAGS1_OFFSET);
1326
1323#if defined(__LITTLE_ENDIAN) 1327#if defined(__LITTLE_ENDIAN)
1324 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) &= ~cpu_to_be32(1 << 1); 1328 *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) &= ~cpu_to_be32(1 << 1);
1325#elif defined(__BIG_ENDIAN) 1329#elif defined(__BIG_ENDIAN)
1326 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 1); 1330 *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1 << 1);
1327#else 1331#else
1328#error Host endianness not defined 1332#error Host endianness not defined
1329#endif 1333#endif
1330 /* Check port for UD address vector: */ 1334 /* Check port for UD address vector: */
1331 *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1); 1335 *(inbox + INIT_HCA_FLAGS2_OFFSET / 4) |= cpu_to_be32(1);
1332 1336
1333 /* We leave wqe_quota, responder_exu, etc as 0 (default) */ 1337 /* We leave wqe_quota, responder_exu, etc as 0 (default) */
1334 1338
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index ea48c897c7ce..ad52edbefe98 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -64,7 +64,8 @@ enum {
64 MTHCA_FLAG_NO_LAM = 1 << 5, 64 MTHCA_FLAG_NO_LAM = 1 << 5,
65 MTHCA_FLAG_FMR = 1 << 6, 65 MTHCA_FLAG_FMR = 1 << 6,
66 MTHCA_FLAG_MEMFREE = 1 << 7, 66 MTHCA_FLAG_MEMFREE = 1 << 7,
67 MTHCA_FLAG_PCIE = 1 << 8 67 MTHCA_FLAG_PCIE = 1 << 8,
68 MTHCA_FLAG_SINAI_OPT = 1 << 9
68}; 69};
69 70
70enum { 71enum {
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 9c849d27b06e..d94837fbf162 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -935,13 +935,19 @@ enum {
935 935
936static struct { 936static struct {
937 u64 latest_fw; 937 u64 latest_fw;
938 int is_memfree; 938 u32 flags;
939 int is_pcie;
940} mthca_hca_table[] = { 939} mthca_hca_table[] = {
941 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 }, 940 [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3),
942 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 }, 941 .flags = 0 },
943 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 }, 942 [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0),
944 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 } 943 .flags = MTHCA_FLAG_PCIE },
944 [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0),
945 .flags = MTHCA_FLAG_MEMFREE |
946 MTHCA_FLAG_PCIE },
947 [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1),
948 .flags = MTHCA_FLAG_MEMFREE |
949 MTHCA_FLAG_PCIE |
950 MTHCA_FLAG_SINAI_OPT }
945}; 951};
946 952
947static int __devinit mthca_init_one(struct pci_dev *pdev, 953static int __devinit mthca_init_one(struct pci_dev *pdev,
@@ -1031,12 +1037,9 @@ static int __devinit mthca_init_one(struct pci_dev *pdev,
1031 1037
1032 mdev->pdev = pdev; 1038 mdev->pdev = pdev;
1033 1039
1040 mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
1034 if (ddr_hidden) 1041 if (ddr_hidden)
1035 mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN; 1042 mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
1036 if (mthca_hca_table[id->driver_data].is_memfree)
1037 mdev->mthca_flags |= MTHCA_FLAG_MEMFREE;
1038 if (mthca_hca_table[id->driver_data].is_pcie)
1039 mdev->mthca_flags |= MTHCA_FLAG_PCIE;
1040 1043
1041 /* 1044 /*
1042 * Now reset the HCA before we touch the PCI capabilities or 1045 * Now reset the HCA before we touch the PCI capabilities or
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 0b48048ad0f8..698b62125765 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -76,6 +76,8 @@ struct mthca_mpt_entry {
76#define MTHCA_MPT_STATUS_SW 0xF0 76#define MTHCA_MPT_STATUS_SW 0xF0
77#define MTHCA_MPT_STATUS_HW 0x00 77#define MTHCA_MPT_STATUS_HW 0x00
78 78
79#define SINAI_FMR_KEY_INC 0x1000000
80
79/* 81/*
80 * Buddy allocator for MTT segments (currently not very efficient 82 * Buddy allocator for MTT segments (currently not very efficient
81 * since it doesn't keep a free list and just searches linearly 83 * since it doesn't keep a free list and just searches linearly
@@ -330,6 +332,14 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
330 return tavor_key_to_hw_index(key); 332 return tavor_key_to_hw_index(key);
331} 333}
332 334
335static inline u32 adjust_key(struct mthca_dev *dev, u32 key)
336{
337 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
338 return ((key << 20) & 0x800000) | (key & 0x7fffff);
339 else
340 return key;
341}
342
333int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, 343int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
334 u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) 344 u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
335{ 345{
@@ -345,6 +355,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
345 key = mthca_alloc(&dev->mr_table.mpt_alloc); 355 key = mthca_alloc(&dev->mr_table.mpt_alloc);
346 if (key == -1) 356 if (key == -1)
347 return -ENOMEM; 357 return -ENOMEM;
358 key = adjust_key(dev, key);
348 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); 359 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
349 360
350 if (mthca_is_memfree(dev)) { 361 if (mthca_is_memfree(dev)) {
@@ -504,6 +515,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
504 key = mthca_alloc(&dev->mr_table.mpt_alloc); 515 key = mthca_alloc(&dev->mr_table.mpt_alloc);
505 if (key == -1) 516 if (key == -1)
506 return -ENOMEM; 517 return -ENOMEM;
518 key = adjust_key(dev, key);
507 519
508 idx = key & (dev->limits.num_mpts - 1); 520 idx = key & (dev->limits.num_mpts - 1);
509 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); 521 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
@@ -687,7 +699,10 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
687 ++fmr->maps; 699 ++fmr->maps;
688 700
689 key = arbel_key_to_hw_index(fmr->ibmr.lkey); 701 key = arbel_key_to_hw_index(fmr->ibmr.lkey);
690 key += dev->limits.num_mpts; 702 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
703 key += SINAI_FMR_KEY_INC;
704 else
705 key += dev->limits.num_mpts;
691 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key); 706 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
692 707
693 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW; 708 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
@@ -760,6 +775,9 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
760 else 775 else
761 dev->mthca_flags |= MTHCA_FLAG_FMR; 776 dev->mthca_flags |= MTHCA_FLAG_FMR;
762 777
778 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT)
779 mthca_dbg(dev, "Memory key throughput optimization activated.\n");
780
763 err = mthca_buddy_init(&dev->mr_table.mtt_buddy, 781 err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
764 fls(dev->limits.num_mtt_segs - 1)); 782 fls(dev->limits.num_mtt_segs - 1));
765 783
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index 08a909371b0a..58d44aa3c302 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -152,7 +152,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
152 } 152 }
153 if (total_size > mem_avail) { 153 if (total_size > mem_avail) {
154 mthca_err(dev, "Profile requires 0x%llx bytes; " 154 mthca_err(dev, "Profile requires 0x%llx bytes; "
155 "won't in 0x%llx bytes of context memory.\n", 155 "won't fit in 0x%llx bytes of context memory.\n",
156 (unsigned long long) total_size, 156 (unsigned long long) total_size,
157 (unsigned long long) mem_avail); 157 (unsigned long long) mem_avail);
158 kfree(profile); 158 kfree(profile);
@@ -262,6 +262,14 @@ u64 mthca_make_profile(struct mthca_dev *dev,
262 */ 262 */
263 dev->limits.num_pds = MTHCA_NUM_PDS; 263 dev->limits.num_pds = MTHCA_NUM_PDS;
264 264
265 if (dev->mthca_flags & MTHCA_FLAG_SINAI_OPT &&
266 init_hca->log_mpt_sz > 23) {
267 mthca_warn(dev, "MPT table too large (requested size 2^%d >= 2^24)\n",
268 init_hca->log_mpt_sz);
269 mthca_warn(dev, "Disabling memory key throughput optimization.\n");
270 dev->mthca_flags &= ~MTHCA_FLAG_SINAI_OPT;
271 }
272
265 /* 273 /*
266 * For Tavor, FMRs use ioremapped PCI memory. For 32 bit 274 * For Tavor, FMRs use ioremapped PCI memory. For 32 bit
267 * systems it may use too much vmalloc space to map all MTT 275 * systems it may use too much vmalloc space to map all MTT