aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJack Morgenstein <jackm@dev.mellanox.co.il>2007-08-01 05:28:53 -0400
committerRoland Dreier <rolandd@cisco.com>2007-10-09 22:59:16 -0400
commitd7bb58fb1c0e7264a7261c7d0304121ef9402e94 (patch)
treeaefbae7ee4b4b84022a7646372ddfcdb0a99e72f
parent121964ec38d3e17b5ea6183b3c0571df2f4b0eb6 (diff)
mlx4_core: Write MTTs from CPU instead with of WRITE_MTT FW command
Write MTT entries directly to ICM from the driver (eliminating use of WRITE_MTT command). This reduces the number of FW commands needed to register an MR by at least a factor of 2 and speeds up memory registration significantly. This code will also be used to implement FMRs. Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il> Signed-off-by: Michael S. Tsirkin <mst@dev.mellanox.co.il> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c7
-rw-r--r--drivers/net/mlx4/icm.c21
-rw-r--r--drivers/net/mlx4/icm.h2
-rw-r--r--drivers/net/mlx4/main.c11
-rw-r--r--drivers/net/mlx4/mr.c73
5 files changed, 67 insertions, 47 deletions
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 85ae906f1d12..734ec2bd15cd 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -96,11 +96,10 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
96 pages[i++] = sg_dma_address(&chunk->page_list[j]) + 96 pages[i++] = sg_dma_address(&chunk->page_list[j]) +
97 umem->page_size * k; 97 umem->page_size * k;
98 /* 98 /*
99 * Be friendly to WRITE_MTT firmware 99 * Be friendly to mlx4_write_mtt() and
100 * command, and pass it chunks of 100 * pass it chunks of appropriate size.
101 * appropriate size.
102 */ 101 */
103 if (i == PAGE_SIZE / sizeof (u64) - 2) { 102 if (i == PAGE_SIZE / sizeof (u64)) {
104 err = mlx4_write_mtt(dev->dev, mtt, n, 103 err = mlx4_write_mtt(dev->dev, mtt, n,
105 i, pages); 104 i, pages);
106 if (err) 105 if (err)
diff --git a/drivers/net/mlx4/icm.c b/drivers/net/mlx4/icm.c
index 250e24887578..4b3c109d5eae 100644
--- a/drivers/net/mlx4/icm.c
+++ b/drivers/net/mlx4/icm.c
@@ -301,9 +301,9 @@ void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
301 mutex_unlock(&table->mutex); 301 mutex_unlock(&table->mutex);
302} 302}
303 303
304void *mlx4_table_find(struct mlx4_icm_table *table, int obj) 304void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle)
305{ 305{
306 int idx, offset, i; 306 int idx, offset, dma_offset, i;
307 struct mlx4_icm_chunk *chunk; 307 struct mlx4_icm_chunk *chunk;
308 struct mlx4_icm *icm; 308 struct mlx4_icm *icm;
309 struct page *page = NULL; 309 struct page *page = NULL;
@@ -313,15 +313,26 @@ void *mlx4_table_find(struct mlx4_icm_table *table, int obj)
313 313
314 mutex_lock(&table->mutex); 314 mutex_lock(&table->mutex);
315 315
316 idx = obj & (table->num_obj - 1); 316 idx = (obj & (table->num_obj - 1)) * table->obj_size;
317 icm = table->icm[idx / (MLX4_TABLE_CHUNK_SIZE / table->obj_size)]; 317 icm = table->icm[idx / MLX4_TABLE_CHUNK_SIZE];
318 offset = idx % (MLX4_TABLE_CHUNK_SIZE / table->obj_size); 318 dma_offset = offset = idx % MLX4_TABLE_CHUNK_SIZE;
319 319
320 if (!icm) 320 if (!icm)
321 goto out; 321 goto out;
322 322
323 list_for_each_entry(chunk, &icm->chunk_list, list) { 323 list_for_each_entry(chunk, &icm->chunk_list, list) {
324 for (i = 0; i < chunk->npages; ++i) { 324 for (i = 0; i < chunk->npages; ++i) {
325 if (dma_handle && dma_offset >= 0) {
326 if (sg_dma_len(&chunk->mem[i]) > dma_offset)
327 *dma_handle = sg_dma_address(&chunk->mem[i]) +
328 dma_offset;
329 dma_offset -= sg_dma_len(&chunk->mem[i]);
330 }
331 /*
332 * DMA mapping can merge pages but not split them,
333 * so if we found the page, dma_handle has already
334 * been assigned to.
335 */
325 if (chunk->mem[i].length > offset) { 336 if (chunk->mem[i].length > offset) {
326 page = chunk->mem[i].page; 337 page = chunk->mem[i].page;
327 goto out; 338 goto out;
diff --git a/drivers/net/mlx4/icm.h b/drivers/net/mlx4/icm.h
index a77db6de8597..6c44edf35847 100644
--- a/drivers/net/mlx4/icm.h
+++ b/drivers/net/mlx4/icm.h
@@ -83,7 +83,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
83void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table); 83void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table);
84int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); 84int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
85void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj); 85void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj);
86void *mlx4_table_find(struct mlx4_icm_table *table, int obj); 86void *mlx4_table_find(struct mlx4_icm_table *table, int obj, dma_addr_t *dma_handle);
87int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, 87int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
88 int start, int end); 88 int start, int end);
89void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table, 89void mlx4_table_put_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index ed7e8d76cde0..478b3ba74edc 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -300,6 +300,17 @@ static int __devinit mlx4_init_icm(struct mlx4_dev *dev,
300 goto err_unmap_cmpt; 300 goto err_unmap_cmpt;
301 } 301 }
302 302
303 /*
304 * Reserved MTT entries must be aligned up to a cacheline
305 * boundary, since the FW will write to them, while the driver
306 * writes to all other MTT entries. (The variable
307 * dev->caps.mtt_entry_sz below is really the MTT segment
308 * size, not the raw entry size)
309 */
310 dev->caps.reserved_mtts =
311 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
312 dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
313
303 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table, 314 err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
304 init_hca->mtt_base, 315 init_hca->mtt_base,
305 dev->caps.mtt_entry_sz, 316 dev->caps.mtt_entry_sz,
diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
index 60a6ee27cfd4..3cc98c699aaf 100644
--- a/drivers/net/mlx4/mr.c
+++ b/drivers/net/mlx4/mr.c
@@ -349,58 +349,57 @@ err_table:
349} 349}
350EXPORT_SYMBOL_GPL(mlx4_mr_enable); 350EXPORT_SYMBOL_GPL(mlx4_mr_enable);
351 351
352static int mlx4_WRITE_MTT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, 352static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
353 int num_mtt) 353 int start_index, int npages, u64 *page_list)
354{ 354{
355 return mlx4_cmd(dev, mailbox->dma, num_mtt, 0, MLX4_CMD_WRITE_MTT, 355 struct mlx4_priv *priv = mlx4_priv(dev);
356 MLX4_CMD_TIME_CLASS_B); 356 __be64 *mtts;
357 dma_addr_t dma_handle;
358 int i;
359 int s = start_index * sizeof (u64);
360
361 /* All MTTs must fit in the same page */
362 if (start_index / (PAGE_SIZE / sizeof (u64)) !=
363 (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
364 return -EINVAL;
365
366 if (start_index & (MLX4_MTT_ENTRY_PER_SEG - 1))
367 return -EINVAL;
368
369 mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
370 s / dev->caps.mtt_entry_sz, &dma_handle);
371 if (!mtts)
372 return -ENOMEM;
373
374 for (i = 0; i < npages; ++i)
375 mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
376
377 dma_sync_single(&dev->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE);
378
379 return 0;
357} 380}
358 381
359int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, 382int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
360 int start_index, int npages, u64 *page_list) 383 int start_index, int npages, u64 *page_list)
361{ 384{
362 struct mlx4_cmd_mailbox *mailbox; 385 int chunk;
363 __be64 *mtt_entry; 386 int err;
364 int i;
365 int err = 0;
366 387
367 if (mtt->order < 0) 388 if (mtt->order < 0)
368 return -EINVAL; 389 return -EINVAL;
369 390
370 mailbox = mlx4_alloc_cmd_mailbox(dev);
371 if (IS_ERR(mailbox))
372 return PTR_ERR(mailbox);
373
374 mtt_entry = mailbox->buf;
375
376 while (npages > 0) { 391 while (npages > 0) {
377 mtt_entry[0] = cpu_to_be64(mlx4_mtt_addr(dev, mtt) + start_index * 8); 392 chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
378 mtt_entry[1] = 0; 393 err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
379
380 for (i = 0; i < npages && i < MLX4_MAILBOX_SIZE / 8 - 2; ++i)
381 mtt_entry[i + 2] = cpu_to_be64(page_list[i] |
382 MLX4_MTT_FLAG_PRESENT);
383
384 /*
385 * If we have an odd number of entries to write, add
386 * one more dummy entry for firmware efficiency.
387 */
388 if (i & 1)
389 mtt_entry[i + 2] = 0;
390
391 err = mlx4_WRITE_MTT(dev, mailbox, (i + 1) & ~1);
392 if (err) 394 if (err)
393 goto out; 395 return err;
394 396
395 npages -= i; 397 npages -= chunk;
396 start_index += i; 398 start_index += chunk;
397 page_list += i; 399 page_list += chunk;
398 } 400 }
399 401
400out: 402 return 0;
401 mlx4_free_cmd_mailbox(dev, mailbox);
402
403 return err;
404} 403}
405EXPORT_SYMBOL_GPL(mlx4_write_mtt); 404EXPORT_SYMBOL_GPL(mlx4_write_mtt);
406 405