aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@mellanox.co.il>2005-04-16 18:26:30 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:26:30 -0400
commite0f5fdca1ca9d1659b920827e5cf6dbad20e5391 (patch)
tree71e6410e14c4c285ce128e99070a38ded75a14f7 /drivers/infiniband/hw
parentd0a9d25cdcd511dd523357bc902979220dc72a2e (diff)
[PATCH] IB/mthca: add fast memory region implementation
Implement fast memory regions (FMRs), where the driver writes directly into the HCA's translation tables rather than requiring a firmware command. For Tavor, MTTs for FMR are separate from regular MTTs, and are reserved at driver initialization. This is done to limit the amount of virtual memory needed to map the MTTs. For Arbel, there's no such limitation, and all MTTs and MPTs may be used for FMR or for regular MR. Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <roland@topspin.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h25
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c17
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c386
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.c19
-rw-r--r--drivers/infiniband/hw/mthca/mthca_profile.h1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c79
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h23
7 files changed, 526 insertions, 24 deletions
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 1faaf542a4e1..cca3ca7196a3 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -61,7 +61,8 @@ enum {
61 MTHCA_FLAG_SRQ = 1 << 2, 61 MTHCA_FLAG_SRQ = 1 << 2,
62 MTHCA_FLAG_MSI = 1 << 3, 62 MTHCA_FLAG_MSI = 1 << 3,
63 MTHCA_FLAG_MSI_X = 1 << 4, 63 MTHCA_FLAG_MSI_X = 1 << 4,
64 MTHCA_FLAG_NO_LAM = 1 << 5 64 MTHCA_FLAG_NO_LAM = 1 << 5,
65 MTHCA_FLAG_FMR = 1 << 6
65}; 66};
66 67
67enum { 68enum {
@@ -134,6 +135,7 @@ struct mthca_limits {
134 int reserved_eqs; 135 int reserved_eqs;
135 int num_mpts; 136 int num_mpts;
136 int num_mtt_segs; 137 int num_mtt_segs;
138 int fmr_reserved_mtts;
137 int reserved_mtts; 139 int reserved_mtts;
138 int reserved_mrws; 140 int reserved_mrws;
139 int reserved_uars; 141 int reserved_uars;
@@ -178,10 +180,17 @@ struct mthca_buddy {
178 180
179struct mthca_mr_table { 181struct mthca_mr_table {
180 struct mthca_alloc mpt_alloc; 182 struct mthca_alloc mpt_alloc;
181 struct mthca_buddy mtt_buddy; 183 struct mthca_buddy mtt_buddy;
184 struct mthca_buddy *fmr_mtt_buddy;
182 u64 mtt_base; 185 u64 mtt_base;
186 u64 mpt_base;
183 struct mthca_icm_table *mtt_table; 187 struct mthca_icm_table *mtt_table;
184 struct mthca_icm_table *mpt_table; 188 struct mthca_icm_table *mpt_table;
189 struct {
190 void __iomem *mpt_base;
191 void __iomem *mtt_base;
192 struct mthca_buddy mtt_buddy;
193 } tavor_fmr;
185}; 194};
186 195
187struct mthca_eq_table { 196struct mthca_eq_table {
@@ -380,7 +389,17 @@ int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
380 u64 *buffer_list, int buffer_size_shift, 389 u64 *buffer_list, int buffer_size_shift,
381 int list_len, u64 iova, u64 total_size, 390 int list_len, u64 iova, u64 total_size,
382 u32 access, struct mthca_mr *mr); 391 u32 access, struct mthca_mr *mr);
383void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr); 392void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
393
394int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
395 u32 access, struct mthca_fmr *fmr);
396int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
397 int list_len, u64 iova);
398void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
399int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
400 int list_len, u64 iova);
401void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr);
402int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr);
384 403
385int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt); 404int mthca_map_eq_icm(struct mthca_dev *dev, u64 icm_virt);
386void mthca_unmap_eq_icm(struct mthca_dev *dev); 405void mthca_unmap_eq_icm(struct mthca_dev *dev);
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 7912b262a4cf..fdfc2b788e64 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -73,14 +73,15 @@ static const char mthca_version[] __devinitdata =
73 DRV_VERSION " (" DRV_RELDATE ")\n"; 73 DRV_VERSION " (" DRV_RELDATE ")\n";
74 74
75static struct mthca_profile default_profile = { 75static struct mthca_profile default_profile = {
76 .num_qp = 1 << 16, 76 .num_qp = 1 << 16,
77 .rdb_per_qp = 4, 77 .rdb_per_qp = 4,
78 .num_cq = 1 << 16, 78 .num_cq = 1 << 16,
79 .num_mcg = 1 << 13, 79 .num_mcg = 1 << 13,
80 .num_mpt = 1 << 17, 80 .num_mpt = 1 << 17,
81 .num_mtt = 1 << 20, 81 .num_mtt = 1 << 20,
82 .num_udav = 1 << 15, /* Tavor only */ 82 .num_udav = 1 << 15, /* Tavor only */
83 .uarc_size = 1 << 18, /* Arbel only */ 83 .fmr_reserved_mtts = 1 << 18, /* Tavor only */
84 .uarc_size = 1 << 18, /* Arbel only */
84}; 85};
85 86
86static int __devinit mthca_tune_pci(struct mthca_dev *mdev) 87static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index ac3265d0bf79..a85b503b8522 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -66,6 +66,9 @@ struct mthca_mpt_entry {
66 66
67#define MTHCA_MTT_FLAG_PRESENT 1 67#define MTHCA_MTT_FLAG_PRESENT 1
68 68
69#define MTHCA_MPT_STATUS_SW 0xF0
70#define MTHCA_MPT_STATUS_HW 0x00
71
69/* 72/*
70 * Buddy allocator for MTT segments (currently not very efficient 73 * Buddy allocator for MTT segments (currently not very efficient
71 * since it doesn't keep a free list and just searches linearly 74 * since it doesn't keep a free list and just searches linearly
@@ -442,6 +445,20 @@ err_out_mpt_free:
442 return err; 445 return err;
443} 446}
444 447
448/* Free mr or fmr */
449static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
450 u32 first_seg, struct mthca_buddy *buddy)
451{
452 if (order >= 0)
453 mthca_free_mtt(dev, first_seg, order, buddy);
454
455 if (dev->hca_type == ARBEL_NATIVE)
456 mthca_table_put(dev, dev->mr_table.mpt_table,
457 arbel_key_to_hw_index(lkey));
458
459 mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
460}
461
445void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) 462void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
446{ 463{
447 int err; 464 int err;
@@ -459,18 +476,288 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
459 mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n", 476 mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
460 status); 477 status);
461 478
462 if (mr->order >= 0) 479 mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg,
463 mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy); 480 &dev->mr_table.mtt_buddy);
481}
482
483int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
484 u32 access, struct mthca_fmr *mr)
485{
486 struct mthca_mpt_entry *mpt_entry;
487 void *mailbox;
488 u64 mtt_seg;
489 u32 key, idx;
490 u8 status;
491 int list_len = mr->attr.max_pages;
492 int err = -ENOMEM;
493 int i;
494
495 might_sleep();
496
497 if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
498 return -EINVAL;
499
500 /* For Arbel, all MTTs must fit in the same page. */
501 if (dev->hca_type == ARBEL_NATIVE &&
502 mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
503 return -EINVAL;
504
505 mr->maps = 0;
506
507 key = mthca_alloc(&dev->mr_table.mpt_alloc);
508 if (key == -1)
509 return -ENOMEM;
510
511 idx = key & (dev->limits.num_mpts - 1);
512 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
513
514 if (dev->hca_type == ARBEL_NATIVE) {
515 err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
516 if (err)
517 goto err_out_mpt_free;
518
519 mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
520 BUG_ON(!mr->mem.arbel.mpt);
521 } else
522 mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
523 sizeof *(mr->mem.tavor.mpt) * idx;
524
525 for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
526 i < list_len;
527 i <<= 1, ++mr->order)
528 ; /* nothing */
529
530 mr->first_seg = mthca_alloc_mtt(dev, mr->order,
531 dev->mr_table.fmr_mtt_buddy);
532 if (mr->first_seg == -1)
533 goto err_out_table;
534
535 mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;
536
537 if (dev->hca_type == ARBEL_NATIVE) {
538 mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
539 mr->first_seg);
540 BUG_ON(!mr->mem.arbel.mtts);
541 } else
542 mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
543
544 mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
545 GFP_KERNEL);
546 if (!mailbox)
547 goto err_out_free_mtt;
548
549 mpt_entry = MAILBOX_ALIGN(mailbox);
550
551 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
552 MTHCA_MPT_FLAG_MIO |
553 MTHCA_MPT_FLAG_REGION |
554 access);
555
556 mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
557 mpt_entry->key = cpu_to_be32(key);
558 mpt_entry->pd = cpu_to_be32(pd);
559 memset(&mpt_entry->start, 0,
560 sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
561 mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
562
563 if (0) {
564 mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
565 for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
566 if (i % 4 == 0)
567 printk("[%02x] ", i * 4);
568 printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
569 if ((i + 1) % 4 == 0)
570 printk("\n");
571 }
572 }
573
574 err = mthca_SW2HW_MPT(dev, mpt_entry,
575 key & (dev->limits.num_mpts - 1),
576 &status);
577 if (err) {
578 mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
579 goto err_out_mailbox_free;
580 }
581 if (status) {
582 mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
583 status);
584 err = -EINVAL;
585 goto err_out_mailbox_free;
586 }
587
588 kfree(mailbox);
589 return 0;
590
591err_out_mailbox_free:
592 kfree(mailbox);
593
594err_out_free_mtt:
595 mthca_free_mtt(dev, mr->first_seg, mr->order,
596 dev->mr_table.fmr_mtt_buddy);
464 597
598err_out_table:
465 if (dev->hca_type == ARBEL_NATIVE) 599 if (dev->hca_type == ARBEL_NATIVE)
466 mthca_table_put(dev, dev->mr_table.mpt_table, 600 mthca_table_put(dev, dev->mr_table.mpt_table, key);
467 key_to_hw_index(dev, mr->ibmr.lkey)); 601
468 mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, mr->ibmr.lkey)); 602err_out_mpt_free:
603 mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
604 return err;
605}
606
607int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
608{
609 if (fmr->maps)
610 return -EBUSY;
611
612 mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg,
613 dev->mr_table.fmr_mtt_buddy);
614 return 0;
615}
616
617static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
618 int list_len, u64 iova)
619{
620 int i, page_mask;
621
622 if (list_len > fmr->attr.max_pages)
623 return -EINVAL;
624
625 page_mask = (1 << fmr->attr.page_size) - 1;
626
627 /* We are getting page lists, so va must be page aligned. */
628 if (iova & page_mask)
629 return -EINVAL;
630
631 /* Trust the user not to pass misaligned data in page_list */
632 if (0)
633 for (i = 0; i < list_len; ++i) {
634 if (page_list[i] & ~page_mask)
635 return -EINVAL;
636 }
637
638 if (fmr->maps >= fmr->attr.max_maps)
639 return -EINVAL;
640
641 return 0;
642}
643
644
645int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
646 int list_len, u64 iova)
647{
648 struct mthca_fmr *fmr = to_mfmr(ibfmr);
649 struct mthca_dev *dev = to_mdev(ibfmr->device);
650 struct mthca_mpt_entry mpt_entry;
651 u32 key;
652 int i, err;
653
654 err = mthca_check_fmr(fmr, page_list, list_len, iova);
655 if (err)
656 return err;
657
658 ++fmr->maps;
659
660 key = tavor_key_to_hw_index(fmr->ibmr.lkey);
661 key += dev->limits.num_mpts;
662 fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
663
664 writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
665
666 for (i = 0; i < list_len; ++i) {
667 __be64 mtt_entry = cpu_to_be64(page_list[i] |
668 MTHCA_MTT_FLAG_PRESENT);
669 mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
670 }
671
672 mpt_entry.lkey = cpu_to_be32(key);
673 mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
674 mpt_entry.start = cpu_to_be64(iova);
675
676 writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
677 memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
678 offsetof(struct mthca_mpt_entry, window_count) -
679 offsetof(struct mthca_mpt_entry, start));
680
681 writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
682
683 return 0;
684}
685
686int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
687 int list_len, u64 iova)
688{
689 struct mthca_fmr *fmr = to_mfmr(ibfmr);
690 struct mthca_dev *dev = to_mdev(ibfmr->device);
691 u32 key;
692 int i, err;
693
694 err = mthca_check_fmr(fmr, page_list, list_len, iova);
695 if (err)
696 return err;
697
698 ++fmr->maps;
699
700 key = arbel_key_to_hw_index(fmr->ibmr.lkey);
701 key += dev->limits.num_mpts;
702 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
703
704 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
705
706 wmb();
707
708 for (i = 0; i < list_len; ++i)
709 fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
710 MTHCA_MTT_FLAG_PRESENT);
711
712 fmr->mem.arbel.mpt->key = cpu_to_be32(key);
713 fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
714 fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
715 fmr->mem.arbel.mpt->start = cpu_to_be64(iova);
716
717 wmb();
718
719 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
720
721 wmb();
722
723 return 0;
724}
725
726void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
727{
728 u32 key;
729
730 if (!fmr->maps)
731 return;
732
733 key = tavor_key_to_hw_index(fmr->ibmr.lkey);
734 key &= dev->limits.num_mpts - 1;
735 fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
736
737 fmr->maps = 0;
738
739 writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
740}
741
742void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
743{
744 u32 key;
745
746 if (!fmr->maps)
747 return;
748
749 key = arbel_key_to_hw_index(fmr->ibmr.lkey);
750 key &= dev->limits.num_mpts - 1;
751 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
752
753 fmr->maps = 0;
754
755 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
469} 756}
470 757
471int __devinit mthca_init_mr_table(struct mthca_dev *dev) 758int __devinit mthca_init_mr_table(struct mthca_dev *dev)
472{ 759{
473 int err; 760 int err, i;
474 761
475 err = mthca_alloc_init(&dev->mr_table.mpt_alloc, 762 err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
476 dev->limits.num_mpts, 763 dev->limits.num_mpts,
@@ -478,23 +765,93 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
478 if (err) 765 if (err)
479 return err; 766 return err;
480 767
768 if (dev->hca_type != ARBEL_NATIVE &&
769 (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
770 dev->limits.fmr_reserved_mtts = 0;
771 else
772 dev->mthca_flags |= MTHCA_FLAG_FMR;
773
481 err = mthca_buddy_init(&dev->mr_table.mtt_buddy, 774 err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
482 fls(dev->limits.num_mtt_segs - 1)); 775 fls(dev->limits.num_mtt_segs - 1));
776
483 if (err) 777 if (err)
484 goto err_mtt_buddy; 778 goto err_mtt_buddy;
485 779
780 dev->mr_table.tavor_fmr.mpt_base = NULL;
781 dev->mr_table.tavor_fmr.mtt_base = NULL;
782
783 if (dev->limits.fmr_reserved_mtts) {
784 i = fls(dev->limits.fmr_reserved_mtts - 1);
785
786 if (i >= 31) {
787 mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
788 err = -EINVAL;
789 goto err_fmr_mpt;
790 }
791
792 dev->mr_table.tavor_fmr.mpt_base =
793 ioremap(dev->mr_table.mpt_base,
794 (1 << i) * sizeof (struct mthca_mpt_entry));
795
796 if (!dev->mr_table.tavor_fmr.mpt_base) {
797 mthca_warn(dev, "MPT ioremap for FMR failed.\n");
798 err = -ENOMEM;
799 goto err_fmr_mpt;
800 }
801
802 dev->mr_table.tavor_fmr.mtt_base =
803 ioremap(dev->mr_table.mtt_base,
804 (1 << i) * MTHCA_MTT_SEG_SIZE);
805 if (!dev->mr_table.tavor_fmr.mtt_base) {
806 mthca_warn(dev, "MTT ioremap for FMR failed.\n");
807 err = -ENOMEM;
808 goto err_fmr_mtt;
809 }
810
811 err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
812 if (err)
813 goto err_fmr_mtt_buddy;
814
815 /* Prevent regular MRs from using FMR keys */
816 err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
817 if (err)
818 goto err_reserve_fmr;
819
820 dev->mr_table.fmr_mtt_buddy =
821 &dev->mr_table.tavor_fmr.mtt_buddy;
822 } else
823 dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
824
825 /* FMR table is always the first, take reserved MTTs out of there */
486 if (dev->limits.reserved_mtts) { 826 if (dev->limits.reserved_mtts) {
487 if (mthca_alloc_mtt(dev, fls(dev->limits.reserved_mtts - 1), 827 i = fls(dev->limits.reserved_mtts - 1);
488 &dev->mr_table.mtt_buddy) == -1) { 828
829 if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) {
489 mthca_warn(dev, "MTT table of order %d is too small.\n", 830 mthca_warn(dev, "MTT table of order %d is too small.\n",
490 dev->mr_table.mtt_buddy.max_order); 831 dev->mr_table.fmr_mtt_buddy->max_order);
491 err = -ENOMEM; 832 err = -ENOMEM;
492 goto err_mtt_buddy; 833 goto err_reserve_mtts;
493 } 834 }
494 } 835 }
495 836
496 return 0; 837 return 0;
497 838
839err_reserve_mtts:
840err_reserve_fmr:
841 if (dev->limits.fmr_reserved_mtts)
842 mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
843
844err_fmr_mtt_buddy:
845 if (dev->mr_table.tavor_fmr.mtt_base)
846 iounmap(dev->mr_table.tavor_fmr.mtt_base);
847
848err_fmr_mtt:
849 if (dev->mr_table.tavor_fmr.mpt_base)
850 iounmap(dev->mr_table.tavor_fmr.mpt_base);
851
852err_fmr_mpt:
853 mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
854
498err_mtt_buddy: 855err_mtt_buddy:
499 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); 856 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
500 857
@@ -504,6 +861,15 @@ err_mtt_buddy:
504void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev) 861void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
505{ 862{
506 /* XXX check if any MRs are still allocated? */ 863 /* XXX check if any MRs are still allocated? */
864 if (dev->limits.fmr_reserved_mtts)
865 mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
866
507 mthca_buddy_cleanup(&dev->mr_table.mtt_buddy); 867 mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
868
869 if (dev->mr_table.tavor_fmr.mtt_base)
870 iounmap(dev->mr_table.tavor_fmr.mtt_base);
871 if (dev->mr_table.tavor_fmr.mpt_base)
872 iounmap(dev->mr_table.tavor_fmr.mpt_base);
873
508 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); 874 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
509} 875}
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c
index cfd6f70c8df3..fd3f167e6460 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -223,9 +223,10 @@ u64 mthca_make_profile(struct mthca_dev *dev,
223 init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1); 223 init_hca->mc_hash_sz = 1 << (profile[i].log_num - 1);
224 break; 224 break;
225 case MTHCA_RES_MPT: 225 case MTHCA_RES_MPT:
226 dev->limits.num_mpts = profile[i].num; 226 dev->limits.num_mpts = profile[i].num;
227 init_hca->mpt_base = profile[i].start; 227 dev->mr_table.mpt_base = profile[i].start;
228 init_hca->log_mpt_sz = profile[i].log_num; 228 init_hca->mpt_base = profile[i].start;
229 init_hca->log_mpt_sz = profile[i].log_num;
229 break; 230 break;
230 case MTHCA_RES_MTT: 231 case MTHCA_RES_MTT:
231 dev->limits.num_mtt_segs = profile[i].num; 232 dev->limits.num_mtt_segs = profile[i].num;
@@ -259,6 +260,18 @@ u64 mthca_make_profile(struct mthca_dev *dev,
259 */ 260 */
260 dev->limits.num_pds = MTHCA_NUM_PDS; 261 dev->limits.num_pds = MTHCA_NUM_PDS;
261 262
263 /*
264 * For Tavor, FMRs use ioremapped PCI memory. For 32 bit
265 * systems it may use too much vmalloc space to map all MTT
266 * memory, so we reserve some MTTs for FMR access, taking them
267 * out of the MR pool. They don't use additional memory, but
268 * we assign them as part of the HCA profile anyway.
269 */
270 if (dev->hca_type == ARBEL_NATIVE)
271 dev->limits.fmr_reserved_mtts = 0;
272 else
273 dev->limits.fmr_reserved_mtts = request->fmr_reserved_mtts;
274
262 kfree(profile); 275 kfree(profile);
263 return total_size; 276 return total_size;
264} 277}
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h
index daaf7999486c..17aef3357661 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.h
+++ b/drivers/infiniband/hw/mthca/mthca_profile.h
@@ -48,6 +48,7 @@ struct mthca_profile {
48 int num_udav; 48 int num_udav;
49 int num_uar; 49 int num_uar;
50 int uarc_size; 50 int uarc_size;
51 int fmr_reserved_mtts;
51}; 52};
52 53
53u64 mthca_make_profile(struct mthca_dev *mdev, 54u64 mthca_make_profile(struct mthca_dev *mdev,
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index daa54db00aa9..28199e42b36f 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -574,6 +574,74 @@ static int mthca_dereg_mr(struct ib_mr *mr)
574 return 0; 574 return 0;
575} 575}
576 576
577static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
578 struct ib_fmr_attr *fmr_attr)
579{
580 struct mthca_fmr *fmr;
581 int err;
582
583 fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
584 if (!fmr)
585 return ERR_PTR(-ENOMEM);
586
587 memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
588 err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
589 convert_access(mr_access_flags), fmr);
590
591 if (err) {
592 kfree(fmr);
593 return ERR_PTR(err);
594 }
595
596 return &fmr->ibmr;
597}
598
599static int mthca_dealloc_fmr(struct ib_fmr *fmr)
600{
601 struct mthca_fmr *mfmr = to_mfmr(fmr);
602 int err;
603
604 err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
605 if (err)
606 return err;
607
608 kfree(mfmr);
609 return 0;
610}
611
612static int mthca_unmap_fmr(struct list_head *fmr_list)
613{
614 struct ib_fmr *fmr;
615 int err;
616 u8 status;
617 struct mthca_dev *mdev = NULL;
618
619 list_for_each_entry(fmr, fmr_list, list) {
620 if (mdev && to_mdev(fmr->device) != mdev)
621 return -EINVAL;
622 mdev = to_mdev(fmr->device);
623 }
624
625 if (!mdev)
626 return 0;
627
628 if (mdev->hca_type == ARBEL_NATIVE) {
629 list_for_each_entry(fmr, fmr_list, list)
630 mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
631
632 wmb();
633 } else
634 list_for_each_entry(fmr, fmr_list, list)
635 mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
636
637 err = mthca_SYNC_TPT(mdev, &status);
638 if (err)
639 return err;
640 if (status)
641 return -EINVAL;
642 return 0;
643}
644
577static ssize_t show_rev(struct class_device *cdev, char *buf) 645static ssize_t show_rev(struct class_device *cdev, char *buf)
578{ 646{
579 struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev); 647 struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
@@ -637,6 +705,17 @@ int mthca_register_device(struct mthca_dev *dev)
637 dev->ib_dev.get_dma_mr = mthca_get_dma_mr; 705 dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
638 dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; 706 dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
639 dev->ib_dev.dereg_mr = mthca_dereg_mr; 707 dev->ib_dev.dereg_mr = mthca_dereg_mr;
708
709 if (dev->mthca_flags & MTHCA_FLAG_FMR) {
710 dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
711 dev->ib_dev.unmap_fmr = mthca_unmap_fmr;
712 dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr;
713 if (dev->hca_type == ARBEL_NATIVE)
714 dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
715 else
716 dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
717 }
718
640 dev->ib_dev.attach_mcast = mthca_multicast_attach; 719 dev->ib_dev.attach_mcast = mthca_multicast_attach;
641 dev->ib_dev.detach_mcast = mthca_multicast_detach; 720 dev->ib_dev.detach_mcast = mthca_multicast_detach;
642 dev->ib_dev.process_mad = mthca_process_mad; 721 dev->ib_dev.process_mad = mthca_process_mad;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 0598f3905d9a..619710f95a87 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -60,6 +60,24 @@ struct mthca_mr {
60 u32 first_seg; 60 u32 first_seg;
61}; 61};
62 62
63struct mthca_fmr {
64 struct ib_fmr ibmr;
65 struct ib_fmr_attr attr;
66 int order;
67 u32 first_seg;
68 int maps;
69 union {
70 struct {
71 struct mthca_mpt_entry __iomem *mpt;
72 u64 __iomem *mtts;
73 } tavor;
74 struct {
75 struct mthca_mpt_entry *mpt;
76 __be64 *mtts;
77 } arbel;
78 } mem;
79};
80
63struct mthca_pd { 81struct mthca_pd {
64 struct ib_pd ibpd; 82 struct ib_pd ibpd;
65 u32 pd_num; 83 u32 pd_num;
@@ -218,6 +236,11 @@ struct mthca_sqp {
218 dma_addr_t header_dma; 236 dma_addr_t header_dma;
219}; 237};
220 238
239static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
240{
241 return container_of(ibmr, struct mthca_fmr, ibmr);
242}
243
221static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr) 244static inline struct mthca_mr *to_mmr(struct ib_mr *ibmr)
222{ 245{
223 return container_of(ibmr, struct mthca_mr, ibmr); 246 return container_of(ibmr, struct mthca_mr, ibmr);