aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mthca/mthca_mr.c
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@mellanox.co.il>2005-04-16 18:26:30 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:26:30 -0400
commite0f5fdca1ca9d1659b920827e5cf6dbad20e5391 (patch)
tree71e6410e14c4c285ce128e99070a38ded75a14f7 /drivers/infiniband/hw/mthca/mthca_mr.c
parentd0a9d25cdcd511dd523357bc902979220dc72a2e (diff)
[PATCH] IB/mthca: add fast memory region implementation
Implement fast memory regions (FMRs), where the driver writes directly into the HCA's translation tables rather than requiring a firmware command. For Tavor, MTTs for FMR are separate from regular MTTs, and are reserved at driver initialization. This is done to limit the amount of virtual memory needed to map the MTTs. For Arbel, there's no such limitation, and all MTTs and MPTs may be used for FMR or for regular MR. Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il> Signed-off-by: Roland Dreier <roland@topspin.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/infiniband/hw/mthca/mthca_mr.c')
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c386
1 files changed, 376 insertions, 10 deletions
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index ac3265d0bf79..a85b503b8522 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -66,6 +66,9 @@ struct mthca_mpt_entry {
66 66
67#define MTHCA_MTT_FLAG_PRESENT 1 67#define MTHCA_MTT_FLAG_PRESENT 1
68 68
69#define MTHCA_MPT_STATUS_SW 0xF0
70#define MTHCA_MPT_STATUS_HW 0x00
71
69/* 72/*
70 * Buddy allocator for MTT segments (currently not very efficient 73 * Buddy allocator for MTT segments (currently not very efficient
71 * since it doesn't keep a free list and just searches linearly 74 * since it doesn't keep a free list and just searches linearly
@@ -442,6 +445,20 @@ err_out_mpt_free:
442 return err; 445 return err;
443} 446}
444 447
448/* Free mr or fmr */
449static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
450 u32 first_seg, struct mthca_buddy *buddy)
451{
452 if (order >= 0)
453 mthca_free_mtt(dev, first_seg, order, buddy);
454
455 if (dev->hca_type == ARBEL_NATIVE)
456 mthca_table_put(dev, dev->mr_table.mpt_table,
457 arbel_key_to_hw_index(lkey));
458
459 mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
460}
461
445void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) 462void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
446{ 463{
447 int err; 464 int err;
@@ -459,18 +476,288 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
459 mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n", 476 mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
460 status); 477 status);
461 478
462 if (mr->order >= 0) 479 mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg,
463 mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy); 480 &dev->mr_table.mtt_buddy);
481}
482
483int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
484 u32 access, struct mthca_fmr *mr)
485{
486 struct mthca_mpt_entry *mpt_entry;
487 void *mailbox;
488 u64 mtt_seg;
489 u32 key, idx;
490 u8 status;
491 int list_len = mr->attr.max_pages;
492 int err = -ENOMEM;
493 int i;
494
495 might_sleep();
496
497 if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
498 return -EINVAL;
499
500 /* For Arbel, all MTTs must fit in the same page. */
501 if (dev->hca_type == ARBEL_NATIVE &&
502 mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
503 return -EINVAL;
504
505 mr->maps = 0;
506
507 key = mthca_alloc(&dev->mr_table.mpt_alloc);
508 if (key == -1)
509 return -ENOMEM;
510
511 idx = key & (dev->limits.num_mpts - 1);
512 mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
513
514 if (dev->hca_type == ARBEL_NATIVE) {
515 err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
516 if (err)
517 goto err_out_mpt_free;
518
519 mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
520 BUG_ON(!mr->mem.arbel.mpt);
521 } else
522 mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
523 sizeof *(mr->mem.tavor.mpt) * idx;
524
525 for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
526 i < list_len;
527 i <<= 1, ++mr->order)
528 ; /* nothing */
529
530 mr->first_seg = mthca_alloc_mtt(dev, mr->order,
531 dev->mr_table.fmr_mtt_buddy);
532 if (mr->first_seg == -1)
533 goto err_out_table;
534
535 mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;
536
537 if (dev->hca_type == ARBEL_NATIVE) {
538 mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
539 mr->first_seg);
540 BUG_ON(!mr->mem.arbel.mtts);
541 } else
542 mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
543
544 mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
545 GFP_KERNEL);
546 if (!mailbox)
547 goto err_out_free_mtt;
548
549 mpt_entry = MAILBOX_ALIGN(mailbox);
550
551 mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS |
552 MTHCA_MPT_FLAG_MIO |
553 MTHCA_MPT_FLAG_REGION |
554 access);
555
556 mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
557 mpt_entry->key = cpu_to_be32(key);
558 mpt_entry->pd = cpu_to_be32(pd);
559 memset(&mpt_entry->start, 0,
560 sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
561 mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
562
563 if (0) {
564 mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
565 for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
566 if (i % 4 == 0)
567 printk("[%02x] ", i * 4);
568 printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
569 if ((i + 1) % 4 == 0)
570 printk("\n");
571 }
572 }
573
574 err = mthca_SW2HW_MPT(dev, mpt_entry,
575 key & (dev->limits.num_mpts - 1),
576 &status);
577 if (err) {
578 mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
579 goto err_out_mailbox_free;
580 }
581 if (status) {
582 mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
583 status);
584 err = -EINVAL;
585 goto err_out_mailbox_free;
586 }
587
588 kfree(mailbox);
589 return 0;
590
591err_out_mailbox_free:
592 kfree(mailbox);
593
594err_out_free_mtt:
595 mthca_free_mtt(dev, mr->first_seg, mr->order,
596 dev->mr_table.fmr_mtt_buddy);
464 597
598err_out_table:
465 if (dev->hca_type == ARBEL_NATIVE) 599 if (dev->hca_type == ARBEL_NATIVE)
466 mthca_table_put(dev, dev->mr_table.mpt_table, 600 mthca_table_put(dev, dev->mr_table.mpt_table, key);
467 key_to_hw_index(dev, mr->ibmr.lkey)); 601
468 mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, mr->ibmr.lkey)); 602err_out_mpt_free:
603 mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
604 return err;
605}
606
607int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
608{
609 if (fmr->maps)
610 return -EBUSY;
611
612 mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg,
613 dev->mr_table.fmr_mtt_buddy);
614 return 0;
615}
616
617static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
618 int list_len, u64 iova)
619{
620 int i, page_mask;
621
622 if (list_len > fmr->attr.max_pages)
623 return -EINVAL;
624
625 page_mask = (1 << fmr->attr.page_size) - 1;
626
627 /* We are getting page lists, so va must be page aligned. */
628 if (iova & page_mask)
629 return -EINVAL;
630
631 /* Trust the user not to pass misaligned data in page_list */
632 if (0)
633 for (i = 0; i < list_len; ++i) {
634 if (page_list[i] & ~page_mask)
635 return -EINVAL;
636 }
637
638 if (fmr->maps >= fmr->attr.max_maps)
639 return -EINVAL;
640
641 return 0;
642}
643
644
645int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
646 int list_len, u64 iova)
647{
648 struct mthca_fmr *fmr = to_mfmr(ibfmr);
649 struct mthca_dev *dev = to_mdev(ibfmr->device);
650 struct mthca_mpt_entry mpt_entry;
651 u32 key;
652 int i, err;
653
654 err = mthca_check_fmr(fmr, page_list, list_len, iova);
655 if (err)
656 return err;
657
658 ++fmr->maps;
659
660 key = tavor_key_to_hw_index(fmr->ibmr.lkey);
661 key += dev->limits.num_mpts;
662 fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
663
664 writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
665
666 for (i = 0; i < list_len; ++i) {
667 __be64 mtt_entry = cpu_to_be64(page_list[i] |
668 MTHCA_MTT_FLAG_PRESENT);
669 mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
670 }
671
672 mpt_entry.lkey = cpu_to_be32(key);
673 mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
674 mpt_entry.start = cpu_to_be64(iova);
675
676 writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
677 memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
678 offsetof(struct mthca_mpt_entry, window_count) -
679 offsetof(struct mthca_mpt_entry, start));
680
681 writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
682
683 return 0;
684}
685
686int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
687 int list_len, u64 iova)
688{
689 struct mthca_fmr *fmr = to_mfmr(ibfmr);
690 struct mthca_dev *dev = to_mdev(ibfmr->device);
691 u32 key;
692 int i, err;
693
694 err = mthca_check_fmr(fmr, page_list, list_len, iova);
695 if (err)
696 return err;
697
698 ++fmr->maps;
699
700 key = arbel_key_to_hw_index(fmr->ibmr.lkey);
701 key += dev->limits.num_mpts;
702 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
703
704 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
705
706 wmb();
707
708 for (i = 0; i < list_len; ++i)
709 fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
710 MTHCA_MTT_FLAG_PRESENT);
711
712 fmr->mem.arbel.mpt->key = cpu_to_be32(key);
713 fmr->mem.arbel.mpt->lkey = cpu_to_be32(key);
714 fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
715 fmr->mem.arbel.mpt->start = cpu_to_be64(iova);
716
717 wmb();
718
719 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
720
721 wmb();
722
723 return 0;
724}
725
726void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
727{
728 u32 key;
729
730 if (!fmr->maps)
731 return;
732
733 key = tavor_key_to_hw_index(fmr->ibmr.lkey);
734 key &= dev->limits.num_mpts - 1;
735 fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
736
737 fmr->maps = 0;
738
739 writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
740}
741
742void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
743{
744 u32 key;
745
746 if (!fmr->maps)
747 return;
748
749 key = arbel_key_to_hw_index(fmr->ibmr.lkey);
750 key &= dev->limits.num_mpts - 1;
751 fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
752
753 fmr->maps = 0;
754
755 *(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
469} 756}
470 757
471int __devinit mthca_init_mr_table(struct mthca_dev *dev) 758int __devinit mthca_init_mr_table(struct mthca_dev *dev)
472{ 759{
473 int err; 760 int err, i;
474 761
475 err = mthca_alloc_init(&dev->mr_table.mpt_alloc, 762 err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
476 dev->limits.num_mpts, 763 dev->limits.num_mpts,
@@ -478,23 +765,93 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev)
478 if (err) 765 if (err)
479 return err; 766 return err;
480 767
768 if (dev->hca_type != ARBEL_NATIVE &&
769 (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
770 dev->limits.fmr_reserved_mtts = 0;
771 else
772 dev->mthca_flags |= MTHCA_FLAG_FMR;
773
481 err = mthca_buddy_init(&dev->mr_table.mtt_buddy, 774 err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
482 fls(dev->limits.num_mtt_segs - 1)); 775 fls(dev->limits.num_mtt_segs - 1));
776
483 if (err) 777 if (err)
484 goto err_mtt_buddy; 778 goto err_mtt_buddy;
485 779
780 dev->mr_table.tavor_fmr.mpt_base = NULL;
781 dev->mr_table.tavor_fmr.mtt_base = NULL;
782
783 if (dev->limits.fmr_reserved_mtts) {
784 i = fls(dev->limits.fmr_reserved_mtts - 1);
785
786 if (i >= 31) {
787 mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
788 err = -EINVAL;
789 goto err_fmr_mpt;
790 }
791
792 dev->mr_table.tavor_fmr.mpt_base =
793 ioremap(dev->mr_table.mpt_base,
794 (1 << i) * sizeof (struct mthca_mpt_entry));
795
796 if (!dev->mr_table.tavor_fmr.mpt_base) {
797 mthca_warn(dev, "MPT ioremap for FMR failed.\n");
798 err = -ENOMEM;
799 goto err_fmr_mpt;
800 }
801
802 dev->mr_table.tavor_fmr.mtt_base =
803 ioremap(dev->mr_table.mtt_base,
804 (1 << i) * MTHCA_MTT_SEG_SIZE);
805 if (!dev->mr_table.tavor_fmr.mtt_base) {
806 mthca_warn(dev, "MTT ioremap for FMR failed.\n");
807 err = -ENOMEM;
808 goto err_fmr_mtt;
809 }
810
811 err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
812 if (err)
813 goto err_fmr_mtt_buddy;
814
815 /* Prevent regular MRs from using FMR keys */
816 err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
817 if (err)
818 goto err_reserve_fmr;
819
820 dev->mr_table.fmr_mtt_buddy =
821 &dev->mr_table.tavor_fmr.mtt_buddy;
822 } else
823 dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
824
825 /* FMR table is always the first, take reserved MTTs out of there */
486 if (dev->limits.reserved_mtts) { 826 if (dev->limits.reserved_mtts) {
487 if (mthca_alloc_mtt(dev, fls(dev->limits.reserved_mtts - 1), 827 i = fls(dev->limits.reserved_mtts - 1);
488 &dev->mr_table.mtt_buddy) == -1) { 828
829 if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) {
489 mthca_warn(dev, "MTT table of order %d is too small.\n", 830 mthca_warn(dev, "MTT table of order %d is too small.\n",
490 dev->mr_table.mtt_buddy.max_order); 831 dev->mr_table.fmr_mtt_buddy->max_order);
491 err = -ENOMEM; 832 err = -ENOMEM;
492 goto err_mtt_buddy; 833 goto err_reserve_mtts;
493 } 834 }
494 } 835 }
495 836
496 return 0; 837 return 0;
497 838
839err_reserve_mtts:
840err_reserve_fmr:
841 if (dev->limits.fmr_reserved_mtts)
842 mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
843
844err_fmr_mtt_buddy:
845 if (dev->mr_table.tavor_fmr.mtt_base)
846 iounmap(dev->mr_table.tavor_fmr.mtt_base);
847
848err_fmr_mtt:
849 if (dev->mr_table.tavor_fmr.mpt_base)
850 iounmap(dev->mr_table.tavor_fmr.mpt_base);
851
852err_fmr_mpt:
853 mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
854
498err_mtt_buddy: 855err_mtt_buddy:
499 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); 856 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
500 857
@@ -504,6 +861,15 @@ err_mtt_buddy:
504void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev) 861void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
505{ 862{
506 /* XXX check if any MRs are still allocated? */ 863 /* XXX check if any MRs are still allocated? */
864 if (dev->limits.fmr_reserved_mtts)
865 mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
866
507 mthca_buddy_cleanup(&dev->mr_table.mtt_buddy); 867 mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
868
869 if (dev->mr_table.tavor_fmr.mtt_base)
870 iounmap(dev->mr_table.tavor_fmr.mtt_base);
871 if (dev->mr_table.tavor_fmr.mpt_base)
872 iounmap(dev->mr_table.tavor_fmr.mpt_base);
873
508 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc); 874 mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
509} 875}