diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/mr.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/mr.c | 323 |
1 files changed, 274 insertions, 49 deletions
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 5a80dd993761..32a28bd50b20 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c | |||
@@ -37,21 +37,34 @@ | |||
37 | #include <linux/export.h> | 37 | #include <linux/export.h> |
38 | #include <linux/delay.h> | 38 | #include <linux/delay.h> |
39 | #include <rdma/ib_umem.h> | 39 | #include <rdma/ib_umem.h> |
40 | #include <rdma/ib_umem_odp.h> | ||
41 | #include <rdma/ib_verbs.h> | ||
40 | #include "mlx5_ib.h" | 42 | #include "mlx5_ib.h" |
41 | 43 | ||
42 | enum { | 44 | enum { |
43 | MAX_PENDING_REG_MR = 8, | 45 | MAX_PENDING_REG_MR = 8, |
44 | }; | 46 | }; |
45 | 47 | ||
46 | enum { | 48 | #define MLX5_UMR_ALIGN 2048 |
47 | MLX5_UMR_ALIGN = 2048 | 49 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING |
48 | }; | 50 | static __be64 mlx5_ib_update_mtt_emergency_buffer[ |
51 | MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] | ||
52 | __aligned(MLX5_UMR_ALIGN); | ||
53 | static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); | ||
54 | #endif | ||
55 | |||
56 | static int clean_mr(struct mlx5_ib_mr *mr); | ||
49 | 57 | ||
50 | static __be64 *mr_align(__be64 *ptr, int align) | 58 | static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) |
51 | { | 59 | { |
52 | unsigned long mask = align - 1; | 60 | int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); |
53 | 61 | ||
54 | return (__be64 *)(((unsigned long)ptr + mask) & ~mask); | 62 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING |
63 | /* Wait until all page fault handlers using the mr complete. */ | ||
64 | synchronize_srcu(&dev->mr_srcu); | ||
65 | #endif | ||
66 | |||
67 | return err; | ||
55 | } | 68 | } |
56 | 69 | ||
57 | static int order2idx(struct mlx5_ib_dev *dev, int order) | 70 | static int order2idx(struct mlx5_ib_dev *dev, int order) |
@@ -146,7 +159,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) | |||
146 | mr->order = ent->order; | 159 | mr->order = ent->order; |
147 | mr->umred = 1; | 160 | mr->umred = 1; |
148 | mr->dev = dev; | 161 | mr->dev = dev; |
149 | in->seg.status = 1 << 6; | 162 | in->seg.status = MLX5_MKEY_STATUS_FREE; |
150 | in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); | 163 | in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); |
151 | in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); | 164 | in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); |
152 | in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; | 165 | in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; |
@@ -191,7 +204,7 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) | |||
191 | ent->cur--; | 204 | ent->cur--; |
192 | ent->size--; | 205 | ent->size--; |
193 | spin_unlock_irq(&ent->lock); | 206 | spin_unlock_irq(&ent->lock); |
194 | err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); | 207 | err = destroy_mkey(dev, mr); |
195 | if (err) | 208 | if (err) |
196 | mlx5_ib_warn(dev, "failed destroy mkey\n"); | 209 | mlx5_ib_warn(dev, "failed destroy mkey\n"); |
197 | else | 210 | else |
@@ -482,7 +495,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) | |||
482 | ent->cur--; | 495 | ent->cur--; |
483 | ent->size--; | 496 | ent->size--; |
484 | spin_unlock_irq(&ent->lock); | 497 | spin_unlock_irq(&ent->lock); |
485 | err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); | 498 | err = destroy_mkey(dev, mr); |
486 | if (err) | 499 | if (err) |
487 | mlx5_ib_warn(dev, "failed destroy mkey\n"); | 500 | mlx5_ib_warn(dev, "failed destroy mkey\n"); |
488 | else | 501 | else |
@@ -668,7 +681,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size) | |||
668 | 681 | ||
669 | static int use_umr(int order) | 682 | static int use_umr(int order) |
670 | { | 683 | { |
671 | return order <= 17; | 684 | return order <= MLX5_MAX_UMR_SHIFT; |
672 | } | 685 | } |
673 | 686 | ||
674 | static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, | 687 | static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, |
@@ -678,6 +691,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, | |||
678 | { | 691 | { |
679 | struct mlx5_ib_dev *dev = to_mdev(pd->device); | 692 | struct mlx5_ib_dev *dev = to_mdev(pd->device); |
680 | struct ib_mr *mr = dev->umrc.mr; | 693 | struct ib_mr *mr = dev->umrc.mr; |
694 | struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; | ||
681 | 695 | ||
682 | sg->addr = dma; | 696 | sg->addr = dma; |
683 | sg->length = ALIGN(sizeof(u64) * n, 64); | 697 | sg->length = ALIGN(sizeof(u64) * n, 64); |
@@ -692,21 +706,24 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, | |||
692 | wr->num_sge = 0; | 706 | wr->num_sge = 0; |
693 | 707 | ||
694 | wr->opcode = MLX5_IB_WR_UMR; | 708 | wr->opcode = MLX5_IB_WR_UMR; |
695 | wr->wr.fast_reg.page_list_len = n; | 709 | |
696 | wr->wr.fast_reg.page_shift = page_shift; | 710 | umrwr->npages = n; |
697 | wr->wr.fast_reg.rkey = key; | 711 | umrwr->page_shift = page_shift; |
698 | wr->wr.fast_reg.iova_start = virt_addr; | 712 | umrwr->mkey = key; |
699 | wr->wr.fast_reg.length = len; | 713 | umrwr->target.virt_addr = virt_addr; |
700 | wr->wr.fast_reg.access_flags = access_flags; | 714 | umrwr->length = len; |
701 | wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; | 715 | umrwr->access_flags = access_flags; |
716 | umrwr->pd = pd; | ||
702 | } | 717 | } |
703 | 718 | ||
704 | static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, | 719 | static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, |
705 | struct ib_send_wr *wr, u32 key) | 720 | struct ib_send_wr *wr, u32 key) |
706 | { | 721 | { |
707 | wr->send_flags = MLX5_IB_SEND_UMR_UNREG; | 722 | struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; |
723 | |||
724 | wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; | ||
708 | wr->opcode = MLX5_IB_WR_UMR; | 725 | wr->opcode = MLX5_IB_WR_UMR; |
709 | wr->wr.fast_reg.rkey = key; | 726 | umrwr->mkey = key; |
710 | } | 727 | } |
711 | 728 | ||
712 | void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) | 729 | void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) |
@@ -742,7 +759,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, | |||
742 | struct ib_send_wr wr, *bad; | 759 | struct ib_send_wr wr, *bad; |
743 | struct mlx5_ib_mr *mr; | 760 | struct mlx5_ib_mr *mr; |
744 | struct ib_sge sg; | 761 | struct ib_sge sg; |
745 | int size = sizeof(u64) * npages; | 762 | int size; |
763 | __be64 *mr_pas; | ||
764 | __be64 *pas; | ||
765 | dma_addr_t dma; | ||
746 | int err = 0; | 766 | int err = 0; |
747 | int i; | 767 | int i; |
748 | 768 | ||
@@ -761,25 +781,31 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, | |||
761 | if (!mr) | 781 | if (!mr) |
762 | return ERR_PTR(-EAGAIN); | 782 | return ERR_PTR(-EAGAIN); |
763 | 783 | ||
764 | mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); | 784 | /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. |
765 | if (!mr->pas) { | 785 | * To avoid copying garbage after the pas array, we allocate |
786 | * a little more. */ | ||
787 | size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); | ||
788 | mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); | ||
789 | if (!mr_pas) { | ||
766 | err = -ENOMEM; | 790 | err = -ENOMEM; |
767 | goto free_mr; | 791 | goto free_mr; |
768 | } | 792 | } |
769 | 793 | ||
770 | mlx5_ib_populate_pas(dev, umem, page_shift, | 794 | pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); |
771 | mr_align(mr->pas, MLX5_UMR_ALIGN), 1); | 795 | mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); |
796 | /* Clear padding after the actual pages. */ | ||
797 | memset(pas + npages, 0, size - npages * sizeof(u64)); | ||
772 | 798 | ||
773 | mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, | 799 | dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); |
774 | DMA_TO_DEVICE); | 800 | if (dma_mapping_error(ddev, dma)) { |
775 | if (dma_mapping_error(ddev, mr->dma)) { | ||
776 | err = -ENOMEM; | 801 | err = -ENOMEM; |
777 | goto free_pas; | 802 | goto free_pas; |
778 | } | 803 | } |
779 | 804 | ||
780 | memset(&wr, 0, sizeof(wr)); | 805 | memset(&wr, 0, sizeof(wr)); |
781 | wr.wr_id = (u64)(unsigned long)&umr_context; | 806 | wr.wr_id = (u64)(unsigned long)&umr_context; |
782 | prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); | 807 | prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift, |
808 | virt_addr, len, access_flags); | ||
783 | 809 | ||
784 | mlx5_ib_init_umr_context(&umr_context); | 810 | mlx5_ib_init_umr_context(&umr_context); |
785 | down(&umrc->sem); | 811 | down(&umrc->sem); |
@@ -799,12 +825,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, | |||
799 | mr->mmr.size = len; | 825 | mr->mmr.size = len; |
800 | mr->mmr.pd = to_mpd(pd)->pdn; | 826 | mr->mmr.pd = to_mpd(pd)->pdn; |
801 | 827 | ||
828 | mr->live = 1; | ||
829 | |||
802 | unmap_dma: | 830 | unmap_dma: |
803 | up(&umrc->sem); | 831 | up(&umrc->sem); |
804 | dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); | 832 | dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); |
805 | 833 | ||
806 | free_pas: | 834 | free_pas: |
807 | kfree(mr->pas); | 835 | kfree(mr_pas); |
808 | 836 | ||
809 | free_mr: | 837 | free_mr: |
810 | if (err) { | 838 | if (err) { |
@@ -815,6 +843,128 @@ free_mr: | |||
815 | return mr; | 843 | return mr; |
816 | } | 844 | } |
817 | 845 | ||
846 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
847 | int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, | ||
848 | int zap) | ||
849 | { | ||
850 | struct mlx5_ib_dev *dev = mr->dev; | ||
851 | struct device *ddev = dev->ib_dev.dma_device; | ||
852 | struct umr_common *umrc = &dev->umrc; | ||
853 | struct mlx5_ib_umr_context umr_context; | ||
854 | struct ib_umem *umem = mr->umem; | ||
855 | int size; | ||
856 | __be64 *pas; | ||
857 | dma_addr_t dma; | ||
858 | struct ib_send_wr wr, *bad; | ||
859 | struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg; | ||
860 | struct ib_sge sg; | ||
861 | int err = 0; | ||
862 | const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); | ||
863 | const int page_index_mask = page_index_alignment - 1; | ||
864 | size_t pages_mapped = 0; | ||
865 | size_t pages_to_map = 0; | ||
866 | size_t pages_iter = 0; | ||
867 | int use_emergency_buf = 0; | ||
868 | |||
869 | /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, | ||
870 | * so we need to align the offset and length accordingly */ | ||
871 | if (start_page_index & page_index_mask) { | ||
872 | npages += start_page_index & page_index_mask; | ||
873 | start_page_index &= ~page_index_mask; | ||
874 | } | ||
875 | |||
876 | pages_to_map = ALIGN(npages, page_index_alignment); | ||
877 | |||
878 | if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) | ||
879 | return -EINVAL; | ||
880 | |||
881 | size = sizeof(u64) * pages_to_map; | ||
882 | size = min_t(int, PAGE_SIZE, size); | ||
883 | /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim | ||
884 | * code, when we are called from an invalidation. The pas buffer must | ||
885 | * be 2k-aligned for Connect-IB. */ | ||
886 | pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); | ||
887 | if (!pas) { | ||
888 | mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); | ||
889 | pas = mlx5_ib_update_mtt_emergency_buffer; | ||
890 | size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; | ||
891 | use_emergency_buf = 1; | ||
892 | mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); | ||
893 | memset(pas, 0, size); | ||
894 | } | ||
895 | pages_iter = size / sizeof(u64); | ||
896 | dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); | ||
897 | if (dma_mapping_error(ddev, dma)) { | ||
898 | mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); | ||
899 | err = -ENOMEM; | ||
900 | goto free_pas; | ||
901 | } | ||
902 | |||
903 | for (pages_mapped = 0; | ||
904 | pages_mapped < pages_to_map && !err; | ||
905 | pages_mapped += pages_iter, start_page_index += pages_iter) { | ||
906 | dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); | ||
907 | |||
908 | npages = min_t(size_t, | ||
909 | pages_iter, | ||
910 | ib_umem_num_pages(umem) - start_page_index); | ||
911 | |||
912 | if (!zap) { | ||
913 | __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, | ||
914 | start_page_index, npages, pas, | ||
915 | MLX5_IB_MTT_PRESENT); | ||
916 | /* Clear padding after the pages brought from the | ||
917 | * umem. */ | ||
918 | memset(pas + npages, 0, size - npages * sizeof(u64)); | ||
919 | } | ||
920 | |||
921 | dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); | ||
922 | |||
923 | memset(&wr, 0, sizeof(wr)); | ||
924 | wr.wr_id = (u64)(unsigned long)&umr_context; | ||
925 | |||
926 | sg.addr = dma; | ||
927 | sg.length = ALIGN(npages * sizeof(u64), | ||
928 | MLX5_UMR_MTT_ALIGNMENT); | ||
929 | sg.lkey = dev->umrc.mr->lkey; | ||
930 | |||
931 | wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | | ||
932 | MLX5_IB_SEND_UMR_UPDATE_MTT; | ||
933 | wr.sg_list = &sg; | ||
934 | wr.num_sge = 1; | ||
935 | wr.opcode = MLX5_IB_WR_UMR; | ||
936 | umrwr->npages = sg.length / sizeof(u64); | ||
937 | umrwr->page_shift = PAGE_SHIFT; | ||
938 | umrwr->mkey = mr->mmr.key; | ||
939 | umrwr->target.offset = start_page_index; | ||
940 | |||
941 | mlx5_ib_init_umr_context(&umr_context); | ||
942 | down(&umrc->sem); | ||
943 | err = ib_post_send(umrc->qp, &wr, &bad); | ||
944 | if (err) { | ||
945 | mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); | ||
946 | } else { | ||
947 | wait_for_completion(&umr_context.done); | ||
948 | if (umr_context.status != IB_WC_SUCCESS) { | ||
949 | mlx5_ib_err(dev, "UMR completion failed, code %d\n", | ||
950 | umr_context.status); | ||
951 | err = -EFAULT; | ||
952 | } | ||
953 | } | ||
954 | up(&umrc->sem); | ||
955 | } | ||
956 | dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); | ||
957 | |||
958 | free_pas: | ||
959 | if (!use_emergency_buf) | ||
960 | free_page((unsigned long)pas); | ||
961 | else | ||
962 | mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); | ||
963 | |||
964 | return err; | ||
965 | } | ||
966 | #endif | ||
967 | |||
818 | static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, | 968 | static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, |
819 | u64 length, struct ib_umem *umem, | 969 | u64 length, struct ib_umem *umem, |
820 | int npages, int page_shift, | 970 | int npages, int page_shift, |
@@ -825,6 +975,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, | |||
825 | struct mlx5_ib_mr *mr; | 975 | struct mlx5_ib_mr *mr; |
826 | int inlen; | 976 | int inlen; |
827 | int err; | 977 | int err; |
978 | bool pg_cap = !!(dev->mdev->caps.gen.flags & | ||
979 | MLX5_DEV_CAP_FLAG_ON_DMND_PG); | ||
828 | 980 | ||
829 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); | 981 | mr = kzalloc(sizeof(*mr), GFP_KERNEL); |
830 | if (!mr) | 982 | if (!mr) |
@@ -836,8 +988,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, | |||
836 | err = -ENOMEM; | 988 | err = -ENOMEM; |
837 | goto err_1; | 989 | goto err_1; |
838 | } | 990 | } |
839 | mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); | 991 | mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, |
992 | pg_cap ? MLX5_IB_MTT_PRESENT : 0); | ||
840 | 993 | ||
994 | /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags | ||
995 | * in the page list submitted with the command. */ | ||
996 | in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; | ||
841 | in->seg.flags = convert_access(access_flags) | | 997 | in->seg.flags = convert_access(access_flags) | |
842 | MLX5_ACCESS_MODE_MTT; | 998 | MLX5_ACCESS_MODE_MTT; |
843 | in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); | 999 | in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); |
@@ -856,6 +1012,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, | |||
856 | goto err_2; | 1012 | goto err_2; |
857 | } | 1013 | } |
858 | mr->umem = umem; | 1014 | mr->umem = umem; |
1015 | mr->live = 1; | ||
859 | kvfree(in); | 1016 | kvfree(in); |
860 | 1017 | ||
861 | mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); | 1018 | mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); |
@@ -910,6 +1067,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
910 | mlx5_ib_dbg(dev, "cache empty for order %d", order); | 1067 | mlx5_ib_dbg(dev, "cache empty for order %d", order); |
911 | mr = NULL; | 1068 | mr = NULL; |
912 | } | 1069 | } |
1070 | } else if (access_flags & IB_ACCESS_ON_DEMAND) { | ||
1071 | err = -EINVAL; | ||
1072 | pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); | ||
1073 | goto error; | ||
913 | } | 1074 | } |
914 | 1075 | ||
915 | if (!mr) | 1076 | if (!mr) |
@@ -925,16 +1086,51 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, | |||
925 | 1086 | ||
926 | mr->umem = umem; | 1087 | mr->umem = umem; |
927 | mr->npages = npages; | 1088 | mr->npages = npages; |
928 | spin_lock(&dev->mr_lock); | 1089 | atomic_add(npages, &dev->mdev->priv.reg_pages); |
929 | dev->mdev->priv.reg_pages += npages; | ||
930 | spin_unlock(&dev->mr_lock); | ||
931 | mr->ibmr.lkey = mr->mmr.key; | 1090 | mr->ibmr.lkey = mr->mmr.key; |
932 | mr->ibmr.rkey = mr->mmr.key; | 1091 | mr->ibmr.rkey = mr->mmr.key; |
933 | 1092 | ||
1093 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
1094 | if (umem->odp_data) { | ||
1095 | /* | ||
1096 | * This barrier prevents the compiler from moving the | ||
1097 | * setting of umem->odp_data->private to point to our | ||
1098 | * MR, before reg_umr finished, to ensure that the MR | ||
1099 | * initialization have finished before starting to | ||
1100 | * handle invalidations. | ||
1101 | */ | ||
1102 | smp_wmb(); | ||
1103 | mr->umem->odp_data->private = mr; | ||
1104 | /* | ||
1105 | * Make sure we will see the new | ||
1106 | * umem->odp_data->private value in the invalidation | ||
1107 | * routines, before we can get page faults on the | ||
1108 | * MR. Page faults can happen once we put the MR in | ||
1109 | * the tree, below this line. Without the barrier, | ||
1110 | * there can be a fault handling and an invalidation | ||
1111 | * before umem->odp_data->private == mr is visible to | ||
1112 | * the invalidation handler. | ||
1113 | */ | ||
1114 | smp_wmb(); | ||
1115 | } | ||
1116 | #endif | ||
1117 | |||
934 | return &mr->ibmr; | 1118 | return &mr->ibmr; |
935 | 1119 | ||
936 | error: | 1120 | error: |
1121 | /* | ||
1122 | * Destroy the umem *before* destroying the MR, to ensure we | ||
1123 | * will not have any in-flight notifiers when destroying the | ||
1124 | * MR. | ||
1125 | * | ||
1126 | * As the MR is completely invalid to begin with, and this | ||
1127 | * error path is only taken if we can't push the mr entry into | ||
1128 | * the pagefault tree, this is safe. | ||
1129 | */ | ||
1130 | |||
937 | ib_umem_release(umem); | 1131 | ib_umem_release(umem); |
1132 | /* Kill the MR, and return an error code. */ | ||
1133 | clean_mr(mr); | ||
938 | return ERR_PTR(err); | 1134 | return ERR_PTR(err); |
939 | } | 1135 | } |
940 | 1136 | ||
@@ -971,17 +1167,14 @@ error: | |||
971 | return err; | 1167 | return err; |
972 | } | 1168 | } |
973 | 1169 | ||
974 | int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | 1170 | static int clean_mr(struct mlx5_ib_mr *mr) |
975 | { | 1171 | { |
976 | struct mlx5_ib_dev *dev = to_mdev(ibmr->device); | 1172 | struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); |
977 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | ||
978 | struct ib_umem *umem = mr->umem; | ||
979 | int npages = mr->npages; | ||
980 | int umred = mr->umred; | 1173 | int umred = mr->umred; |
981 | int err; | 1174 | int err; |
982 | 1175 | ||
983 | if (!umred) { | 1176 | if (!umred) { |
984 | err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); | 1177 | err = destroy_mkey(dev, mr); |
985 | if (err) { | 1178 | if (err) { |
986 | mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", | 1179 | mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", |
987 | mr->mmr.key, err); | 1180 | mr->mmr.key, err); |
@@ -996,15 +1189,47 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | |||
996 | free_cached_mr(dev, mr); | 1189 | free_cached_mr(dev, mr); |
997 | } | 1190 | } |
998 | 1191 | ||
999 | if (umem) { | 1192 | if (!umred) |
1193 | kfree(mr); | ||
1194 | |||
1195 | return 0; | ||
1196 | } | ||
1197 | |||
1198 | int mlx5_ib_dereg_mr(struct ib_mr *ibmr) | ||
1199 | { | ||
1200 | struct mlx5_ib_dev *dev = to_mdev(ibmr->device); | ||
1201 | struct mlx5_ib_mr *mr = to_mmr(ibmr); | ||
1202 | int npages = mr->npages; | ||
1203 | struct ib_umem *umem = mr->umem; | ||
1204 | |||
1205 | #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING | ||
1206 | if (umem && umem->odp_data) { | ||
1207 | /* Prevent new page faults from succeeding */ | ||
1208 | mr->live = 0; | ||
1209 | /* Wait for all running page-fault handlers to finish. */ | ||
1210 | synchronize_srcu(&dev->mr_srcu); | ||
1211 | /* Destroy all page mappings */ | ||
1212 | mlx5_ib_invalidate_range(umem, ib_umem_start(umem), | ||
1213 | ib_umem_end(umem)); | ||
1214 | /* | ||
1215 | * We kill the umem before the MR for ODP, | ||
1216 | * so that there will not be any invalidations in | ||
1217 | * flight, looking at the *mr struct. | ||
1218 | */ | ||
1000 | ib_umem_release(umem); | 1219 | ib_umem_release(umem); |
1001 | spin_lock(&dev->mr_lock); | 1220 | atomic_sub(npages, &dev->mdev->priv.reg_pages); |
1002 | dev->mdev->priv.reg_pages -= npages; | 1221 | |
1003 | spin_unlock(&dev->mr_lock); | 1222 | /* Avoid double-freeing the umem. */ |
1223 | umem = NULL; | ||
1004 | } | 1224 | } |
1225 | #endif | ||
1005 | 1226 | ||
1006 | if (!umred) | 1227 | clean_mr(mr); |
1007 | kfree(mr); | 1228 | |
1229 | if (umem) { | ||
1230 | ib_umem_release(umem); | ||
1231 | atomic_sub(npages, &dev->mdev->priv.reg_pages); | ||
1232 | } | ||
1008 | 1233 | ||
1009 | return 0; | 1234 | return 0; |
1010 | } | 1235 | } |
@@ -1028,7 +1253,7 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, | |||
1028 | goto err_free; | 1253 | goto err_free; |
1029 | } | 1254 | } |
1030 | 1255 | ||
1031 | in->seg.status = 1 << 6; /* free */ | 1256 | in->seg.status = MLX5_MKEY_STATUS_FREE; |
1032 | in->seg.xlt_oct_size = cpu_to_be32(ndescs); | 1257 | in->seg.xlt_oct_size = cpu_to_be32(ndescs); |
1033 | in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); | 1258 | in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); |
1034 | in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); | 1259 | in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); |
@@ -1113,7 +1338,7 @@ int mlx5_ib_destroy_mr(struct ib_mr *ibmr) | |||
1113 | kfree(mr->sig); | 1338 | kfree(mr->sig); |
1114 | } | 1339 | } |
1115 | 1340 | ||
1116 | err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); | 1341 | err = destroy_mkey(dev, mr); |
1117 | if (err) { | 1342 | if (err) { |
1118 | mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", | 1343 | mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", |
1119 | mr->mmr.key, err); | 1344 | mr->mmr.key, err); |
@@ -1143,7 +1368,7 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, | |||
1143 | goto err_free; | 1368 | goto err_free; |
1144 | } | 1369 | } |
1145 | 1370 | ||
1146 | in->seg.status = 1 << 6; /* free */ | 1371 | in->seg.status = MLX5_MKEY_STATUS_FREE; |
1147 | in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); | 1372 | in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); |
1148 | in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); | 1373 | in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); |
1149 | in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; | 1374 | in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; |