summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2019-05-20 02:05:25 -0400
committerJason Gunthorpe <jgg@mellanox.com>2019-05-21 14:23:24 -0400
commitd2183c6f1958e6b6dfdde279f4cee04280710e34 (patch)
treec7e1d369b4747816879c5cc694049c791555250f
parent69054666df0a9b4e8331319f98b6b9a88bc3fcc4 (diff)
RDMA/umem: Move page_shift from ib_umem to ib_odp_umem
This value has always been set to PAGE_SHIFT in the core code, the only thing that does differently was the ODP path. Move the value into the ODP struct and still use it for ODP, but change all the non-ODP things to just use PAGE_SHIFT/PAGE_SIZE/PAGE_MASK directly. Reviewed-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
-rw-r--r--drivers/infiniband/core/umem.c3
-rw-r--r--drivers/infiniband/core/umem_odp.c79
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c10
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c8
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c20
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c5
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c23
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c9
-rw-r--r--include/rdma/ib_umem.h19
-rw-r--r--include/rdma/ib_umem_odp.h20
12 files changed, 99 insertions, 102 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index e7ea819fcb11..7edc5839606b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -244,7 +244,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
244 umem->context = context; 244 umem->context = context;
245 umem->length = size; 245 umem->length = size;
246 umem->address = addr; 246 umem->address = addr;
247 umem->page_shift = PAGE_SHIFT;
248 umem->writable = ib_access_writable(access); 247 umem->writable = ib_access_writable(access);
249 umem->owning_mm = mm = current->mm; 248 umem->owning_mm = mm = current->mm;
250 mmgrab(mm); 249 mmgrab(mm);
@@ -385,7 +384,7 @@ int ib_umem_page_count(struct ib_umem *umem)
385 384
386 n = 0; 385 n = 0;
387 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) 386 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
388 n += sg_dma_len(sg) >> umem->page_shift; 387 n += sg_dma_len(sg) >> PAGE_SHIFT;
389 388
390 return n; 389 return n;
391} 390}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index f962b5bbfa40..c3b3c523401f 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -59,7 +59,7 @@ static u64 node_start(struct umem_odp_node *n)
59 struct ib_umem_odp *umem_odp = 59 struct ib_umem_odp *umem_odp =
60 container_of(n, struct ib_umem_odp, interval_tree); 60 container_of(n, struct ib_umem_odp, interval_tree);
61 61
62 return ib_umem_start(&umem_odp->umem); 62 return ib_umem_start(umem_odp);
63} 63}
64 64
65/* Note that the representation of the intervals in the interval tree 65/* Note that the representation of the intervals in the interval tree
@@ -72,7 +72,7 @@ static u64 node_last(struct umem_odp_node *n)
72 struct ib_umem_odp *umem_odp = 72 struct ib_umem_odp *umem_odp =
73 container_of(n, struct ib_umem_odp, interval_tree); 73 container_of(n, struct ib_umem_odp, interval_tree);
74 74
75 return ib_umem_end(&umem_odp->umem) - 1; 75 return ib_umem_end(umem_odp) - 1;
76} 76}
77 77
78INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last, 78INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
@@ -107,8 +107,6 @@ static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
107static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp, 107static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
108 u64 start, u64 end, void *cookie) 108 u64 start, u64 end, void *cookie)
109{ 109{
110 struct ib_umem *umem = &umem_odp->umem;
111
112 /* 110 /*
113 * Increase the number of notifiers running, to 111 * Increase the number of notifiers running, to
114 * prevent any further fault handling on this MR. 112 * prevent any further fault handling on this MR.
@@ -119,8 +117,8 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
119 * all pending page faults. */ 117 * all pending page faults. */
120 smp_wmb(); 118 smp_wmb();
121 complete_all(&umem_odp->notifier_completion); 119 complete_all(&umem_odp->notifier_completion);
122 umem->context->invalidate_range(umem_odp, ib_umem_start(umem), 120 umem_odp->umem.context->invalidate_range(
123 ib_umem_end(umem)); 121 umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp));
124 return 0; 122 return 0;
125} 123}
126 124
@@ -205,10 +203,9 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
205static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp) 203static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
206{ 204{
207 struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; 205 struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
208 struct ib_umem *umem = &umem_odp->umem;
209 206
210 down_write(&per_mm->umem_rwsem); 207 down_write(&per_mm->umem_rwsem);
211 if (likely(ib_umem_start(umem) != ib_umem_end(umem))) 208 if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
212 rbt_ib_umem_insert(&umem_odp->interval_tree, 209 rbt_ib_umem_insert(&umem_odp->interval_tree,
213 &per_mm->umem_tree); 210 &per_mm->umem_tree);
214 up_write(&per_mm->umem_rwsem); 211 up_write(&per_mm->umem_rwsem);
@@ -217,10 +214,9 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
217static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp) 214static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
218{ 215{
219 struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm; 216 struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
220 struct ib_umem *umem = &umem_odp->umem;
221 217
222 down_write(&per_mm->umem_rwsem); 218 down_write(&per_mm->umem_rwsem);
223 if (likely(ib_umem_start(umem) != ib_umem_end(umem))) 219 if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
224 rbt_ib_umem_remove(&umem_odp->interval_tree, 220 rbt_ib_umem_remove(&umem_odp->interval_tree,
225 &per_mm->umem_tree); 221 &per_mm->umem_tree);
226 complete_all(&umem_odp->notifier_completion); 222 complete_all(&umem_odp->notifier_completion);
@@ -351,7 +347,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
351 umem->context = ctx; 347 umem->context = ctx;
352 umem->length = size; 348 umem->length = size;
353 umem->address = addr; 349 umem->address = addr;
354 umem->page_shift = PAGE_SHIFT; 350 odp_data->page_shift = PAGE_SHIFT;
355 umem->writable = root->umem.writable; 351 umem->writable = root->umem.writable;
356 umem->is_odp = 1; 352 umem->is_odp = 1;
357 odp_data->per_mm = per_mm; 353 odp_data->per_mm = per_mm;
@@ -405,18 +401,19 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
405 struct mm_struct *mm = umem->owning_mm; 401 struct mm_struct *mm = umem->owning_mm;
406 int ret_val; 402 int ret_val;
407 403
404 umem_odp->page_shift = PAGE_SHIFT;
408 if (access & IB_ACCESS_HUGETLB) { 405 if (access & IB_ACCESS_HUGETLB) {
409 struct vm_area_struct *vma; 406 struct vm_area_struct *vma;
410 struct hstate *h; 407 struct hstate *h;
411 408
412 down_read(&mm->mmap_sem); 409 down_read(&mm->mmap_sem);
413 vma = find_vma(mm, ib_umem_start(umem)); 410 vma = find_vma(mm, ib_umem_start(umem_odp));
414 if (!vma || !is_vm_hugetlb_page(vma)) { 411 if (!vma || !is_vm_hugetlb_page(vma)) {
415 up_read(&mm->mmap_sem); 412 up_read(&mm->mmap_sem);
416 return -EINVAL; 413 return -EINVAL;
417 } 414 }
418 h = hstate_vma(vma); 415 h = hstate_vma(vma);
419 umem->page_shift = huge_page_shift(h); 416 umem_odp->page_shift = huge_page_shift(h);
420 up_read(&mm->mmap_sem); 417 up_read(&mm->mmap_sem);
421 } 418 }
422 419
@@ -424,16 +421,16 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
424 421
425 init_completion(&umem_odp->notifier_completion); 422 init_completion(&umem_odp->notifier_completion);
426 423
427 if (ib_umem_num_pages(umem)) { 424 if (ib_umem_odp_num_pages(umem_odp)) {
428 umem_odp->page_list = 425 umem_odp->page_list =
429 vzalloc(array_size(sizeof(*umem_odp->page_list), 426 vzalloc(array_size(sizeof(*umem_odp->page_list),
430 ib_umem_num_pages(umem))); 427 ib_umem_odp_num_pages(umem_odp)));
431 if (!umem_odp->page_list) 428 if (!umem_odp->page_list)
432 return -ENOMEM; 429 return -ENOMEM;
433 430
434 umem_odp->dma_list = 431 umem_odp->dma_list =
435 vzalloc(array_size(sizeof(*umem_odp->dma_list), 432 vzalloc(array_size(sizeof(*umem_odp->dma_list),
436 ib_umem_num_pages(umem))); 433 ib_umem_odp_num_pages(umem_odp)));
437 if (!umem_odp->dma_list) { 434 if (!umem_odp->dma_list) {
438 ret_val = -ENOMEM; 435 ret_val = -ENOMEM;
439 goto out_page_list; 436 goto out_page_list;
@@ -456,16 +453,14 @@ out_page_list:
456 453
457void ib_umem_odp_release(struct ib_umem_odp *umem_odp) 454void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
458{ 455{
459 struct ib_umem *umem = &umem_odp->umem;
460
461 /* 456 /*
462 * Ensure that no more pages are mapped in the umem. 457 * Ensure that no more pages are mapped in the umem.
463 * 458 *
464 * It is the driver's responsibility to ensure, before calling us, 459 * It is the driver's responsibility to ensure, before calling us,
465 * that the hardware will not attempt to access the MR any more. 460 * that the hardware will not attempt to access the MR any more.
466 */ 461 */
467 ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), 462 ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
468 ib_umem_end(umem)); 463 ib_umem_end(umem_odp));
469 464
470 remove_umem_from_per_mm(umem_odp); 465 remove_umem_from_per_mm(umem_odp);
471 put_per_mm(umem_odp); 466 put_per_mm(umem_odp);
@@ -498,8 +493,8 @@ static int ib_umem_odp_map_dma_single_page(
498 u64 access_mask, 493 u64 access_mask,
499 unsigned long current_seq) 494 unsigned long current_seq)
500{ 495{
501 struct ib_umem *umem = &umem_odp->umem; 496 struct ib_ucontext *context = umem_odp->umem.context;
502 struct ib_device *dev = umem->context->device; 497 struct ib_device *dev = context->device;
503 dma_addr_t dma_addr; 498 dma_addr_t dma_addr;
504 int remove_existing_mapping = 0; 499 int remove_existing_mapping = 0;
505 int ret = 0; 500 int ret = 0;
@@ -514,10 +509,9 @@ static int ib_umem_odp_map_dma_single_page(
514 goto out; 509 goto out;
515 } 510 }
516 if (!(umem_odp->dma_list[page_index])) { 511 if (!(umem_odp->dma_list[page_index])) {
517 dma_addr = ib_dma_map_page(dev, 512 dma_addr =
518 page, 513 ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift),
519 0, BIT(umem->page_shift), 514 DMA_BIDIRECTIONAL);
520 DMA_BIDIRECTIONAL);
521 if (ib_dma_mapping_error(dev, dma_addr)) { 515 if (ib_dma_mapping_error(dev, dma_addr)) {
522 ret = -EFAULT; 516 ret = -EFAULT;
523 goto out; 517 goto out;
@@ -540,11 +534,12 @@ out:
540 534
541 if (remove_existing_mapping) { 535 if (remove_existing_mapping) {
542 ib_umem_notifier_start_account(umem_odp); 536 ib_umem_notifier_start_account(umem_odp);
543 umem->context->invalidate_range( 537 context->invalidate_range(
544 umem_odp, 538 umem_odp,
545 ib_umem_start(umem) + (page_index << umem->page_shift), 539 ib_umem_start(umem_odp) +
546 ib_umem_start(umem) + 540 (page_index << umem_odp->page_shift),
547 ((page_index + 1) << umem->page_shift)); 541 ib_umem_start(umem_odp) +
542 ((page_index + 1) << umem_odp->page_shift));
548 ib_umem_notifier_end_account(umem_odp); 543 ib_umem_notifier_end_account(umem_odp);
549 ret = -EAGAIN; 544 ret = -EAGAIN;
550 } 545 }
@@ -581,27 +576,26 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
581 u64 bcnt, u64 access_mask, 576 u64 bcnt, u64 access_mask,
582 unsigned long current_seq) 577 unsigned long current_seq)
583{ 578{
584 struct ib_umem *umem = &umem_odp->umem;
585 struct task_struct *owning_process = NULL; 579 struct task_struct *owning_process = NULL;
586 struct mm_struct *owning_mm = umem_odp->umem.owning_mm; 580 struct mm_struct *owning_mm = umem_odp->umem.owning_mm;
587 struct page **local_page_list = NULL; 581 struct page **local_page_list = NULL;
588 u64 page_mask, off; 582 u64 page_mask, off;
589 int j, k, ret = 0, start_idx, npages = 0, page_shift; 583 int j, k, ret = 0, start_idx, npages = 0;
590 unsigned int flags = 0; 584 unsigned int flags = 0, page_shift;
591 phys_addr_t p = 0; 585 phys_addr_t p = 0;
592 586
593 if (access_mask == 0) 587 if (access_mask == 0)
594 return -EINVAL; 588 return -EINVAL;
595 589
596 if (user_virt < ib_umem_start(umem) || 590 if (user_virt < ib_umem_start(umem_odp) ||
597 user_virt + bcnt > ib_umem_end(umem)) 591 user_virt + bcnt > ib_umem_end(umem_odp))
598 return -EFAULT; 592 return -EFAULT;
599 593
600 local_page_list = (struct page **)__get_free_page(GFP_KERNEL); 594 local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
601 if (!local_page_list) 595 if (!local_page_list)
602 return -ENOMEM; 596 return -ENOMEM;
603 597
604 page_shift = umem->page_shift; 598 page_shift = umem_odp->page_shift;
605 page_mask = ~(BIT(page_shift) - 1); 599 page_mask = ~(BIT(page_shift) - 1);
606 off = user_virt & (~page_mask); 600 off = user_virt & (~page_mask);
607 user_virt = user_virt & page_mask; 601 user_virt = user_virt & page_mask;
@@ -621,7 +615,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
621 if (access_mask & ODP_WRITE_ALLOWED_BIT) 615 if (access_mask & ODP_WRITE_ALLOWED_BIT)
622 flags |= FOLL_WRITE; 616 flags |= FOLL_WRITE;
623 617
624 start_idx = (user_virt - ib_umem_start(umem)) >> page_shift; 618 start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift;
625 k = start_idx; 619 k = start_idx;
626 620
627 while (bcnt > 0) { 621 while (bcnt > 0) {
@@ -711,21 +705,20 @@ EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
711void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, 705void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
712 u64 bound) 706 u64 bound)
713{ 707{
714 struct ib_umem *umem = &umem_odp->umem;
715 int idx; 708 int idx;
716 u64 addr; 709 u64 addr;
717 struct ib_device *dev = umem->context->device; 710 struct ib_device *dev = umem_odp->umem.context->device;
718 711
719 virt = max_t(u64, virt, ib_umem_start(umem)); 712 virt = max_t(u64, virt, ib_umem_start(umem_odp));
720 bound = min_t(u64, bound, ib_umem_end(umem)); 713 bound = min_t(u64, bound, ib_umem_end(umem_odp));
721 /* Note that during the run of this function, the 714 /* Note that during the run of this function, the
722 * notifiers_count of the MR is > 0, preventing any racing 715 * notifiers_count of the MR is > 0, preventing any racing
723 * faults from completion. We might be racing with other 716 * faults from completion. We might be racing with other
724 * invalidations, so we must make sure we free each page only 717 * invalidations, so we must make sure we free each page only
725 * once. */ 718 * once. */
726 mutex_lock(&umem_odp->umem_mutex); 719 mutex_lock(&umem_odp->umem_mutex);
727 for (addr = virt; addr < bound; addr += BIT(umem->page_shift)) { 720 for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
728 idx = (addr - ib_umem_start(umem)) >> umem->page_shift; 721 idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
729 if (umem_odp->page_list[idx]) { 722 if (umem_odp->page_list[idx]) {
730 struct page *page = umem_odp->page_list[idx]; 723 struct page *page = umem_odp->page_list[idx];
731 dma_addr_t dma = umem_odp->dma_list[idx]; 724 dma_addr_t dma = umem_odp->dma_list[idx];
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 9caf35061721..6e81ff3f1813 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -235,8 +235,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
235 &buf->hr_mtt); 235 &buf->hr_mtt);
236 } else { 236 } else {
237 ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), 237 ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
238 (*umem)->page_shift, 238 PAGE_SHIFT, &buf->hr_mtt);
239 &buf->hr_mtt);
240 } 239 }
241 if (ret) 240 if (ret)
242 goto err_buf; 241 goto err_buf;
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index b3421b1f21e0..ad15b41da30a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -264,8 +264,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
264 } else 264 } else
265 ret = hns_roce_mtt_init(hr_dev, 265 ret = hns_roce_mtt_init(hr_dev,
266 ib_umem_page_count(srq->umem), 266 ib_umem_page_count(srq->umem),
267 srq->umem->page_shift, 267 PAGE_SHIFT, &srq->mtt);
268 &srq->mtt);
269 if (ret) 268 if (ret)
270 goto err_buf; 269 goto err_buf;
271 270
@@ -291,10 +290,9 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
291 ret = hns_roce_mtt_init(hr_dev, npages, 290 ret = hns_roce_mtt_init(hr_dev, npages,
292 page_shift, &srq->idx_que.mtt); 291 page_shift, &srq->idx_que.mtt);
293 } else { 292 } else {
294 ret = hns_roce_mtt_init(hr_dev, 293 ret = hns_roce_mtt_init(
295 ib_umem_page_count(srq->idx_que.umem), 294 hr_dev, ib_umem_page_count(srq->idx_que.umem),
296 srq->idx_que.umem->page_shift, 295 PAGE_SHIFT, &srq->idx_que.mtt);
297 &srq->idx_que.mtt);
298 } 296 }
299 297
300 if (ret) { 298 if (ret) {
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 355205a28544..b0b94dedb848 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -258,7 +258,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
258 int *num_of_mtts) 258 int *num_of_mtts)
259{ 259{
260 u64 block_shift = MLX4_MAX_MTT_SHIFT; 260 u64 block_shift = MLX4_MAX_MTT_SHIFT;
261 u64 min_shift = umem->page_shift; 261 u64 min_shift = PAGE_SHIFT;
262 u64 last_block_aligned_end = 0; 262 u64 last_block_aligned_end = 0;
263 u64 current_block_start = 0; 263 u64 current_block_start = 0;
264 u64 first_block_start = 0; 264 u64 first_block_start = 0;
@@ -295,8 +295,8 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
295 * in access to the wrong data. 295 * in access to the wrong data.
296 */ 296 */
297 misalignment_bits = 297 misalignment_bits =
298 (start_va & (~(((u64)(BIT(umem->page_shift))) - 1ULL))) 298 (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^
299 ^ current_block_start; 299 current_block_start;
300 block_shift = min(alignment_of(misalignment_bits), 300 block_shift = min(alignment_of(misalignment_bits),
301 block_shift); 301 block_shift);
302 } 302 }
@@ -514,7 +514,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
514 goto release_mpt_entry; 514 goto release_mpt_entry;
515 } 515 }
516 n = ib_umem_page_count(mmr->umem); 516 n = ib_umem_page_count(mmr->umem);
517 shift = mmr->umem->page_shift; 517 shift = PAGE_SHIFT;
518 518
519 err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, 519 err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
520 virt_addr, length, n, shift, 520 virt_addr, length, n, shift,
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 4bf2946b9759..c9f555e04c9f 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -115,7 +115,7 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
115 return PTR_ERR(srq->umem); 115 return PTR_ERR(srq->umem);
116 116
117 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), 117 err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem),
118 srq->umem->page_shift, &srq->mtt); 118 PAGE_SHIFT, &srq->mtt);
119 if (err) 119 if (err)
120 goto err_buf; 120 goto err_buf;
121 121
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 9f90be296ee0..fe1a76d8531c 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -55,9 +55,10 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
55 int i = 0; 55 int i = 0;
56 struct scatterlist *sg; 56 struct scatterlist *sg;
57 int entry; 57 int entry;
58 unsigned long page_shift = umem->page_shift;
59 58
60 if (umem->is_odp) { 59 if (umem->is_odp) {
60 unsigned int page_shift = to_ib_umem_odp(umem)->page_shift;
61
61 *ncont = ib_umem_page_count(umem); 62 *ncont = ib_umem_page_count(umem);
62 *count = *ncont << (page_shift - PAGE_SHIFT); 63 *count = *ncont << (page_shift - PAGE_SHIFT);
63 *shift = page_shift; 64 *shift = page_shift;
@@ -67,15 +68,15 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
67 return; 68 return;
68 } 69 }
69 70
70 addr = addr >> page_shift; 71 addr = addr >> PAGE_SHIFT;
71 tmp = (unsigned long)addr; 72 tmp = (unsigned long)addr;
72 m = find_first_bit(&tmp, BITS_PER_LONG); 73 m = find_first_bit(&tmp, BITS_PER_LONG);
73 if (max_page_shift) 74 if (max_page_shift)
74 m = min_t(unsigned long, max_page_shift - page_shift, m); 75 m = min_t(unsigned long, max_page_shift - PAGE_SHIFT, m);
75 76
76 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 77 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
77 len = sg_dma_len(sg) >> page_shift; 78 len = sg_dma_len(sg) >> PAGE_SHIFT;
78 pfn = sg_dma_address(sg) >> page_shift; 79 pfn = sg_dma_address(sg) >> PAGE_SHIFT;
79 if (base + p != pfn) { 80 if (base + p != pfn) {
80 /* If either the offset or the new 81 /* If either the offset or the new
81 * base are unaligned update m 82 * base are unaligned update m
@@ -107,7 +108,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
107 108
108 *ncont = 0; 109 *ncont = 0;
109 } 110 }
110 *shift = page_shift + m; 111 *shift = PAGE_SHIFT + m;
111 *count = i; 112 *count = i;
112} 113}
113 114
@@ -140,8 +141,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
140 int page_shift, size_t offset, size_t num_pages, 141 int page_shift, size_t offset, size_t num_pages,
141 __be64 *pas, int access_flags) 142 __be64 *pas, int access_flags)
142{ 143{
143 unsigned long umem_page_shift = umem->page_shift; 144 int shift = page_shift - PAGE_SHIFT;
144 int shift = page_shift - umem_page_shift;
145 int mask = (1 << shift) - 1; 145 int mask = (1 << shift) - 1;
146 int i, k, idx; 146 int i, k, idx;
147 u64 cur = 0; 147 u64 cur = 0;
@@ -165,7 +165,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
165 165
166 i = 0; 166 i = 0;
167 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 167 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
168 len = sg_dma_len(sg) >> umem_page_shift; 168 len = sg_dma_len(sg) >> PAGE_SHIFT;
169 base = sg_dma_address(sg); 169 base = sg_dma_address(sg);
170 170
171 /* Skip elements below offset */ 171 /* Skip elements below offset */
@@ -184,7 +184,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
184 184
185 for (; k < len; k++) { 185 for (; k < len; k++) {
186 if (!(i & mask)) { 186 if (!(i & mask)) {
187 cur = base + (k << umem_page_shift); 187 cur = base + (k << PAGE_SHIFT);
188 cur |= access_flags; 188 cur |= access_flags;
189 idx = (i >> shift) - offset; 189 idx = (i >> shift) - offset;
190 190
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 5f09699fab98..4d033796dcfc 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1606,8 +1606,9 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1606 synchronize_srcu(&dev->mr_srcu); 1606 synchronize_srcu(&dev->mr_srcu);
1607 /* Destroy all page mappings */ 1607 /* Destroy all page mappings */
1608 if (umem_odp->page_list) 1608 if (umem_odp->page_list)
1609 mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem), 1609 mlx5_ib_invalidate_range(umem_odp,
1610 ib_umem_end(umem)); 1610 ib_umem_start(umem_odp),
1611 ib_umem_end(umem_odp));
1611 else 1612 else
1612 mlx5_ib_free_implicit_mr(mr); 1613 mlx5_ib_free_implicit_mr(mr);
1613 /* 1614 /*
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 91507a2e9290..d0c6f9cc97ef 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -150,7 +150,7 @@ static struct ib_umem_odp *odp_lookup(u64 start, u64 length,
150 if (!rb) 150 if (!rb)
151 goto not_found; 151 goto not_found;
152 odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb); 152 odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
153 if (ib_umem_start(&odp->umem) > start + length) 153 if (ib_umem_start(odp) > start + length)
154 goto not_found; 154 goto not_found;
155 } 155 }
156not_found: 156not_found:
@@ -200,7 +200,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
200static void mr_leaf_free_action(struct work_struct *work) 200static void mr_leaf_free_action(struct work_struct *work)
201{ 201{
202 struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); 202 struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
203 int idx = ib_umem_start(&odp->umem) >> MLX5_IMR_MTT_SHIFT; 203 int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
204 struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; 204 struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
205 205
206 mr->parent = NULL; 206 mr->parent = NULL;
@@ -224,7 +224,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
224 const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / 224 const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
225 sizeof(struct mlx5_mtt)) - 1; 225 sizeof(struct mlx5_mtt)) - 1;
226 u64 idx = 0, blk_start_idx = 0; 226 u64 idx = 0, blk_start_idx = 0;
227 struct ib_umem *umem;
228 int in_block = 0; 227 int in_block = 0;
229 u64 addr; 228 u64 addr;
230 229
@@ -232,15 +231,14 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
232 pr_err("invalidation called on NULL umem or non-ODP umem\n"); 231 pr_err("invalidation called on NULL umem or non-ODP umem\n");
233 return; 232 return;
234 } 233 }
235 umem = &umem_odp->umem;
236 234
237 mr = umem_odp->private; 235 mr = umem_odp->private;
238 236
239 if (!mr || !mr->ibmr.pd) 237 if (!mr || !mr->ibmr.pd)
240 return; 238 return;
241 239
242 start = max_t(u64, ib_umem_start(umem), start); 240 start = max_t(u64, ib_umem_start(umem_odp), start);
243 end = min_t(u64, ib_umem_end(umem), end); 241 end = min_t(u64, ib_umem_end(umem_odp), end);
244 242
245 /* 243 /*
246 * Iteration one - zap the HW's MTTs. The notifiers_count ensures that 244 * Iteration one - zap the HW's MTTs. The notifiers_count ensures that
@@ -249,8 +247,8 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
249 * but they will write 0s as well, so no difference in the end result. 247 * but they will write 0s as well, so no difference in the end result.
250 */ 248 */
251 249
252 for (addr = start; addr < end; addr += BIT(umem->page_shift)) { 250 for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) {
253 idx = (addr - ib_umem_start(umem)) >> umem->page_shift; 251 idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
254 /* 252 /*
255 * Strive to write the MTTs in chunks, but avoid overwriting 253 * Strive to write the MTTs in chunks, but avoid overwriting
256 * non-existing MTTs. The huristic here can be improved to 254 * non-existing MTTs. The huristic here can be improved to
@@ -544,13 +542,12 @@ static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
544 void *cookie) 542 void *cookie)
545{ 543{
546 struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie; 544 struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
547 struct ib_umem *umem = &umem_odp->umem;
548 545
549 if (mr->parent != imr) 546 if (mr->parent != imr)
550 return 0; 547 return 0;
551 548
552 ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem), 549 ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
553 ib_umem_end(umem)); 550 ib_umem_end(umem_odp));
554 551
555 if (umem_odp->dying) 552 if (umem_odp->dying)
556 return 0; 553 return 0;
@@ -602,9 +599,9 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
602 } 599 }
603 600
604next_mr: 601next_mr:
605 size = min_t(size_t, bcnt, ib_umem_end(&odp->umem) - io_virt); 602 size = min_t(size_t, bcnt, ib_umem_end(odp) - io_virt);
606 603
607 page_shift = mr->umem->page_shift; 604 page_shift = odp->page_shift;
608 page_mask = ~(BIT(page_shift) - 1); 605 page_mask = ~(BIT(page_shift) - 1);
609 start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; 606 start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
610 access_mask = ODP_READ_ALLOWED_BIT; 607 access_mask = ODP_READ_ALLOWED_BIT;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 49024326a518..ad2b8322cc3f 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2112,10 +2112,11 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2112 return (struct ib_mr *)region; 2112 return (struct ib_mr *)region;
2113 } 2113 }
2114 2114
2115 nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u," 2115 nes_debug(
2116 " offset = %u, page size = %lu.\n", 2116 NES_DBG_MR,
2117 (unsigned long int)start, (unsigned long int)virt, (u32)length, 2117 "User base = 0x%lX, Virt base = 0x%lX, length = %u, offset = %u, page size = %lu.\n",
2118 ib_umem_offset(region), BIT(region->page_shift)); 2118 (unsigned long)start, (unsigned long)virt, (u32)length,
2119 ib_umem_offset(region), PAGE_SIZE);
2119 2120
2120 skip_pages = ((u32)ib_umem_offset(region)) >> 12; 2121 skip_pages = ((u32)ib_umem_offset(region)) >> 12;
2121 2122
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 040d853077c6..1052d0d62be7 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -46,7 +46,6 @@ struct ib_umem {
46 struct mm_struct *owning_mm; 46 struct mm_struct *owning_mm;
47 size_t length; 47 size_t length;
48 unsigned long address; 48 unsigned long address;
49 int page_shift;
50 u32 writable : 1; 49 u32 writable : 1;
51 u32 is_odp : 1; 50 u32 is_odp : 1;
52 struct work_struct work; 51 struct work_struct work;
@@ -58,24 +57,14 @@ struct ib_umem {
58/* Returns the offset of the umem start relative to the first page. */ 57/* Returns the offset of the umem start relative to the first page. */
59static inline int ib_umem_offset(struct ib_umem *umem) 58static inline int ib_umem_offset(struct ib_umem *umem)
60{ 59{
61 return umem->address & (BIT(umem->page_shift) - 1); 60 return umem->address & ~PAGE_MASK;
62}
63
64/* Returns the first page of an ODP umem. */
65static inline unsigned long ib_umem_start(struct ib_umem *umem)
66{
67 return umem->address - ib_umem_offset(umem);
68}
69
70/* Returns the address of the page after the last one of an ODP umem. */
71static inline unsigned long ib_umem_end(struct ib_umem *umem)
72{
73 return ALIGN(umem->address + umem->length, BIT(umem->page_shift));
74} 61}
75 62
76static inline size_t ib_umem_num_pages(struct ib_umem *umem) 63static inline size_t ib_umem_num_pages(struct ib_umem *umem)
77{ 64{
78 return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift; 65 return (ALIGN(umem->address + umem->length, PAGE_SIZE) -
66 ALIGN_DOWN(umem->address, PAGE_SIZE)) >>
67 PAGE_SHIFT;
79} 68}
80 69
81#ifdef CONFIG_INFINIBAND_USER_MEM 70#ifdef CONFIG_INFINIBAND_USER_MEM
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index eeec4e53c448..479db5c98ff6 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -76,6 +76,7 @@ struct ib_umem_odp {
76 76
77 struct completion notifier_completion; 77 struct completion notifier_completion;
78 int dying; 78 int dying;
79 unsigned int page_shift;
79 struct work_struct work; 80 struct work_struct work;
80}; 81};
81 82
@@ -84,6 +85,25 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
84 return container_of(umem, struct ib_umem_odp, umem); 85 return container_of(umem, struct ib_umem_odp, umem);
85} 86}
86 87
88/* Returns the first page of an ODP umem. */
89static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp)
90{
91 return ALIGN_DOWN(umem_odp->umem.address, 1UL << umem_odp->page_shift);
92}
93
94/* Returns the address of the page after the last one of an ODP umem. */
95static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp)
96{
97 return ALIGN(umem_odp->umem.address + umem_odp->umem.length,
98 1UL << umem_odp->page_shift);
99}
100
101static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
102{
103 return (ib_umem_end(umem_odp) - ib_umem_start(umem_odp)) >>
104 umem_odp->page_shift;
105}
106
87/* 107/*
88 * The lower 2 bits of the DMA address signal the R/W permissions for 108 * The lower 2 bits of the DMA address signal the R/W permissions for
89 * the entry. To upgrade the permissions, provide the appropriate 109 * the entry. To upgrade the permissions, provide the appropriate