summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/Makefile.nvgpu1
-rw-r--r--drivers/gpu/nvgpu/common/linux/nvgpu_mem.c114
-rw-r--r--drivers/gpu/nvgpu/common/linux/vm.c25
-rw-r--r--drivers/gpu/nvgpu/common/mm/gmmu.c109
-rw-r--r--drivers/gpu/nvgpu/common/mm/nvgpu_mem.c73
-rw-r--r--drivers/gpu/nvgpu/common/mm/page_allocator.c142
-rw-r--r--drivers/gpu/nvgpu/common/pramin.c27
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c20
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h43
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/pramin_gk20a.h6
-rw-r--r--drivers/gpu/nvgpu/gp10b/gr_gp10b.c2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/gmmu.h2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h2
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/log.h1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h45
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/page_allocator.h22
-rw-r--r--drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c55
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c4
20 files changed, 474 insertions, 241 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index d02870fb..6e475fcb 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -55,6 +55,7 @@ nvgpu-y := \
55 common/mm/pd_cache.o \ 55 common/mm/pd_cache.o \
56 common/mm/vm.o \ 56 common/mm/vm.o \
57 common/mm/vm_area.o \ 57 common/mm/vm_area.o \
58 common/mm/nvgpu_mem.o \
58 common/bus.o \ 59 common/bus.o \
59 common/enabled.o \ 60 common/enabled.o \
60 common/pramin.o \ 61 common/pramin.o \
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index e4991d0d..eb54f3fd 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -21,6 +21,7 @@
21#include <nvgpu/log.h> 21#include <nvgpu/log.h>
22#include <nvgpu/bug.h> 22#include <nvgpu/bug.h>
23#include <nvgpu/enabled.h> 23#include <nvgpu/enabled.h>
24#include <nvgpu/kmem.h>
24 25
25#include <nvgpu/linux/dma.h> 26#include <nvgpu/linux/dma.h>
26 27
@@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
395 396
396 return 0; 397 return 0;
397} 398}
399
400static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g,
401 struct nvgpu_mem_sgl *sgl)
402{
403 struct nvgpu_mem_sgl *head, *next;
404
405 head = nvgpu_kzalloc(g, sizeof(*sgl));
406 if (!head)
407 return NULL;
408
409 next = head;
410 while (true) {
411 nvgpu_log(g, gpu_dbg_sgl,
412 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
413 sgl->phys, sgl->dma, sgl->length);
414
415 next->dma = sgl->dma;
416 next->phys = sgl->phys;
417 next->length = sgl->length;
418 next->next = NULL;
419
420 sgl = nvgpu_mem_sgl_next(sgl);
421 if (!sgl)
422 break;
423
424 next->next = nvgpu_kzalloc(g, sizeof(*sgl));
425 if (!next->next) {
426 nvgpu_mem_sgl_free(g, head);
427 return NULL;
428 }
429 next = next->next;
430 }
431
432 return head;
433}
434
435static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem(
436 struct gk20a *g,
437 struct scatterlist *linux_sgl)
438{
439 struct nvgpu_page_alloc *vidmem_alloc;
440
441 vidmem_alloc = get_vidmem_page_alloc(linux_sgl);
442 if (!vidmem_alloc)
443 return NULL;
444
445 nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:");
446
447 return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl);
448}
449
450struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
451 struct sg_table *sgt)
452{
453 struct nvgpu_mem_sgl *head, *sgl, *next;
454 struct scatterlist *linux_sgl = sgt->sgl;
455
456 if (is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
457 return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl);
458
459 head = nvgpu_kzalloc(g, sizeof(*sgl));
460 if (!head)
461 return NULL;
462
463 nvgpu_log(g, gpu_dbg_sgl, "Making sgl:");
464
465 sgl = head;
466 while (true) {
467 sgl->dma = sg_dma_address(linux_sgl);
468 sgl->phys = sg_phys(linux_sgl);
469 sgl->length = linux_sgl->length;
470
471 /*
472 * We don't like offsets in the pages here. This will cause
473 * problems.
474 */
475 if (WARN_ON(linux_sgl->offset)) {
476 nvgpu_mem_sgl_free(g, head);
477 return NULL;
478 }
479
480 nvgpu_log(g, gpu_dbg_sgl,
481 " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx",
482 sgl->phys, sgl->dma, sgl->length);
483
484 /*
485 * When there's no more SGL ents for the Linux SGL we are
486 * done. Don't bother making any more SGL ents for the nvgpu
487 * SGL.
488 */
489 linux_sgl = sg_next(linux_sgl);
490 if (!linux_sgl)
491 break;
492
493 next = nvgpu_kzalloc(g, sizeof(*sgl));
494 if (!next) {
495 nvgpu_mem_sgl_free(g, head);
496 return NULL;
497 }
498
499 sgl->next = next;
500 sgl = next;
501 }
502
503 nvgpu_log(g, gpu_dbg_sgl, "Done!");
504 return head;
505}
506
507struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
508 struct nvgpu_mem *mem)
509{
510 return nvgpu_mem_sgl_create(g, mem->priv.sgt);
511}
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c
index 86d8bec9..4a4429dc 100644
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -21,8 +21,11 @@
21#include <nvgpu/lock.h> 21#include <nvgpu/lock.h>
22#include <nvgpu/rbtree.h> 22#include <nvgpu/rbtree.h>
23#include <nvgpu/vm_area.h> 23#include <nvgpu/vm_area.h>
24#include <nvgpu/nvgpu_mem.h>
24#include <nvgpu/page_allocator.h> 25#include <nvgpu/page_allocator.h>
25 26
27#include <nvgpu/linux/nvgpu_mem.h>
28
26#include "gk20a/gk20a.h" 29#include "gk20a/gk20a.h"
27#include "gk20a/mm_gk20a.h" 30#include "gk20a/mm_gk20a.h"
28#include "gk20a/kind_gk20a.h" 31#include "gk20a/kind_gk20a.h"
@@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl,
66 69
67 if (aperture == APERTURE_VIDMEM) { 70 if (aperture == APERTURE_VIDMEM) {
68 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); 71 struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl);
69 struct page_alloc_chunk *chunk = NULL; 72 struct nvgpu_mem_sgl *sgl_vid = alloc->sgl;
70 73
71 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 74 while (sgl_vid) {
72 page_alloc_chunk, list_entry) { 75 chunk_align = 1ULL <<
73 chunk_align = 1ULL << __ffs(chunk->base | 76 __ffs(nvgpu_mem_sgl_phys(sgl_vid) |
74 chunk->length); 77 nvgpu_mem_sgl_length(sgl_vid));
75 78
76 if (align) 79 if (align)
77 align = min(align, chunk_align); 80 align = min(align, chunk_align);
78 else 81 else
79 align = chunk_align; 82 align = chunk_align;
83
84 sgl_vid = nvgpu_mem_sgl_next(sgl_vid);
80 } 85 }
81 86
82 return align; 87 return align;
@@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
237 struct nvgpu_vm_area *vm_area = NULL; 242 struct nvgpu_vm_area *vm_area = NULL;
238 u32 ctag_offset; 243 u32 ctag_offset;
239 enum nvgpu_aperture aperture; 244 enum nvgpu_aperture aperture;
245 struct nvgpu_mem_sgl *nvgpu_sgl;
240 246
241 /* 247 /*
242 * The kind used as part of the key for map caching. HW may 248 * The kind used as part of the key for map caching. HW may
@@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
393 ctag_offset += buffer_offset >> 399 ctag_offset += buffer_offset >>
394 ilog2(g->ops.fb.compression_page_size(g)); 400 ilog2(g->ops.fb.compression_page_size(g));
395 401
402 nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt);
403
396 /* update gmmu ptes */ 404 /* update gmmu ptes */
397 map_offset = g->ops.mm.gmmu_map(vm, map_offset, 405 map_offset = g->ops.mm.gmmu_map(vm,
398 bfr.sgt, 406 map_offset,
407 nvgpu_sgl,
399 buffer_offset, /* sg offset */ 408 buffer_offset, /* sg offset */
400 mapping_size, 409 mapping_size,
401 bfr.pgsz_idx, 410 bfr.pgsz_idx,
@@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
410 if (!map_offset) 419 if (!map_offset)
411 goto clean_up; 420 goto clean_up;
412 421
422 nvgpu_mem_sgl_free(g, nvgpu_sgl);
423
413 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); 424 mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
414 if (!mapped_buffer) { 425 if (!mapped_buffer) {
415 nvgpu_warn(g, "oom allocating tracking buffer"); 426 nvgpu_warn(g, "oom allocating tracking buffer");
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 7f486d68..41f5acdd 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
65 struct gk20a *g = gk20a_from_vm(vm); 65 struct gk20a *g = gk20a_from_vm(vm);
66 u64 vaddr; 66 u64 vaddr;
67 67
68 struct sg_table *sgt = mem->priv.sgt; 68 struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem);
69
70 if (!sgl)
71 return -ENOMEM;
69 72
70 nvgpu_mutex_acquire(&vm->update_gmmu_lock); 73 nvgpu_mutex_acquire(&vm->update_gmmu_lock);
71 vaddr = g->ops.mm.gmmu_map(vm, addr, 74 vaddr = g->ops.mm.gmmu_map(vm, addr,
72 sgt, /* sg table */ 75 sgl, /* sg list */
73 0, /* sg offset */ 76 0, /* sg offset */
74 size, 77 size,
75 gmmu_page_size_kernel, 78 gmmu_page_size_kernel,
@@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
82 NULL, /* mapping_batch handle */ 85 NULL, /* mapping_batch handle */
83 aperture); 86 aperture);
84 nvgpu_mutex_release(&vm->update_gmmu_lock); 87 nvgpu_mutex_release(&vm->update_gmmu_lock);
88
89 nvgpu_mem_sgl_free(g, sgl);
90
85 if (!vaddr) { 91 if (!vaddr) {
86 nvgpu_err(g, "failed to allocate va space"); 92 nvgpu_err(g, "failed to map buffer!");
87 return 0; 93 return 0;
88 } 94 }
89 95
@@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
91} 97}
92 98
93/* 99/*
94 * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. 100 * Map a nvgpu_mem into the GMMU. This is for kernel space to use.
95 */ 101 */
96u64 nvgpu_gmmu_map(struct vm_gk20a *vm, 102u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
97 struct nvgpu_mem *mem, 103 struct nvgpu_mem *mem,
@@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm,
106} 112}
107 113
108/* 114/*
109 * Like nvgpu_gmmu_map() except it can work on a fixed address instead. 115 * Like nvgpu_gmmu_map() except this can work on a fixed address.
110 */ 116 */
111u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, 117u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm,
112 struct nvgpu_mem *mem, 118 struct nvgpu_mem *mem,
@@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
407 */ 413 */
408 target_addr = next_pd ? 414 target_addr = next_pd ?
409 nvgpu_pde_phys_addr(g, next_pd) : 415 nvgpu_pde_phys_addr(g, next_pd) :
410 g->ops.mm.gpu_phys_addr(g, attrs, phys_addr); 416 phys_addr;
411 417
412 l->update_entry(vm, l, 418 l->update_entry(vm, l,
413 pd, pd_idx, 419 pd, pd_idx,
@@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm,
458 * VIDMEM version of the update_ptes logic. 464 * VIDMEM version of the update_ptes logic.
459 */ 465 */
460static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, 466static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
461 struct sg_table *sgt, 467 struct nvgpu_mem_sgl *sgl,
462 u64 space_to_skip, 468 u64 space_to_skip,
463 u64 virt_addr, 469 u64 virt_addr,
464 u64 length, 470 u64 length,
465 struct nvgpu_gmmu_attrs *attrs) 471 struct nvgpu_gmmu_attrs *attrs)
466{ 472{
467 struct nvgpu_page_alloc *alloc = NULL;
468 struct page_alloc_chunk *chunk = NULL;
469 u64 phys_addr, chunk_length; 473 u64 phys_addr, chunk_length;
470 int err = 0; 474 int err = 0;
471 475
472 if (!sgt) { 476 if (!sgl) {
473 /* 477 /*
474 * This is considered an unmap. Just pass in 0 as the physical 478 * This is considered an unmap. Just pass in 0 as the physical
475 * address for the entire GPU range. 479 * address for the entire GPU range.
@@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
482 return err; 486 return err;
483 } 487 }
484 488
485 alloc = get_vidmem_page_alloc(sgt->sgl);
486
487 /* 489 /*
488 * Otherwise iterate across all the chunks in this allocation and 490 * Otherwise iterate across all the chunks in this allocation and
489 * map them. 491 * map them.
490 */ 492 */
491 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 493 while (sgl) {
492 page_alloc_chunk, list_entry) {
493 if (space_to_skip && 494 if (space_to_skip &&
494 space_to_skip >= chunk->length) { 495 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
495 space_to_skip -= chunk->length; 496 space_to_skip -= nvgpu_mem_sgl_length(sgl);
497 sgl = nvgpu_mem_sgl_next(sgl);
496 continue; 498 continue;
497 } 499 }
498 500
499 phys_addr = chunk->base + space_to_skip; 501 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
500 chunk_length = min(length, (chunk->length - space_to_skip)); 502 chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) -
503 space_to_skip));
501 504
502 err = __set_pd_level(vm, &vm->pdb, 505 err = __set_pd_level(vm, &vm->pdb,
503 0, 506 0,
@@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm,
518 521
519 if (length == 0) 522 if (length == 0)
520 break; 523 break;
524
525 sgl = nvgpu_mem_sgl_next(sgl);
521 } 526 }
522 527
523 return err; 528 return err;
524} 529}
525 530
526static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, 531static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
527 struct sg_table *sgt, 532 struct nvgpu_mem_sgl *sgl,
528 u64 space_to_skip, 533 u64 space_to_skip,
529 u64 virt_addr, 534 u64 virt_addr,
530 u64 length, 535 u64 length,
531 struct nvgpu_gmmu_attrs *attrs) 536 struct nvgpu_gmmu_attrs *attrs)
532{ 537{
533 int err; 538 int err;
534 struct scatterlist *sgl;
535 struct gk20a *g = gk20a_from_vm(vm); 539 struct gk20a *g = gk20a_from_vm(vm);
536 540
537 if (!sgt) { 541 if (!sgl) {
538 /* 542 /*
539 * This is considered an unmap. Just pass in 0 as the physical 543 * This is considered an unmap. Just pass in 0 as the physical
540 * address for the entire GPU range. 544 * address for the entire GPU range.
@@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
548 } 552 }
549 553
550 /* 554 /*
551 * At this point we have a Linux scatter-gather list pointing to some 555 * At this point we have a scatter-gather list pointing to some number
552 * number of discontiguous chunks of memory. Iterate over that list and 556 * of discontiguous chunks of memory. We must iterate over that list and
553 * generate a GMMU map call for each chunk. There are two possibilities: 557 * generate a GMMU map call for each chunk. There are two possibilities:
554 * either the IOMMU is enabled or not. When the IOMMU is enabled the 558 * either an IOMMU is enabled or not. When an IOMMU is enabled the
555 * mapping is simple since the "physical" address is actually a virtual 559 * mapping is simple since the "physical" address is actually a virtual
556 * IO address and will be contiguous. The no-IOMMU case is more 560 * IO address and will be contiguous.
557 * complicated. We will have to iterate over the SGT and do a separate
558 * map for each chunk of the SGT.
559 */ 561 */
560 sgl = sgt->sgl;
561
562 if (!g->mm.bypass_smmu) { 562 if (!g->mm.bypass_smmu) {
563 u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl); 563 u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs);
564 564
565 io_addr += space_to_skip; 565 io_addr += space_to_skip;
566 566
@@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
585 /* 585 /*
586 * Cut out sgl ents for space_to_skip. 586 * Cut out sgl ents for space_to_skip.
587 */ 587 */
588 if (space_to_skip && space_to_skip >= sgl->length) { 588 if (space_to_skip &&
589 space_to_skip -= sgl->length; 589 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
590 sgl = sg_next(sgl); 590 space_to_skip -= nvgpu_mem_sgl_length(sgl);
591 sgl = nvgpu_mem_sgl_next(sgl);
591 continue; 592 continue;
592 } 593 }
593 594
594 phys_addr = sg_phys(sgl) + space_to_skip; 595 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
595 chunk_length = min(length, sgl->length - space_to_skip); 596 chunk_length = min(length,
597 nvgpu_mem_sgl_length(sgl) - space_to_skip);
596 598
597 err = __set_pd_level(vm, &vm->pdb, 599 err = __set_pd_level(vm, &vm->pdb,
598 0, 600 0,
@@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
600 virt_addr, 602 virt_addr,
601 chunk_length, 603 chunk_length,
602 attrs); 604 attrs);
603 if (err)
604 return err;
605 605
606 space_to_skip = 0; 606 space_to_skip = 0;
607 virt_addr += chunk_length; 607 virt_addr += chunk_length;
608 length -= chunk_length; 608 length -= chunk_length;
609 sgl = sg_next(sgl); 609 sgl = nvgpu_mem_sgl_next(sgl);
610 610
611 if (length == 0) 611 if (length == 0)
612 break; 612 break;
@@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
624 * implementations. But the logic around that is generic to all chips. Every 624 * implementations. But the logic around that is generic to all chips. Every
625 * chip has some number of PDE levels and then a PTE level. 625 * chip has some number of PDE levels and then a PTE level.
626 * 626 *
627 * Each chunk of the incoming SGT is sent to the chip specific implementation 627 * Each chunk of the incoming SGL is sent to the chip specific implementation
628 * of page table update. 628 * of page table update.
629 * 629 *
630 * [*] Note: the "physical" address may actually be an IO virtual address in the 630 * [*] Note: the "physical" address may actually be an IO virtual address in the
631 * case of SMMU usage. 631 * case of SMMU usage.
632 */ 632 */
633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, 633static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
634 struct sg_table *sgt, 634 struct nvgpu_mem_sgl *sgl,
635 u64 space_to_skip, 635 u64 space_to_skip,
636 u64 virt_addr, 636 u64 virt_addr,
637 u64 length, 637 u64 length,
638 struct nvgpu_gmmu_attrs *attrs) 638 struct nvgpu_gmmu_attrs *attrs)
639{ 639{
640 struct gk20a *g = gk20a_from_vm(vm); 640 struct gk20a *g = gk20a_from_vm(vm);
641 struct nvgpu_page_alloc *alloc;
642 u64 phys_addr = 0;
643 u32 page_size; 641 u32 page_size;
644 int err; 642 int err;
645 643
@@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
665 return err; 663 return err;
666 } 664 }
667 665
668 if (sgt) {
669 if (attrs->aperture == APERTURE_VIDMEM) {
670 alloc = get_vidmem_page_alloc(sgt->sgl);
671
672 phys_addr = alloc->base;
673 } else
674 phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl);
675 }
676
677 __gmmu_dbg(g, attrs, 666 __gmmu_dbg(g, attrs,
678 "vm=%s " 667 "vm=%s "
679 "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " 668 "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx "
680 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " 669 "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | "
681 "kind=%#02x APT=%-6s %c%c%c%c%c", 670 "kind=%#02x APT=%-6s %c%c%c%c%c",
682 vm->name, 671 vm->name,
683 sgt ? "MAP" : "UNMAP", 672 sgl ? "MAP" : "UNMAP",
684 virt_addr, 673 virt_addr,
685 length, 674 length,
686 phys_addr, 675 sgl ? nvgpu_mem_sgl_phys(sgl) : 0,
687 space_to_skip, 676 space_to_skip,
688 page_size >> 10, 677 page_size >> 10,
689 nvgpu_gmmu_perm_str(attrs->rw_flag), 678 nvgpu_gmmu_perm_str(attrs->rw_flag),
@@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
696 attrs->valid ? 'V' : '-'); 685 attrs->valid ? 'V' : '-');
697 686
698 /* 687 /*
699 * Handle VIDMEM progamming. Currently uses a different scatter list 688 * For historical reasons these are separate, but soon these will be
700 * format. 689 * unified.
701 */ 690 */
702 if (attrs->aperture == APERTURE_VIDMEM) 691 if (attrs->aperture == APERTURE_VIDMEM)
703 err = __nvgpu_gmmu_update_page_table_vidmem(vm, 692 err = __nvgpu_gmmu_update_page_table_vidmem(vm,
704 sgt, 693 sgl,
705 space_to_skip, 694 space_to_skip,
706 virt_addr, 695 virt_addr,
707 length, 696 length,
708 attrs); 697 attrs);
709 else 698 else
710 err = __nvgpu_gmmu_update_page_table_sysmem(vm, 699 err = __nvgpu_gmmu_update_page_table_sysmem(vm,
711 sgt, 700 sgl,
712 space_to_skip, 701 space_to_skip,
713 virt_addr, 702 virt_addr,
714 length, 703 length,
@@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
717 unmap_gmmu_pages(g, &vm->pdb); 706 unmap_gmmu_pages(g, &vm->pdb);
718 nvgpu_smp_mb(); 707 nvgpu_smp_mb();
719 708
720 __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); 709 __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP");
721 710
722 return err; 711 return err;
723} 712}
@@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
736 */ 725 */
737u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 726u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
738 u64 vaddr, 727 u64 vaddr,
739 struct sg_table *sgt, 728 struct nvgpu_mem_sgl *sgl,
740 u64 buffer_offset, 729 u64 buffer_offset,
741 u64 size, 730 u64 size,
742 int pgsz_idx, 731 int pgsz_idx,
@@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
785 allocated = true; 774 allocated = true;
786 } 775 }
787 776
788 err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, 777 err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset,
789 vaddr, size, &attrs); 778 vaddr, size, &attrs);
790 if (err) { 779 if (err) {
791 nvgpu_err(g, "failed to update ptes on map"); 780 nvgpu_err(g, "failed to update ptes on map");
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
new file mode 100644
index 00000000..7296c673
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -0,0 +1,73 @@
1/*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16
17#include <nvgpu/kmem.h>
18#include <nvgpu/nvgpu_mem.h>
19
20#include "gk20a/gk20a.h"
21
22struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl)
23{
24 return sgl->next;
25}
26
27u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl)
28{
29 return sgl->phys;
30}
31
32u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl)
33{
34 return sgl->dma;
35}
36
37u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl)
38{
39 return sgl->length;
40}
41
42/*
43 * This builds a GPU address for the %sgl based on whether an IOMMU is present
44 * or not. It also handles turning the physical address into the true GPU
45 * physical address that should be programmed into the page tables.
46 */
47u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
48 struct nvgpu_gmmu_attrs *attrs)
49{
50 if (nvgpu_mem_sgl_dma(sgl) == 0)
51 return g->ops.mm.gpu_phys_addr(g, attrs,
52 nvgpu_mem_sgl_phys(sgl));
53
54 if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE)
55 return 0;
56
57 return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl));
58}
59
60void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl)
61{
62 struct nvgpu_mem_sgl *next;
63
64 /*
65 * Free each of the elements. We expect each element to have been
66 * nvgpu_k[mz]alloc()ed.
67 */
68 while (sgl) {
69 next = nvgpu_mem_sgl_next(sgl);
70 nvgpu_kfree(g, sgl);
71 sgl = next;
72 }
73}
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c
index 72ff8f2d..6d92b457 100644
--- a/drivers/gpu/nvgpu/common/mm/page_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c
@@ -147,19 +147,16 @@ static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
147 struct nvgpu_page_alloc *alloc, 147 struct nvgpu_page_alloc *alloc,
148 bool free_buddy_alloc) 148 bool free_buddy_alloc)
149{ 149{
150 struct page_alloc_chunk *chunk; 150 struct nvgpu_mem_sgl *sgl = alloc->sgl;
151 151
152 while (!nvgpu_list_empty(&alloc->alloc_chunks)) { 152 if (free_buddy_alloc) {
153 chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, 153 while (sgl) {
154 page_alloc_chunk, 154 nvgpu_free(&a->source_allocator, sgl->phys);
155 list_entry); 155 sgl = nvgpu_mem_sgl_next(sgl);
156 nvgpu_list_del(&chunk->list_entry); 156 }
157
158 if (free_buddy_alloc)
159 nvgpu_free(&a->source_allocator, chunk->base);
160 nvgpu_kmem_cache_free(a->chunk_cache, chunk);
161 } 157 }
162 158
159 nvgpu_mem_sgl_free(a->owner->g, alloc->sgl);
163 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 160 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
164} 161}
165 162
@@ -243,15 +240,14 @@ static void free_slab_page(struct nvgpu_page_allocator *a,
243} 240}
244 241
245/* 242/*
246 * This expects @alloc to have 1 empty page_alloc_chunk already added to the 243 * This expects @alloc to have 1 empty sgl_entry ready for usage.
247 * alloc_chunks list.
248 */ 244 */
249static int __do_slab_alloc(struct nvgpu_page_allocator *a, 245static int __do_slab_alloc(struct nvgpu_page_allocator *a,
250 struct page_alloc_slab *slab, 246 struct page_alloc_slab *slab,
251 struct nvgpu_page_alloc *alloc) 247 struct nvgpu_page_alloc *alloc)
252{ 248{
253 struct page_alloc_slab_page *slab_page = NULL; 249 struct page_alloc_slab_page *slab_page = NULL;
254 struct page_alloc_chunk *chunk; 250 struct nvgpu_mem_sgl *sgl;
255 unsigned long offs; 251 unsigned long offs;
256 252
257 /* 253 /*
@@ -302,18 +298,19 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a,
302 BUG(); /* Should be impossible to hit this. */ 298 BUG(); /* Should be impossible to hit this. */
303 299
304 /* 300 /*
305 * Handle building the nvgpu_page_alloc struct. We expect one 301 * Handle building the nvgpu_page_alloc struct. We expect one sgl
306 * page_alloc_chunk to be present. 302 * to be present.
307 */ 303 */
308 alloc->slab_page = slab_page; 304 alloc->slab_page = slab_page;
309 alloc->nr_chunks = 1; 305 alloc->nr_chunks = 1;
310 alloc->length = slab_page->slab_size; 306 alloc->length = slab_page->slab_size;
311 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); 307 alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
312 308
313 chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, 309 sgl = alloc->sgl;
314 page_alloc_chunk, list_entry); 310 sgl->phys = alloc->base;
315 chunk->base = alloc->base; 311 sgl->dma = alloc->base;
316 chunk->length = alloc->length; 312 sgl->length = alloc->length;
313 sgl->next = NULL;
317 314
318 return 0; 315 return 0;
319} 316}
@@ -327,7 +324,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
327 int err, slab_nr; 324 int err, slab_nr;
328 struct page_alloc_slab *slab; 325 struct page_alloc_slab *slab;
329 struct nvgpu_page_alloc *alloc = NULL; 326 struct nvgpu_page_alloc *alloc = NULL;
330 struct page_alloc_chunk *chunk = NULL; 327 struct nvgpu_mem_sgl *sgl = NULL;
331 328
332 /* 329 /*
333 * Align the length to a page and then divide by the page size (4k for 330 * Align the length to a page and then divide by the page size (4k for
@@ -341,15 +338,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
341 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); 338 palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
342 goto fail; 339 goto fail;
343 } 340 }
344 chunk = nvgpu_kmem_cache_alloc(a->chunk_cache); 341 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
345 if (!chunk) { 342 if (!sgl) {
346 palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); 343 palloc_dbg(a, "OOM: could not alloc sgl struct!\n");
347 goto fail; 344 goto fail;
348 } 345 }
349 346
350 nvgpu_init_list_node(&alloc->alloc_chunks); 347 alloc->sgl = sgl;
351 nvgpu_list_add(&chunk->list_entry, &alloc->alloc_chunks);
352
353 err = __do_slab_alloc(a, slab, alloc); 348 err = __do_slab_alloc(a, slab, alloc);
354 if (err) 349 if (err)
355 goto fail; 350 goto fail;
@@ -363,8 +358,8 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
363fail: 358fail:
364 if (alloc) 359 if (alloc)
365 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 360 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
366 if (chunk) 361 if (sgl)
367 nvgpu_kmem_cache_free(a->chunk_cache, chunk); 362 nvgpu_kfree(a->owner->g, sgl);
368 return NULL; 363 return NULL;
369} 364}
370 365
@@ -426,7 +421,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
426 struct nvgpu_page_allocator *a, u64 pages) 421 struct nvgpu_page_allocator *a, u64 pages)
427{ 422{
428 struct nvgpu_page_alloc *alloc; 423 struct nvgpu_page_alloc *alloc;
429 struct page_alloc_chunk *c; 424 struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL;
430 u64 max_chunk_len = pages << a->page_shift; 425 u64 max_chunk_len = pages << a->page_shift;
431 int i = 0; 426 int i = 0;
432 427
@@ -436,7 +431,6 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
436 431
437 memset(alloc, 0, sizeof(*alloc)); 432 memset(alloc, 0, sizeof(*alloc));
438 433
439 nvgpu_init_list_node(&alloc->alloc_chunks);
440 alloc->length = pages << a->page_shift; 434 alloc->length = pages << a->page_shift;
441 435
442 while (pages) { 436 while (pages) {
@@ -482,36 +476,48 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
482 goto fail_cleanup; 476 goto fail_cleanup;
483 } 477 }
484 478
485 c = nvgpu_kmem_cache_alloc(a->chunk_cache); 479 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
486 if (!c) { 480 if (!sgl) {
487 nvgpu_free(&a->source_allocator, chunk_addr); 481 nvgpu_free(&a->source_allocator, chunk_addr);
488 goto fail_cleanup; 482 goto fail_cleanup;
489 } 483 }
490 484
491 pages -= chunk_pages; 485 pages -= chunk_pages;
492 486
493 c->base = chunk_addr; 487 sgl->phys = chunk_addr;
494 c->length = chunk_len; 488 sgl->dma = chunk_addr;
495 nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); 489 sgl->length = chunk_len;
490
491 /*
492 * Build the singly linked list with a head node that is part of
493 * the list.
494 */
495 if (prev_sgl)
496 prev_sgl->next = sgl;
497 else
498 alloc->sgl = sgl;
499
500 prev_sgl = sgl;
496 501
497 i++; 502 i++;
498 } 503 }
499 504
500 alloc->nr_chunks = i; 505 alloc->nr_chunks = i;
501 c = nvgpu_list_first_entry(&alloc->alloc_chunks, 506 alloc->base = alloc->sgl->phys;
502 page_alloc_chunk, list_entry);
503 alloc->base = c->base;
504 507
505 return alloc; 508 return alloc;
506 509
507fail_cleanup: 510fail_cleanup:
508 while (!nvgpu_list_empty(&alloc->alloc_chunks)) { 511 sgl = alloc->sgl;
509 c = nvgpu_list_first_entry(&alloc->alloc_chunks, 512 while (sgl) {
510 page_alloc_chunk, list_entry); 513 struct nvgpu_mem_sgl *next = sgl->next;
511 nvgpu_list_del(&c->list_entry); 514
512 nvgpu_free(&a->source_allocator, c->base); 515 nvgpu_free(&a->source_allocator, sgl->phys);
513 nvgpu_kmem_cache_free(a->chunk_cache, c); 516 nvgpu_kfree(a->owner->g, sgl);
517
518 sgl = next;
514 } 519 }
520
515 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 521 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
516fail: 522fail:
517 return NULL; 523 return NULL;
@@ -521,7 +527,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
521 struct nvgpu_page_allocator *a, u64 len) 527 struct nvgpu_page_allocator *a, u64 len)
522{ 528{
523 struct nvgpu_page_alloc *alloc = NULL; 529 struct nvgpu_page_alloc *alloc = NULL;
524 struct page_alloc_chunk *c; 530 struct nvgpu_mem_sgl *sgl;
525 u64 pages; 531 u64 pages;
526 int i = 0; 532 int i = 0;
527 533
@@ -536,11 +542,15 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
536 542
537 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", 543 palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
538 pages << a->page_shift, pages, alloc->base); 544 pages << a->page_shift, pages, alloc->base);
539 nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, 545 sgl = alloc->sgl;
540 page_alloc_chunk, list_entry) { 546 while (sgl) {
541 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 547 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
542 i++, c->base, c->length); 548 i++,
549 nvgpu_mem_sgl_phys(sgl),
550 nvgpu_mem_sgl_length(sgl));
551 sgl = sgl->next;
543 } 552 }
553 palloc_dbg(a, "Alloc done\n");
544 554
545 return alloc; 555 return alloc;
546} 556}
@@ -638,11 +648,11 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
638 struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) 648 struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused)
639{ 649{
640 struct nvgpu_page_alloc *alloc; 650 struct nvgpu_page_alloc *alloc;
641 struct page_alloc_chunk *c; 651 struct nvgpu_mem_sgl *sgl;
642 652
643 alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); 653 alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
644 c = nvgpu_kmem_cache_alloc(a->chunk_cache); 654 sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
645 if (!alloc || !c) 655 if (!alloc || !sgl)
646 goto fail; 656 goto fail;
647 657
648 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); 658 alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
@@ -653,17 +663,18 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
653 663
654 alloc->nr_chunks = 1; 664 alloc->nr_chunks = 1;
655 alloc->length = length; 665 alloc->length = length;
656 nvgpu_init_list_node(&alloc->alloc_chunks); 666 alloc->sgl = sgl;
657 667
658 c->base = alloc->base; 668 sgl->phys = alloc->base;
659 c->length = length; 669 sgl->dma = alloc->base;
660 nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); 670 sgl->length = length;
671 sgl->next = NULL;
661 672
662 return alloc; 673 return alloc;
663 674
664fail: 675fail:
665 if (c) 676 if (sgl)
666 nvgpu_kmem_cache_free(a->chunk_cache, c); 677 nvgpu_kfree(a->owner->g, sgl);
667 if (alloc) 678 if (alloc)
668 nvgpu_kmem_cache_free(a->alloc_cache, alloc); 679 nvgpu_kmem_cache_free(a->alloc_cache, alloc);
669 return NULL; 680 return NULL;
@@ -677,7 +688,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
677{ 688{
678 struct nvgpu_page_allocator *a = page_allocator(__a); 689 struct nvgpu_page_allocator *a = page_allocator(__a);
679 struct nvgpu_page_alloc *alloc = NULL; 690 struct nvgpu_page_alloc *alloc = NULL;
680 struct page_alloc_chunk *c; 691 struct nvgpu_mem_sgl *sgl;
681 u64 aligned_len, pages; 692 u64 aligned_len, pages;
682 int i = 0; 693 int i = 0;
683 694
@@ -697,10 +708,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
697 708
698 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", 709 palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
699 alloc->base, aligned_len, pages); 710 alloc->base, aligned_len, pages);
700 nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, 711 sgl = alloc->sgl;
701 page_alloc_chunk, list_entry) { 712 while (sgl) {
702 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", 713 palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
703 i++, c->base, c->length); 714 i++,
715 nvgpu_mem_sgl_phys(sgl),
716 nvgpu_mem_sgl_length(sgl));
717 sgl = sgl->next;
704 } 718 }
705 719
706 a->nr_fixed_allocs++; 720 a->nr_fixed_allocs++;
@@ -896,11 +910,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
896 910
897 a->alloc_cache = nvgpu_kmem_cache_create(g, 911 a->alloc_cache = nvgpu_kmem_cache_create(g,
898 sizeof(struct nvgpu_page_alloc)); 912 sizeof(struct nvgpu_page_alloc));
899 a->chunk_cache = nvgpu_kmem_cache_create(g,
900 sizeof(struct page_alloc_chunk));
901 a->slab_page_cache = nvgpu_kmem_cache_create(g, 913 a->slab_page_cache = nvgpu_kmem_cache_create(g,
902 sizeof(struct page_alloc_slab_page)); 914 sizeof(struct page_alloc_slab_page));
903 if (!a->alloc_cache || !a->chunk_cache || !a->slab_page_cache) { 915 if (!a->alloc_cache || !a->slab_page_cache) {
904 err = -ENOMEM; 916 err = -ENOMEM;
905 goto fail; 917 goto fail;
906 } 918 }
@@ -941,8 +953,6 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
941fail: 953fail:
942 if (a->alloc_cache) 954 if (a->alloc_cache)
943 nvgpu_kmem_cache_destroy(a->alloc_cache); 955 nvgpu_kmem_cache_destroy(a->alloc_cache);
944 if (a->chunk_cache)
945 nvgpu_kmem_cache_destroy(a->chunk_cache);
946 if (a->slab_page_cache) 956 if (a->slab_page_cache)
947 nvgpu_kmem_cache_destroy(a->slab_page_cache); 957 nvgpu_kmem_cache_destroy(a->slab_page_cache);
948 nvgpu_kfree(g, a); 958 nvgpu_kfree(g, a);
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c
index 425bfdb4..bb7d930e 100644
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -84,37 +84,40 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) 84 u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
85{ 85{
86 struct nvgpu_page_alloc *alloc = NULL; 86 struct nvgpu_page_alloc *alloc = NULL;
87 struct page_alloc_chunk *chunk = NULL; 87 struct nvgpu_mem_sgl *sgl;
88 u32 byteoff, start_reg, until_end, n; 88 u32 byteoff, start_reg, until_end, n;
89 89
90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 90 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
91 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 91 sgl = alloc->sgl;
92 page_alloc_chunk, list_entry) { 92 while (sgl) {
93 if (offset >= chunk->length) 93 if (offset >= nvgpu_mem_sgl_length(sgl)) {
94 offset -= chunk->length; 94 offset -= nvgpu_mem_sgl_length(sgl);
95 else 95 sgl = sgl->next;
96 } else {
96 break; 97 break;
98 }
97 } 99 }
98 100
99 while (size) { 101 while (size) {
100 byteoff = g->ops.pramin.enter(g, mem, chunk, 102 u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl);
103
104 byteoff = g->ops.pramin.enter(g, mem, sgl,
101 offset / sizeof(u32)); 105 offset / sizeof(u32));
102 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); 106 start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
103 until_end = SZ_1M - (byteoff & (SZ_1M - 1)); 107 until_end = SZ_1M - (byteoff & (SZ_1M - 1));
104 108
105 n = min3(size, until_end, (u32)(chunk->length - offset)); 109 n = min3(size, until_end, (u32)(sgl_len - offset));
106 110
107 loop(g, start_reg, n / sizeof(u32), arg); 111 loop(g, start_reg, n / sizeof(u32), arg);
108 112
109 /* read back to synchronize accesses */ 113 /* read back to synchronize accesses */
110 gk20a_readl(g, start_reg); 114 gk20a_readl(g, start_reg);
111 g->ops.pramin.exit(g, mem, chunk); 115 g->ops.pramin.exit(g, mem, sgl);
112 116
113 size -= n; 117 size -= n;
114 118
115 if (n == (chunk->length - offset)) { 119 if (n == (sgl_len - offset)) {
116 chunk = nvgpu_list_next_entry(chunk, page_alloc_chunk, 120 sgl = nvgpu_mem_sgl_next(sgl);
117 list_entry);
118 offset = 0; 121 offset = 0;
119 } else { 122 } else {
120 offset += n; 123 offset += n;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 7eee2d51..355228db 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -34,6 +34,7 @@ struct gk20a_debug_output;
34struct nvgpu_clk_pll_debug_data; 34struct nvgpu_clk_pll_debug_data;
35struct nvgpu_nvhost_dev; 35struct nvgpu_nvhost_dev;
36struct nvgpu_cpu_time_correlation_sample; 36struct nvgpu_cpu_time_correlation_sample;
37struct nvgpu_mem_sgl;
37 38
38#include <nvgpu/lock.h> 39#include <nvgpu/lock.h>
39#include <nvgpu/thread.h> 40#include <nvgpu/thread.h>
@@ -70,8 +71,6 @@ struct nvgpu_cpu_time_correlation_sample;
70#endif 71#endif
71#include "ecc_gk20a.h" 72#include "ecc_gk20a.h"
72 73
73struct page_alloc_chunk;
74
75/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 74/* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds.
76 32 ns is the resolution of ptimer. */ 75 32 ns is the resolution of ptimer. */
77#define PTIMER_REF_FREQ_HZ 31250000 76#define PTIMER_REF_FREQ_HZ 31250000
@@ -701,7 +700,7 @@ struct gpu_ops {
701 bool (*support_sparse)(struct gk20a *g); 700 bool (*support_sparse)(struct gk20a *g);
702 u64 (*gmmu_map)(struct vm_gk20a *vm, 701 u64 (*gmmu_map)(struct vm_gk20a *vm,
703 u64 map_offset, 702 u64 map_offset,
704 struct sg_table *sgt, 703 struct nvgpu_mem_sgl *sgl,
705 u64 buffer_offset, 704 u64 buffer_offset,
706 u64 size, 705 u64 size,
707 int pgsz_idx, 706 int pgsz_idx,
@@ -761,9 +760,9 @@ struct gpu_ops {
761 size_t size); 760 size_t size);
762 struct { 761 struct {
763 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, 762 u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem,
764 struct page_alloc_chunk *chunk, u32 w); 763 struct nvgpu_mem_sgl *sgl, u32 w);
765 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, 764 void (*exit)(struct gk20a *g, struct nvgpu_mem *mem,
766 struct page_alloc_chunk *chunk); 765 struct nvgpu_mem_sgl *sgl);
767 u32 (*data032_r)(u32 i); 766 u32 (*data032_r)(u32 i);
768 } pramin; 767 } pramin;
769 struct { 768 struct {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 97b7aa80..cd34e769 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1151,7 +1151,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1151 struct gk20a_fence *gk20a_fence_out = NULL; 1151 struct gk20a_fence *gk20a_fence_out = NULL;
1152 struct gk20a_fence *gk20a_last_fence = NULL; 1152 struct gk20a_fence *gk20a_last_fence = NULL;
1153 struct nvgpu_page_alloc *alloc = NULL; 1153 struct nvgpu_page_alloc *alloc = NULL;
1154 struct page_alloc_chunk *chunk = NULL; 1154 struct nvgpu_mem_sgl *sgl = NULL;
1155 int err = 0; 1155 int err = 0;
1156 1156
1157 if (g->mm.vidmem.ce_ctx_id == (u32)~0) 1157 if (g->mm.vidmem.ce_ctx_id == (u32)~0)
@@ -1159,16 +1159,16 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1159 1159
1160 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); 1160 alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
1161 1161
1162 nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, 1162 sgl = alloc->sgl;
1163 page_alloc_chunk, list_entry) { 1163 while (sgl) {
1164 if (gk20a_last_fence) 1164 if (gk20a_last_fence)
1165 gk20a_fence_put(gk20a_last_fence); 1165 gk20a_fence_put(gk20a_last_fence);
1166 1166
1167 err = gk20a_ce_execute_ops(g, 1167 err = gk20a_ce_execute_ops(g,
1168 g->mm.vidmem.ce_ctx_id, 1168 g->mm.vidmem.ce_ctx_id,
1169 0, 1169 0,
1170 chunk->base, 1170 nvgpu_mem_sgl_phys(sgl),
1171 chunk->length, 1171 nvgpu_mem_sgl_length(sgl),
1172 0x00000000, 1172 0x00000000,
1173 NVGPU_CE_DST_LOCATION_LOCAL_FB, 1173 NVGPU_CE_DST_LOCATION_LOCAL_FB,
1174 NVGPU_CE_MEMSET, 1174 NVGPU_CE_MEMSET,
@@ -1183,6 +1183,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
1183 } 1183 }
1184 1184
1185 gk20a_last_fence = gk20a_fence_out; 1185 gk20a_last_fence = gk20a_fence_out;
1186 sgl = nvgpu_mem_sgl_next(sgl);
1186 } 1187 }
1187 1188
1188 if (gk20a_last_fence) { 1189 if (gk20a_last_fence) {
@@ -1262,10 +1263,10 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
1262 return addr; 1263 return addr;
1263} 1264}
1264 1265
1265u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova) 1266u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, u64 iova)
1266{ 1267{
1267 /* ensure it is not vidmem allocation */ 1268 /* ensure it is not vidmem allocation */
1268 WARN_ON(is_vidmem_page_alloc((u64)iova)); 1269 WARN_ON(is_vidmem_page_alloc(iova));
1269 1270
1270 if (device_is_iommuable(dev_from_gk20a(g)) && 1271 if (device_is_iommuable(dev_from_gk20a(g)) &&
1271 g->ops.mm.get_physical_addr_bits) 1272 g->ops.mm.get_physical_addr_bits)
@@ -2167,11 +2168,6 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)
2167 return 34; 2168 return 34;
2168} 2169}
2169 2170
2170u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags)
2171{
2172 return phys;
2173}
2174
2175const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, 2171const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
2176 u32 big_page_size) 2172 u32 big_page_size)
2177{ 2173{
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index c77bebf8..2fdc1729 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -336,7 +336,6 @@ void gk20a_mm_dump_vm(struct vm_gk20a *vm,
336 336
337int gk20a_mm_suspend(struct gk20a *g); 337int gk20a_mm_suspend(struct gk20a *g);
338 338
339u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags);
340u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova); 339u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova);
341 340
342void gk20a_mm_ltc_isr(struct gk20a *g); 341void gk20a_mm_ltc_isr(struct gk20a *g);
@@ -361,29 +360,29 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
361} 360}
362 361
363u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, 362u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
364 u64 map_offset, 363 u64 map_offset,
365 struct sg_table *sgt, 364 struct nvgpu_mem_sgl *sgl,
366 u64 buffer_offset, 365 u64 buffer_offset,
367 u64 size, 366 u64 size,
368 int pgsz_idx, 367 int pgsz_idx,
369 u8 kind_v, 368 u8 kind_v,
370 u32 ctag_offset, 369 u32 ctag_offset,
371 u32 flags, 370 u32 flags,
372 int rw_flag, 371 int rw_flag,
373 bool clear_ctags, 372 bool clear_ctags,
374 bool sparse, 373 bool sparse,
375 bool priv, 374 bool priv,
376 struct vm_gk20a_mapping_batch *batch, 375 struct vm_gk20a_mapping_batch *batch,
377 enum nvgpu_aperture aperture); 376 enum nvgpu_aperture aperture);
378 377
379void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, 378void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
380 u64 vaddr, 379 u64 vaddr,
381 u64 size, 380 u64 size,
382 int pgsz_idx, 381 int pgsz_idx,
383 bool va_allocated, 382 bool va_allocated,
384 int rw_flag, 383 int rw_flag,
385 bool sparse, 384 bool sparse,
386 struct vm_gk20a_mapping_batch *batch); 385 struct vm_gk20a_mapping_batch *batch);
387 386
388struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); 387struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf);
389void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, 388void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
index 9d19e9e5..8a34a63c 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c
@@ -26,9 +26,9 @@
26 26
27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ 27/* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */
28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 28u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
29 struct page_alloc_chunk *chunk, u32 w) 29 struct nvgpu_mem_sgl *sgl, u32 w)
30{ 30{
31 u64 bufbase = chunk->base; 31 u64 bufbase = nvgpu_mem_sgl_phys(sgl);
32 u64 addr = bufbase + w * sizeof(u32); 32 u64 addr = bufbase + w * sizeof(u32);
33 u32 hi = (u32)((addr & ~(u64)0xfffff) 33 u32 hi = (u32)((addr & ~(u64)0xfffff)
34 >> bus_bar0_window_target_bar0_window_base_shift_v()); 34 >> bus_bar0_window_target_bar0_window_base_shift_v());
@@ -40,8 +40,9 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
40 40
41 gk20a_dbg(gpu_dbg_mem, 41 gk20a_dbg(gpu_dbg_mem,
42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", 42 "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)",
43 hi, lo, mem, chunk, bufbase, 43 hi, lo, mem, sgl, bufbase,
44 bufbase + chunk->length, chunk->length); 44 bufbase + nvgpu_mem_sgl_phys(sgl),
45 nvgpu_mem_sgl_length(sgl));
45 46
46 WARN_ON(!bufbase); 47 WARN_ON(!bufbase);
47 48
@@ -57,9 +58,9 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
57} 58}
58 59
59void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 60void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
60 struct page_alloc_chunk *chunk) 61 struct nvgpu_mem_sgl *sgl)
61{ 62{
62 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk); 63 gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl);
63 64
64 nvgpu_spinlock_release(&g->mm.pramin_window_lock); 65 nvgpu_spinlock_release(&g->mm.pramin_window_lock);
65} 66}
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
index 1a1ac871..fc5ba919 100644
--- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h
@@ -19,10 +19,10 @@
19 19
20struct gk20a; 20struct gk20a;
21struct nvgpu_mem; 21struct nvgpu_mem;
22struct page_alloc_chunk; 22struct nvgpu_mem_sgl;
23 23
24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, 24u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
25 struct page_alloc_chunk *chunk, u32 w); 25 struct nvgpu_mem_sgl *sgl, u32 w);
26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, 26void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem,
27 struct page_alloc_chunk *chunk); 27 struct nvgpu_mem_sgl *sgl);
28#endif 28#endif
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index fc27b120..c276f5a6 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -904,7 +904,7 @@ int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size,
904 904
905 mem->gpu_va = nvgpu_gmmu_map(vm, 905 mem->gpu_va = nvgpu_gmmu_map(vm,
906 mem, 906 mem,
907 size, 907 mem->aligned_size,
908 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, 908 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
909 gk20a_mem_flag_none, 909 gk20a_mem_flag_none,
910 false, 910 false,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index de129a5f..11060300 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -27,8 +27,6 @@
27#include <nvgpu/gmmu_t19x.h> 27#include <nvgpu/gmmu_t19x.h>
28#endif 28#endif
29 29
30struct scatterlist;
31
32/* 30/*
33 * This is the GMMU API visible to blocks outside of the GMMU. Basically this 31 * This is the GMMU API visible to blocks outside of the GMMU. Basically this
34 * API supports all the different types of mappings that might be done in the 32 * API supports all the different types of mappings that might be done in the
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
index e2d4d336..f96c2801 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
@@ -32,6 +32,8 @@ struct nvgpu_mem_priv {
32}; 32};
33 33
34u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl); 34u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl);
35struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g,
36 struct sg_table *sgt);
35 37
36/** 38/**
37 * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. 39 * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/log.h b/drivers/gpu/nvgpu/include/nvgpu/log.h
index 4cac3e70..cfce8c5b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/log.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/log.h
@@ -71,6 +71,7 @@ enum nvgpu_log_categories {
71 gpu_dbg_pd_cache = BIT(20), /* PD cache traces. */ 71 gpu_dbg_pd_cache = BIT(20), /* PD cache traces. */
72 gpu_dbg_alloc = BIT(21), /* Allocator debugging. */ 72 gpu_dbg_alloc = BIT(21), /* Allocator debugging. */
73 gpu_dbg_dma = BIT(22), /* DMA allocation prints. */ 73 gpu_dbg_dma = BIT(22), /* DMA allocation prints. */
74 gpu_dbg_sgl = BIT(23), /* SGL related traces. */
74 gpu_dbg_mem = BIT(31), /* memory accesses; very verbose. */ 75 gpu_dbg_mem = BIT(31), /* memory accesses; very verbose. */
75}; 76};
76 77
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
index a112623e..7d19cf81 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -33,6 +33,8 @@ struct gk20a;
33struct nvgpu_allocator; 33struct nvgpu_allocator;
34struct nvgpu_gmmu_attrs; 34struct nvgpu_gmmu_attrs;
35 35
36#define NVGPU_MEM_DMA_ERROR (~0ULL)
37
36/* 38/*
37 * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be 39 * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be
38 * told to the gpu about the aperture, but this flag designates where the 40 * told to the gpu about the aperture, but this flag designates where the
@@ -44,6 +46,28 @@ enum nvgpu_aperture {
44 APERTURE_VIDMEM 46 APERTURE_VIDMEM
45}; 47};
46 48
49/*
50 * This struct holds the necessary information for describing a struct
51 * nvgpu_mem's scatter gather list.
52 *
53 * These are created in a platform dependent way. As a result the function
54 * definition for allocating these lives in the <nvgpu/_OS_/nvgpu_mem.h> file.
55 */
56struct nvgpu_mem_sgl {
57 /*
58 * Internally this is implemented as a singly linked list.
59 */
60 struct nvgpu_mem_sgl *next;
61
62 /*
63 * There is both a phys address and a DMA address since some systems,
64 * for example ones with an IOMMU, may see these as different addresses.
65 */
66 u64 phys;
67 u64 dma;
68 u64 length;
69};
70
47struct nvgpu_mem { 71struct nvgpu_mem {
48 /* 72 /*
49 * Populated for all nvgpu_mem structs - vidmem or system. 73 * Populated for all nvgpu_mem structs - vidmem or system.
@@ -176,6 +200,27 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
176 struct nvgpu_mem *dest, struct nvgpu_mem *src, 200 struct nvgpu_mem *dest, struct nvgpu_mem *src,
177 int start_page, int nr_pages); 201 int start_page, int nr_pages);
178 202
203/**
204 * nvgpu_mem_sgl_create_from_mem - Create a scatter list from an nvgpu_mem.
205 *
206 * @g - The GPU.
207 * @mem - The source memory allocation to use.
208 *
209 * Create a scatter gather list from the passed @mem struct. This list lets the
210 * calling code iterate across each chunk of a DMA allocation for when that DMA
211 * allocation is not completely contiguous.
212 */
213struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g,
214 struct nvgpu_mem *mem);
215void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl);
216
217struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl);
218u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl);
219u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl);
220u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl);
221u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl,
222 struct nvgpu_gmmu_attrs *attrs);
223
179/* 224/*
180 * Buffer accessors - wrap between begin() and end() if there is no permanent 225 * Buffer accessors - wrap between begin() and end() if there is no permanent
181 * kernel mapping for this buffer. 226 * kernel mapping for this buffer.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
index 9a5ef8d3..de83ca7f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h
@@ -18,6 +18,7 @@
18#define PAGE_ALLOCATOR_PRIV_H 18#define PAGE_ALLOCATOR_PRIV_H
19 19
20#include <nvgpu/allocator.h> 20#include <nvgpu/allocator.h>
21#include <nvgpu/nvgpu_mem.h>
21#include <nvgpu/kmem.h> 22#include <nvgpu/kmem.h>
22#include <nvgpu/list.h> 23#include <nvgpu/list.h>
23#include <nvgpu/rbtree.h> 24#include <nvgpu/rbtree.h>
@@ -83,27 +84,17 @@ page_alloc_slab_page_from_list_entry(struct nvgpu_list_node *node)
83 ((uintptr_t)node - offsetof(struct page_alloc_slab_page, list_entry)); 84 ((uintptr_t)node - offsetof(struct page_alloc_slab_page, list_entry));
84}; 85};
85 86
86struct page_alloc_chunk {
87 struct nvgpu_list_node list_entry;
88
89 u64 base;
90 u64 length;
91};
92
93static inline struct page_alloc_chunk *
94page_alloc_chunk_from_list_entry(struct nvgpu_list_node *node)
95{
96 return (struct page_alloc_chunk *)
97 ((uintptr_t)node - offsetof(struct page_alloc_chunk, list_entry));
98};
99
100/* 87/*
101 * Struct to handle internal management of page allocation. It holds a list 88 * Struct to handle internal management of page allocation. It holds a list
102 * of the chunks of pages that make up the overall allocation - much like a 89 * of the chunks of pages that make up the overall allocation - much like a
103 * scatter gather table. 90 * scatter gather table.
104 */ 91 */
105struct nvgpu_page_alloc { 92struct nvgpu_page_alloc {
106 struct nvgpu_list_node alloc_chunks; 93 /*
94 * nvgpu_mem_sgl for describing the actual allocation. Convenient for
95 * GMMU mapping.
96 */
97 struct nvgpu_mem_sgl *sgl;
107 98
108 int nr_chunks; 99 int nr_chunks;
109 u64 length; 100 u64 length;
@@ -156,7 +147,6 @@ struct nvgpu_page_allocator {
156 int nr_slabs; 147 int nr_slabs;
157 148
158 struct nvgpu_kmem_cache *alloc_cache; 149 struct nvgpu_kmem_cache *alloc_cache;
159 struct nvgpu_kmem_cache *chunk_cache;
160 struct nvgpu_kmem_cache *slab_page_cache; 150 struct nvgpu_kmem_cache *slab_page_cache;
161 151
162 u64 flags; 152 u64 flags;
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
index 85c436e5..ee9b791a 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -13,7 +13,6 @@
13 * more details. 13 * more details.
14 */ 14 */
15 15
16#include <linux/dma-mapping.h>
17#include "vgpu/vgpu.h" 16#include "vgpu/vgpu.h"
18#include "vgpu_mm_gp10b.h" 17#include "vgpu_mm_gp10b.h"
19#include "gk20a/mm_gk20a.h" 18#include "gk20a/mm_gk20a.h"
@@ -41,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc,
41 40
42static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, 41static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
43 u64 map_offset, 42 u64 map_offset,
44 struct sg_table *sgt, 43 struct nvgpu_mem_sgl *sgl,
45 u64 buffer_offset, 44 u64 buffer_offset,
46 u64 size, 45 u64 size,
47 int pgsz_idx, 46 int pgsz_idx,
@@ -61,10 +60,9 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
61 struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex; 60 struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex;
62 struct tegra_vgpu_mem_desc *mem_desc; 61 struct tegra_vgpu_mem_desc *mem_desc;
63 u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; 62 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
63 u64 buffer_size = PAGE_ALIGN(size);
64 u64 space_to_skip = buffer_offset; 64 u64 space_to_skip = buffer_offset;
65 u64 buffer_size = 0;
66 u32 mem_desc_count = 0, i; 65 u32 mem_desc_count = 0, i;
67 struct scatterlist *sgl;
68 void *handle = NULL; 66 void *handle = NULL;
69 size_t oob_size; 67 size_t oob_size;
70 u8 prot; 68 u8 prot;
@@ -73,7 +71,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
73 71
74 /* FIXME: add support for sparse mappings */ 72 /* FIXME: add support for sparse mappings */
75 73
76 if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu)) 74 if (WARN_ON(!sgl) || WARN_ON(!g->mm.bypass_smmu))
77 return 0; 75 return 0;
78 76
79 if (space_to_skip & (page_size - 1)) 77 if (space_to_skip & (page_size - 1))
@@ -100,33 +98,36 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
100 goto fail; 98 goto fail;
101 } 99 }
102 100
103 sgl = sgt->sgl; 101 while (sgl) {
104 while (space_to_skip && sgl && 102 u64 phys_addr;
105 (space_to_skip + page_size > sgl->length)) { 103 u64 chunk_length;
106 space_to_skip -= sgl->length; 104
107 sgl = sg_next(sgl); 105 /*
108 } 106 * Cut out sgl ents for space_to_skip.
109 WARN_ON(!sgl); 107 */
108 if (space_to_skip &&
109 space_to_skip >= nvgpu_mem_sgl_length(sgl)) {
110 space_to_skip -= nvgpu_mem_sgl_length(sgl);
111 sgl = nvgpu_mem_sgl_next(sgl);
112 continue;
113 }
110 114
111 if (add_mem_desc(&mem_desc[mem_desc_count++], 115 phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip;
112 sg_phys(sgl) + space_to_skip, 116 chunk_length = min(size,
113 sgl->length - space_to_skip, 117 nvgpu_mem_sgl_length(sgl) - space_to_skip);
114 &oob_size)) {
115 err = -ENOMEM;
116 goto fail;
117 }
118 buffer_size += sgl->length - space_to_skip;
119 118
120 sgl = sg_next(sgl); 119 if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr,
121 while (sgl && buffer_size < size) { 120 chunk_length, &oob_size)) {
122 if (add_mem_desc(&mem_desc[mem_desc_count++], sg_phys(sgl),
123 sgl->length, &oob_size)) {
124 err = -ENOMEM; 121 err = -ENOMEM;
125 goto fail; 122 goto fail;
126 } 123 }
127 124
128 buffer_size += sgl->length; 125 space_to_skip = 0;
129 sgl = sg_next(sgl); 126 size -= chunk_length;
127 sgl = nvgpu_mem_sgl_next(sgl);
128
129 if (size == 0)
130 break;
130 } 131 }
131 132
132 if (rw_flag == gk20a_mem_flag_read_only) 133 if (rw_flag == gk20a_mem_flag_read_only)
@@ -153,7 +154,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,
153 msg.handle = vgpu_get_handle(g); 154 msg.handle = vgpu_get_handle(g);
154 p->handle = vm->handle; 155 p->handle = vm->handle;
155 p->gpu_va = map_offset; 156 p->gpu_va = map_offset;
156 p->size = size; 157 p->size = buffer_size;
157 p->mem_desc_count = mem_desc_count; 158 p->mem_desc_count = mem_desc_count;
158 p->pgsz_idx = pgsz_idx; 159 p->pgsz_idx = pgsz_idx;
159 p->iova = 0; 160 p->iova = 0;
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index ef9e00c8..5da6f158 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g)
78 78
79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, 79static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
80 u64 map_offset, 80 u64 map_offset,
81 struct sg_table *sgt, 81 struct nvgpu_mem_sgl *sgl,
82 u64 buffer_offset, 82 u64 buffer_offset,
83 u64 size, 83 u64 size,
84 int pgsz_idx, 84 int pgsz_idx,
@@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); 98 struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
99 struct tegra_vgpu_cmd_msg msg; 99 struct tegra_vgpu_cmd_msg msg;
100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map; 100 struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
101 u64 addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); 101 u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL);
102 u8 prot; 102 u8 prot;
103 103
104 gk20a_dbg_fn(""); 104 gk20a_dbg_fn("");