diff options
author | Sunny He <suhe@nvidia.com> | 2017-08-15 15:01:04 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-09-22 15:55:24 -0400 |
commit | 17c581d75514c32d1e8c1e416beb33b3ccce22a5 (patch) | |
tree | a25d063f19b8e1f83f61af418f3aa2ac32fe0cce | |
parent | 0090ee5aca268a3c359f34c74b8c521df3bd8593 (diff) |
gpu: nvgpu: SGL passthrough implementation
The basic nvgpu_mem_sgl implementation provides support
for OS specific scatter-gather list implementations by
simply copying them node by node. This is inefficient,
taking extra time and memory.
This patch implements an nvgpu_mem_sgt struct to act as
a header which is inserted at the front of any scatter-
gather list implementation. This labels every struct
with a set of ops which can be used to interact with
the attached scatter gather list.
Since nvgpu common code only has to interact with these
function pointers, any sgl implementation can be used.
Initialization only requires the allocation of a single
struct, removing the need to copy or iterate through the
sgl being converted.
Jira NVGPU-186
Change-Id: I2994f804a4a4cc141b702e987e9081d8560ba2e8
Signed-off-by: Sunny He <suhe@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1541426
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 144 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 17 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 67 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 47 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/page_allocator.c | 114 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/pramin.c | 21 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 12 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pramin_gk20a.c | 10 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/pramin_gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h | 7 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | 80 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/page_allocator.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c | 19 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 4 |
16 files changed, 320 insertions, 240 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index eb54f3fd..8d8909dd 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -397,42 +397,59 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, | |||
397 | return 0; | 397 | return 0; |
398 | } | 398 | } |
399 | 399 | ||
400 | static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, | 400 | static void *nvgpu_mem_linux_sgl_next(void *sgl) |
401 | struct nvgpu_mem_sgl *sgl) | ||
402 | { | 401 | { |
403 | struct nvgpu_mem_sgl *head, *next; | 402 | return sg_next((struct scatterlist *)sgl); |
403 | } | ||
404 | 404 | ||
405 | head = nvgpu_kzalloc(g, sizeof(*sgl)); | 405 | static u64 nvgpu_mem_linux_sgl_phys(void *sgl) |
406 | if (!head) | 406 | { |
407 | return NULL; | 407 | return (u64)sg_phys((struct scatterlist *)sgl); |
408 | } | ||
408 | 409 | ||
409 | next = head; | 410 | static u64 nvgpu_mem_linux_sgl_dma(void *sgl) |
410 | while (true) { | 411 | { |
411 | nvgpu_log(g, gpu_dbg_sgl, | 412 | return (u64)sg_dma_address((struct scatterlist *)sgl); |
412 | " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", | 413 | } |
413 | sgl->phys, sgl->dma, sgl->length); | ||
414 | |||
415 | next->dma = sgl->dma; | ||
416 | next->phys = sgl->phys; | ||
417 | next->length = sgl->length; | ||
418 | next->next = NULL; | ||
419 | |||
420 | sgl = nvgpu_mem_sgl_next(sgl); | ||
421 | if (!sgl) | ||
422 | break; | ||
423 | |||
424 | next->next = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
425 | if (!next->next) { | ||
426 | nvgpu_mem_sgl_free(g, head); | ||
427 | return NULL; | ||
428 | } | ||
429 | next = next->next; | ||
430 | } | ||
431 | 414 | ||
432 | return head; | 415 | static u64 nvgpu_mem_linux_sgl_length(void *sgl) |
416 | { | ||
417 | return (u64)((struct scatterlist *)sgl)->length; | ||
433 | } | 418 | } |
434 | 419 | ||
435 | static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( | 420 | static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl, |
421 | struct nvgpu_gmmu_attrs *attrs) | ||
422 | { | ||
423 | if (sg_dma_address((struct scatterlist *)sgl) == 0) | ||
424 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
425 | sg_phys((struct scatterlist *)sgl)); | ||
426 | |||
427 | if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE) | ||
428 | return 0; | ||
429 | |||
430 | return gk20a_mm_smmu_vaddr_translate(g, | ||
431 | sg_dma_address((struct scatterlist *)sgl)); | ||
432 | } | ||
433 | |||
434 | static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
435 | { | ||
436 | /* | ||
437 | * Free this SGT. All we do is free the passed SGT. The actual Linux | ||
438 | * SGT/SGL needs to be freed separately. | ||
439 | */ | ||
440 | nvgpu_kfree(g, sgt); | ||
441 | } | ||
442 | |||
443 | static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { | ||
444 | .sgl_next = nvgpu_mem_linux_sgl_next, | ||
445 | .sgl_phys = nvgpu_mem_linux_sgl_phys, | ||
446 | .sgl_dma = nvgpu_mem_linux_sgl_dma, | ||
447 | .sgl_length = nvgpu_mem_linux_sgl_length, | ||
448 | .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, | ||
449 | .sgt_free = nvgpu_mem_linux_sgl_free, | ||
450 | }; | ||
451 | |||
452 | static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( | ||
436 | struct gk20a *g, | 453 | struct gk20a *g, |
437 | struct scatterlist *linux_sgl) | 454 | struct scatterlist *linux_sgl) |
438 | { | 455 | { |
@@ -442,70 +459,31 @@ static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( | |||
442 | if (!vidmem_alloc) | 459 | if (!vidmem_alloc) |
443 | return NULL; | 460 | return NULL; |
444 | 461 | ||
445 | nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); | 462 | return &vidmem_alloc->sgt; |
446 | |||
447 | return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl); | ||
448 | } | 463 | } |
449 | 464 | ||
450 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, | 465 | struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt) |
451 | struct sg_table *sgt) | ||
452 | { | 466 | { |
453 | struct nvgpu_mem_sgl *head, *sgl, *next; | 467 | struct nvgpu_sgt *nvgpu_sgt; |
454 | struct scatterlist *linux_sgl = sgt->sgl; | 468 | struct scatterlist *linux_sgl = sgt->sgl; |
455 | 469 | ||
456 | if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) | 470 | if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) |
457 | return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); | 471 | return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl); |
458 | 472 | ||
459 | head = nvgpu_kzalloc(g, sizeof(*sgl)); | 473 | nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt)); |
460 | if (!head) | 474 | if (!nvgpu_sgt) |
461 | return NULL; | 475 | return NULL; |
462 | 476 | ||
463 | nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); | 477 | nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!"); |
464 | 478 | ||
465 | sgl = head; | 479 | nvgpu_sgt->sgl = sgt->sgl; |
466 | while (true) { | 480 | nvgpu_sgt->ops = &nvgpu_linux_sgt_ops; |
467 | sgl->dma = sg_dma_address(linux_sgl); | ||
468 | sgl->phys = sg_phys(linux_sgl); | ||
469 | sgl->length = linux_sgl->length; | ||
470 | |||
471 | /* | ||
472 | * We don't like offsets in the pages here. This will cause | ||
473 | * problems. | ||
474 | */ | ||
475 | if (WARN_ON(linux_sgl->offset)) { | ||
476 | nvgpu_mem_sgl_free(g, head); | ||
477 | return NULL; | ||
478 | } | ||
479 | |||
480 | nvgpu_log(g, gpu_dbg_sgl, | ||
481 | " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", | ||
482 | sgl->phys, sgl->dma, sgl->length); | ||
483 | |||
484 | /* | ||
485 | * When there's no more SGL ents for the Linux SGL we are | ||
486 | * done. Don't bother making any more SGL ents for the nvgpu | ||
487 | * SGL. | ||
488 | */ | ||
489 | linux_sgl = sg_next(linux_sgl); | ||
490 | if (!linux_sgl) | ||
491 | break; | ||
492 | |||
493 | next = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
494 | if (!next) { | ||
495 | nvgpu_mem_sgl_free(g, head); | ||
496 | return NULL; | ||
497 | } | ||
498 | |||
499 | sgl->next = next; | ||
500 | sgl = next; | ||
501 | } | ||
502 | 481 | ||
503 | nvgpu_log(g, gpu_dbg_sgl, "Done!"); | 482 | return nvgpu_sgt; |
504 | return head; | ||
505 | } | 483 | } |
506 | 484 | ||
507 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, | 485 | struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, |
508 | struct nvgpu_mem *mem) | 486 | struct nvgpu_mem *mem) |
509 | { | 487 | { |
510 | return nvgpu_mem_sgl_create(g, mem->priv.sgt); | 488 | return nvgpu_linux_sgt_create(g, mem->priv.sgt); |
511 | } | 489 | } |
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 4a4429dc..2e29f0f7 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -69,19 +69,20 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, | |||
69 | 69 | ||
70 | if (aperture == APERTURE_VIDMEM) { | 70 | if (aperture == APERTURE_VIDMEM) { |
71 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); | 71 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); |
72 | struct nvgpu_mem_sgl *sgl_vid = alloc->sgl; | 72 | struct nvgpu_sgt *sgt = &alloc->sgt; |
73 | void *sgl_vid = sgt->sgl; | ||
73 | 74 | ||
74 | while (sgl_vid) { | 75 | while (sgl_vid) { |
75 | chunk_align = 1ULL << | 76 | chunk_align = 1ULL << |
76 | __ffs(nvgpu_mem_sgl_phys(sgl_vid) | | 77 | __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) | |
77 | nvgpu_mem_sgl_length(sgl_vid)); | 78 | nvgpu_sgt_get_length(sgt, sgl_vid); |
78 | 79 | ||
79 | if (align) | 80 | if (align) |
80 | align = min(align, chunk_align); | 81 | align = min(align, chunk_align); |
81 | else | 82 | else |
82 | align = chunk_align; | 83 | align = chunk_align; |
83 | 84 | ||
84 | sgl_vid = nvgpu_mem_sgl_next(sgl_vid); | 85 | sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid); |
85 | } | 86 | } |
86 | 87 | ||
87 | return align; | 88 | return align; |
@@ -242,7 +243,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
242 | struct nvgpu_vm_area *vm_area = NULL; | 243 | struct nvgpu_vm_area *vm_area = NULL; |
243 | u32 ctag_offset; | 244 | u32 ctag_offset; |
244 | enum nvgpu_aperture aperture; | 245 | enum nvgpu_aperture aperture; |
245 | struct nvgpu_mem_sgl *nvgpu_sgl; | 246 | struct nvgpu_sgt *nvgpu_sgt; |
246 | 247 | ||
247 | /* | 248 | /* |
248 | * The kind used as part of the key for map caching. HW may | 249 | * The kind used as part of the key for map caching. HW may |
@@ -399,12 +400,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
399 | ctag_offset += buffer_offset >> | 400 | ctag_offset += buffer_offset >> |
400 | ilog2(g->ops.fb.compression_page_size(g)); | 401 | ilog2(g->ops.fb.compression_page_size(g)); |
401 | 402 | ||
402 | nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt); | 403 | nvgpu_sgt = nvgpu_linux_sgt_create(g, bfr.sgt); |
403 | 404 | ||
404 | /* update gmmu ptes */ | 405 | /* update gmmu ptes */ |
405 | map_offset = g->ops.mm.gmmu_map(vm, | 406 | map_offset = g->ops.mm.gmmu_map(vm, |
406 | map_offset, | 407 | map_offset, |
407 | nvgpu_sgl, | 408 | nvgpu_sgt, |
408 | buffer_offset, /* sg offset */ | 409 | buffer_offset, /* sg offset */ |
409 | mapping_size, | 410 | mapping_size, |
410 | bfr.pgsz_idx, | 411 | bfr.pgsz_idx, |
@@ -419,7 +420,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
419 | if (!map_offset) | 420 | if (!map_offset) |
420 | goto clean_up; | 421 | goto clean_up; |
421 | 422 | ||
422 | nvgpu_mem_sgl_free(g, nvgpu_sgl); | 423 | nvgpu_sgt_free(nvgpu_sgt, g); |
423 | 424 | ||
424 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); | 425 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); |
425 | if (!mapped_buffer) { | 426 | if (!mapped_buffer) { |
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 41f5acdd..66bce8f0 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -65,14 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
65 | struct gk20a *g = gk20a_from_vm(vm); | 65 | struct gk20a *g = gk20a_from_vm(vm); |
66 | u64 vaddr; | 66 | u64 vaddr; |
67 | 67 | ||
68 | struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); | 68 | struct nvgpu_sgt *sgt = nvgpu_sgt_create_from_mem(g, mem); |
69 | 69 | ||
70 | if (!sgl) | 70 | if (!sgt) |
71 | return -ENOMEM; | 71 | return -ENOMEM; |
72 | 72 | ||
73 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 73 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
74 | vaddr = g->ops.mm.gmmu_map(vm, addr, | 74 | vaddr = g->ops.mm.gmmu_map(vm, addr, |
75 | sgl, /* sg list */ | 75 | sgt, /* sg list */ |
76 | 0, /* sg offset */ | 76 | 0, /* sg offset */ |
77 | size, | 77 | size, |
78 | gmmu_page_size_kernel, | 78 | gmmu_page_size_kernel, |
@@ -86,7 +86,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
86 | aperture); | 86 | aperture); |
87 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 87 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
88 | 88 | ||
89 | nvgpu_mem_sgl_free(g, sgl); | 89 | nvgpu_sgt_free(sgt, g); |
90 | 90 | ||
91 | if (!vaddr) { | 91 | if (!vaddr) { |
92 | nvgpu_err(g, "failed to map buffer!"); | 92 | nvgpu_err(g, "failed to map buffer!"); |
@@ -464,7 +464,7 @@ static int __set_pd_level(struct vm_gk20a *vm, | |||
464 | * VIDMEM version of the update_ptes logic. | 464 | * VIDMEM version of the update_ptes logic. |
465 | */ | 465 | */ |
466 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | 466 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, |
467 | struct nvgpu_mem_sgl *sgl, | 467 | struct nvgpu_sgt *sgt, |
468 | u64 space_to_skip, | 468 | u64 space_to_skip, |
469 | u64 virt_addr, | 469 | u64 virt_addr, |
470 | u64 length, | 470 | u64 length, |
@@ -472,8 +472,9 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
472 | { | 472 | { |
473 | u64 phys_addr, chunk_length; | 473 | u64 phys_addr, chunk_length; |
474 | int err = 0; | 474 | int err = 0; |
475 | void *sgl; | ||
475 | 476 | ||
476 | if (!sgl) { | 477 | if (!sgt) { |
477 | /* | 478 | /* |
478 | * This is considered an unmap. Just pass in 0 as the physical | 479 | * This is considered an unmap. Just pass in 0 as the physical |
479 | * address for the entire GPU range. | 480 | * address for the entire GPU range. |
@@ -490,16 +491,17 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
490 | * Otherwise iterate across all the chunks in this allocation and | 491 | * Otherwise iterate across all the chunks in this allocation and |
491 | * map them. | 492 | * map them. |
492 | */ | 493 | */ |
494 | sgl = sgt->sgl; | ||
493 | while (sgl) { | 495 | while (sgl) { |
494 | if (space_to_skip && | 496 | if (space_to_skip && |
495 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { | 497 | space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) { |
496 | space_to_skip -= nvgpu_mem_sgl_length(sgl); | 498 | space_to_skip -= nvgpu_sgt_get_length(sgt, sgl); |
497 | sgl = nvgpu_mem_sgl_next(sgl); | 499 | sgl = nvgpu_sgt_get_next(sgt, sgl); |
498 | continue; | 500 | continue; |
499 | } | 501 | } |
500 | 502 | ||
501 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; | 503 | phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip; |
502 | chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - | 504 | chunk_length = min(length, (nvgpu_sgt_get_length(sgt, sgl) - |
503 | space_to_skip)); | 505 | space_to_skip)); |
504 | 506 | ||
505 | err = __set_pd_level(vm, &vm->pdb, | 507 | err = __set_pd_level(vm, &vm->pdb, |
@@ -518,27 +520,27 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
518 | */ | 520 | */ |
519 | virt_addr += chunk_length; | 521 | virt_addr += chunk_length; |
520 | length -= chunk_length; | 522 | length -= chunk_length; |
523 | sgl = nvgpu_sgt_get_next(sgt, sgl); | ||
521 | 524 | ||
522 | if (length == 0) | 525 | if (length == 0) |
523 | break; | 526 | break; |
524 | |||
525 | sgl = nvgpu_mem_sgl_next(sgl); | ||
526 | } | 527 | } |
527 | 528 | ||
528 | return err; | 529 | return err; |
529 | } | 530 | } |
530 | 531 | ||
531 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | 532 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, |
532 | struct nvgpu_mem_sgl *sgl, | 533 | struct nvgpu_sgt *sgt, |
533 | u64 space_to_skip, | 534 | u64 space_to_skip, |
534 | u64 virt_addr, | 535 | u64 virt_addr, |
535 | u64 length, | 536 | u64 length, |
536 | struct nvgpu_gmmu_attrs *attrs) | 537 | struct nvgpu_gmmu_attrs *attrs) |
537 | { | 538 | { |
538 | int err; | ||
539 | struct gk20a *g = gk20a_from_vm(vm); | 539 | struct gk20a *g = gk20a_from_vm(vm); |
540 | void *sgl; | ||
541 | int err; | ||
540 | 542 | ||
541 | if (!sgl) { | 543 | if (!sgt) { |
542 | /* | 544 | /* |
543 | * This is considered an unmap. Just pass in 0 as the physical | 545 | * This is considered an unmap. Just pass in 0 as the physical |
544 | * address for the entire GPU range. | 546 | * address for the entire GPU range. |
@@ -559,8 +561,10 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
559 | * mapping is simple since the "physical" address is actually a virtual | 561 | * mapping is simple since the "physical" address is actually a virtual |
560 | * IO address and will be contiguous. | 562 | * IO address and will be contiguous. |
561 | */ | 563 | */ |
564 | sgl = sgt->sgl; | ||
565 | |||
562 | if (!g->mm.bypass_smmu) { | 566 | if (!g->mm.bypass_smmu) { |
563 | u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); | 567 | u64 io_addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgl, attrs); |
564 | 568 | ||
565 | io_addr += space_to_skip; | 569 | io_addr += space_to_skip; |
566 | 570 | ||
@@ -586,15 +590,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
586 | * Cut out sgl ents for space_to_skip. | 590 | * Cut out sgl ents for space_to_skip. |
587 | */ | 591 | */ |
588 | if (space_to_skip && | 592 | if (space_to_skip && |
589 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { | 593 | space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) { |
590 | space_to_skip -= nvgpu_mem_sgl_length(sgl); | 594 | space_to_skip -= nvgpu_sgt_get_length(sgt, sgl); |
591 | sgl = nvgpu_mem_sgl_next(sgl); | 595 | sgl = nvgpu_sgt_get_next(sgt, sgl); |
592 | continue; | 596 | continue; |
593 | } | 597 | } |
594 | 598 | ||
595 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; | 599 | phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip; |
596 | chunk_length = min(length, | 600 | chunk_length = min(length, |
597 | nvgpu_mem_sgl_length(sgl) - space_to_skip); | 601 | nvgpu_sgt_get_length(sgt, sgl) - space_to_skip); |
598 | 602 | ||
599 | err = __set_pd_level(vm, &vm->pdb, | 603 | err = __set_pd_level(vm, &vm->pdb, |
600 | 0, | 604 | 0, |
@@ -606,7 +610,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
606 | space_to_skip = 0; | 610 | space_to_skip = 0; |
607 | virt_addr += chunk_length; | 611 | virt_addr += chunk_length; |
608 | length -= chunk_length; | 612 | length -= chunk_length; |
609 | sgl = nvgpu_mem_sgl_next(sgl); | 613 | sgl = nvgpu_sgt_get_next(sgt, sgl); |
610 | 614 | ||
611 | if (length == 0) | 615 | if (length == 0) |
612 | break; | 616 | break; |
@@ -631,7 +635,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
631 | * case of SMMU usage. | 635 | * case of SMMU usage. |
632 | */ | 636 | */ |
633 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | 637 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, |
634 | struct nvgpu_mem_sgl *sgl, | 638 | struct nvgpu_sgt *sgt, |
635 | u64 space_to_skip, | 639 | u64 space_to_skip, |
636 | u64 virt_addr, | 640 | u64 virt_addr, |
637 | u64 length, | 641 | u64 length, |
@@ -669,10 +673,10 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
669 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " | 673 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " |
670 | "kind=%#02x APT=%-6s %c%c%c%c%c", | 674 | "kind=%#02x APT=%-6s %c%c%c%c%c", |
671 | vm->name, | 675 | vm->name, |
672 | sgl ? "MAP" : "UNMAP", | 676 | sgt ? "MAP" : "UNMAP", |
673 | virt_addr, | 677 | virt_addr, |
674 | length, | 678 | length, |
675 | sgl ? nvgpu_mem_sgl_phys(sgl) : 0, | 679 | sgt ? nvgpu_sgt_get_phys(sgt, sgt->sgl) : 0, |
676 | space_to_skip, | 680 | space_to_skip, |
677 | page_size >> 10, | 681 | page_size >> 10, |
678 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 682 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
@@ -690,14 +694,14 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
690 | */ | 694 | */ |
691 | if (attrs->aperture == APERTURE_VIDMEM) | 695 | if (attrs->aperture == APERTURE_VIDMEM) |
692 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, | 696 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, |
693 | sgl, | 697 | sgt, |
694 | space_to_skip, | 698 | space_to_skip, |
695 | virt_addr, | 699 | virt_addr, |
696 | length, | 700 | length, |
697 | attrs); | 701 | attrs); |
698 | else | 702 | else |
699 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, | 703 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, |
700 | sgl, | 704 | sgt, |
701 | space_to_skip, | 705 | space_to_skip, |
702 | virt_addr, | 706 | virt_addr, |
703 | length, | 707 | length, |
@@ -706,7 +710,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
706 | unmap_gmmu_pages(g, &vm->pdb); | 710 | unmap_gmmu_pages(g, &vm->pdb); |
707 | nvgpu_smp_mb(); | 711 | nvgpu_smp_mb(); |
708 | 712 | ||
709 | __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); | 713 | __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); |
710 | 714 | ||
711 | return err; | 715 | return err; |
712 | } | 716 | } |
@@ -725,7 +729,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
725 | */ | 729 | */ |
726 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 730 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
727 | u64 vaddr, | 731 | u64 vaddr, |
728 | struct nvgpu_mem_sgl *sgl, | 732 | struct nvgpu_sgt *sgt, |
729 | u64 buffer_offset, | 733 | u64 buffer_offset, |
730 | u64 size, | 734 | u64 size, |
731 | int pgsz_idx, | 735 | int pgsz_idx, |
@@ -774,7 +778,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
774 | allocated = true; | 778 | allocated = true; |
775 | } | 779 | } |
776 | 780 | ||
777 | err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, | 781 | err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, |
778 | vaddr, size, &attrs); | 782 | vaddr, size, &attrs); |
779 | if (err) { | 783 | if (err) { |
780 | nvgpu_err(g, "failed to update ptes on map"); | 784 | nvgpu_err(g, "failed to update ptes on map"); |
@@ -787,6 +791,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
787 | batch->need_tlb_invalidate = true; | 791 | batch->need_tlb_invalidate = true; |
788 | 792 | ||
789 | return vaddr; | 793 | return vaddr; |
794 | |||
790 | fail_validate: | 795 | fail_validate: |
791 | if (allocated) | 796 | if (allocated) |
792 | __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); | 797 | __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); |
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 7296c673..6decec24 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | |||
@@ -19,55 +19,34 @@ | |||
19 | 19 | ||
20 | #include "gk20a/gk20a.h" | 20 | #include "gk20a/gk20a.h" |
21 | 21 | ||
22 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl) | 22 | void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl) |
23 | { | 23 | { |
24 | return sgl->next; | 24 | return sgt->ops->sgl_next(sgl); |
25 | } | 25 | } |
26 | 26 | ||
27 | u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl) | 27 | u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl) |
28 | { | 28 | { |
29 | return sgl->phys; | 29 | return sgt->ops->sgl_phys(sgl); |
30 | } | 30 | } |
31 | 31 | ||
32 | u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl) | 32 | u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl) |
33 | { | 33 | { |
34 | return sgl->dma; | 34 | return sgt->ops->sgl_dma(sgl); |
35 | } | 35 | } |
36 | 36 | ||
37 | u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl) | 37 | u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl) |
38 | { | 38 | { |
39 | return sgl->length; | 39 | return sgt->ops->sgl_length(sgl); |
40 | } | 40 | } |
41 | 41 | ||
42 | /* | 42 | u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl, |
43 | * This builds a GPU address for the %sgl based on whether an IOMMU is present | ||
44 | * or not. It also handles turning the physical address into the true GPU | ||
45 | * physical address that should be programmed into the page tables. | ||
46 | */ | ||
47 | u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl, | ||
48 | struct nvgpu_gmmu_attrs *attrs) | 43 | struct nvgpu_gmmu_attrs *attrs) |
49 | { | 44 | { |
50 | if (nvgpu_mem_sgl_dma(sgl) == 0) | 45 | return sgt->ops->sgl_gpu_addr(g, sgl, attrs); |
51 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
52 | nvgpu_mem_sgl_phys(sgl)); | ||
53 | |||
54 | if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE) | ||
55 | return 0; | ||
56 | |||
57 | return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl)); | ||
58 | } | 46 | } |
59 | 47 | ||
60 | void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) | 48 | void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g) |
61 | { | 49 | { |
62 | struct nvgpu_mem_sgl *next; | 50 | if (sgt && sgt->ops->sgt_free) |
63 | 51 | sgt->ops->sgt_free(g, sgt); | |
64 | /* | ||
65 | * Free each of the elements. We expect each element to have been | ||
66 | * nvgpu_k[mz]alloc()ed. | ||
67 | */ | ||
68 | while (sgl) { | ||
69 | next = nvgpu_mem_sgl_next(sgl); | ||
70 | nvgpu_kfree(g, sgl); | ||
71 | sgl = next; | ||
72 | } | ||
73 | } | 52 | } |
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index 6d92b457..9c35f528 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
@@ -143,20 +143,93 @@ static void nvgpu_page_release_co(struct nvgpu_allocator *a, | |||
143 | nvgpu_alloc_release_carveout(&va->source_allocator, co); | 143 | nvgpu_alloc_release_carveout(&va->source_allocator, co); |
144 | } | 144 | } |
145 | 145 | ||
146 | static void *nvgpu_page_alloc_sgl_next(void *sgl) | ||
147 | { | ||
148 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
149 | |||
150 | return nvgpu_sgl->next; | ||
151 | } | ||
152 | |||
153 | static u64 nvgpu_page_alloc_sgl_phys(void *sgl) | ||
154 | { | ||
155 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
156 | |||
157 | return nvgpu_sgl->phys; | ||
158 | } | ||
159 | |||
160 | static u64 nvgpu_page_alloc_sgl_dma(void *sgl) | ||
161 | { | ||
162 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
163 | |||
164 | return nvgpu_sgl->dma; | ||
165 | } | ||
166 | |||
167 | static u64 nvgpu_page_alloc_sgl_length(void *sgl) | ||
168 | { | ||
169 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
170 | |||
171 | return nvgpu_sgl->length; | ||
172 | } | ||
173 | |||
174 | static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g, void *sgl, | ||
175 | struct nvgpu_gmmu_attrs *attrs) | ||
176 | { | ||
177 | struct nvgpu_mem_sgl *nvgpu_sgl = sgl; | ||
178 | |||
179 | return nvgpu_sgl->phys; | ||
180 | } | ||
181 | |||
182 | static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) | ||
183 | { | ||
184 | /* | ||
185 | * No-op here. The free is handled by the page_alloc free() functions. | ||
186 | */ | ||
187 | } | ||
188 | |||
189 | /* | ||
190 | * These implement the generic scatter gather ops for pages allocated | ||
191 | * by the page allocator. however, the primary aim for this, is of course, | ||
192 | * vidmem. | ||
193 | */ | ||
194 | static const struct nvgpu_sgt_ops page_alloc_sgl_ops = { | ||
195 | .sgl_next = nvgpu_page_alloc_sgl_next, | ||
196 | .sgl_phys = nvgpu_page_alloc_sgl_phys, | ||
197 | .sgl_dma = nvgpu_page_alloc_sgl_dma, | ||
198 | .sgl_length = nvgpu_page_alloc_sgl_length, | ||
199 | .sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr, | ||
200 | .sgt_free = nvgpu_page_alloc_sgt_free, | ||
201 | }; | ||
202 | |||
203 | /* | ||
204 | * This actually frees the sgl memory. Used by the page_alloc free() functions. | ||
205 | */ | ||
206 | static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g, | ||
207 | struct nvgpu_mem_sgl *sgl) | ||
208 | { | ||
209 | struct nvgpu_mem_sgl *next; | ||
210 | |||
211 | while (sgl) { | ||
212 | next = sgl->next; | ||
213 | nvgpu_kfree(g, sgl); | ||
214 | sgl = next; | ||
215 | } | ||
216 | } | ||
217 | |||
146 | static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, | 218 | static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, |
147 | struct nvgpu_page_alloc *alloc, | 219 | struct nvgpu_page_alloc *alloc, |
148 | bool free_buddy_alloc) | 220 | bool free_buddy_alloc) |
149 | { | 221 | { |
150 | struct nvgpu_mem_sgl *sgl = alloc->sgl; | 222 | struct nvgpu_mem_sgl *sgl = alloc->sgt.sgl; |
151 | 223 | ||
152 | if (free_buddy_alloc) { | 224 | if (free_buddy_alloc) { |
153 | while (sgl) { | 225 | while (sgl) { |
154 | nvgpu_free(&a->source_allocator, sgl->phys); | 226 | nvgpu_free(&a->source_allocator, |
155 | sgl = nvgpu_mem_sgl_next(sgl); | 227 | nvgpu_sgt_get_phys(&alloc->sgt, sgl)); |
228 | sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl); | ||
156 | } | 229 | } |
157 | } | 230 | } |
158 | 231 | ||
159 | nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); | 232 | nvgpu_page_alloc_sgl_proper_free(a->owner->g, sgl); |
160 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 233 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
161 | } | 234 | } |
162 | 235 | ||
@@ -306,7 +379,7 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a, | |||
306 | alloc->length = slab_page->slab_size; | 379 | alloc->length = slab_page->slab_size; |
307 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | 380 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); |
308 | 381 | ||
309 | sgl = alloc->sgl; | 382 | sgl = alloc->sgt.sgl; |
310 | sgl->phys = alloc->base; | 383 | sgl->phys = alloc->base; |
311 | sgl->dma = alloc->base; | 384 | sgl->dma = alloc->base; |
312 | sgl->length = alloc->length; | 385 | sgl->length = alloc->length; |
@@ -338,13 +411,16 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
338 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | 411 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); |
339 | goto fail; | 412 | goto fail; |
340 | } | 413 | } |
414 | |||
415 | alloc->sgt.ops = &page_alloc_sgl_ops; | ||
416 | |||
341 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); | 417 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
342 | if (!sgl) { | 418 | if (!sgl) { |
343 | palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); | 419 | palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); |
344 | goto fail; | 420 | goto fail; |
345 | } | 421 | } |
346 | 422 | ||
347 | alloc->sgl = sgl; | 423 | alloc->sgt.sgl = sgl; |
348 | err = __do_slab_alloc(a, slab, alloc); | 424 | err = __do_slab_alloc(a, slab, alloc); |
349 | if (err) | 425 | if (err) |
350 | goto fail; | 426 | goto fail; |
@@ -432,6 +508,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
432 | memset(alloc, 0, sizeof(*alloc)); | 508 | memset(alloc, 0, sizeof(*alloc)); |
433 | 509 | ||
434 | alloc->length = pages << a->page_shift; | 510 | alloc->length = pages << a->page_shift; |
511 | alloc->sgt.ops = &page_alloc_sgl_ops; | ||
435 | 512 | ||
436 | while (pages) { | 513 | while (pages) { |
437 | u64 chunk_addr = 0; | 514 | u64 chunk_addr = 0; |
@@ -495,7 +572,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
495 | if (prev_sgl) | 572 | if (prev_sgl) |
496 | prev_sgl->next = sgl; | 573 | prev_sgl->next = sgl; |
497 | else | 574 | else |
498 | alloc->sgl = sgl; | 575 | alloc->sgt.sgl = sgl; |
499 | 576 | ||
500 | prev_sgl = sgl; | 577 | prev_sgl = sgl; |
501 | 578 | ||
@@ -503,12 +580,12 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
503 | } | 580 | } |
504 | 581 | ||
505 | alloc->nr_chunks = i; | 582 | alloc->nr_chunks = i; |
506 | alloc->base = alloc->sgl->phys; | 583 | alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys; |
507 | 584 | ||
508 | return alloc; | 585 | return alloc; |
509 | 586 | ||
510 | fail_cleanup: | 587 | fail_cleanup: |
511 | sgl = alloc->sgl; | 588 | sgl = alloc->sgt.sgl; |
512 | while (sgl) { | 589 | while (sgl) { |
513 | struct nvgpu_mem_sgl *next = sgl->next; | 590 | struct nvgpu_mem_sgl *next = sgl->next; |
514 | 591 | ||
@@ -542,13 +619,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | |||
542 | 619 | ||
543 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | 620 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", |
544 | pages << a->page_shift, pages, alloc->base); | 621 | pages << a->page_shift, pages, alloc->base); |
545 | sgl = alloc->sgl; | 622 | sgl = alloc->sgt.sgl; |
546 | while (sgl) { | 623 | while (sgl) { |
547 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 624 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
548 | i++, | 625 | i++, |
549 | nvgpu_mem_sgl_phys(sgl), | 626 | nvgpu_sgt_get_phys(&alloc->sgt, sgl), |
550 | nvgpu_mem_sgl_length(sgl)); | 627 | nvgpu_sgt_get_length(&alloc->sgt, sgl)); |
551 | sgl = sgl->next; | 628 | sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl); |
552 | } | 629 | } |
553 | palloc_dbg(a, "Alloc done\n"); | 630 | palloc_dbg(a, "Alloc done\n"); |
554 | 631 | ||
@@ -655,6 +732,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
655 | if (!alloc || !sgl) | 732 | if (!alloc || !sgl) |
656 | goto fail; | 733 | goto fail; |
657 | 734 | ||
735 | alloc->sgt.ops = &page_alloc_sgl_ops; | ||
658 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); | 736 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); |
659 | if (!alloc->base) { | 737 | if (!alloc->base) { |
660 | WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); | 738 | WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); |
@@ -663,7 +741,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
663 | 741 | ||
664 | alloc->nr_chunks = 1; | 742 | alloc->nr_chunks = 1; |
665 | alloc->length = length; | 743 | alloc->length = length; |
666 | alloc->sgl = sgl; | 744 | alloc->sgt.sgl = sgl; |
667 | 745 | ||
668 | sgl->phys = alloc->base; | 746 | sgl->phys = alloc->base; |
669 | sgl->dma = alloc->base; | 747 | sgl->dma = alloc->base; |
@@ -708,13 +786,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
708 | 786 | ||
709 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | 787 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", |
710 | alloc->base, aligned_len, pages); | 788 | alloc->base, aligned_len, pages); |
711 | sgl = alloc->sgl; | 789 | sgl = alloc->sgt.sgl; |
712 | while (sgl) { | 790 | while (sgl) { |
713 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 791 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
714 | i++, | 792 | i++, |
715 | nvgpu_mem_sgl_phys(sgl), | 793 | nvgpu_sgt_get_phys(&alloc->sgt, sgl), |
716 | nvgpu_mem_sgl_length(sgl)); | 794 | nvgpu_sgt_get_length(&alloc->sgt, sgl)); |
717 | sgl = sgl->next; | 795 | sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl); |
718 | } | 796 | } |
719 | 797 | ||
720 | a->nr_fixed_allocs++; | 798 | a->nr_fixed_allocs++; |
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c index bb7d930e..ae9c9b1f 100644 --- a/drivers/gpu/nvgpu/common/pramin.c +++ b/drivers/gpu/nvgpu/common/pramin.c | |||
@@ -84,24 +84,23 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, | |||
84 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) | 84 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) |
85 | { | 85 | { |
86 | struct nvgpu_page_alloc *alloc = NULL; | 86 | struct nvgpu_page_alloc *alloc = NULL; |
87 | struct nvgpu_mem_sgl *sgl; | 87 | struct nvgpu_sgt *sgt; |
88 | void *sgl; | ||
88 | u32 byteoff, start_reg, until_end, n; | 89 | u32 byteoff, start_reg, until_end, n; |
89 | 90 | ||
90 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); | 91 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); |
91 | sgl = alloc->sgl; | 92 | sgt = &alloc->sgt; |
92 | while (sgl) { | 93 | for (sgl = sgt->sgl; sgl; sgl = nvgpu_sgt_get_next(sgt, sgl)) { |
93 | if (offset >= nvgpu_mem_sgl_length(sgl)) { | 94 | if (offset >= nvgpu_sgt_get_length(sgt, sgl)) |
94 | offset -= nvgpu_mem_sgl_length(sgl); | 95 | offset -= nvgpu_sgt_get_length(sgt, sgl); |
95 | sgl = sgl->next; | 96 | else |
96 | } else { | ||
97 | break; | 97 | break; |
98 | } | ||
99 | } | 98 | } |
100 | 99 | ||
101 | while (size) { | 100 | while (size) { |
102 | u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl); | 101 | u32 sgl_len = (u32)nvgpu_sgt_get_length(sgt, sgl); |
103 | 102 | ||
104 | byteoff = g->ops.pramin.enter(g, mem, sgl, | 103 | byteoff = g->ops.pramin.enter(g, mem, sgt, sgl, |
105 | offset / sizeof(u32)); | 104 | offset / sizeof(u32)); |
106 | start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); | 105 | start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); |
107 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); | 106 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); |
@@ -117,7 +116,7 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, | |||
117 | size -= n; | 116 | size -= n; |
118 | 117 | ||
119 | if (n == (sgl_len - offset)) { | 118 | if (n == (sgl_len - offset)) { |
120 | sgl = nvgpu_mem_sgl_next(sgl); | 119 | sgl = nvgpu_sgt_get_next(sgt, sgl); |
121 | offset = 0; | 120 | offset = 0; |
122 | } else { | 121 | } else { |
123 | offset += n; | 122 | offset += n; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 355228db..13c62691 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -34,7 +34,7 @@ struct gk20a_debug_output; | |||
34 | struct nvgpu_clk_pll_debug_data; | 34 | struct nvgpu_clk_pll_debug_data; |
35 | struct nvgpu_nvhost_dev; | 35 | struct nvgpu_nvhost_dev; |
36 | struct nvgpu_cpu_time_correlation_sample; | 36 | struct nvgpu_cpu_time_correlation_sample; |
37 | struct nvgpu_mem_sgl; | 37 | struct nvgpu_mem_sgt; |
38 | 38 | ||
39 | #include <nvgpu/lock.h> | 39 | #include <nvgpu/lock.h> |
40 | #include <nvgpu/thread.h> | 40 | #include <nvgpu/thread.h> |
@@ -700,7 +700,7 @@ struct gpu_ops { | |||
700 | bool (*support_sparse)(struct gk20a *g); | 700 | bool (*support_sparse)(struct gk20a *g); |
701 | u64 (*gmmu_map)(struct vm_gk20a *vm, | 701 | u64 (*gmmu_map)(struct vm_gk20a *vm, |
702 | u64 map_offset, | 702 | u64 map_offset, |
703 | struct nvgpu_mem_sgl *sgl, | 703 | struct nvgpu_sgt *sgt, |
704 | u64 buffer_offset, | 704 | u64 buffer_offset, |
705 | u64 size, | 705 | u64 size, |
706 | int pgsz_idx, | 706 | int pgsz_idx, |
@@ -760,9 +760,9 @@ struct gpu_ops { | |||
760 | size_t size); | 760 | size_t size); |
761 | struct { | 761 | struct { |
762 | u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, | 762 | u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, |
763 | struct nvgpu_mem_sgl *sgl, u32 w); | 763 | struct nvgpu_sgt *sgt, void *sgl, u32 w); |
764 | void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, | 764 | void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, |
765 | struct nvgpu_mem_sgl *sgl); | 765 | void *sgl); |
766 | u32 (*data032_r)(u32 i); | 766 | u32 (*data032_r)(u32 i); |
767 | } pramin; | 767 | } pramin; |
768 | struct { | 768 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index cd34e769..0e0326dd 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1151,7 +1151,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) | |||
1151 | struct gk20a_fence *gk20a_fence_out = NULL; | 1151 | struct gk20a_fence *gk20a_fence_out = NULL; |
1152 | struct gk20a_fence *gk20a_last_fence = NULL; | 1152 | struct gk20a_fence *gk20a_last_fence = NULL; |
1153 | struct nvgpu_page_alloc *alloc = NULL; | 1153 | struct nvgpu_page_alloc *alloc = NULL; |
1154 | struct nvgpu_mem_sgl *sgl = NULL; | 1154 | struct nvgpu_sgt *sgt = NULL; |
1155 | void *sgl = NULL; | ||
1155 | int err = 0; | 1156 | int err = 0; |
1156 | 1157 | ||
1157 | if (g->mm.vidmem.ce_ctx_id == (u32)~0) | 1158 | if (g->mm.vidmem.ce_ctx_id == (u32)~0) |
@@ -1159,7 +1160,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) | |||
1159 | 1160 | ||
1160 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); | 1161 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); |
1161 | 1162 | ||
1162 | sgl = alloc->sgl; | 1163 | sgt = &alloc->sgt; |
1164 | sgl = sgt->sgl; | ||
1163 | while (sgl) { | 1165 | while (sgl) { |
1164 | if (gk20a_last_fence) | 1166 | if (gk20a_last_fence) |
1165 | gk20a_fence_put(gk20a_last_fence); | 1167 | gk20a_fence_put(gk20a_last_fence); |
@@ -1167,8 +1169,8 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) | |||
1167 | err = gk20a_ce_execute_ops(g, | 1169 | err = gk20a_ce_execute_ops(g, |
1168 | g->mm.vidmem.ce_ctx_id, | 1170 | g->mm.vidmem.ce_ctx_id, |
1169 | 0, | 1171 | 0, |
1170 | nvgpu_mem_sgl_phys(sgl), | 1172 | nvgpu_sgt_get_phys(sgt, sgl), |
1171 | nvgpu_mem_sgl_length(sgl), | 1173 | nvgpu_sgt_get_length(sgt, sgl), |
1172 | 0x00000000, | 1174 | 0x00000000, |
1173 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | 1175 | NVGPU_CE_DST_LOCATION_LOCAL_FB, |
1174 | NVGPU_CE_MEMSET, | 1176 | NVGPU_CE_MEMSET, |
@@ -1183,7 +1185,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) | |||
1183 | } | 1185 | } |
1184 | 1186 | ||
1185 | gk20a_last_fence = gk20a_fence_out; | 1187 | gk20a_last_fence = gk20a_fence_out; |
1186 | sgl = nvgpu_mem_sgl_next(sgl); | 1188 | sgl = nvgpu_sgt_get_next(sgt, sgl); |
1187 | } | 1189 | } |
1188 | 1190 | ||
1189 | if (gk20a_last_fence) { | 1191 | if (gk20a_last_fence) { |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 2fdc1729..9c5e0fae 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -361,7 +361,7 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem) | |||
361 | 361 | ||
362 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 362 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
363 | u64 map_offset, | 363 | u64 map_offset, |
364 | struct nvgpu_mem_sgl *sgl, | 364 | struct nvgpu_sgt *sgt, |
365 | u64 buffer_offset, | 365 | u64 buffer_offset, |
366 | u64 size, | 366 | u64 size, |
367 | int pgsz_idx, | 367 | int pgsz_idx, |
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c index 8a34a63c..aaba4ffc 100644 --- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c | |||
@@ -26,9 +26,9 @@ | |||
26 | 26 | ||
27 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ | 27 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ |
28 | u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | 28 | u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, |
29 | struct nvgpu_mem_sgl *sgl, u32 w) | 29 | struct nvgpu_sgt *sgt, void *sgl, u32 w) |
30 | { | 30 | { |
31 | u64 bufbase = nvgpu_mem_sgl_phys(sgl); | 31 | u64 bufbase = nvgpu_sgt_get_phys(sgt, sgl); |
32 | u64 addr = bufbase + w * sizeof(u32); | 32 | u64 addr = bufbase + w * sizeof(u32); |
33 | u32 hi = (u32)((addr & ~(u64)0xfffff) | 33 | u32 hi = (u32)((addr & ~(u64)0xfffff) |
34 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | 34 | >> bus_bar0_window_target_bar0_window_base_shift_v()); |
@@ -41,8 +41,8 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | |||
41 | gk20a_dbg(gpu_dbg_mem, | 41 | gk20a_dbg(gpu_dbg_mem, |
42 | "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", | 42 | "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", |
43 | hi, lo, mem, sgl, bufbase, | 43 | hi, lo, mem, sgl, bufbase, |
44 | bufbase + nvgpu_mem_sgl_phys(sgl), | 44 | bufbase + nvgpu_sgt_get_phys(sgt, sgl), |
45 | nvgpu_mem_sgl_length(sgl)); | 45 | nvgpu_sgt_get_length(sgt, sgl)); |
46 | 46 | ||
47 | WARN_ON(!bufbase); | 47 | WARN_ON(!bufbase); |
48 | 48 | ||
@@ -58,7 +58,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | |||
58 | } | 58 | } |
59 | 59 | ||
60 | void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, | 60 | void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, |
61 | struct nvgpu_mem_sgl *sgl) | 61 | void *sgl) |
62 | { | 62 | { |
63 | gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl); | 63 | gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl); |
64 | 64 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h index fc5ba919..29e76978 100644 --- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h | |||
@@ -22,7 +22,7 @@ struct nvgpu_mem; | |||
22 | struct nvgpu_mem_sgl; | 22 | struct nvgpu_mem_sgl; |
23 | 23 | ||
24 | u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | 24 | u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, |
25 | struct nvgpu_mem_sgl *sgl, u32 w); | 25 | struct nvgpu_sgt *sgt, void *sgl, u32 w); |
26 | void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, | 26 | void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, |
27 | struct nvgpu_mem_sgl *sgl); | 27 | void *sgl); |
28 | #endif | 28 | #endif |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h index f96c2801..517d834c 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h | |||
@@ -20,6 +20,7 @@ | |||
20 | struct page; | 20 | struct page; |
21 | struct sg_table; | 21 | struct sg_table; |
22 | struct scatterlist; | 22 | struct scatterlist; |
23 | struct nvgpu_sgt; | ||
23 | 24 | ||
24 | struct gk20a; | 25 | struct gk20a; |
25 | struct nvgpu_mem; | 26 | struct nvgpu_mem; |
@@ -32,9 +33,11 @@ struct nvgpu_mem_priv { | |||
32 | }; | 33 | }; |
33 | 34 | ||
34 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl); | 35 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl); |
35 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, | 36 | struct nvgpu_sgt *nvgpu_mem_linux_sgt_create(struct gk20a *g, |
37 | struct sg_table *sgt); | ||
38 | void nvgpu_mem_linux_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
39 | struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, | ||
36 | struct sg_table *sgt); | 40 | struct sg_table *sgt); |
37 | |||
38 | /** | 41 | /** |
39 | * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. | 42 | * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. |
40 | * | 43 | * |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 7d19cf81..beffbfe8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | |||
@@ -46,12 +46,41 @@ enum nvgpu_aperture { | |||
46 | APERTURE_VIDMEM | 46 | APERTURE_VIDMEM |
47 | }; | 47 | }; |
48 | 48 | ||
49 | struct nvgpu_sgt_ops { | ||
50 | void *(*sgl_next)(void *sgl); | ||
51 | u64 (*sgl_phys)(void *sgl); | ||
52 | u64 (*sgl_dma)(void *sgl); | ||
53 | u64 (*sgl_length)(void *sgl); | ||
54 | u64 (*sgl_gpu_addr)(struct gk20a *g, void *sgl, | ||
55 | struct nvgpu_gmmu_attrs *attrs); | ||
56 | /* | ||
57 | * Note: this operates on the whole SGT not a specific SGL entry. | ||
58 | */ | ||
59 | void (*sgt_free)(struct gk20a *g, struct nvgpu_sgt *sgt); | ||
60 | }; | ||
61 | |||
62 | /* | ||
63 | * Scatter gather table: this is a list of scatter list entries and the ops for | ||
64 | * interacting with those entries. | ||
65 | */ | ||
66 | struct nvgpu_sgt { | ||
67 | /* | ||
68 | * Ops for interacting with the underlying scatter gather list entries. | ||
69 | */ | ||
70 | const struct nvgpu_sgt_ops *ops; | ||
71 | |||
72 | /* | ||
73 | * The first node in the scatter gather list. | ||
74 | */ | ||
75 | void *sgl; | ||
76 | }; | ||
77 | |||
49 | /* | 78 | /* |
50 | * This struct holds the necessary information for describing a struct | 79 | * This struct holds the necessary information for describing a struct |
51 | * nvgpu_mem's scatter gather list. | 80 | * nvgpu_mem's scatter gather list. |
52 | * | 81 | * |
53 | * These are created in a platform dependent way. As a result the function | 82 | * Not all nvgpu_sgt's use this particular implementation. Nor is a given OS |
54 | * definition for allocating these lives in the <nvgpu/_OS_/nvgpu_mem.h> file. | 83 | * required to use this at all. |
55 | */ | 84 | */ |
56 | struct nvgpu_mem_sgl { | 85 | struct nvgpu_mem_sgl { |
57 | /* | 86 | /* |
@@ -164,6 +193,32 @@ static inline bool nvgpu_mem_is_valid(struct nvgpu_mem *mem) | |||
164 | 193 | ||
165 | } | 194 | } |
166 | 195 | ||
196 | /* | ||
197 | * Create a nvgpu_sgt of the default implementation | ||
198 | */ | ||
199 | struct nvgpu_sgt *nvgpu_sgt_create(struct gk20a *g); | ||
200 | |||
201 | /** | ||
202 | * nvgpu_mem_sgt_create_from_mem - Create a scatter list from an nvgpu_mem. | ||
203 | * | ||
204 | * @g - The GPU. | ||
205 | * @mem - The source memory allocation to use. | ||
206 | * | ||
207 | * Create a scatter gather table from the passed @mem struct. This list lets the | ||
208 | * calling code iterate across each chunk of a DMA allocation for when that DMA | ||
209 | * allocation is not completely contiguous. | ||
210 | */ | ||
211 | struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, | ||
212 | struct nvgpu_mem *mem); | ||
213 | |||
214 | void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl); | ||
215 | u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl); | ||
216 | u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl); | ||
217 | u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl); | ||
218 | u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl, | ||
219 | struct nvgpu_gmmu_attrs *attrs); | ||
220 | void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g); | ||
221 | |||
167 | /** | 222 | /** |
168 | * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. | 223 | * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. |
169 | * | 224 | * |
@@ -200,27 +255,6 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, | |||
200 | struct nvgpu_mem *dest, struct nvgpu_mem *src, | 255 | struct nvgpu_mem *dest, struct nvgpu_mem *src, |
201 | int start_page, int nr_pages); | 256 | int start_page, int nr_pages); |
202 | 257 | ||
203 | /** | ||
204 | * nvgpu_mem_sgl_create_from_mem - Create a scatter list from an nvgpu_mem. | ||
205 | * | ||
206 | * @g - The GPU. | ||
207 | * @mem - The source memory allocation to use. | ||
208 | * | ||
209 | * Create a scatter gather list from the passed @mem struct. This list lets the | ||
210 | * calling code iterate across each chunk of a DMA allocation for when that DMA | ||
211 | * allocation is not completely contiguous. | ||
212 | */ | ||
213 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, | ||
214 | struct nvgpu_mem *mem); | ||
215 | void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl); | ||
216 | |||
217 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl); | ||
218 | u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl); | ||
219 | u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl); | ||
220 | u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl); | ||
221 | u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl, | ||
222 | struct nvgpu_gmmu_attrs *attrs); | ||
223 | |||
224 | /* | 258 | /* |
225 | * Buffer accessors - wrap between begin() and end() if there is no permanent | 259 | * Buffer accessors - wrap between begin() and end() if there is no permanent |
226 | * kernel mapping for this buffer. | 260 | * kernel mapping for this buffer. |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h index de83ca7f..b22c55d0 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h +++ b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h | |||
@@ -91,10 +91,10 @@ page_alloc_slab_page_from_list_entry(struct nvgpu_list_node *node) | |||
91 | */ | 91 | */ |
92 | struct nvgpu_page_alloc { | 92 | struct nvgpu_page_alloc { |
93 | /* | 93 | /* |
94 | * nvgpu_mem_sgl for describing the actual allocation. Convenient for | 94 | * nvgpu_sgt for describing the actual allocation. Convenient for |
95 | * GMMU mapping. | 95 | * GMMU mapping. |
96 | */ | 96 | */ |
97 | struct nvgpu_mem_sgl *sgl; | 97 | struct nvgpu_sgt sgt; |
98 | 98 | ||
99 | int nr_chunks; | 99 | int nr_chunks; |
100 | u64 length; | 100 | u64 length; |
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c index ee9b791a..d9324363 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c | |||
@@ -40,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc, | |||
40 | 40 | ||
41 | static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | 41 | static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, |
42 | u64 map_offset, | 42 | u64 map_offset, |
43 | struct nvgpu_mem_sgl *sgl, | 43 | struct nvgpu_sgt *sgt, |
44 | u64 buffer_offset, | 44 | u64 buffer_offset, |
45 | u64 size, | 45 | u64 size, |
46 | int pgsz_idx, | 46 | int pgsz_idx, |
@@ -66,12 +66,13 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | |||
66 | void *handle = NULL; | 66 | void *handle = NULL; |
67 | size_t oob_size; | 67 | size_t oob_size; |
68 | u8 prot; | 68 | u8 prot; |
69 | void *sgl; | ||
69 | 70 | ||
70 | gk20a_dbg_fn(""); | 71 | gk20a_dbg_fn(""); |
71 | 72 | ||
72 | /* FIXME: add support for sparse mappings */ | 73 | /* FIXME: add support for sparse mappings */ |
73 | 74 | ||
74 | if (WARN_ON(!sgl) || WARN_ON(!g->mm.bypass_smmu)) | 75 | if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu)) |
75 | return 0; | 76 | return 0; |
76 | 77 | ||
77 | if (space_to_skip & (page_size - 1)) | 78 | if (space_to_skip & (page_size - 1)) |
@@ -97,7 +98,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | |||
97 | err = -EINVAL; | 98 | err = -EINVAL; |
98 | goto fail; | 99 | goto fail; |
99 | } | 100 | } |
100 | 101 | sgl = sgt->sgl; | |
101 | while (sgl) { | 102 | while (sgl) { |
102 | u64 phys_addr; | 103 | u64 phys_addr; |
103 | u64 chunk_length; | 104 | u64 chunk_length; |
@@ -106,15 +107,15 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | |||
106 | * Cut out sgl ents for space_to_skip. | 107 | * Cut out sgl ents for space_to_skip. |
107 | */ | 108 | */ |
108 | if (space_to_skip && | 109 | if (space_to_skip && |
109 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { | 110 | space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) { |
110 | space_to_skip -= nvgpu_mem_sgl_length(sgl); | 111 | space_to_skip -= nvgpu_sgt_get_length(sgt, sgl); |
111 | sgl = nvgpu_mem_sgl_next(sgl); | 112 | sgl = nvgpu_sgt_get_next(sgt, sgl); |
112 | continue; | 113 | continue; |
113 | } | 114 | } |
114 | 115 | ||
115 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; | 116 | phys_addr = nvgpu_sgt_get_phys(sgt, sgl) + space_to_skip; |
116 | chunk_length = min(size, | 117 | chunk_length = min(size, |
117 | nvgpu_mem_sgl_length(sgl) - space_to_skip); | 118 | nvgpu_sgt_get_length(sgt, sgl) - space_to_skip); |
118 | 119 | ||
119 | if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr, | 120 | if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr, |
120 | chunk_length, &oob_size)) { | 121 | chunk_length, &oob_size)) { |
@@ -124,7 +125,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | |||
124 | 125 | ||
125 | space_to_skip = 0; | 126 | space_to_skip = 0; |
126 | size -= chunk_length; | 127 | size -= chunk_length; |
127 | sgl = nvgpu_mem_sgl_next(sgl); | 128 | sgl = nvgpu_sgt_get_next(sgt, sgl); |
128 | 129 | ||
129 | if (size == 0) | 130 | if (size == 0) |
130 | break; | 131 | break; |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 5da6f158..adb01ae5 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
@@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g) | |||
78 | 78 | ||
79 | static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | 79 | static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, |
80 | u64 map_offset, | 80 | u64 map_offset, |
81 | struct nvgpu_mem_sgl *sgl, | 81 | struct nvgpu_sgt *sgt, |
82 | u64 buffer_offset, | 82 | u64 buffer_offset, |
83 | u64 size, | 83 | u64 size, |
84 | int pgsz_idx, | 84 | int pgsz_idx, |
@@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
98 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); | 98 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); |
99 | struct tegra_vgpu_cmd_msg msg; | 99 | struct tegra_vgpu_cmd_msg msg; |
100 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | 100 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; |
101 | u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL); | 101 | u64 addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, NULL); |
102 | u8 prot; | 102 | u8 prot; |
103 | 103 | ||
104 | gk20a_dbg_fn(""); | 104 | gk20a_dbg_fn(""); |