diff options
20 files changed, 474 insertions, 241 deletions
diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index d02870fb..6e475fcb 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu | |||
| @@ -55,6 +55,7 @@ nvgpu-y := \ | |||
| 55 | common/mm/pd_cache.o \ | 55 | common/mm/pd_cache.o \ |
| 56 | common/mm/vm.o \ | 56 | common/mm/vm.o \ |
| 57 | common/mm/vm_area.o \ | 57 | common/mm/vm_area.o \ |
| 58 | common/mm/nvgpu_mem.o \ | ||
| 58 | common/bus.o \ | 59 | common/bus.o \ |
| 59 | common/enabled.o \ | 60 | common/enabled.o \ |
| 60 | common/pramin.o \ | 61 | common/pramin.o \ |
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index e4991d0d..eb54f3fd 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <nvgpu/log.h> | 21 | #include <nvgpu/log.h> |
| 22 | #include <nvgpu/bug.h> | 22 | #include <nvgpu/bug.h> |
| 23 | #include <nvgpu/enabled.h> | 23 | #include <nvgpu/enabled.h> |
| 24 | #include <nvgpu/kmem.h> | ||
| 24 | 25 | ||
| 25 | #include <nvgpu/linux/dma.h> | 26 | #include <nvgpu/linux/dma.h> |
| 26 | 27 | ||
| @@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, | |||
| 395 | 396 | ||
| 396 | return 0; | 397 | return 0; |
| 397 | } | 398 | } |
| 399 | |||
| 400 | static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, | ||
| 401 | struct nvgpu_mem_sgl *sgl) | ||
| 402 | { | ||
| 403 | struct nvgpu_mem_sgl *head, *next; | ||
| 404 | |||
| 405 | head = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
| 406 | if (!head) | ||
| 407 | return NULL; | ||
| 408 | |||
| 409 | next = head; | ||
| 410 | while (true) { | ||
| 411 | nvgpu_log(g, gpu_dbg_sgl, | ||
| 412 | " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", | ||
| 413 | sgl->phys, sgl->dma, sgl->length); | ||
| 414 | |||
| 415 | next->dma = sgl->dma; | ||
| 416 | next->phys = sgl->phys; | ||
| 417 | next->length = sgl->length; | ||
| 418 | next->next = NULL; | ||
| 419 | |||
| 420 | sgl = nvgpu_mem_sgl_next(sgl); | ||
| 421 | if (!sgl) | ||
| 422 | break; | ||
| 423 | |||
| 424 | next->next = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
| 425 | if (!next->next) { | ||
| 426 | nvgpu_mem_sgl_free(g, head); | ||
| 427 | return NULL; | ||
| 428 | } | ||
| 429 | next = next->next; | ||
| 430 | } | ||
| 431 | |||
| 432 | return head; | ||
| 433 | } | ||
| 434 | |||
| 435 | static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( | ||
| 436 | struct gk20a *g, | ||
| 437 | struct scatterlist *linux_sgl) | ||
| 438 | { | ||
| 439 | struct nvgpu_page_alloc *vidmem_alloc; | ||
| 440 | |||
| 441 | vidmem_alloc = get_vidmem_page_alloc(linux_sgl); | ||
| 442 | if (!vidmem_alloc) | ||
| 443 | return NULL; | ||
| 444 | |||
| 445 | nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); | ||
| 446 | |||
| 447 | return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl); | ||
| 448 | } | ||
| 449 | |||
| 450 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, | ||
| 451 | struct sg_table *sgt) | ||
| 452 | { | ||
| 453 | struct nvgpu_mem_sgl *head, *sgl, *next; | ||
| 454 | struct scatterlist *linux_sgl = sgt->sgl; | ||
| 455 | |||
| 456 | if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) | ||
| 457 | return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); | ||
| 458 | |||
| 459 | head = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
| 460 | if (!head) | ||
| 461 | return NULL; | ||
| 462 | |||
| 463 | nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); | ||
| 464 | |||
| 465 | sgl = head; | ||
| 466 | while (true) { | ||
| 467 | sgl->dma = sg_dma_address(linux_sgl); | ||
| 468 | sgl->phys = sg_phys(linux_sgl); | ||
| 469 | sgl->length = linux_sgl->length; | ||
| 470 | |||
| 471 | /* | ||
| 472 | * We don't like offsets in the pages here. This will cause | ||
| 473 | * problems. | ||
| 474 | */ | ||
| 475 | if (WARN_ON(linux_sgl->offset)) { | ||
| 476 | nvgpu_mem_sgl_free(g, head); | ||
| 477 | return NULL; | ||
| 478 | } | ||
| 479 | |||
| 480 | nvgpu_log(g, gpu_dbg_sgl, | ||
| 481 | " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", | ||
| 482 | sgl->phys, sgl->dma, sgl->length); | ||
| 483 | |||
| 484 | /* | ||
| 485 | * When there's no more SGL ents for the Linux SGL we are | ||
| 486 | * done. Don't bother making any more SGL ents for the nvgpu | ||
| 487 | * SGL. | ||
| 488 | */ | ||
| 489 | linux_sgl = sg_next(linux_sgl); | ||
| 490 | if (!linux_sgl) | ||
| 491 | break; | ||
| 492 | |||
| 493 | next = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
| 494 | if (!next) { | ||
| 495 | nvgpu_mem_sgl_free(g, head); | ||
| 496 | return NULL; | ||
| 497 | } | ||
| 498 | |||
| 499 | sgl->next = next; | ||
| 500 | sgl = next; | ||
| 501 | } | ||
| 502 | |||
| 503 | nvgpu_log(g, gpu_dbg_sgl, "Done!"); | ||
| 504 | return head; | ||
| 505 | } | ||
| 506 | |||
| 507 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, | ||
| 508 | struct nvgpu_mem *mem) | ||
| 509 | { | ||
| 510 | return nvgpu_mem_sgl_create(g, mem->priv.sgt); | ||
| 511 | } | ||
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 86d8bec9..4a4429dc 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
| @@ -21,8 +21,11 @@ | |||
| 21 | #include <nvgpu/lock.h> | 21 | #include <nvgpu/lock.h> |
| 22 | #include <nvgpu/rbtree.h> | 22 | #include <nvgpu/rbtree.h> |
| 23 | #include <nvgpu/vm_area.h> | 23 | #include <nvgpu/vm_area.h> |
| 24 | #include <nvgpu/nvgpu_mem.h> | ||
| 24 | #include <nvgpu/page_allocator.h> | 25 | #include <nvgpu/page_allocator.h> |
| 25 | 26 | ||
| 27 | #include <nvgpu/linux/nvgpu_mem.h> | ||
| 28 | |||
| 26 | #include "gk20a/gk20a.h" | 29 | #include "gk20a/gk20a.h" |
| 27 | #include "gk20a/mm_gk20a.h" | 30 | #include "gk20a/mm_gk20a.h" |
| 28 | #include "gk20a/kind_gk20a.h" | 31 | #include "gk20a/kind_gk20a.h" |
| @@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, | |||
| 66 | 69 | ||
| 67 | if (aperture == APERTURE_VIDMEM) { | 70 | if (aperture == APERTURE_VIDMEM) { |
| 68 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); | 71 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); |
| 69 | struct page_alloc_chunk *chunk = NULL; | 72 | struct nvgpu_mem_sgl *sgl_vid = alloc->sgl; |
| 70 | 73 | ||
| 71 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | 74 | while (sgl_vid) { |
| 72 | page_alloc_chunk, list_entry) { | 75 | chunk_align = 1ULL << |
| 73 | chunk_align = 1ULL << __ffs(chunk->base | | 76 | __ffs(nvgpu_mem_sgl_phys(sgl_vid) | |
| 74 | chunk->length); | 77 | nvgpu_mem_sgl_length(sgl_vid)); |
| 75 | 78 | ||
| 76 | if (align) | 79 | if (align) |
| 77 | align = min(align, chunk_align); | 80 | align = min(align, chunk_align); |
| 78 | else | 81 | else |
| 79 | align = chunk_align; | 82 | align = chunk_align; |
| 83 | |||
| 84 | sgl_vid = nvgpu_mem_sgl_next(sgl_vid); | ||
| 80 | } | 85 | } |
| 81 | 86 | ||
| 82 | return align; | 87 | return align; |
| @@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
| 237 | struct nvgpu_vm_area *vm_area = NULL; | 242 | struct nvgpu_vm_area *vm_area = NULL; |
| 238 | u32 ctag_offset; | 243 | u32 ctag_offset; |
| 239 | enum nvgpu_aperture aperture; | 244 | enum nvgpu_aperture aperture; |
| 245 | struct nvgpu_mem_sgl *nvgpu_sgl; | ||
| 240 | 246 | ||
| 241 | /* | 247 | /* |
| 242 | * The kind used as part of the key for map caching. HW may | 248 | * The kind used as part of the key for map caching. HW may |
| @@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
| 393 | ctag_offset += buffer_offset >> | 399 | ctag_offset += buffer_offset >> |
| 394 | ilog2(g->ops.fb.compression_page_size(g)); | 400 | ilog2(g->ops.fb.compression_page_size(g)); |
| 395 | 401 | ||
| 402 | nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt); | ||
| 403 | |||
| 396 | /* update gmmu ptes */ | 404 | /* update gmmu ptes */ |
| 397 | map_offset = g->ops.mm.gmmu_map(vm, map_offset, | 405 | map_offset = g->ops.mm.gmmu_map(vm, |
| 398 | bfr.sgt, | 406 | map_offset, |
| 407 | nvgpu_sgl, | ||
| 399 | buffer_offset, /* sg offset */ | 408 | buffer_offset, /* sg offset */ |
| 400 | mapping_size, | 409 | mapping_size, |
| 401 | bfr.pgsz_idx, | 410 | bfr.pgsz_idx, |
| @@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
| 410 | if (!map_offset) | 419 | if (!map_offset) |
| 411 | goto clean_up; | 420 | goto clean_up; |
| 412 | 421 | ||
| 422 | nvgpu_mem_sgl_free(g, nvgpu_sgl); | ||
| 423 | |||
| 413 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); | 424 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); |
| 414 | if (!mapped_buffer) { | 425 | if (!mapped_buffer) { |
| 415 | nvgpu_warn(g, "oom allocating tracking buffer"); | 426 | nvgpu_warn(g, "oom allocating tracking buffer"); |
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 7f486d68..41f5acdd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
| @@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
| 65 | struct gk20a *g = gk20a_from_vm(vm); | 65 | struct gk20a *g = gk20a_from_vm(vm); |
| 66 | u64 vaddr; | 66 | u64 vaddr; |
| 67 | 67 | ||
| 68 | struct sg_table *sgt = mem->priv.sgt; | 68 | struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); |
| 69 | |||
| 70 | if (!sgl) | ||
| 71 | return -ENOMEM; | ||
| 69 | 72 | ||
| 70 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 73 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
| 71 | vaddr = g->ops.mm.gmmu_map(vm, addr, | 74 | vaddr = g->ops.mm.gmmu_map(vm, addr, |
| 72 | sgt, /* sg table */ | 75 | sgl, /* sg list */ |
| 73 | 0, /* sg offset */ | 76 | 0, /* sg offset */ |
| 74 | size, | 77 | size, |
| 75 | gmmu_page_size_kernel, | 78 | gmmu_page_size_kernel, |
| @@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
| 82 | NULL, /* mapping_batch handle */ | 85 | NULL, /* mapping_batch handle */ |
| 83 | aperture); | 86 | aperture); |
| 84 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 87 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
| 88 | |||
| 89 | nvgpu_mem_sgl_free(g, sgl); | ||
| 90 | |||
| 85 | if (!vaddr) { | 91 | if (!vaddr) { |
| 86 | nvgpu_err(g, "failed to allocate va space"); | 92 | nvgpu_err(g, "failed to map buffer!"); |
| 87 | return 0; | 93 | return 0; |
| 88 | } | 94 | } |
| 89 | 95 | ||
| @@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
| 91 | } | 97 | } |
| 92 | 98 | ||
| 93 | /* | 99 | /* |
| 94 | * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. | 100 | * Map a nvgpu_mem into the GMMU. This is for kernel space to use. |
| 95 | */ | 101 | */ |
| 96 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | 102 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, |
| 97 | struct nvgpu_mem *mem, | 103 | struct nvgpu_mem *mem, |
| @@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
| 106 | } | 112 | } |
| 107 | 113 | ||
| 108 | /* | 114 | /* |
| 109 | * Like nvgpu_gmmu_map() except it can work on a fixed address instead. | 115 | * Like nvgpu_gmmu_map() except this can work on a fixed address. |
| 110 | */ | 116 | */ |
| 111 | u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, | 117 | u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, |
| 112 | struct nvgpu_mem *mem, | 118 | struct nvgpu_mem *mem, |
| @@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm, | |||
| 407 | */ | 413 | */ |
| 408 | target_addr = next_pd ? | 414 | target_addr = next_pd ? |
| 409 | nvgpu_pde_phys_addr(g, next_pd) : | 415 | nvgpu_pde_phys_addr(g, next_pd) : |
| 410 | g->ops.mm.gpu_phys_addr(g, attrs, phys_addr); | 416 | phys_addr; |
| 411 | 417 | ||
| 412 | l->update_entry(vm, l, | 418 | l->update_entry(vm, l, |
| 413 | pd, pd_idx, | 419 | pd, pd_idx, |
| @@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm, | |||
| 458 | * VIDMEM version of the update_ptes logic. | 464 | * VIDMEM version of the update_ptes logic. |
| 459 | */ | 465 | */ |
| 460 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | 466 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, |
| 461 | struct sg_table *sgt, | 467 | struct nvgpu_mem_sgl *sgl, |
| 462 | u64 space_to_skip, | 468 | u64 space_to_skip, |
| 463 | u64 virt_addr, | 469 | u64 virt_addr, |
| 464 | u64 length, | 470 | u64 length, |
| 465 | struct nvgpu_gmmu_attrs *attrs) | 471 | struct nvgpu_gmmu_attrs *attrs) |
| 466 | { | 472 | { |
| 467 | struct nvgpu_page_alloc *alloc = NULL; | ||
| 468 | struct page_alloc_chunk *chunk = NULL; | ||
| 469 | u64 phys_addr, chunk_length; | 473 | u64 phys_addr, chunk_length; |
| 470 | int err = 0; | 474 | int err = 0; |
| 471 | 475 | ||
| 472 | if (!sgt) { | 476 | if (!sgl) { |
| 473 | /* | 477 | /* |
| 474 | * This is considered an unmap. Just pass in 0 as the physical | 478 | * This is considered an unmap. Just pass in 0 as the physical |
| 475 | * address for the entire GPU range. | 479 | * address for the entire GPU range. |
| @@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
| 482 | return err; | 486 | return err; |
| 483 | } | 487 | } |
| 484 | 488 | ||
| 485 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
| 486 | |||
| 487 | /* | 489 | /* |
| 488 | * Otherwise iterate across all the chunks in this allocation and | 490 | * Otherwise iterate across all the chunks in this allocation and |
| 489 | * map them. | 491 | * map them. |
| 490 | */ | 492 | */ |
| 491 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | 493 | while (sgl) { |
| 492 | page_alloc_chunk, list_entry) { | ||
| 493 | if (space_to_skip && | 494 | if (space_to_skip && |
| 494 | space_to_skip >= chunk->length) { | 495 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { |
| 495 | space_to_skip -= chunk->length; | 496 | space_to_skip -= nvgpu_mem_sgl_length(sgl); |
| 497 | sgl = nvgpu_mem_sgl_next(sgl); | ||
| 496 | continue; | 498 | continue; |
| 497 | } | 499 | } |
| 498 | 500 | ||
| 499 | phys_addr = chunk->base + space_to_skip; | 501 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; |
| 500 | chunk_length = min(length, (chunk->length - space_to_skip)); | 502 | chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - |
| 503 | space_to_skip)); | ||
| 501 | 504 | ||
| 502 | err = __set_pd_level(vm, &vm->pdb, | 505 | err = __set_pd_level(vm, &vm->pdb, |
| 503 | 0, | 506 | 0, |
| @@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
| 518 | 521 | ||
| 519 | if (length == 0) | 522 | if (length == 0) |
| 520 | break; | 523 | break; |
| 524 | |||
| 525 | sgl = nvgpu_mem_sgl_next(sgl); | ||
| 521 | } | 526 | } |
| 522 | 527 | ||
| 523 | return err; | 528 | return err; |
| 524 | } | 529 | } |
| 525 | 530 | ||
| 526 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | 531 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, |
| 527 | struct sg_table *sgt, | 532 | struct nvgpu_mem_sgl *sgl, |
| 528 | u64 space_to_skip, | 533 | u64 space_to_skip, |
| 529 | u64 virt_addr, | 534 | u64 virt_addr, |
| 530 | u64 length, | 535 | u64 length, |
| 531 | struct nvgpu_gmmu_attrs *attrs) | 536 | struct nvgpu_gmmu_attrs *attrs) |
| 532 | { | 537 | { |
| 533 | int err; | 538 | int err; |
| 534 | struct scatterlist *sgl; | ||
| 535 | struct gk20a *g = gk20a_from_vm(vm); | 539 | struct gk20a *g = gk20a_from_vm(vm); |
| 536 | 540 | ||
| 537 | if (!sgt) { | 541 | if (!sgl) { |
| 538 | /* | 542 | /* |
| 539 | * This is considered an unmap. Just pass in 0 as the physical | 543 | * This is considered an unmap. Just pass in 0 as the physical |
| 540 | * address for the entire GPU range. | 544 | * address for the entire GPU range. |
| @@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
| 548 | } | 552 | } |
| 549 | 553 | ||
| 550 | /* | 554 | /* |
| 551 | * At this point we have a Linux scatter-gather list pointing to some | 555 | * At this point we have a scatter-gather list pointing to some number |
| 552 | * number of discontiguous chunks of memory. Iterate over that list and | 556 | * of discontiguous chunks of memory. We must iterate over that list and |
| 553 | * generate a GMMU map call for each chunk. There are two possibilities: | 557 | * generate a GMMU map call for each chunk. There are two possibilities: |
| 554 | * either the IOMMU is enabled or not. When the IOMMU is enabled the | 558 | * either an IOMMU is enabled or not. When an IOMMU is enabled the |
| 555 | * mapping is simple since the "physical" address is actually a virtual | 559 | * mapping is simple since the "physical" address is actually a virtual |
| 556 | * IO address and will be contiguous. The no-IOMMU case is more | 560 | * IO address and will be contiguous. |
| 557 | * complicated. We will have to iterate over the SGT and do a separate | ||
| 558 | * map for each chunk of the SGT. | ||
| 559 | */ | 561 | */ |
| 560 | sgl = sgt->sgl; | ||
| 561 | |||
| 562 | if (!g->mm.bypass_smmu) { | 562 | if (!g->mm.bypass_smmu) { |
| 563 | u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl); | 563 | u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); |
| 564 | 564 | ||
| 565 | io_addr += space_to_skip; | 565 | io_addr += space_to_skip; |
| 566 | 566 | ||
| @@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
| 585 | /* | 585 | /* |
| 586 | * Cut out sgl ents for space_to_skip. | 586 | * Cut out sgl ents for space_to_skip. |
| 587 | */ | 587 | */ |
| 588 | if (space_to_skip && space_to_skip >= sgl->length) { | 588 | if (space_to_skip && |
| 589 | space_to_skip -= sgl->length; | 589 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { |
| 590 | sgl = sg_next(sgl); | 590 | space_to_skip -= nvgpu_mem_sgl_length(sgl); |
| 591 | sgl = nvgpu_mem_sgl_next(sgl); | ||
| 591 | continue; | 592 | continue; |
| 592 | } | 593 | } |
| 593 | 594 | ||
| 594 | phys_addr = sg_phys(sgl) + space_to_skip; | 595 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; |
| 595 | chunk_length = min(length, sgl->length - space_to_skip); | 596 | chunk_length = min(length, |
| 597 | nvgpu_mem_sgl_length(sgl) - space_to_skip); | ||
| 596 | 598 | ||
| 597 | err = __set_pd_level(vm, &vm->pdb, | 599 | err = __set_pd_level(vm, &vm->pdb, |
| 598 | 0, | 600 | 0, |
| @@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
| 600 | virt_addr, | 602 | virt_addr, |
| 601 | chunk_length, | 603 | chunk_length, |
| 602 | attrs); | 604 | attrs); |
| 603 | if (err) | ||
| 604 | return err; | ||
| 605 | 605 | ||
| 606 | space_to_skip = 0; | 606 | space_to_skip = 0; |
| 607 | virt_addr += chunk_length; | 607 | virt_addr += chunk_length; |
| 608 | length -= chunk_length; | 608 | length -= chunk_length; |
| 609 | sgl = sg_next(sgl); | 609 | sgl = nvgpu_mem_sgl_next(sgl); |
| 610 | 610 | ||
| 611 | if (length == 0) | 611 | if (length == 0) |
| 612 | break; | 612 | break; |
| @@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
| 624 | * implementations. But the logic around that is generic to all chips. Every | 624 | * implementations. But the logic around that is generic to all chips. Every |
| 625 | * chip has some number of PDE levels and then a PTE level. | 625 | * chip has some number of PDE levels and then a PTE level. |
| 626 | * | 626 | * |
| 627 | * Each chunk of the incoming SGT is sent to the chip specific implementation | 627 | * Each chunk of the incoming SGL is sent to the chip specific implementation |
| 628 | * of page table update. | 628 | * of page table update. |
| 629 | * | 629 | * |
| 630 | * [*] Note: the "physical" address may actually be an IO virtual address in the | 630 | * [*] Note: the "physical" address may actually be an IO virtual address in the |
| 631 | * case of SMMU usage. | 631 | * case of SMMU usage. |
| 632 | */ | 632 | */ |
| 633 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | 633 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, |
| 634 | struct sg_table *sgt, | 634 | struct nvgpu_mem_sgl *sgl, |
| 635 | u64 space_to_skip, | 635 | u64 space_to_skip, |
| 636 | u64 virt_addr, | 636 | u64 virt_addr, |
| 637 | u64 length, | 637 | u64 length, |
| 638 | struct nvgpu_gmmu_attrs *attrs) | 638 | struct nvgpu_gmmu_attrs *attrs) |
| 639 | { | 639 | { |
| 640 | struct gk20a *g = gk20a_from_vm(vm); | 640 | struct gk20a *g = gk20a_from_vm(vm); |
| 641 | struct nvgpu_page_alloc *alloc; | ||
| 642 | u64 phys_addr = 0; | ||
| 643 | u32 page_size; | 641 | u32 page_size; |
| 644 | int err; | 642 | int err; |
| 645 | 643 | ||
| @@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
| 665 | return err; | 663 | return err; |
| 666 | } | 664 | } |
| 667 | 665 | ||
| 668 | if (sgt) { | ||
| 669 | if (attrs->aperture == APERTURE_VIDMEM) { | ||
| 670 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
| 671 | |||
| 672 | phys_addr = alloc->base; | ||
| 673 | } else | ||
| 674 | phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); | ||
| 675 | } | ||
| 676 | |||
| 677 | __gmmu_dbg(g, attrs, | 666 | __gmmu_dbg(g, attrs, |
| 678 | "vm=%s " | 667 | "vm=%s " |
| 679 | "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " | 668 | "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " |
| 680 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " | 669 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " |
| 681 | "kind=%#02x APT=%-6s %c%c%c%c%c", | 670 | "kind=%#02x APT=%-6s %c%c%c%c%c", |
| 682 | vm->name, | 671 | vm->name, |
| 683 | sgt ? "MAP" : "UNMAP", | 672 | sgl ? "MAP" : "UNMAP", |
| 684 | virt_addr, | 673 | virt_addr, |
| 685 | length, | 674 | length, |
| 686 | phys_addr, | 675 | sgl ? nvgpu_mem_sgl_phys(sgl) : 0, |
| 687 | space_to_skip, | 676 | space_to_skip, |
| 688 | page_size >> 10, | 677 | page_size >> 10, |
| 689 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 678 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
| @@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
| 696 | attrs->valid ? 'V' : '-'); | 685 | attrs->valid ? 'V' : '-'); |
| 697 | 686 | ||
| 698 | /* | 687 | /* |
| 699 | * Handle VIDMEM progamming. Currently uses a different scatter list | 688 | * For historical reasons these are separate, but soon these will be |
| 700 | * format. | 689 | * unified. |
| 701 | */ | 690 | */ |
| 702 | if (attrs->aperture == APERTURE_VIDMEM) | 691 | if (attrs->aperture == APERTURE_VIDMEM) |
| 703 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, | 692 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, |
| 704 | sgt, | 693 | sgl, |
| 705 | space_to_skip, | 694 | space_to_skip, |
| 706 | virt_addr, | 695 | virt_addr, |
| 707 | length, | 696 | length, |
| 708 | attrs); | 697 | attrs); |
| 709 | else | 698 | else |
| 710 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, | 699 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, |
| 711 | sgt, | 700 | sgl, |
| 712 | space_to_skip, | 701 | space_to_skip, |
| 713 | virt_addr, | 702 | virt_addr, |
| 714 | length, | 703 | length, |
| @@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
| 717 | unmap_gmmu_pages(g, &vm->pdb); | 706 | unmap_gmmu_pages(g, &vm->pdb); |
| 718 | nvgpu_smp_mb(); | 707 | nvgpu_smp_mb(); |
| 719 | 708 | ||
| 720 | __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); | 709 | __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); |
| 721 | 710 | ||
| 722 | return err; | 711 | return err; |
| 723 | } | 712 | } |
| @@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
| 736 | */ | 725 | */ |
| 737 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 726 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
| 738 | u64 vaddr, | 727 | u64 vaddr, |
| 739 | struct sg_table *sgt, | 728 | struct nvgpu_mem_sgl *sgl, |
| 740 | u64 buffer_offset, | 729 | u64 buffer_offset, |
| 741 | u64 size, | 730 | u64 size, |
| 742 | int pgsz_idx, | 731 | int pgsz_idx, |
| @@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
| 785 | allocated = true; | 774 | allocated = true; |
| 786 | } | 775 | } |
| 787 | 776 | ||
| 788 | err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, | 777 | err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, |
| 789 | vaddr, size, &attrs); | 778 | vaddr, size, &attrs); |
| 790 | if (err) { | 779 | if (err) { |
| 791 | nvgpu_err(g, "failed to update ptes on map"); | 780 | nvgpu_err(g, "failed to update ptes on map"); |
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c new file mode 100644 index 00000000..7296c673 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify it | ||
| 5 | * under the terms and conditions of the GNU General Public License, | ||
| 6 | * version 2, as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 11 | * more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public License | ||
| 14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| 15 | */ | ||
| 16 | |||
| 17 | #include <nvgpu/kmem.h> | ||
| 18 | #include <nvgpu/nvgpu_mem.h> | ||
| 19 | |||
| 20 | #include "gk20a/gk20a.h" | ||
| 21 | |||
| 22 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl) | ||
| 23 | { | ||
| 24 | return sgl->next; | ||
| 25 | } | ||
| 26 | |||
| 27 | u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl) | ||
| 28 | { | ||
| 29 | return sgl->phys; | ||
| 30 | } | ||
| 31 | |||
| 32 | u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl) | ||
| 33 | { | ||
| 34 | return sgl->dma; | ||
| 35 | } | ||
| 36 | |||
| 37 | u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl) | ||
| 38 | { | ||
| 39 | return sgl->length; | ||
| 40 | } | ||
| 41 | |||
| 42 | /* | ||
| 43 | * This builds a GPU address for the %sgl based on whether an IOMMU is present | ||
| 44 | * or not. It also handles turning the physical address into the true GPU | ||
| 45 | * physical address that should be programmed into the page tables. | ||
| 46 | */ | ||
| 47 | u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl, | ||
| 48 | struct nvgpu_gmmu_attrs *attrs) | ||
| 49 | { | ||
| 50 | if (nvgpu_mem_sgl_dma(sgl) == 0) | ||
| 51 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
| 52 | nvgpu_mem_sgl_phys(sgl)); | ||
| 53 | |||
| 54 | if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE) | ||
| 55 | return 0; | ||
| 56 | |||
| 57 | return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl)); | ||
| 58 | } | ||
| 59 | |||
| 60 | void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) | ||
| 61 | { | ||
| 62 | struct nvgpu_mem_sgl *next; | ||
| 63 | |||
| 64 | /* | ||
| 65 | * Free each of the elements. We expect each element to have been | ||
| 66 | * nvgpu_k[mz]alloc()ed. | ||
| 67 | */ | ||
| 68 | while (sgl) { | ||
| 69 | next = nvgpu_mem_sgl_next(sgl); | ||
| 70 | nvgpu_kfree(g, sgl); | ||
| 71 | sgl = next; | ||
| 72 | } | ||
| 73 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index 72ff8f2d..6d92b457 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
| @@ -147,19 +147,16 @@ static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, | |||
| 147 | struct nvgpu_page_alloc *alloc, | 147 | struct nvgpu_page_alloc *alloc, |
| 148 | bool free_buddy_alloc) | 148 | bool free_buddy_alloc) |
| 149 | { | 149 | { |
| 150 | struct page_alloc_chunk *chunk; | 150 | struct nvgpu_mem_sgl *sgl = alloc->sgl; |
| 151 | 151 | ||
| 152 | while (!nvgpu_list_empty(&alloc->alloc_chunks)) { | 152 | if (free_buddy_alloc) { |
| 153 | chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, | 153 | while (sgl) { |
| 154 | page_alloc_chunk, | 154 | nvgpu_free(&a->source_allocator, sgl->phys); |
| 155 | list_entry); | 155 | sgl = nvgpu_mem_sgl_next(sgl); |
| 156 | nvgpu_list_del(&chunk->list_entry); | 156 | } |
| 157 | |||
| 158 | if (free_buddy_alloc) | ||
| 159 | nvgpu_free(&a->source_allocator, chunk->base); | ||
| 160 | nvgpu_kmem_cache_free(a->chunk_cache, chunk); | ||
| 161 | } | 157 | } |
| 162 | 158 | ||
| 159 | nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); | ||
| 163 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 160 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
| 164 | } | 161 | } |
| 165 | 162 | ||
| @@ -243,15 +240,14 @@ static void free_slab_page(struct nvgpu_page_allocator *a, | |||
| 243 | } | 240 | } |
| 244 | 241 | ||
| 245 | /* | 242 | /* |
| 246 | * This expects @alloc to have 1 empty page_alloc_chunk already added to the | 243 | * This expects @alloc to have 1 empty sgl_entry ready for usage. |
| 247 | * alloc_chunks list. | ||
| 248 | */ | 244 | */ |
| 249 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, | 245 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, |
| 250 | struct page_alloc_slab *slab, | 246 | struct page_alloc_slab *slab, |
| 251 | struct nvgpu_page_alloc *alloc) | 247 | struct nvgpu_page_alloc *alloc) |
| 252 | { | 248 | { |
| 253 | struct page_alloc_slab_page *slab_page = NULL; | 249 | struct page_alloc_slab_page *slab_page = NULL; |
| 254 | struct page_alloc_chunk *chunk; | 250 | struct nvgpu_mem_sgl *sgl; |
| 255 | unsigned long offs; | 251 | unsigned long offs; |
| 256 | 252 | ||
| 257 | /* | 253 | /* |
| @@ -302,18 +298,19 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a, | |||
| 302 | BUG(); /* Should be impossible to hit this. */ | 298 | BUG(); /* Should be impossible to hit this. */ |
| 303 | 299 | ||
| 304 | /* | 300 | /* |
| 305 | * Handle building the nvgpu_page_alloc struct. We expect one | 301 | * Handle building the nvgpu_page_alloc struct. We expect one sgl |
| 306 | * page_alloc_chunk to be present. | 302 | * to be present. |
| 307 | */ | 303 | */ |
| 308 | alloc->slab_page = slab_page; | 304 | alloc->slab_page = slab_page; |
| 309 | alloc->nr_chunks = 1; | 305 | alloc->nr_chunks = 1; |
| 310 | alloc->length = slab_page->slab_size; | 306 | alloc->length = slab_page->slab_size; |
| 311 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | 307 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); |
| 312 | 308 | ||
| 313 | chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, | 309 | sgl = alloc->sgl; |
| 314 | page_alloc_chunk, list_entry); | 310 | sgl->phys = alloc->base; |
| 315 | chunk->base = alloc->base; | 311 | sgl->dma = alloc->base; |
| 316 | chunk->length = alloc->length; | 312 | sgl->length = alloc->length; |
| 313 | sgl->next = NULL; | ||
| 317 | 314 | ||
| 318 | return 0; | 315 | return 0; |
| 319 | } | 316 | } |
| @@ -327,7 +324,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
| 327 | int err, slab_nr; | 324 | int err, slab_nr; |
| 328 | struct page_alloc_slab *slab; | 325 | struct page_alloc_slab *slab; |
| 329 | struct nvgpu_page_alloc *alloc = NULL; | 326 | struct nvgpu_page_alloc *alloc = NULL; |
| 330 | struct page_alloc_chunk *chunk = NULL; | 327 | struct nvgpu_mem_sgl *sgl = NULL; |
| 331 | 328 | ||
| 332 | /* | 329 | /* |
| 333 | * Align the length to a page and then divide by the page size (4k for | 330 | * Align the length to a page and then divide by the page size (4k for |
| @@ -341,15 +338,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
| 341 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | 338 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); |
| 342 | goto fail; | 339 | goto fail; |
| 343 | } | 340 | } |
| 344 | chunk = nvgpu_kmem_cache_alloc(a->chunk_cache); | 341 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
| 345 | if (!chunk) { | 342 | if (!sgl) { |
| 346 | palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); | 343 | palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); |
| 347 | goto fail; | 344 | goto fail; |
| 348 | } | 345 | } |
| 349 | 346 | ||
| 350 | nvgpu_init_list_node(&alloc->alloc_chunks); | 347 | alloc->sgl = sgl; |
| 351 | nvgpu_list_add(&chunk->list_entry, &alloc->alloc_chunks); | ||
| 352 | |||
| 353 | err = __do_slab_alloc(a, slab, alloc); | 348 | err = __do_slab_alloc(a, slab, alloc); |
| 354 | if (err) | 349 | if (err) |
| 355 | goto fail; | 350 | goto fail; |
| @@ -363,8 +358,8 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
| 363 | fail: | 358 | fail: |
| 364 | if (alloc) | 359 | if (alloc) |
| 365 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 360 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
| 366 | if (chunk) | 361 | if (sgl) |
| 367 | nvgpu_kmem_cache_free(a->chunk_cache, chunk); | 362 | nvgpu_kfree(a->owner->g, sgl); |
| 368 | return NULL; | 363 | return NULL; |
| 369 | } | 364 | } |
| 370 | 365 | ||
| @@ -426,7 +421,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
| 426 | struct nvgpu_page_allocator *a, u64 pages) | 421 | struct nvgpu_page_allocator *a, u64 pages) |
| 427 | { | 422 | { |
| 428 | struct nvgpu_page_alloc *alloc; | 423 | struct nvgpu_page_alloc *alloc; |
| 429 | struct page_alloc_chunk *c; | 424 | struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL; |
| 430 | u64 max_chunk_len = pages << a->page_shift; | 425 | u64 max_chunk_len = pages << a->page_shift; |
| 431 | int i = 0; | 426 | int i = 0; |
| 432 | 427 | ||
| @@ -436,7 +431,6 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
| 436 | 431 | ||
| 437 | memset(alloc, 0, sizeof(*alloc)); | 432 | memset(alloc, 0, sizeof(*alloc)); |
| 438 | 433 | ||
| 439 | nvgpu_init_list_node(&alloc->alloc_chunks); | ||
| 440 | alloc->length = pages << a->page_shift; | 434 | alloc->length = pages << a->page_shift; |
| 441 | 435 | ||
| 442 | while (pages) { | 436 | while (pages) { |
| @@ -482,36 +476,48 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
| 482 | goto fail_cleanup; | 476 | goto fail_cleanup; |
| 483 | } | 477 | } |
| 484 | 478 | ||
| 485 | c = nvgpu_kmem_cache_alloc(a->chunk_cache); | 479 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
| 486 | if (!c) { | 480 | if (!sgl) { |
| 487 | nvgpu_free(&a->source_allocator, chunk_addr); | 481 | nvgpu_free(&a->source_allocator, chunk_addr); |
| 488 | goto fail_cleanup; | 482 | goto fail_cleanup; |
| 489 | } | 483 | } |
| 490 | 484 | ||
| 491 | pages -= chunk_pages; | 485 | pages -= chunk_pages; |
| 492 | 486 | ||
| 493 | c->base = chunk_addr; | 487 | sgl->phys = chunk_addr; |
| 494 | c->length = chunk_len; | 488 | sgl->dma = chunk_addr; |
| 495 | nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); | 489 | sgl->length = chunk_len; |
| 490 | |||
| 491 | /* | ||
| 492 | * Build the singly linked list with a head node that is part of | ||
| 493 | * the list. | ||
| 494 | */ | ||
| 495 | if (prev_sgl) | ||
| 496 | prev_sgl->next = sgl; | ||
| 497 | else | ||
| 498 | alloc->sgl = sgl; | ||
| 499 | |||
| 500 | prev_sgl = sgl; | ||
| 496 | 501 | ||
| 497 | i++; | 502 | i++; |
| 498 | } | 503 | } |
| 499 | 504 | ||
| 500 | alloc->nr_chunks = i; | 505 | alloc->nr_chunks = i; |
| 501 | c = nvgpu_list_first_entry(&alloc->alloc_chunks, | 506 | alloc->base = alloc->sgl->phys; |
| 502 | page_alloc_chunk, list_entry); | ||
| 503 | alloc->base = c->base; | ||
| 504 | 507 | ||
| 505 | return alloc; | 508 | return alloc; |
| 506 | 509 | ||
| 507 | fail_cleanup: | 510 | fail_cleanup: |
| 508 | while (!nvgpu_list_empty(&alloc->alloc_chunks)) { | 511 | sgl = alloc->sgl; |
| 509 | c = nvgpu_list_first_entry(&alloc->alloc_chunks, | 512 | while (sgl) { |
| 510 | page_alloc_chunk, list_entry); | 513 | struct nvgpu_mem_sgl *next = sgl->next; |
| 511 | nvgpu_list_del(&c->list_entry); | 514 | |
| 512 | nvgpu_free(&a->source_allocator, c->base); | 515 | nvgpu_free(&a->source_allocator, sgl->phys); |
| 513 | nvgpu_kmem_cache_free(a->chunk_cache, c); | 516 | nvgpu_kfree(a->owner->g, sgl); |
| 517 | |||
| 518 | sgl = next; | ||
| 514 | } | 519 | } |
| 520 | |||
| 515 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 521 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
| 516 | fail: | 522 | fail: |
| 517 | return NULL; | 523 | return NULL; |
| @@ -521,7 +527,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | |||
| 521 | struct nvgpu_page_allocator *a, u64 len) | 527 | struct nvgpu_page_allocator *a, u64 len) |
| 522 | { | 528 | { |
| 523 | struct nvgpu_page_alloc *alloc = NULL; | 529 | struct nvgpu_page_alloc *alloc = NULL; |
| 524 | struct page_alloc_chunk *c; | 530 | struct nvgpu_mem_sgl *sgl; |
| 525 | u64 pages; | 531 | u64 pages; |
| 526 | int i = 0; | 532 | int i = 0; |
| 527 | 533 | ||
| @@ -536,11 +542,15 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | |||
| 536 | 542 | ||
| 537 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | 543 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", |
| 538 | pages << a->page_shift, pages, alloc->base); | 544 | pages << a->page_shift, pages, alloc->base); |
| 539 | nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, | 545 | sgl = alloc->sgl; |
| 540 | page_alloc_chunk, list_entry) { | 546 | while (sgl) { |
| 541 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 547 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
| 542 | i++, c->base, c->length); | 548 | i++, |
| 549 | nvgpu_mem_sgl_phys(sgl), | ||
| 550 | nvgpu_mem_sgl_length(sgl)); | ||
| 551 | sgl = sgl->next; | ||
| 543 | } | 552 | } |
| 553 | palloc_dbg(a, "Alloc done\n"); | ||
| 544 | 554 | ||
| 545 | return alloc; | 555 | return alloc; |
| 546 | } | 556 | } |
| @@ -638,11 +648,11 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
| 638 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) | 648 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) |
| 639 | { | 649 | { |
| 640 | struct nvgpu_page_alloc *alloc; | 650 | struct nvgpu_page_alloc *alloc; |
| 641 | struct page_alloc_chunk *c; | 651 | struct nvgpu_mem_sgl *sgl; |
| 642 | 652 | ||
| 643 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); | 653 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); |
| 644 | c = nvgpu_kmem_cache_alloc(a->chunk_cache); | 654 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
| 645 | if (!alloc || !c) | 655 | if (!alloc || !sgl) |
| 646 | goto fail; | 656 | goto fail; |
| 647 | 657 | ||
| 648 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); | 658 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); |
| @@ -653,17 +663,18 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
| 653 | 663 | ||
| 654 | alloc->nr_chunks = 1; | 664 | alloc->nr_chunks = 1; |
| 655 | alloc->length = length; | 665 | alloc->length = length; |
| 656 | nvgpu_init_list_node(&alloc->alloc_chunks); | 666 | alloc->sgl = sgl; |
| 657 | 667 | ||
| 658 | c->base = alloc->base; | 668 | sgl->phys = alloc->base; |
| 659 | c->length = length; | 669 | sgl->dma = alloc->base; |
| 660 | nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); | 670 | sgl->length = length; |
| 671 | sgl->next = NULL; | ||
| 661 | 672 | ||
| 662 | return alloc; | 673 | return alloc; |
| 663 | 674 | ||
| 664 | fail: | 675 | fail: |
| 665 | if (c) | 676 | if (sgl) |
| 666 | nvgpu_kmem_cache_free(a->chunk_cache, c); | 677 | nvgpu_kfree(a->owner->g, sgl); |
| 667 | if (alloc) | 678 | if (alloc) |
| 668 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 679 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
| 669 | return NULL; | 680 | return NULL; |
| @@ -677,7 +688,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
| 677 | { | 688 | { |
| 678 | struct nvgpu_page_allocator *a = page_allocator(__a); | 689 | struct nvgpu_page_allocator *a = page_allocator(__a); |
| 679 | struct nvgpu_page_alloc *alloc = NULL; | 690 | struct nvgpu_page_alloc *alloc = NULL; |
| 680 | struct page_alloc_chunk *c; | 691 | struct nvgpu_mem_sgl *sgl; |
| 681 | u64 aligned_len, pages; | 692 | u64 aligned_len, pages; |
| 682 | int i = 0; | 693 | int i = 0; |
| 683 | 694 | ||
| @@ -697,10 +708,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
| 697 | 708 | ||
| 698 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | 709 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", |
| 699 | alloc->base, aligned_len, pages); | 710 | alloc->base, aligned_len, pages); |
| 700 | nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, | 711 | sgl = alloc->sgl; |
| 701 | page_alloc_chunk, list_entry) { | 712 | while (sgl) { |
| 702 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 713 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
| 703 | i++, c->base, c->length); | 714 | i++, |
| 715 | nvgpu_mem_sgl_phys(sgl), | ||
| 716 | nvgpu_mem_sgl_length(sgl)); | ||
| 717 | sgl = sgl->next; | ||
| 704 | } | 718 | } |
| 705 | 719 | ||
| 706 | a->nr_fixed_allocs++; | 720 | a->nr_fixed_allocs++; |
| @@ -896,11 +910,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
| 896 | 910 | ||
| 897 | a->alloc_cache = nvgpu_kmem_cache_create(g, | 911 | a->alloc_cache = nvgpu_kmem_cache_create(g, |
| 898 | sizeof(struct nvgpu_page_alloc)); | 912 | sizeof(struct nvgpu_page_alloc)); |
| 899 | a->chunk_cache = nvgpu_kmem_cache_create(g, | ||
| 900 | sizeof(struct page_alloc_chunk)); | ||
| 901 | a->slab_page_cache = nvgpu_kmem_cache_create(g, | 913 | a->slab_page_cache = nvgpu_kmem_cache_create(g, |
| 902 | sizeof(struct page_alloc_slab_page)); | 914 | sizeof(struct page_alloc_slab_page)); |
| 903 | if (!a->alloc_cache || !a->chunk_cache || !a->slab_page_cache) { | 915 | if (!a->alloc_cache || !a->slab_page_cache) { |
| 904 | err = -ENOMEM; | 916 | err = -ENOMEM; |
| 905 | goto fail; | 917 | goto fail; |
| 906 | } | 918 | } |
| @@ -941,8 +953,6 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
| 941 | fail: | 953 | fail: |
| 942 | if (a->alloc_cache) | 954 | if (a->alloc_cache) |
| 943 | nvgpu_kmem_cache_destroy(a->alloc_cache); | 955 | nvgpu_kmem_cache_destroy(a->alloc_cache); |
| 944 | if (a->chunk_cache) | ||
| 945 | nvgpu_kmem_cache_destroy(a->chunk_cache); | ||
| 946 | if (a->slab_page_cache) | 956 | if (a->slab_page_cache) |
| 947 | nvgpu_kmem_cache_destroy(a->slab_page_cache); | 957 | nvgpu_kmem_cache_destroy(a->slab_page_cache); |
| 948 | nvgpu_kfree(g, a); | 958 | nvgpu_kfree(g, a); |
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c index 425bfdb4..bb7d930e 100644 --- a/drivers/gpu/nvgpu/common/pramin.c +++ b/drivers/gpu/nvgpu/common/pramin.c | |||
| @@ -84,37 +84,40 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, | |||
| 84 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) | 84 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) |
| 85 | { | 85 | { |
| 86 | struct nvgpu_page_alloc *alloc = NULL; | 86 | struct nvgpu_page_alloc *alloc = NULL; |
| 87 | struct page_alloc_chunk *chunk = NULL; | 87 | struct nvgpu_mem_sgl *sgl; |
| 88 | u32 byteoff, start_reg, until_end, n; | 88 | u32 byteoff, start_reg, until_end, n; |
| 89 | 89 | ||
| 90 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); | 90 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); |
| 91 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | 91 | sgl = alloc->sgl; |
| 92 | page_alloc_chunk, list_entry) { | 92 | while (sgl) { |
| 93 | if (offset >= chunk->length) | 93 | if (offset >= nvgpu_mem_sgl_length(sgl)) { |
| 94 | offset -= chunk->length; | 94 | offset -= nvgpu_mem_sgl_length(sgl); |
| 95 | else | 95 | sgl = sgl->next; |
| 96 | } else { | ||
| 96 | break; | 97 | break; |
| 98 | } | ||
| 97 | } | 99 | } |
| 98 | 100 | ||
| 99 | while (size) { | 101 | while (size) { |
| 100 | byteoff = g->ops.pramin.enter(g, mem, chunk, | 102 | u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl); |
| 103 | |||
| 104 | byteoff = g->ops.pramin.enter(g, mem, sgl, | ||
| 101 | offset / sizeof(u32)); | 105 | offset / sizeof(u32)); |
| 102 | start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); | 106 | start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); |
| 103 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); | 107 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); |
| 104 | 108 | ||
| 105 | n = min3(size, until_end, (u32)(chunk->length - offset)); | 109 | n = min3(size, until_end, (u32)(sgl_len - offset)); |
| 106 | 110 | ||
| 107 | loop(g, start_reg, n / sizeof(u32), arg); | 111 | loop(g, start_reg, n / sizeof(u32), arg); |
| 108 | 112 | ||
| 109 | /* read back to synchronize accesses */ | 113 | /* read back to synchronize accesses */ |
| 110 | gk20a_readl(g, start_reg); | 114 | gk20a_readl(g, start_reg); |
| 111 | g->ops.pramin.exit(g, mem, chunk); | 115 | g->ops.pramin.exit(g, mem, sgl); |
| 112 | 116 | ||
| 113 | size -= n; | 117 | size -= n; |
| 114 | 118 | ||
| 115 | if (n == (chunk->length - offset)) { | 119 | if (n == (sgl_len - offset)) { |
| 116 | chunk = nvgpu_list_next_entry(chunk, page_alloc_chunk, | 120 | sgl = nvgpu_mem_sgl_next(sgl); |
| 117 | list_entry); | ||
| 118 | offset = 0; | 121 | offset = 0; |
| 119 | } else { | 122 | } else { |
| 120 | offset += n; | 123 | offset += n; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 7eee2d51..355228db 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
| @@ -34,6 +34,7 @@ struct gk20a_debug_output; | |||
| 34 | struct nvgpu_clk_pll_debug_data; | 34 | struct nvgpu_clk_pll_debug_data; |
| 35 | struct nvgpu_nvhost_dev; | 35 | struct nvgpu_nvhost_dev; |
| 36 | struct nvgpu_cpu_time_correlation_sample; | 36 | struct nvgpu_cpu_time_correlation_sample; |
| 37 | struct nvgpu_mem_sgl; | ||
| 37 | 38 | ||
| 38 | #include <nvgpu/lock.h> | 39 | #include <nvgpu/lock.h> |
| 39 | #include <nvgpu/thread.h> | 40 | #include <nvgpu/thread.h> |
| @@ -70,8 +71,6 @@ struct nvgpu_cpu_time_correlation_sample; | |||
| 70 | #endif | 71 | #endif |
| 71 | #include "ecc_gk20a.h" | 72 | #include "ecc_gk20a.h" |
| 72 | 73 | ||
| 73 | struct page_alloc_chunk; | ||
| 74 | |||
| 75 | /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. | 74 | /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. |
| 76 | 32 ns is the resolution of ptimer. */ | 75 | 32 ns is the resolution of ptimer. */ |
| 77 | #define PTIMER_REF_FREQ_HZ 31250000 | 76 | #define PTIMER_REF_FREQ_HZ 31250000 |
| @@ -701,7 +700,7 @@ struct gpu_ops { | |||
| 701 | bool (*support_sparse)(struct gk20a *g); | 700 | bool (*support_sparse)(struct gk20a *g); |
| 702 | u64 (*gmmu_map)(struct vm_gk20a *vm, | 701 | u64 (*gmmu_map)(struct vm_gk20a *vm, |
| 703 | u64 map_offset, | 702 | u64 map_offset, |
| 704 | struct sg_table *sgt, | 703 | struct nvgpu_mem_sgl *sgl, |
| 705 | u64 buffer_offset, | 704 | u64 buffer_offset, |
| 706 | u64 size, | 705 | u64 size, |
| 707 | int pgsz_idx, | 706 | int pgsz_idx, |
| @@ -761,9 +760,9 @@ struct gpu_ops { | |||
| 761 | size_t size); | 760 | size_t size); |
| 762 | struct { | 761 | struct { |
| 763 | u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, | 762 | u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, |
| 764 | struct page_alloc_chunk *chunk, u32 w); | 763 | struct nvgpu_mem_sgl *sgl, u32 w); |
| 765 | void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, | 764 | void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, |
| 766 | struct page_alloc_chunk *chunk); | 765 | struct nvgpu_mem_sgl *sgl); |
| 767 | u32 (*data032_r)(u32 i); | 766 | u32 (*data032_r)(u32 i); |
| 768 | } pramin; | 767 | } pramin; |
| 769 | struct { | 768 | struct { |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 97b7aa80..cd34e769 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
| @@ -1151,7 +1151,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) | |||
| 1151 | struct gk20a_fence *gk20a_fence_out = NULL; | 1151 | struct gk20a_fence *gk20a_fence_out = NULL; |
| 1152 | struct gk20a_fence *gk20a_last_fence = NULL; | 1152 | struct gk20a_fence *gk20a_last_fence = NULL; |
| 1153 | struct nvgpu_page_alloc *alloc = NULL; | 1153 | struct nvgpu_page_alloc *alloc = NULL; |
| 1154 | struct page_alloc_chunk *chunk = NULL; | 1154 | struct nvgpu_mem_sgl *sgl = NULL; |
| 1155 | int err = 0; | 1155 | int err = 0; |
| 1156 | 1156 | ||
| 1157 | if (g->mm.vidmem.ce_ctx_id == (u32)~0) | 1157 | if (g->mm.vidmem.ce_ctx_id == (u32)~0) |
| @@ -1159,16 +1159,16 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) | |||
| 1159 | 1159 | ||
| 1160 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); | 1160 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); |
| 1161 | 1161 | ||
| 1162 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | 1162 | sgl = alloc->sgl; |
| 1163 | page_alloc_chunk, list_entry) { | 1163 | while (sgl) { |
| 1164 | if (gk20a_last_fence) | 1164 | if (gk20a_last_fence) |
| 1165 | gk20a_fence_put(gk20a_last_fence); | 1165 | gk20a_fence_put(gk20a_last_fence); |
| 1166 | 1166 | ||
| 1167 | err = gk20a_ce_execute_ops(g, | 1167 | err = gk20a_ce_execute_ops(g, |
| 1168 | g->mm.vidmem.ce_ctx_id, | 1168 | g->mm.vidmem.ce_ctx_id, |
| 1169 | 0, | 1169 | 0, |
| 1170 | chunk->base, | 1170 | nvgpu_mem_sgl_phys(sgl), |
| 1171 | chunk->length, | 1171 | nvgpu_mem_sgl_length(sgl), |
| 1172 | 0x00000000, | 1172 | 0x00000000, |
| 1173 | NVGPU_CE_DST_LOCATION_LOCAL_FB, | 1173 | NVGPU_CE_DST_LOCATION_LOCAL_FB, |
| 1174 | NVGPU_CE_MEMSET, | 1174 | NVGPU_CE_MEMSET, |
| @@ -1183,6 +1183,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) | |||
| 1183 | } | 1183 | } |
| 1184 | 1184 | ||
| 1185 | gk20a_last_fence = gk20a_fence_out; | 1185 | gk20a_last_fence = gk20a_fence_out; |
| 1186 | sgl = nvgpu_mem_sgl_next(sgl); | ||
| 1186 | } | 1187 | } |
| 1187 | 1188 | ||
| 1188 | if (gk20a_last_fence) { | 1189 | if (gk20a_last_fence) { |
| @@ -1262,10 +1263,10 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) | |||
| 1262 | return addr; | 1263 | return addr; |
| 1263 | } | 1264 | } |
| 1264 | 1265 | ||
| 1265 | u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova) | 1266 | u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, u64 iova) |
| 1266 | { | 1267 | { |
| 1267 | /* ensure it is not vidmem allocation */ | 1268 | /* ensure it is not vidmem allocation */ |
| 1268 | WARN_ON(is_vidmem_page_alloc((u64)iova)); | 1269 | WARN_ON(is_vidmem_page_alloc(iova)); |
| 1269 | 1270 | ||
| 1270 | if (device_is_iommuable(dev_from_gk20a(g)) && | 1271 | if (device_is_iommuable(dev_from_gk20a(g)) && |
| 1271 | g->ops.mm.get_physical_addr_bits) | 1272 | g->ops.mm.get_physical_addr_bits) |
| @@ -2167,11 +2168,6 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g) | |||
| 2167 | return 34; | 2168 | return 34; |
| 2168 | } | 2169 | } |
| 2169 | 2170 | ||
| 2170 | u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags) | ||
| 2171 | { | ||
| 2172 | return phys; | ||
| 2173 | } | ||
| 2174 | |||
| 2175 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, | 2171 | const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, |
| 2176 | u32 big_page_size) | 2172 | u32 big_page_size) |
| 2177 | { | 2173 | { |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index c77bebf8..2fdc1729 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
| @@ -336,7 +336,6 @@ void gk20a_mm_dump_vm(struct vm_gk20a *vm, | |||
| 336 | 336 | ||
| 337 | int gk20a_mm_suspend(struct gk20a *g); | 337 | int gk20a_mm_suspend(struct gk20a *g); |
| 338 | 338 | ||
| 339 | u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags); | ||
| 340 | u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova); | 339 | u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova); |
| 341 | 340 | ||
| 342 | void gk20a_mm_ltc_isr(struct gk20a *g); | 341 | void gk20a_mm_ltc_isr(struct gk20a *g); |
| @@ -361,29 +360,29 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem) | |||
| 361 | } | 360 | } |
| 362 | 361 | ||
| 363 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 362 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
| 364 | u64 map_offset, | 363 | u64 map_offset, |
| 365 | struct sg_table *sgt, | 364 | struct nvgpu_mem_sgl *sgl, |
| 366 | u64 buffer_offset, | 365 | u64 buffer_offset, |
| 367 | u64 size, | 366 | u64 size, |
| 368 | int pgsz_idx, | 367 | int pgsz_idx, |
| 369 | u8 kind_v, | 368 | u8 kind_v, |
| 370 | u32 ctag_offset, | 369 | u32 ctag_offset, |
| 371 | u32 flags, | 370 | u32 flags, |
| 372 | int rw_flag, | 371 | int rw_flag, |
| 373 | bool clear_ctags, | 372 | bool clear_ctags, |
| 374 | bool sparse, | 373 | bool sparse, |
| 375 | bool priv, | 374 | bool priv, |
| 376 | struct vm_gk20a_mapping_batch *batch, | 375 | struct vm_gk20a_mapping_batch *batch, |
| 377 | enum nvgpu_aperture aperture); | 376 | enum nvgpu_aperture aperture); |
| 378 | 377 | ||
| 379 | void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, | 378 | void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, |
| 380 | u64 vaddr, | 379 | u64 vaddr, |
| 381 | u64 size, | 380 | u64 size, |
| 382 | int pgsz_idx, | 381 | int pgsz_idx, |
| 383 | bool va_allocated, | 382 | bool va_allocated, |
| 384 | int rw_flag, | 383 | int rw_flag, |
| 385 | bool sparse, | 384 | bool sparse, |
| 386 | struct vm_gk20a_mapping_batch *batch); | 385 | struct vm_gk20a_mapping_batch *batch); |
| 387 | 386 | ||
| 388 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); | 387 | struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); |
| 389 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, | 388 | void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, |
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c index 9d19e9e5..8a34a63c 100644 --- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c | |||
| @@ -26,9 +26,9 @@ | |||
| 26 | 26 | ||
| 27 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ | 27 | /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ |
| 28 | u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | 28 | u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, |
| 29 | struct page_alloc_chunk *chunk, u32 w) | 29 | struct nvgpu_mem_sgl *sgl, u32 w) |
| 30 | { | 30 | { |
| 31 | u64 bufbase = chunk->base; | 31 | u64 bufbase = nvgpu_mem_sgl_phys(sgl); |
| 32 | u64 addr = bufbase + w * sizeof(u32); | 32 | u64 addr = bufbase + w * sizeof(u32); |
| 33 | u32 hi = (u32)((addr & ~(u64)0xfffff) | 33 | u32 hi = (u32)((addr & ~(u64)0xfffff) |
| 34 | >> bus_bar0_window_target_bar0_window_base_shift_v()); | 34 | >> bus_bar0_window_target_bar0_window_base_shift_v()); |
| @@ -40,8 +40,9 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | |||
| 40 | 40 | ||
| 41 | gk20a_dbg(gpu_dbg_mem, | 41 | gk20a_dbg(gpu_dbg_mem, |
| 42 | "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", | 42 | "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", |
| 43 | hi, lo, mem, chunk, bufbase, | 43 | hi, lo, mem, sgl, bufbase, |
| 44 | bufbase + chunk->length, chunk->length); | 44 | bufbase + nvgpu_mem_sgl_phys(sgl), |
| 45 | nvgpu_mem_sgl_length(sgl)); | ||
| 45 | 46 | ||
| 46 | WARN_ON(!bufbase); | 47 | WARN_ON(!bufbase); |
| 47 | 48 | ||
| @@ -57,9 +58,9 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | |||
| 57 | } | 58 | } |
| 58 | 59 | ||
| 59 | void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, | 60 | void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, |
| 60 | struct page_alloc_chunk *chunk) | 61 | struct nvgpu_mem_sgl *sgl) |
| 61 | { | 62 | { |
| 62 | gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk); | 63 | gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl); |
| 63 | 64 | ||
| 64 | nvgpu_spinlock_release(&g->mm.pramin_window_lock); | 65 | nvgpu_spinlock_release(&g->mm.pramin_window_lock); |
| 65 | } | 66 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h index 1a1ac871..fc5ba919 100644 --- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h | |||
| @@ -19,10 +19,10 @@ | |||
| 19 | 19 | ||
| 20 | struct gk20a; | 20 | struct gk20a; |
| 21 | struct nvgpu_mem; | 21 | struct nvgpu_mem; |
| 22 | struct page_alloc_chunk; | 22 | struct nvgpu_mem_sgl; |
| 23 | 23 | ||
| 24 | u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, | 24 | u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, |
| 25 | struct page_alloc_chunk *chunk, u32 w); | 25 | struct nvgpu_mem_sgl *sgl, u32 w); |
| 26 | void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, | 26 | void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, |
| 27 | struct page_alloc_chunk *chunk); | 27 | struct nvgpu_mem_sgl *sgl); |
| 28 | #endif | 28 | #endif |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index fc27b120..c276f5a6 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
| @@ -904,7 +904,7 @@ int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, | |||
| 904 | 904 | ||
| 905 | mem->gpu_va = nvgpu_gmmu_map(vm, | 905 | mem->gpu_va = nvgpu_gmmu_map(vm, |
| 906 | mem, | 906 | mem, |
| 907 | size, | 907 | mem->aligned_size, |
| 908 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | 908 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, |
| 909 | gk20a_mem_flag_none, | 909 | gk20a_mem_flag_none, |
| 910 | false, | 910 | false, |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index de129a5f..11060300 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h | |||
| @@ -27,8 +27,6 @@ | |||
| 27 | #include <nvgpu/gmmu_t19x.h> | 27 | #include <nvgpu/gmmu_t19x.h> |
| 28 | #endif | 28 | #endif |
| 29 | 29 | ||
| 30 | struct scatterlist; | ||
| 31 | |||
| 32 | /* | 30 | /* |
| 33 | * This is the GMMU API visible to blocks outside of the GMMU. Basically this | 31 | * This is the GMMU API visible to blocks outside of the GMMU. Basically this |
| 34 | * API supports all the different types of mappings that might be done in the | 32 | * API supports all the different types of mappings that might be done in the |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h index e2d4d336..f96c2801 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h | |||
| @@ -32,6 +32,8 @@ struct nvgpu_mem_priv { | |||
| 32 | }; | 32 | }; |
| 33 | 33 | ||
| 34 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl); | 34 | u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl); |
| 35 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, | ||
| 36 | struct sg_table *sgt); | ||
| 35 | 37 | ||
| 36 | /** | 38 | /** |
| 37 | * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. | 39 | * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/log.h b/drivers/gpu/nvgpu/include/nvgpu/log.h index 4cac3e70..cfce8c5b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/log.h +++ b/drivers/gpu/nvgpu/include/nvgpu/log.h | |||
| @@ -71,6 +71,7 @@ enum nvgpu_log_categories { | |||
| 71 | gpu_dbg_pd_cache = BIT(20), /* PD cache traces. */ | 71 | gpu_dbg_pd_cache = BIT(20), /* PD cache traces. */ |
| 72 | gpu_dbg_alloc = BIT(21), /* Allocator debugging. */ | 72 | gpu_dbg_alloc = BIT(21), /* Allocator debugging. */ |
| 73 | gpu_dbg_dma = BIT(22), /* DMA allocation prints. */ | 73 | gpu_dbg_dma = BIT(22), /* DMA allocation prints. */ |
| 74 | gpu_dbg_sgl = BIT(23), /* SGL related traces. */ | ||
| 74 | gpu_dbg_mem = BIT(31), /* memory accesses; very verbose. */ | 75 | gpu_dbg_mem = BIT(31), /* memory accesses; very verbose. */ |
| 75 | }; | 76 | }; |
| 76 | 77 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index a112623e..7d19cf81 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | |||
| @@ -33,6 +33,8 @@ struct gk20a; | |||
| 33 | struct nvgpu_allocator; | 33 | struct nvgpu_allocator; |
| 34 | struct nvgpu_gmmu_attrs; | 34 | struct nvgpu_gmmu_attrs; |
| 35 | 35 | ||
| 36 | #define NVGPU_MEM_DMA_ERROR (~0ULL) | ||
| 37 | |||
| 36 | /* | 38 | /* |
| 37 | * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be | 39 | * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be |
| 38 | * told to the gpu about the aperture, but this flag designates where the | 40 | * told to the gpu about the aperture, but this flag designates where the |
| @@ -44,6 +46,28 @@ enum nvgpu_aperture { | |||
| 44 | APERTURE_VIDMEM | 46 | APERTURE_VIDMEM |
| 45 | }; | 47 | }; |
| 46 | 48 | ||
| 49 | /* | ||
| 50 | * This struct holds the necessary information for describing a struct | ||
| 51 | * nvgpu_mem's scatter gather list. | ||
| 52 | * | ||
| 53 | * These are created in a platform dependent way. As a result the function | ||
| 54 | * definition for allocating these lives in the <nvgpu/_OS_/nvgpu_mem.h> file. | ||
| 55 | */ | ||
| 56 | struct nvgpu_mem_sgl { | ||
| 57 | /* | ||
| 58 | * Internally this is implemented as a singly linked list. | ||
| 59 | */ | ||
| 60 | struct nvgpu_mem_sgl *next; | ||
| 61 | |||
| 62 | /* | ||
| 63 | * There is both a phys address and a DMA address since some systems, | ||
| 64 | * for example ones with an IOMMU, may see these as different addresses. | ||
| 65 | */ | ||
| 66 | u64 phys; | ||
| 67 | u64 dma; | ||
| 68 | u64 length; | ||
| 69 | }; | ||
| 70 | |||
| 47 | struct nvgpu_mem { | 71 | struct nvgpu_mem { |
| 48 | /* | 72 | /* |
| 49 | * Populated for all nvgpu_mem structs - vidmem or system. | 73 | * Populated for all nvgpu_mem structs - vidmem or system. |
| @@ -176,6 +200,27 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, | |||
| 176 | struct nvgpu_mem *dest, struct nvgpu_mem *src, | 200 | struct nvgpu_mem *dest, struct nvgpu_mem *src, |
| 177 | int start_page, int nr_pages); | 201 | int start_page, int nr_pages); |
| 178 | 202 | ||
| 203 | /** | ||
| 204 | * nvgpu_mem_sgl_create_from_mem - Create a scatter list from an nvgpu_mem. | ||
| 205 | * | ||
| 206 | * @g - The GPU. | ||
| 207 | * @mem - The source memory allocation to use. | ||
| 208 | * | ||
| 209 | * Create a scatter gather list from the passed @mem struct. This list lets the | ||
| 210 | * calling code iterate across each chunk of a DMA allocation for when that DMA | ||
| 211 | * allocation is not completely contiguous. | ||
| 212 | */ | ||
| 213 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, | ||
| 214 | struct nvgpu_mem *mem); | ||
| 215 | void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl); | ||
| 216 | |||
| 217 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl); | ||
| 218 | u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl); | ||
| 219 | u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl); | ||
| 220 | u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl); | ||
| 221 | u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl, | ||
| 222 | struct nvgpu_gmmu_attrs *attrs); | ||
| 223 | |||
| 179 | /* | 224 | /* |
| 180 | * Buffer accessors - wrap between begin() and end() if there is no permanent | 225 | * Buffer accessors - wrap between begin() and end() if there is no permanent |
| 181 | * kernel mapping for this buffer. | 226 | * kernel mapping for this buffer. |
diff --git a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h index 9a5ef8d3..de83ca7f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h +++ b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #define PAGE_ALLOCATOR_PRIV_H | 18 | #define PAGE_ALLOCATOR_PRIV_H |
| 19 | 19 | ||
| 20 | #include <nvgpu/allocator.h> | 20 | #include <nvgpu/allocator.h> |
| 21 | #include <nvgpu/nvgpu_mem.h> | ||
| 21 | #include <nvgpu/kmem.h> | 22 | #include <nvgpu/kmem.h> |
| 22 | #include <nvgpu/list.h> | 23 | #include <nvgpu/list.h> |
| 23 | #include <nvgpu/rbtree.h> | 24 | #include <nvgpu/rbtree.h> |
| @@ -83,27 +84,17 @@ page_alloc_slab_page_from_list_entry(struct nvgpu_list_node *node) | |||
| 83 | ((uintptr_t)node - offsetof(struct page_alloc_slab_page, list_entry)); | 84 | ((uintptr_t)node - offsetof(struct page_alloc_slab_page, list_entry)); |
| 84 | }; | 85 | }; |
| 85 | 86 | ||
| 86 | struct page_alloc_chunk { | ||
| 87 | struct nvgpu_list_node list_entry; | ||
| 88 | |||
| 89 | u64 base; | ||
| 90 | u64 length; | ||
| 91 | }; | ||
| 92 | |||
| 93 | static inline struct page_alloc_chunk * | ||
| 94 | page_alloc_chunk_from_list_entry(struct nvgpu_list_node *node) | ||
| 95 | { | ||
| 96 | return (struct page_alloc_chunk *) | ||
| 97 | ((uintptr_t)node - offsetof(struct page_alloc_chunk, list_entry)); | ||
| 98 | }; | ||
| 99 | |||
| 100 | /* | 87 | /* |
| 101 | * Struct to handle internal management of page allocation. It holds a list | 88 | * Struct to handle internal management of page allocation. It holds a list |
| 102 | * of the chunks of pages that make up the overall allocation - much like a | 89 | * of the chunks of pages that make up the overall allocation - much like a |
| 103 | * scatter gather table. | 90 | * scatter gather table. |
| 104 | */ | 91 | */ |
| 105 | struct nvgpu_page_alloc { | 92 | struct nvgpu_page_alloc { |
| 106 | struct nvgpu_list_node alloc_chunks; | 93 | /* |
| 94 | * nvgpu_mem_sgl for describing the actual allocation. Convenient for | ||
| 95 | * GMMU mapping. | ||
| 96 | */ | ||
| 97 | struct nvgpu_mem_sgl *sgl; | ||
| 107 | 98 | ||
| 108 | int nr_chunks; | 99 | int nr_chunks; |
| 109 | u64 length; | 100 | u64 length; |
| @@ -156,7 +147,6 @@ struct nvgpu_page_allocator { | |||
| 156 | int nr_slabs; | 147 | int nr_slabs; |
| 157 | 148 | ||
| 158 | struct nvgpu_kmem_cache *alloc_cache; | 149 | struct nvgpu_kmem_cache *alloc_cache; |
| 159 | struct nvgpu_kmem_cache *chunk_cache; | ||
| 160 | struct nvgpu_kmem_cache *slab_page_cache; | 150 | struct nvgpu_kmem_cache *slab_page_cache; |
| 161 | 151 | ||
| 162 | u64 flags; | 152 | u64 flags; |
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c index 85c436e5..ee9b791a 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c | |||
| @@ -13,7 +13,6 @@ | |||
| 13 | * more details. | 13 | * more details. |
| 14 | */ | 14 | */ |
| 15 | 15 | ||
| 16 | #include <linux/dma-mapping.h> | ||
| 17 | #include "vgpu/vgpu.h" | 16 | #include "vgpu/vgpu.h" |
| 18 | #include "vgpu_mm_gp10b.h" | 17 | #include "vgpu_mm_gp10b.h" |
| 19 | #include "gk20a/mm_gk20a.h" | 18 | #include "gk20a/mm_gk20a.h" |
| @@ -41,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc, | |||
| 41 | 40 | ||
| 42 | static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | 41 | static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, |
| 43 | u64 map_offset, | 42 | u64 map_offset, |
| 44 | struct sg_table *sgt, | 43 | struct nvgpu_mem_sgl *sgl, |
| 45 | u64 buffer_offset, | 44 | u64 buffer_offset, |
| 46 | u64 size, | 45 | u64 size, |
| 47 | int pgsz_idx, | 46 | int pgsz_idx, |
| @@ -61,10 +60,9 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | |||
| 61 | struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex; | 60 | struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex; |
| 62 | struct tegra_vgpu_mem_desc *mem_desc; | 61 | struct tegra_vgpu_mem_desc *mem_desc; |
| 63 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | 62 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; |
| 63 | u64 buffer_size = PAGE_ALIGN(size); | ||
| 64 | u64 space_to_skip = buffer_offset; | 64 | u64 space_to_skip = buffer_offset; |
| 65 | u64 buffer_size = 0; | ||
| 66 | u32 mem_desc_count = 0, i; | 65 | u32 mem_desc_count = 0, i; |
| 67 | struct scatterlist *sgl; | ||
| 68 | void *handle = NULL; | 66 | void *handle = NULL; |
| 69 | size_t oob_size; | 67 | size_t oob_size; |
| 70 | u8 prot; | 68 | u8 prot; |
| @@ -73,7 +71,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | |||
| 73 | 71 | ||
| 74 | /* FIXME: add support for sparse mappings */ | 72 | /* FIXME: add support for sparse mappings */ |
| 75 | 73 | ||
| 76 | if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu)) | 74 | if (WARN_ON(!sgl) || WARN_ON(!g->mm.bypass_smmu)) |
| 77 | return 0; | 75 | return 0; |
| 78 | 76 | ||
| 79 | if (space_to_skip & (page_size - 1)) | 77 | if (space_to_skip & (page_size - 1)) |
| @@ -100,33 +98,36 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | |||
| 100 | goto fail; | 98 | goto fail; |
| 101 | } | 99 | } |
| 102 | 100 | ||
| 103 | sgl = sgt->sgl; | 101 | while (sgl) { |
| 104 | while (space_to_skip && sgl && | 102 | u64 phys_addr; |
| 105 | (space_to_skip + page_size > sgl->length)) { | 103 | u64 chunk_length; |
| 106 | space_to_skip -= sgl->length; | 104 | |
| 107 | sgl = sg_next(sgl); | 105 | /* |
| 108 | } | 106 | * Cut out sgl ents for space_to_skip. |
| 109 | WARN_ON(!sgl); | 107 | */ |
| 108 | if (space_to_skip && | ||
| 109 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { | ||
| 110 | space_to_skip -= nvgpu_mem_sgl_length(sgl); | ||
| 111 | sgl = nvgpu_mem_sgl_next(sgl); | ||
| 112 | continue; | ||
| 113 | } | ||
| 110 | 114 | ||
| 111 | if (add_mem_desc(&mem_desc[mem_desc_count++], | 115 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; |
| 112 | sg_phys(sgl) + space_to_skip, | 116 | chunk_length = min(size, |
| 113 | sgl->length - space_to_skip, | 117 | nvgpu_mem_sgl_length(sgl) - space_to_skip); |
| 114 | &oob_size)) { | ||
| 115 | err = -ENOMEM; | ||
| 116 | goto fail; | ||
| 117 | } | ||
| 118 | buffer_size += sgl->length - space_to_skip; | ||
| 119 | 118 | ||
| 120 | sgl = sg_next(sgl); | 119 | if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr, |
| 121 | while (sgl && buffer_size < size) { | 120 | chunk_length, &oob_size)) { |
| 122 | if (add_mem_desc(&mem_desc[mem_desc_count++], sg_phys(sgl), | ||
| 123 | sgl->length, &oob_size)) { | ||
| 124 | err = -ENOMEM; | 121 | err = -ENOMEM; |
| 125 | goto fail; | 122 | goto fail; |
| 126 | } | 123 | } |
| 127 | 124 | ||
| 128 | buffer_size += sgl->length; | 125 | space_to_skip = 0; |
| 129 | sgl = sg_next(sgl); | 126 | size -= chunk_length; |
| 127 | sgl = nvgpu_mem_sgl_next(sgl); | ||
| 128 | |||
| 129 | if (size == 0) | ||
| 130 | break; | ||
| 130 | } | 131 | } |
| 131 | 132 | ||
| 132 | if (rw_flag == gk20a_mem_flag_read_only) | 133 | if (rw_flag == gk20a_mem_flag_read_only) |
| @@ -153,7 +154,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, | |||
| 153 | msg.handle = vgpu_get_handle(g); | 154 | msg.handle = vgpu_get_handle(g); |
| 154 | p->handle = vm->handle; | 155 | p->handle = vm->handle; |
| 155 | p->gpu_va = map_offset; | 156 | p->gpu_va = map_offset; |
| 156 | p->size = size; | 157 | p->size = buffer_size; |
| 157 | p->mem_desc_count = mem_desc_count; | 158 | p->mem_desc_count = mem_desc_count; |
| 158 | p->pgsz_idx = pgsz_idx; | 159 | p->pgsz_idx = pgsz_idx; |
| 159 | p->iova = 0; | 160 | p->iova = 0; |
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index ef9e00c8..5da6f158 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c | |||
| @@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g) | |||
| 78 | 78 | ||
| 79 | static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | 79 | static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, |
| 80 | u64 map_offset, | 80 | u64 map_offset, |
| 81 | struct sg_table *sgt, | 81 | struct nvgpu_mem_sgl *sgl, |
| 82 | u64 buffer_offset, | 82 | u64 buffer_offset, |
| 83 | u64 size, | 83 | u64 size, |
| 84 | int pgsz_idx, | 84 | int pgsz_idx, |
| @@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, | |||
| 98 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); | 98 | struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); |
| 99 | struct tegra_vgpu_cmd_msg msg; | 99 | struct tegra_vgpu_cmd_msg msg; |
| 100 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; | 100 | struct tegra_vgpu_as_map_params *p = &msg.params.as_map; |
| 101 | u64 addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); | 101 | u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL); |
| 102 | u8 prot; | 102 | u8 prot; |
| 103 | 103 | ||
| 104 | gk20a_dbg_fn(""); | 104 | gk20a_dbg_fn(""); |
