diff options
Diffstat (limited to 'drivers/gpu/nvgpu/common')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | 114 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/vm.c | 25 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/gmmu.c | 109 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 73 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/mm/page_allocator.c | 142 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/pramin.c | 27 |
6 files changed, 345 insertions, 145 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index e4991d0d..eb54f3fd 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <nvgpu/log.h> | 21 | #include <nvgpu/log.h> |
22 | #include <nvgpu/bug.h> | 22 | #include <nvgpu/bug.h> |
23 | #include <nvgpu/enabled.h> | 23 | #include <nvgpu/enabled.h> |
24 | #include <nvgpu/kmem.h> | ||
24 | 25 | ||
25 | #include <nvgpu/linux/dma.h> | 26 | #include <nvgpu/linux/dma.h> |
26 | 27 | ||
@@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, | |||
395 | 396 | ||
396 | return 0; | 397 | return 0; |
397 | } | 398 | } |
399 | |||
400 | static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, | ||
401 | struct nvgpu_mem_sgl *sgl) | ||
402 | { | ||
403 | struct nvgpu_mem_sgl *head, *next; | ||
404 | |||
405 | head = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
406 | if (!head) | ||
407 | return NULL; | ||
408 | |||
409 | next = head; | ||
410 | while (true) { | ||
411 | nvgpu_log(g, gpu_dbg_sgl, | ||
412 | " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", | ||
413 | sgl->phys, sgl->dma, sgl->length); | ||
414 | |||
415 | next->dma = sgl->dma; | ||
416 | next->phys = sgl->phys; | ||
417 | next->length = sgl->length; | ||
418 | next->next = NULL; | ||
419 | |||
420 | sgl = nvgpu_mem_sgl_next(sgl); | ||
421 | if (!sgl) | ||
422 | break; | ||
423 | |||
424 | next->next = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
425 | if (!next->next) { | ||
426 | nvgpu_mem_sgl_free(g, head); | ||
427 | return NULL; | ||
428 | } | ||
429 | next = next->next; | ||
430 | } | ||
431 | |||
432 | return head; | ||
433 | } | ||
434 | |||
435 | static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( | ||
436 | struct gk20a *g, | ||
437 | struct scatterlist *linux_sgl) | ||
438 | { | ||
439 | struct nvgpu_page_alloc *vidmem_alloc; | ||
440 | |||
441 | vidmem_alloc = get_vidmem_page_alloc(linux_sgl); | ||
442 | if (!vidmem_alloc) | ||
443 | return NULL; | ||
444 | |||
445 | nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); | ||
446 | |||
447 | return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl); | ||
448 | } | ||
449 | |||
450 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, | ||
451 | struct sg_table *sgt) | ||
452 | { | ||
453 | struct nvgpu_mem_sgl *head, *sgl, *next; | ||
454 | struct scatterlist *linux_sgl = sgt->sgl; | ||
455 | |||
456 | if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) | ||
457 | return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); | ||
458 | |||
459 | head = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
460 | if (!head) | ||
461 | return NULL; | ||
462 | |||
463 | nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); | ||
464 | |||
465 | sgl = head; | ||
466 | while (true) { | ||
467 | sgl->dma = sg_dma_address(linux_sgl); | ||
468 | sgl->phys = sg_phys(linux_sgl); | ||
469 | sgl->length = linux_sgl->length; | ||
470 | |||
471 | /* | ||
472 | * We don't like offsets in the pages here. This will cause | ||
473 | * problems. | ||
474 | */ | ||
475 | if (WARN_ON(linux_sgl->offset)) { | ||
476 | nvgpu_mem_sgl_free(g, head); | ||
477 | return NULL; | ||
478 | } | ||
479 | |||
480 | nvgpu_log(g, gpu_dbg_sgl, | ||
481 | " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", | ||
482 | sgl->phys, sgl->dma, sgl->length); | ||
483 | |||
484 | /* | ||
485 | * When there's no more SGL ents for the Linux SGL we are | ||
486 | * done. Don't bother making any more SGL ents for the nvgpu | ||
487 | * SGL. | ||
488 | */ | ||
489 | linux_sgl = sg_next(linux_sgl); | ||
490 | if (!linux_sgl) | ||
491 | break; | ||
492 | |||
493 | next = nvgpu_kzalloc(g, sizeof(*sgl)); | ||
494 | if (!next) { | ||
495 | nvgpu_mem_sgl_free(g, head); | ||
496 | return NULL; | ||
497 | } | ||
498 | |||
499 | sgl->next = next; | ||
500 | sgl = next; | ||
501 | } | ||
502 | |||
503 | nvgpu_log(g, gpu_dbg_sgl, "Done!"); | ||
504 | return head; | ||
505 | } | ||
506 | |||
507 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, | ||
508 | struct nvgpu_mem *mem) | ||
509 | { | ||
510 | return nvgpu_mem_sgl_create(g, mem->priv.sgt); | ||
511 | } | ||
diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 86d8bec9..4a4429dc 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c | |||
@@ -21,8 +21,11 @@ | |||
21 | #include <nvgpu/lock.h> | 21 | #include <nvgpu/lock.h> |
22 | #include <nvgpu/rbtree.h> | 22 | #include <nvgpu/rbtree.h> |
23 | #include <nvgpu/vm_area.h> | 23 | #include <nvgpu/vm_area.h> |
24 | #include <nvgpu/nvgpu_mem.h> | ||
24 | #include <nvgpu/page_allocator.h> | 25 | #include <nvgpu/page_allocator.h> |
25 | 26 | ||
27 | #include <nvgpu/linux/nvgpu_mem.h> | ||
28 | |||
26 | #include "gk20a/gk20a.h" | 29 | #include "gk20a/gk20a.h" |
27 | #include "gk20a/mm_gk20a.h" | 30 | #include "gk20a/mm_gk20a.h" |
28 | #include "gk20a/kind_gk20a.h" | 31 | #include "gk20a/kind_gk20a.h" |
@@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, | |||
66 | 69 | ||
67 | if (aperture == APERTURE_VIDMEM) { | 70 | if (aperture == APERTURE_VIDMEM) { |
68 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); | 71 | struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); |
69 | struct page_alloc_chunk *chunk = NULL; | 72 | struct nvgpu_mem_sgl *sgl_vid = alloc->sgl; |
70 | 73 | ||
71 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | 74 | while (sgl_vid) { |
72 | page_alloc_chunk, list_entry) { | 75 | chunk_align = 1ULL << |
73 | chunk_align = 1ULL << __ffs(chunk->base | | 76 | __ffs(nvgpu_mem_sgl_phys(sgl_vid) | |
74 | chunk->length); | 77 | nvgpu_mem_sgl_length(sgl_vid)); |
75 | 78 | ||
76 | if (align) | 79 | if (align) |
77 | align = min(align, chunk_align); | 80 | align = min(align, chunk_align); |
78 | else | 81 | else |
79 | align = chunk_align; | 82 | align = chunk_align; |
83 | |||
84 | sgl_vid = nvgpu_mem_sgl_next(sgl_vid); | ||
80 | } | 85 | } |
81 | 86 | ||
82 | return align; | 87 | return align; |
@@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
237 | struct nvgpu_vm_area *vm_area = NULL; | 242 | struct nvgpu_vm_area *vm_area = NULL; |
238 | u32 ctag_offset; | 243 | u32 ctag_offset; |
239 | enum nvgpu_aperture aperture; | 244 | enum nvgpu_aperture aperture; |
245 | struct nvgpu_mem_sgl *nvgpu_sgl; | ||
240 | 246 | ||
241 | /* | 247 | /* |
242 | * The kind used as part of the key for map caching. HW may | 248 | * The kind used as part of the key for map caching. HW may |
@@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
393 | ctag_offset += buffer_offset >> | 399 | ctag_offset += buffer_offset >> |
394 | ilog2(g->ops.fb.compression_page_size(g)); | 400 | ilog2(g->ops.fb.compression_page_size(g)); |
395 | 401 | ||
402 | nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt); | ||
403 | |||
396 | /* update gmmu ptes */ | 404 | /* update gmmu ptes */ |
397 | map_offset = g->ops.mm.gmmu_map(vm, map_offset, | 405 | map_offset = g->ops.mm.gmmu_map(vm, |
398 | bfr.sgt, | 406 | map_offset, |
407 | nvgpu_sgl, | ||
399 | buffer_offset, /* sg offset */ | 408 | buffer_offset, /* sg offset */ |
400 | mapping_size, | 409 | mapping_size, |
401 | bfr.pgsz_idx, | 410 | bfr.pgsz_idx, |
@@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, | |||
410 | if (!map_offset) | 419 | if (!map_offset) |
411 | goto clean_up; | 420 | goto clean_up; |
412 | 421 | ||
422 | nvgpu_mem_sgl_free(g, nvgpu_sgl); | ||
423 | |||
413 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); | 424 | mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); |
414 | if (!mapped_buffer) { | 425 | if (!mapped_buffer) { |
415 | nvgpu_warn(g, "oom allocating tracking buffer"); | 426 | nvgpu_warn(g, "oom allocating tracking buffer"); |
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 7f486d68..41f5acdd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c | |||
@@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
65 | struct gk20a *g = gk20a_from_vm(vm); | 65 | struct gk20a *g = gk20a_from_vm(vm); |
66 | u64 vaddr; | 66 | u64 vaddr; |
67 | 67 | ||
68 | struct sg_table *sgt = mem->priv.sgt; | 68 | struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); |
69 | |||
70 | if (!sgl) | ||
71 | return -ENOMEM; | ||
69 | 72 | ||
70 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); | 73 | nvgpu_mutex_acquire(&vm->update_gmmu_lock); |
71 | vaddr = g->ops.mm.gmmu_map(vm, addr, | 74 | vaddr = g->ops.mm.gmmu_map(vm, addr, |
72 | sgt, /* sg table */ | 75 | sgl, /* sg list */ |
73 | 0, /* sg offset */ | 76 | 0, /* sg offset */ |
74 | size, | 77 | size, |
75 | gmmu_page_size_kernel, | 78 | gmmu_page_size_kernel, |
@@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
82 | NULL, /* mapping_batch handle */ | 85 | NULL, /* mapping_batch handle */ |
83 | aperture); | 86 | aperture); |
84 | nvgpu_mutex_release(&vm->update_gmmu_lock); | 87 | nvgpu_mutex_release(&vm->update_gmmu_lock); |
88 | |||
89 | nvgpu_mem_sgl_free(g, sgl); | ||
90 | |||
85 | if (!vaddr) { | 91 | if (!vaddr) { |
86 | nvgpu_err(g, "failed to allocate va space"); | 92 | nvgpu_err(g, "failed to map buffer!"); |
87 | return 0; | 93 | return 0; |
88 | } | 94 | } |
89 | 95 | ||
@@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
91 | } | 97 | } |
92 | 98 | ||
93 | /* | 99 | /* |
94 | * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. | 100 | * Map a nvgpu_mem into the GMMU. This is for kernel space to use. |
95 | */ | 101 | */ |
96 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | 102 | u64 nvgpu_gmmu_map(struct vm_gk20a *vm, |
97 | struct nvgpu_mem *mem, | 103 | struct nvgpu_mem *mem, |
@@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm, | |||
106 | } | 112 | } |
107 | 113 | ||
108 | /* | 114 | /* |
109 | * Like nvgpu_gmmu_map() except it can work on a fixed address instead. | 115 | * Like nvgpu_gmmu_map() except this can work on a fixed address. |
110 | */ | 116 | */ |
111 | u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, | 117 | u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, |
112 | struct nvgpu_mem *mem, | 118 | struct nvgpu_mem *mem, |
@@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm, | |||
407 | */ | 413 | */ |
408 | target_addr = next_pd ? | 414 | target_addr = next_pd ? |
409 | nvgpu_pde_phys_addr(g, next_pd) : | 415 | nvgpu_pde_phys_addr(g, next_pd) : |
410 | g->ops.mm.gpu_phys_addr(g, attrs, phys_addr); | 416 | phys_addr; |
411 | 417 | ||
412 | l->update_entry(vm, l, | 418 | l->update_entry(vm, l, |
413 | pd, pd_idx, | 419 | pd, pd_idx, |
@@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm, | |||
458 | * VIDMEM version of the update_ptes logic. | 464 | * VIDMEM version of the update_ptes logic. |
459 | */ | 465 | */ |
460 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | 466 | static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, |
461 | struct sg_table *sgt, | 467 | struct nvgpu_mem_sgl *sgl, |
462 | u64 space_to_skip, | 468 | u64 space_to_skip, |
463 | u64 virt_addr, | 469 | u64 virt_addr, |
464 | u64 length, | 470 | u64 length, |
465 | struct nvgpu_gmmu_attrs *attrs) | 471 | struct nvgpu_gmmu_attrs *attrs) |
466 | { | 472 | { |
467 | struct nvgpu_page_alloc *alloc = NULL; | ||
468 | struct page_alloc_chunk *chunk = NULL; | ||
469 | u64 phys_addr, chunk_length; | 473 | u64 phys_addr, chunk_length; |
470 | int err = 0; | 474 | int err = 0; |
471 | 475 | ||
472 | if (!sgt) { | 476 | if (!sgl) { |
473 | /* | 477 | /* |
474 | * This is considered an unmap. Just pass in 0 as the physical | 478 | * This is considered an unmap. Just pass in 0 as the physical |
475 | * address for the entire GPU range. | 479 | * address for the entire GPU range. |
@@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
482 | return err; | 486 | return err; |
483 | } | 487 | } |
484 | 488 | ||
485 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
486 | |||
487 | /* | 489 | /* |
488 | * Otherwise iterate across all the chunks in this allocation and | 490 | * Otherwise iterate across all the chunks in this allocation and |
489 | * map them. | 491 | * map them. |
490 | */ | 492 | */ |
491 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | 493 | while (sgl) { |
492 | page_alloc_chunk, list_entry) { | ||
493 | if (space_to_skip && | 494 | if (space_to_skip && |
494 | space_to_skip >= chunk->length) { | 495 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { |
495 | space_to_skip -= chunk->length; | 496 | space_to_skip -= nvgpu_mem_sgl_length(sgl); |
497 | sgl = nvgpu_mem_sgl_next(sgl); | ||
496 | continue; | 498 | continue; |
497 | } | 499 | } |
498 | 500 | ||
499 | phys_addr = chunk->base + space_to_skip; | 501 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; |
500 | chunk_length = min(length, (chunk->length - space_to_skip)); | 502 | chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - |
503 | space_to_skip)); | ||
501 | 504 | ||
502 | err = __set_pd_level(vm, &vm->pdb, | 505 | err = __set_pd_level(vm, &vm->pdb, |
503 | 0, | 506 | 0, |
@@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, | |||
518 | 521 | ||
519 | if (length == 0) | 522 | if (length == 0) |
520 | break; | 523 | break; |
524 | |||
525 | sgl = nvgpu_mem_sgl_next(sgl); | ||
521 | } | 526 | } |
522 | 527 | ||
523 | return err; | 528 | return err; |
524 | } | 529 | } |
525 | 530 | ||
526 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | 531 | static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, |
527 | struct sg_table *sgt, | 532 | struct nvgpu_mem_sgl *sgl, |
528 | u64 space_to_skip, | 533 | u64 space_to_skip, |
529 | u64 virt_addr, | 534 | u64 virt_addr, |
530 | u64 length, | 535 | u64 length, |
531 | struct nvgpu_gmmu_attrs *attrs) | 536 | struct nvgpu_gmmu_attrs *attrs) |
532 | { | 537 | { |
533 | int err; | 538 | int err; |
534 | struct scatterlist *sgl; | ||
535 | struct gk20a *g = gk20a_from_vm(vm); | 539 | struct gk20a *g = gk20a_from_vm(vm); |
536 | 540 | ||
537 | if (!sgt) { | 541 | if (!sgl) { |
538 | /* | 542 | /* |
539 | * This is considered an unmap. Just pass in 0 as the physical | 543 | * This is considered an unmap. Just pass in 0 as the physical |
540 | * address for the entire GPU range. | 544 | * address for the entire GPU range. |
@@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
548 | } | 552 | } |
549 | 553 | ||
550 | /* | 554 | /* |
551 | * At this point we have a Linux scatter-gather list pointing to some | 555 | * At this point we have a scatter-gather list pointing to some number |
552 | * number of discontiguous chunks of memory. Iterate over that list and | 556 | * of discontiguous chunks of memory. We must iterate over that list and |
553 | * generate a GMMU map call for each chunk. There are two possibilities: | 557 | * generate a GMMU map call for each chunk. There are two possibilities: |
554 | * either the IOMMU is enabled or not. When the IOMMU is enabled the | 558 | * either an IOMMU is enabled or not. When an IOMMU is enabled the |
555 | * mapping is simple since the "physical" address is actually a virtual | 559 | * mapping is simple since the "physical" address is actually a virtual |
556 | * IO address and will be contiguous. The no-IOMMU case is more | 560 | * IO address and will be contiguous. |
557 | * complicated. We will have to iterate over the SGT and do a separate | ||
558 | * map for each chunk of the SGT. | ||
559 | */ | 561 | */ |
560 | sgl = sgt->sgl; | ||
561 | |||
562 | if (!g->mm.bypass_smmu) { | 562 | if (!g->mm.bypass_smmu) { |
563 | u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl); | 563 | u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); |
564 | 564 | ||
565 | io_addr += space_to_skip; | 565 | io_addr += space_to_skip; |
566 | 566 | ||
@@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
585 | /* | 585 | /* |
586 | * Cut out sgl ents for space_to_skip. | 586 | * Cut out sgl ents for space_to_skip. |
587 | */ | 587 | */ |
588 | if (space_to_skip && space_to_skip >= sgl->length) { | 588 | if (space_to_skip && |
589 | space_to_skip -= sgl->length; | 589 | space_to_skip >= nvgpu_mem_sgl_length(sgl)) { |
590 | sgl = sg_next(sgl); | 590 | space_to_skip -= nvgpu_mem_sgl_length(sgl); |
591 | sgl = nvgpu_mem_sgl_next(sgl); | ||
591 | continue; | 592 | continue; |
592 | } | 593 | } |
593 | 594 | ||
594 | phys_addr = sg_phys(sgl) + space_to_skip; | 595 | phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; |
595 | chunk_length = min(length, sgl->length - space_to_skip); | 596 | chunk_length = min(length, |
597 | nvgpu_mem_sgl_length(sgl) - space_to_skip); | ||
596 | 598 | ||
597 | err = __set_pd_level(vm, &vm->pdb, | 599 | err = __set_pd_level(vm, &vm->pdb, |
598 | 0, | 600 | 0, |
@@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
600 | virt_addr, | 602 | virt_addr, |
601 | chunk_length, | 603 | chunk_length, |
602 | attrs); | 604 | attrs); |
603 | if (err) | ||
604 | return err; | ||
605 | 605 | ||
606 | space_to_skip = 0; | 606 | space_to_skip = 0; |
607 | virt_addr += chunk_length; | 607 | virt_addr += chunk_length; |
608 | length -= chunk_length; | 608 | length -= chunk_length; |
609 | sgl = sg_next(sgl); | 609 | sgl = nvgpu_mem_sgl_next(sgl); |
610 | 610 | ||
611 | if (length == 0) | 611 | if (length == 0) |
612 | break; | 612 | break; |
@@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, | |||
624 | * implementations. But the logic around that is generic to all chips. Every | 624 | * implementations. But the logic around that is generic to all chips. Every |
625 | * chip has some number of PDE levels and then a PTE level. | 625 | * chip has some number of PDE levels and then a PTE level. |
626 | * | 626 | * |
627 | * Each chunk of the incoming SGT is sent to the chip specific implementation | 627 | * Each chunk of the incoming SGL is sent to the chip specific implementation |
628 | * of page table update. | 628 | * of page table update. |
629 | * | 629 | * |
630 | * [*] Note: the "physical" address may actually be an IO virtual address in the | 630 | * [*] Note: the "physical" address may actually be an IO virtual address in the |
631 | * case of SMMU usage. | 631 | * case of SMMU usage. |
632 | */ | 632 | */ |
633 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | 633 | static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, |
634 | struct sg_table *sgt, | 634 | struct nvgpu_mem_sgl *sgl, |
635 | u64 space_to_skip, | 635 | u64 space_to_skip, |
636 | u64 virt_addr, | 636 | u64 virt_addr, |
637 | u64 length, | 637 | u64 length, |
638 | struct nvgpu_gmmu_attrs *attrs) | 638 | struct nvgpu_gmmu_attrs *attrs) |
639 | { | 639 | { |
640 | struct gk20a *g = gk20a_from_vm(vm); | 640 | struct gk20a *g = gk20a_from_vm(vm); |
641 | struct nvgpu_page_alloc *alloc; | ||
642 | u64 phys_addr = 0; | ||
643 | u32 page_size; | 641 | u32 page_size; |
644 | int err; | 642 | int err; |
645 | 643 | ||
@@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
665 | return err; | 663 | return err; |
666 | } | 664 | } |
667 | 665 | ||
668 | if (sgt) { | ||
669 | if (attrs->aperture == APERTURE_VIDMEM) { | ||
670 | alloc = get_vidmem_page_alloc(sgt->sgl); | ||
671 | |||
672 | phys_addr = alloc->base; | ||
673 | } else | ||
674 | phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); | ||
675 | } | ||
676 | |||
677 | __gmmu_dbg(g, attrs, | 666 | __gmmu_dbg(g, attrs, |
678 | "vm=%s " | 667 | "vm=%s " |
679 | "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " | 668 | "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " |
680 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " | 669 | "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " |
681 | "kind=%#02x APT=%-6s %c%c%c%c%c", | 670 | "kind=%#02x APT=%-6s %c%c%c%c%c", |
682 | vm->name, | 671 | vm->name, |
683 | sgt ? "MAP" : "UNMAP", | 672 | sgl ? "MAP" : "UNMAP", |
684 | virt_addr, | 673 | virt_addr, |
685 | length, | 674 | length, |
686 | phys_addr, | 675 | sgl ? nvgpu_mem_sgl_phys(sgl) : 0, |
687 | space_to_skip, | 676 | space_to_skip, |
688 | page_size >> 10, | 677 | page_size >> 10, |
689 | nvgpu_gmmu_perm_str(attrs->rw_flag), | 678 | nvgpu_gmmu_perm_str(attrs->rw_flag), |
@@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
696 | attrs->valid ? 'V' : '-'); | 685 | attrs->valid ? 'V' : '-'); |
697 | 686 | ||
698 | /* | 687 | /* |
699 | * Handle VIDMEM progamming. Currently uses a different scatter list | 688 | * For historical reasons these are separate, but soon these will be |
700 | * format. | 689 | * unified. |
701 | */ | 690 | */ |
702 | if (attrs->aperture == APERTURE_VIDMEM) | 691 | if (attrs->aperture == APERTURE_VIDMEM) |
703 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, | 692 | err = __nvgpu_gmmu_update_page_table_vidmem(vm, |
704 | sgt, | 693 | sgl, |
705 | space_to_skip, | 694 | space_to_skip, |
706 | virt_addr, | 695 | virt_addr, |
707 | length, | 696 | length, |
708 | attrs); | 697 | attrs); |
709 | else | 698 | else |
710 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, | 699 | err = __nvgpu_gmmu_update_page_table_sysmem(vm, |
711 | sgt, | 700 | sgl, |
712 | space_to_skip, | 701 | space_to_skip, |
713 | virt_addr, | 702 | virt_addr, |
714 | length, | 703 | length, |
@@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
717 | unmap_gmmu_pages(g, &vm->pdb); | 706 | unmap_gmmu_pages(g, &vm->pdb); |
718 | nvgpu_smp_mb(); | 707 | nvgpu_smp_mb(); |
719 | 708 | ||
720 | __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); | 709 | __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); |
721 | 710 | ||
722 | return err; | 711 | return err; |
723 | } | 712 | } |
@@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, | |||
736 | */ | 725 | */ |
737 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | 726 | u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, |
738 | u64 vaddr, | 727 | u64 vaddr, |
739 | struct sg_table *sgt, | 728 | struct nvgpu_mem_sgl *sgl, |
740 | u64 buffer_offset, | 729 | u64 buffer_offset, |
741 | u64 size, | 730 | u64 size, |
742 | int pgsz_idx, | 731 | int pgsz_idx, |
@@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
785 | allocated = true; | 774 | allocated = true; |
786 | } | 775 | } |
787 | 776 | ||
788 | err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, | 777 | err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, |
789 | vaddr, size, &attrs); | 778 | vaddr, size, &attrs); |
790 | if (err) { | 779 | if (err) { |
791 | nvgpu_err(g, "failed to update ptes on map"); | 780 | nvgpu_err(g, "failed to update ptes on map"); |
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c new file mode 100644 index 00000000..7296c673 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | |||
@@ -0,0 +1,73 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify it | ||
5 | * under the terms and conditions of the GNU General Public License, | ||
6 | * version 2, as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
9 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
10 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
11 | * more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
15 | */ | ||
16 | |||
17 | #include <nvgpu/kmem.h> | ||
18 | #include <nvgpu/nvgpu_mem.h> | ||
19 | |||
20 | #include "gk20a/gk20a.h" | ||
21 | |||
22 | struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl) | ||
23 | { | ||
24 | return sgl->next; | ||
25 | } | ||
26 | |||
27 | u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl) | ||
28 | { | ||
29 | return sgl->phys; | ||
30 | } | ||
31 | |||
32 | u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl) | ||
33 | { | ||
34 | return sgl->dma; | ||
35 | } | ||
36 | |||
37 | u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl) | ||
38 | { | ||
39 | return sgl->length; | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * This builds a GPU address for the %sgl based on whether an IOMMU is present | ||
44 | * or not. It also handles turning the physical address into the true GPU | ||
45 | * physical address that should be programmed into the page tables. | ||
46 | */ | ||
47 | u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl, | ||
48 | struct nvgpu_gmmu_attrs *attrs) | ||
49 | { | ||
50 | if (nvgpu_mem_sgl_dma(sgl) == 0) | ||
51 | return g->ops.mm.gpu_phys_addr(g, attrs, | ||
52 | nvgpu_mem_sgl_phys(sgl)); | ||
53 | |||
54 | if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE) | ||
55 | return 0; | ||
56 | |||
57 | return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl)); | ||
58 | } | ||
59 | |||
60 | void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) | ||
61 | { | ||
62 | struct nvgpu_mem_sgl *next; | ||
63 | |||
64 | /* | ||
65 | * Free each of the elements. We expect each element to have been | ||
66 | * nvgpu_k[mz]alloc()ed. | ||
67 | */ | ||
68 | while (sgl) { | ||
69 | next = nvgpu_mem_sgl_next(sgl); | ||
70 | nvgpu_kfree(g, sgl); | ||
71 | sgl = next; | ||
72 | } | ||
73 | } | ||
diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index 72ff8f2d..6d92b457 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c | |||
@@ -147,19 +147,16 @@ static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, | |||
147 | struct nvgpu_page_alloc *alloc, | 147 | struct nvgpu_page_alloc *alloc, |
148 | bool free_buddy_alloc) | 148 | bool free_buddy_alloc) |
149 | { | 149 | { |
150 | struct page_alloc_chunk *chunk; | 150 | struct nvgpu_mem_sgl *sgl = alloc->sgl; |
151 | 151 | ||
152 | while (!nvgpu_list_empty(&alloc->alloc_chunks)) { | 152 | if (free_buddy_alloc) { |
153 | chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, | 153 | while (sgl) { |
154 | page_alloc_chunk, | 154 | nvgpu_free(&a->source_allocator, sgl->phys); |
155 | list_entry); | 155 | sgl = nvgpu_mem_sgl_next(sgl); |
156 | nvgpu_list_del(&chunk->list_entry); | 156 | } |
157 | |||
158 | if (free_buddy_alloc) | ||
159 | nvgpu_free(&a->source_allocator, chunk->base); | ||
160 | nvgpu_kmem_cache_free(a->chunk_cache, chunk); | ||
161 | } | 157 | } |
162 | 158 | ||
159 | nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); | ||
163 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 160 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
164 | } | 161 | } |
165 | 162 | ||
@@ -243,15 +240,14 @@ static void free_slab_page(struct nvgpu_page_allocator *a, | |||
243 | } | 240 | } |
244 | 241 | ||
245 | /* | 242 | /* |
246 | * This expects @alloc to have 1 empty page_alloc_chunk already added to the | 243 | * This expects @alloc to have 1 empty sgl_entry ready for usage. |
247 | * alloc_chunks list. | ||
248 | */ | 244 | */ |
249 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, | 245 | static int __do_slab_alloc(struct nvgpu_page_allocator *a, |
250 | struct page_alloc_slab *slab, | 246 | struct page_alloc_slab *slab, |
251 | struct nvgpu_page_alloc *alloc) | 247 | struct nvgpu_page_alloc *alloc) |
252 | { | 248 | { |
253 | struct page_alloc_slab_page *slab_page = NULL; | 249 | struct page_alloc_slab_page *slab_page = NULL; |
254 | struct page_alloc_chunk *chunk; | 250 | struct nvgpu_mem_sgl *sgl; |
255 | unsigned long offs; | 251 | unsigned long offs; |
256 | 252 | ||
257 | /* | 253 | /* |
@@ -302,18 +298,19 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a, | |||
302 | BUG(); /* Should be impossible to hit this. */ | 298 | BUG(); /* Should be impossible to hit this. */ |
303 | 299 | ||
304 | /* | 300 | /* |
305 | * Handle building the nvgpu_page_alloc struct. We expect one | 301 | * Handle building the nvgpu_page_alloc struct. We expect one sgl |
306 | * page_alloc_chunk to be present. | 302 | * to be present. |
307 | */ | 303 | */ |
308 | alloc->slab_page = slab_page; | 304 | alloc->slab_page = slab_page; |
309 | alloc->nr_chunks = 1; | 305 | alloc->nr_chunks = 1; |
310 | alloc->length = slab_page->slab_size; | 306 | alloc->length = slab_page->slab_size; |
311 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); | 307 | alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); |
312 | 308 | ||
313 | chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, | 309 | sgl = alloc->sgl; |
314 | page_alloc_chunk, list_entry); | 310 | sgl->phys = alloc->base; |
315 | chunk->base = alloc->base; | 311 | sgl->dma = alloc->base; |
316 | chunk->length = alloc->length; | 312 | sgl->length = alloc->length; |
313 | sgl->next = NULL; | ||
317 | 314 | ||
318 | return 0; | 315 | return 0; |
319 | } | 316 | } |
@@ -327,7 +324,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
327 | int err, slab_nr; | 324 | int err, slab_nr; |
328 | struct page_alloc_slab *slab; | 325 | struct page_alloc_slab *slab; |
329 | struct nvgpu_page_alloc *alloc = NULL; | 326 | struct nvgpu_page_alloc *alloc = NULL; |
330 | struct page_alloc_chunk *chunk = NULL; | 327 | struct nvgpu_mem_sgl *sgl = NULL; |
331 | 328 | ||
332 | /* | 329 | /* |
333 | * Align the length to a page and then divide by the page size (4k for | 330 | * Align the length to a page and then divide by the page size (4k for |
@@ -341,15 +338,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
341 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); | 338 | palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); |
342 | goto fail; | 339 | goto fail; |
343 | } | 340 | } |
344 | chunk = nvgpu_kmem_cache_alloc(a->chunk_cache); | 341 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
345 | if (!chunk) { | 342 | if (!sgl) { |
346 | palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); | 343 | palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); |
347 | goto fail; | 344 | goto fail; |
348 | } | 345 | } |
349 | 346 | ||
350 | nvgpu_init_list_node(&alloc->alloc_chunks); | 347 | alloc->sgl = sgl; |
351 | nvgpu_list_add(&chunk->list_entry, &alloc->alloc_chunks); | ||
352 | |||
353 | err = __do_slab_alloc(a, slab, alloc); | 348 | err = __do_slab_alloc(a, slab, alloc); |
354 | if (err) | 349 | if (err) |
355 | goto fail; | 350 | goto fail; |
@@ -363,8 +358,8 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( | |||
363 | fail: | 358 | fail: |
364 | if (alloc) | 359 | if (alloc) |
365 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 360 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
366 | if (chunk) | 361 | if (sgl) |
367 | nvgpu_kmem_cache_free(a->chunk_cache, chunk); | 362 | nvgpu_kfree(a->owner->g, sgl); |
368 | return NULL; | 363 | return NULL; |
369 | } | 364 | } |
370 | 365 | ||
@@ -426,7 +421,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
426 | struct nvgpu_page_allocator *a, u64 pages) | 421 | struct nvgpu_page_allocator *a, u64 pages) |
427 | { | 422 | { |
428 | struct nvgpu_page_alloc *alloc; | 423 | struct nvgpu_page_alloc *alloc; |
429 | struct page_alloc_chunk *c; | 424 | struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL; |
430 | u64 max_chunk_len = pages << a->page_shift; | 425 | u64 max_chunk_len = pages << a->page_shift; |
431 | int i = 0; | 426 | int i = 0; |
432 | 427 | ||
@@ -436,7 +431,6 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
436 | 431 | ||
437 | memset(alloc, 0, sizeof(*alloc)); | 432 | memset(alloc, 0, sizeof(*alloc)); |
438 | 433 | ||
439 | nvgpu_init_list_node(&alloc->alloc_chunks); | ||
440 | alloc->length = pages << a->page_shift; | 434 | alloc->length = pages << a->page_shift; |
441 | 435 | ||
442 | while (pages) { | 436 | while (pages) { |
@@ -482,36 +476,48 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( | |||
482 | goto fail_cleanup; | 476 | goto fail_cleanup; |
483 | } | 477 | } |
484 | 478 | ||
485 | c = nvgpu_kmem_cache_alloc(a->chunk_cache); | 479 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
486 | if (!c) { | 480 | if (!sgl) { |
487 | nvgpu_free(&a->source_allocator, chunk_addr); | 481 | nvgpu_free(&a->source_allocator, chunk_addr); |
488 | goto fail_cleanup; | 482 | goto fail_cleanup; |
489 | } | 483 | } |
490 | 484 | ||
491 | pages -= chunk_pages; | 485 | pages -= chunk_pages; |
492 | 486 | ||
493 | c->base = chunk_addr; | 487 | sgl->phys = chunk_addr; |
494 | c->length = chunk_len; | 488 | sgl->dma = chunk_addr; |
495 | nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); | 489 | sgl->length = chunk_len; |
490 | |||
491 | /* | ||
492 | * Build the singly linked list with a head node that is part of | ||
493 | * the list. | ||
494 | */ | ||
495 | if (prev_sgl) | ||
496 | prev_sgl->next = sgl; | ||
497 | else | ||
498 | alloc->sgl = sgl; | ||
499 | |||
500 | prev_sgl = sgl; | ||
496 | 501 | ||
497 | i++; | 502 | i++; |
498 | } | 503 | } |
499 | 504 | ||
500 | alloc->nr_chunks = i; | 505 | alloc->nr_chunks = i; |
501 | c = nvgpu_list_first_entry(&alloc->alloc_chunks, | 506 | alloc->base = alloc->sgl->phys; |
502 | page_alloc_chunk, list_entry); | ||
503 | alloc->base = c->base; | ||
504 | 507 | ||
505 | return alloc; | 508 | return alloc; |
506 | 509 | ||
507 | fail_cleanup: | 510 | fail_cleanup: |
508 | while (!nvgpu_list_empty(&alloc->alloc_chunks)) { | 511 | sgl = alloc->sgl; |
509 | c = nvgpu_list_first_entry(&alloc->alloc_chunks, | 512 | while (sgl) { |
510 | page_alloc_chunk, list_entry); | 513 | struct nvgpu_mem_sgl *next = sgl->next; |
511 | nvgpu_list_del(&c->list_entry); | 514 | |
512 | nvgpu_free(&a->source_allocator, c->base); | 515 | nvgpu_free(&a->source_allocator, sgl->phys); |
513 | nvgpu_kmem_cache_free(a->chunk_cache, c); | 516 | nvgpu_kfree(a->owner->g, sgl); |
517 | |||
518 | sgl = next; | ||
514 | } | 519 | } |
520 | |||
515 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 521 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
516 | fail: | 522 | fail: |
517 | return NULL; | 523 | return NULL; |
@@ -521,7 +527,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | |||
521 | struct nvgpu_page_allocator *a, u64 len) | 527 | struct nvgpu_page_allocator *a, u64 len) |
522 | { | 528 | { |
523 | struct nvgpu_page_alloc *alloc = NULL; | 529 | struct nvgpu_page_alloc *alloc = NULL; |
524 | struct page_alloc_chunk *c; | 530 | struct nvgpu_mem_sgl *sgl; |
525 | u64 pages; | 531 | u64 pages; |
526 | int i = 0; | 532 | int i = 0; |
527 | 533 | ||
@@ -536,11 +542,15 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( | |||
536 | 542 | ||
537 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", | 543 | palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", |
538 | pages << a->page_shift, pages, alloc->base); | 544 | pages << a->page_shift, pages, alloc->base); |
539 | nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, | 545 | sgl = alloc->sgl; |
540 | page_alloc_chunk, list_entry) { | 546 | while (sgl) { |
541 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 547 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
542 | i++, c->base, c->length); | 548 | i++, |
549 | nvgpu_mem_sgl_phys(sgl), | ||
550 | nvgpu_mem_sgl_length(sgl)); | ||
551 | sgl = sgl->next; | ||
543 | } | 552 | } |
553 | palloc_dbg(a, "Alloc done\n"); | ||
544 | 554 | ||
545 | return alloc; | 555 | return alloc; |
546 | } | 556 | } |
@@ -638,11 +648,11 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
638 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) | 648 | struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) |
639 | { | 649 | { |
640 | struct nvgpu_page_alloc *alloc; | 650 | struct nvgpu_page_alloc *alloc; |
641 | struct page_alloc_chunk *c; | 651 | struct nvgpu_mem_sgl *sgl; |
642 | 652 | ||
643 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); | 653 | alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); |
644 | c = nvgpu_kmem_cache_alloc(a->chunk_cache); | 654 | sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); |
645 | if (!alloc || !c) | 655 | if (!alloc || !sgl) |
646 | goto fail; | 656 | goto fail; |
647 | 657 | ||
648 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); | 658 | alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); |
@@ -653,17 +663,18 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( | |||
653 | 663 | ||
654 | alloc->nr_chunks = 1; | 664 | alloc->nr_chunks = 1; |
655 | alloc->length = length; | 665 | alloc->length = length; |
656 | nvgpu_init_list_node(&alloc->alloc_chunks); | 666 | alloc->sgl = sgl; |
657 | 667 | ||
658 | c->base = alloc->base; | 668 | sgl->phys = alloc->base; |
659 | c->length = length; | 669 | sgl->dma = alloc->base; |
660 | nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); | 670 | sgl->length = length; |
671 | sgl->next = NULL; | ||
661 | 672 | ||
662 | return alloc; | 673 | return alloc; |
663 | 674 | ||
664 | fail: | 675 | fail: |
665 | if (c) | 676 | if (sgl) |
666 | nvgpu_kmem_cache_free(a->chunk_cache, c); | 677 | nvgpu_kfree(a->owner->g, sgl); |
667 | if (alloc) | 678 | if (alloc) |
668 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); | 679 | nvgpu_kmem_cache_free(a->alloc_cache, alloc); |
669 | return NULL; | 680 | return NULL; |
@@ -677,7 +688,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
677 | { | 688 | { |
678 | struct nvgpu_page_allocator *a = page_allocator(__a); | 689 | struct nvgpu_page_allocator *a = page_allocator(__a); |
679 | struct nvgpu_page_alloc *alloc = NULL; | 690 | struct nvgpu_page_alloc *alloc = NULL; |
680 | struct page_alloc_chunk *c; | 691 | struct nvgpu_mem_sgl *sgl; |
681 | u64 aligned_len, pages; | 692 | u64 aligned_len, pages; |
682 | int i = 0; | 693 | int i = 0; |
683 | 694 | ||
@@ -697,10 +708,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, | |||
697 | 708 | ||
698 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", | 709 | palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", |
699 | alloc->base, aligned_len, pages); | 710 | alloc->base, aligned_len, pages); |
700 | nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, | 711 | sgl = alloc->sgl; |
701 | page_alloc_chunk, list_entry) { | 712 | while (sgl) { |
702 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", | 713 | palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", |
703 | i++, c->base, c->length); | 714 | i++, |
715 | nvgpu_mem_sgl_phys(sgl), | ||
716 | nvgpu_mem_sgl_length(sgl)); | ||
717 | sgl = sgl->next; | ||
704 | } | 718 | } |
705 | 719 | ||
706 | a->nr_fixed_allocs++; | 720 | a->nr_fixed_allocs++; |
@@ -896,11 +910,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
896 | 910 | ||
897 | a->alloc_cache = nvgpu_kmem_cache_create(g, | 911 | a->alloc_cache = nvgpu_kmem_cache_create(g, |
898 | sizeof(struct nvgpu_page_alloc)); | 912 | sizeof(struct nvgpu_page_alloc)); |
899 | a->chunk_cache = nvgpu_kmem_cache_create(g, | ||
900 | sizeof(struct page_alloc_chunk)); | ||
901 | a->slab_page_cache = nvgpu_kmem_cache_create(g, | 913 | a->slab_page_cache = nvgpu_kmem_cache_create(g, |
902 | sizeof(struct page_alloc_slab_page)); | 914 | sizeof(struct page_alloc_slab_page)); |
903 | if (!a->alloc_cache || !a->chunk_cache || !a->slab_page_cache) { | 915 | if (!a->alloc_cache || !a->slab_page_cache) { |
904 | err = -ENOMEM; | 916 | err = -ENOMEM; |
905 | goto fail; | 917 | goto fail; |
906 | } | 918 | } |
@@ -941,8 +953,6 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, | |||
941 | fail: | 953 | fail: |
942 | if (a->alloc_cache) | 954 | if (a->alloc_cache) |
943 | nvgpu_kmem_cache_destroy(a->alloc_cache); | 955 | nvgpu_kmem_cache_destroy(a->alloc_cache); |
944 | if (a->chunk_cache) | ||
945 | nvgpu_kmem_cache_destroy(a->chunk_cache); | ||
946 | if (a->slab_page_cache) | 956 | if (a->slab_page_cache) |
947 | nvgpu_kmem_cache_destroy(a->slab_page_cache); | 957 | nvgpu_kmem_cache_destroy(a->slab_page_cache); |
948 | nvgpu_kfree(g, a); | 958 | nvgpu_kfree(g, a); |
diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c index 425bfdb4..bb7d930e 100644 --- a/drivers/gpu/nvgpu/common/pramin.c +++ b/drivers/gpu/nvgpu/common/pramin.c | |||
@@ -84,37 +84,40 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, | |||
84 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) | 84 | u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) |
85 | { | 85 | { |
86 | struct nvgpu_page_alloc *alloc = NULL; | 86 | struct nvgpu_page_alloc *alloc = NULL; |
87 | struct page_alloc_chunk *chunk = NULL; | 87 | struct nvgpu_mem_sgl *sgl; |
88 | u32 byteoff, start_reg, until_end, n; | 88 | u32 byteoff, start_reg, until_end, n; |
89 | 89 | ||
90 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); | 90 | alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); |
91 | nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, | 91 | sgl = alloc->sgl; |
92 | page_alloc_chunk, list_entry) { | 92 | while (sgl) { |
93 | if (offset >= chunk->length) | 93 | if (offset >= nvgpu_mem_sgl_length(sgl)) { |
94 | offset -= chunk->length; | 94 | offset -= nvgpu_mem_sgl_length(sgl); |
95 | else | 95 | sgl = sgl->next; |
96 | } else { | ||
96 | break; | 97 | break; |
98 | } | ||
97 | } | 99 | } |
98 | 100 | ||
99 | while (size) { | 101 | while (size) { |
100 | byteoff = g->ops.pramin.enter(g, mem, chunk, | 102 | u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl); |
103 | |||
104 | byteoff = g->ops.pramin.enter(g, mem, sgl, | ||
101 | offset / sizeof(u32)); | 105 | offset / sizeof(u32)); |
102 | start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); | 106 | start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); |
103 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); | 107 | until_end = SZ_1M - (byteoff & (SZ_1M - 1)); |
104 | 108 | ||
105 | n = min3(size, until_end, (u32)(chunk->length - offset)); | 109 | n = min3(size, until_end, (u32)(sgl_len - offset)); |
106 | 110 | ||
107 | loop(g, start_reg, n / sizeof(u32), arg); | 111 | loop(g, start_reg, n / sizeof(u32), arg); |
108 | 112 | ||
109 | /* read back to synchronize accesses */ | 113 | /* read back to synchronize accesses */ |
110 | gk20a_readl(g, start_reg); | 114 | gk20a_readl(g, start_reg); |
111 | g->ops.pramin.exit(g, mem, chunk); | 115 | g->ops.pramin.exit(g, mem, sgl); |
112 | 116 | ||
113 | size -= n; | 117 | size -= n; |
114 | 118 | ||
115 | if (n == (chunk->length - offset)) { | 119 | if (n == (sgl_len - offset)) { |
116 | chunk = nvgpu_list_next_entry(chunk, page_alloc_chunk, | 120 | sgl = nvgpu_mem_sgl_next(sgl); |
117 | list_entry); | ||
118 | offset = 0; | 121 | offset = 0; |
119 | } else { | 122 | } else { |
120 | offset += n; | 123 | offset += n; |