diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2016-06-14 07:26:32 -0400 |
---|---|---|
committer | Terje Bergstrom <tbergstrom@nvidia.com> | 2016-06-15 12:34:07 -0400 |
commit | 75f6a1dff48bba1fe08ef82a76a0fa5727f3b6f8 (patch) | |
tree | 1e367a377c1b55da28795d9c3980223e0afefea8 | |
parent | e9fd9e9fe3e80c0437542eb119ddc82e9f85aa43 (diff) |
gpu: nvgpu: add vidmem allocation API
Add in-nvgpu APIs for allocating and freeing mem_descs in video memory.
Changes for gmmu tables etc. will be added in upcoming changes.
Video memory is allocated via nvmap by initially registering the
aperture size to it and binding it to a struct device, and then going
via the usual dma alloc. This API allows also fixed-address allocations,
meant for reserving special memory areas at boot.
The aperture registration is skipped completely if vidmem isn't found
for the particular device.
gk20a_gmmu_alloc_attr() still uses sysmem, and the unmap/free paths
select internally the correct path by the mem_desc's aperture.
Video memory allocation is off by default, and can be turned on with
CONFIG_GK20A_VIDMEM.
JIRA DNVGPU-16
Change-Id: I77eae5ea90cbed6f4b5db0da86c5f70ddf2a34f9
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1157216
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/Kconfig | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 166 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 35 |
3 files changed, 205 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/Kconfig b/drivers/gpu/nvgpu/Kconfig index 156b7889..6f3a1f8f 100644 --- a/drivers/gpu/nvgpu/Kconfig +++ b/drivers/gpu/nvgpu/Kconfig | |||
@@ -100,4 +100,13 @@ config GK20A_PCI | |||
100 | help | 100 | help |
101 | Enable support for GPUs on PCIe bus. | 101 | Enable support for GPUs on PCIe bus. |
102 | 102 | ||
103 | config GK20A_VIDMEM | ||
104 | bool "Support separate video memory on nvgpu" | ||
105 | depends on GK20A | ||
106 | default n | ||
107 | help | ||
108 | Enable support for using and allocating buffers in a distinct video | ||
109 | memory aperture (in contrast to general system memory), available on | ||
110 | GPUs that have their own banks. PCIe GPUs have this, for example. | ||
111 | |||
103 | trysource "../nvgpu-t19x/drivers/gpu/nvgpu/Kconfig" | 112 | trysource "../nvgpu-t19x/drivers/gpu/nvgpu/Kconfig" |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 14a3dbc6..3b21e843 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -724,15 +724,28 @@ static void gk20a_init_pramin(struct mm_gk20a *mm) | |||
724 | 724 | ||
725 | static int gk20a_init_vidmem(struct mm_gk20a *mm) | 725 | static int gk20a_init_vidmem(struct mm_gk20a *mm) |
726 | { | 726 | { |
727 | #if defined(CONFIG_GK20A_VIDMEM) | ||
727 | struct gk20a *g = mm->g; | 728 | struct gk20a *g = mm->g; |
729 | struct device *d = dev_from_gk20a(g); | ||
728 | size_t size = g->ops.mm.get_vidmem_size ? | 730 | size_t size = g->ops.mm.get_vidmem_size ? |
729 | g->ops.mm.get_vidmem_size(g) : 0; | 731 | g->ops.mm.get_vidmem_size(g) : 0; |
732 | int err; | ||
730 | 733 | ||
731 | if (!size) | 734 | if (!size) |
732 | return 0; | 735 | return 0; |
733 | 736 | ||
737 | err = nvmap_register_vidmem_carveout(&mm->vidmem_dev, SZ_4K, size); | ||
738 | if (err) { | ||
739 | gk20a_err(d, "Failed to register vidmem for size %zu: %d", | ||
740 | size, err); | ||
741 | return err; | ||
742 | } | ||
743 | |||
734 | mm->vidmem_size = size; | 744 | mm->vidmem_size = size; |
735 | 745 | ||
746 | gk20a_dbg_info("registered vidmem: %zu MB", size / SZ_1M); | ||
747 | |||
748 | #endif | ||
736 | return 0; | 749 | return 0; |
737 | } | 750 | } |
738 | 751 | ||
@@ -760,7 +773,10 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) | |||
760 | (int)(mm->channel.kernel_size >> 20)); | 773 | (int)(mm->channel.kernel_size >> 20)); |
761 | 774 | ||
762 | gk20a_init_pramin(mm); | 775 | gk20a_init_pramin(mm); |
763 | gk20a_init_vidmem(mm); | 776 | |
777 | err = gk20a_init_vidmem(mm); | ||
778 | if (err) | ||
779 | return err; | ||
764 | 780 | ||
765 | err = gk20a_alloc_sysmem_flush(g); | 781 | err = gk20a_alloc_sysmem_flush(g); |
766 | if (err) | 782 | if (err) |
@@ -2332,7 +2348,14 @@ int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct mem_desc *mem) | |||
2332 | return gk20a_gmmu_alloc_attr(g, 0, size, mem); | 2348 | return gk20a_gmmu_alloc_attr(g, 0, size, mem); |
2333 | } | 2349 | } |
2334 | 2350 | ||
2335 | int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, struct mem_desc *mem) | 2351 | int gk20a_gmmu_alloc_attr(struct gk20a *g, enum dma_attr attr, size_t size, |
2352 | struct mem_desc *mem) | ||
2353 | { | ||
2354 | return gk20a_gmmu_alloc_attr_sys(g, attr, size, mem); | ||
2355 | } | ||
2356 | |||
2357 | int gk20a_gmmu_alloc_attr_sys(struct gk20a *g, enum dma_attr attr, | ||
2358 | size_t size, struct mem_desc *mem) | ||
2336 | { | 2359 | { |
2337 | struct device *d = dev_from_gk20a(g); | 2360 | struct device *d = dev_from_gk20a(g); |
2338 | int err; | 2361 | int err; |
@@ -2384,7 +2407,7 @@ fail_free: | |||
2384 | return err; | 2407 | return err; |
2385 | } | 2408 | } |
2386 | 2409 | ||
2387 | void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr, | 2410 | static void gk20a_gmmu_free_attr_sys(struct gk20a *g, enum dma_attr attr, |
2388 | struct mem_desc *mem) | 2411 | struct mem_desc *mem) |
2389 | { | 2412 | { |
2390 | struct device *d = dev_from_gk20a(g); | 2413 | struct device *d = dev_from_gk20a(g); |
@@ -2421,6 +2444,116 @@ void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr, | |||
2421 | mem->aperture = APERTURE_INVALID; | 2444 | mem->aperture = APERTURE_INVALID; |
2422 | } | 2445 | } |
2423 | 2446 | ||
2447 | int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct mem_desc *mem) | ||
2448 | { | ||
2449 | return gk20a_gmmu_alloc_attr_vid(g, 0, size, mem); | ||
2450 | } | ||
2451 | |||
2452 | int gk20a_gmmu_alloc_attr_vid(struct gk20a *g, enum dma_attr attr, | ||
2453 | size_t size, struct mem_desc *mem) | ||
2454 | { | ||
2455 | return gk20a_gmmu_alloc_attr_vid_at(g, attr, size, mem, 0); | ||
2456 | } | ||
2457 | |||
2458 | int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, enum dma_attr attr, | ||
2459 | size_t size, struct mem_desc *mem, dma_addr_t at) | ||
2460 | { | ||
2461 | #if defined(CONFIG_GK20A_VIDMEM) | ||
2462 | struct device *d = &g->mm.vidmem_dev; | ||
2463 | int err; | ||
2464 | dma_addr_t iova; | ||
2465 | DEFINE_DMA_ATTRS(attrs); | ||
2466 | |||
2467 | gk20a_dbg_fn(""); | ||
2468 | |||
2469 | if (at) { | ||
2470 | void *va; | ||
2471 | |||
2472 | dma_set_attr(DMA_ATTR_ALLOC_EXACT_SIZE, &attrs); | ||
2473 | va = dma_mark_declared_memory_occupied(d, at, size, | ||
2474 | &attrs); | ||
2475 | |||
2476 | if (IS_ERR(va)) | ||
2477 | return PTR_ERR(va); | ||
2478 | |||
2479 | iova = at; | ||
2480 | mem->fixed = true; | ||
2481 | } else { | ||
2482 | dma_set_attr(attr, &attrs); | ||
2483 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2484 | /* cpuva has no meaning here, the following returns null */ | ||
2485 | dma_alloc_attrs(d, size, &iova, GFP_KERNEL, &attrs); | ||
2486 | |||
2487 | if (iova == DMA_ERROR_CODE) | ||
2488 | return -ENOMEM; | ||
2489 | |||
2490 | mem->fixed = false; | ||
2491 | } | ||
2492 | |||
2493 | err = gk20a_get_sgtable(d, &mem->sgt, NULL, iova, size); | ||
2494 | if (err) | ||
2495 | goto fail_free; | ||
2496 | |||
2497 | mem->size = size; | ||
2498 | mem->aperture = APERTURE_VIDMEM; | ||
2499 | |||
2500 | gk20a_dbg_fn("done"); | ||
2501 | |||
2502 | return 0; | ||
2503 | |||
2504 | fail_free: | ||
2505 | if (at) { | ||
2506 | dma_mark_declared_memory_unoccupied(d, iova, mem->size, | ||
2507 | &attrs); | ||
2508 | } else { | ||
2509 | dma_free_attrs(d, size, NULL, iova, &attrs); | ||
2510 | } | ||
2511 | |||
2512 | return err; | ||
2513 | #else | ||
2514 | return -ENOSYS; | ||
2515 | #endif | ||
2516 | } | ||
2517 | |||
2518 | static void gk20a_gmmu_free_attr_vid(struct gk20a *g, enum dma_attr attr, | ||
2519 | struct mem_desc *mem) | ||
2520 | { | ||
2521 | #if defined(CONFIG_GK20A_VIDMEM) | ||
2522 | struct device *d = &g->mm.vidmem_dev; | ||
2523 | DEFINE_DMA_ATTRS(attrs); | ||
2524 | |||
2525 | if (mem->fixed) { | ||
2526 | dma_set_attr(DMA_ATTR_ALLOC_EXACT_SIZE, &attrs); | ||
2527 | dma_mark_declared_memory_unoccupied(d, | ||
2528 | sg_dma_address(mem->sgt->sgl), mem->size, | ||
2529 | &attrs); | ||
2530 | } else { | ||
2531 | dma_set_attr(attr, &attrs); | ||
2532 | dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs); | ||
2533 | dma_free_attrs(d, mem->size, NULL, | ||
2534 | sg_dma_address(mem->sgt->sgl), | ||
2535 | &attrs); | ||
2536 | gk20a_free_sgtable(&mem->sgt); | ||
2537 | } | ||
2538 | |||
2539 | mem->size = 0; | ||
2540 | mem->aperture = APERTURE_INVALID; | ||
2541 | #endif | ||
2542 | } | ||
2543 | |||
2544 | void gk20a_gmmu_free_attr(struct gk20a *g, enum dma_attr attr, | ||
2545 | struct mem_desc *mem) | ||
2546 | { | ||
2547 | switch (mem->aperture) { | ||
2548 | case APERTURE_SYSMEM: | ||
2549 | return gk20a_gmmu_free_attr_sys(g, attr, mem); | ||
2550 | case APERTURE_VIDMEM: | ||
2551 | return gk20a_gmmu_free_attr_vid(g, attr, mem); | ||
2552 | default: | ||
2553 | break; /* like free() on "null" memory */ | ||
2554 | } | ||
2555 | } | ||
2556 | |||
2424 | void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) | 2557 | void gk20a_gmmu_free(struct gk20a *g, struct mem_desc *mem) |
2425 | { | 2558 | { |
2426 | return gk20a_gmmu_free_attr(g, 0, mem); | 2559 | return gk20a_gmmu_free_attr(g, 0, mem); |
@@ -2453,6 +2586,33 @@ fail_free: | |||
2453 | return err; | 2586 | return err; |
2454 | } | 2587 | } |
2455 | 2588 | ||
2589 | int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size, struct mem_desc *mem) | ||
2590 | { | ||
2591 | return gk20a_gmmu_alloc_map_attr_vid(vm, 0, size, mem); | ||
2592 | } | ||
2593 | |||
2594 | int gk20a_gmmu_alloc_map_attr_vid(struct vm_gk20a *vm, | ||
2595 | enum dma_attr attr, size_t size, struct mem_desc *mem) | ||
2596 | { | ||
2597 | int err = gk20a_gmmu_alloc_attr_vid(vm->mm->g, attr, size, mem); | ||
2598 | |||
2599 | if (err) | ||
2600 | return err; | ||
2601 | |||
2602 | mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0, | ||
2603 | gk20a_mem_flag_none, false); | ||
2604 | if (!mem->gpu_va) { | ||
2605 | err = -ENOMEM; | ||
2606 | goto fail_free; | ||
2607 | } | ||
2608 | |||
2609 | return 0; | ||
2610 | |||
2611 | fail_free: | ||
2612 | gk20a_gmmu_free(vm->mm->g, mem); | ||
2613 | return err; | ||
2614 | } | ||
2615 | |||
2456 | void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct mem_desc *mem) | 2616 | void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct mem_desc *mem) |
2457 | { | 2617 | { |
2458 | if (mem->gpu_va) | 2618 | if (mem->gpu_va) |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 23420fef..f9c5477e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -47,12 +47,13 @@ enum gk20a_aperture { | |||
47 | }; | 47 | }; |
48 | 48 | ||
49 | struct mem_desc { | 49 | struct mem_desc { |
50 | void *cpu_va; | 50 | void *cpu_va; /* sysmem only */ |
51 | struct page **pages; | 51 | struct page **pages; /* sysmem only */ |
52 | struct sg_table *sgt; | 52 | struct sg_table *sgt; |
53 | enum gk20a_aperture aperture; | 53 | enum gk20a_aperture aperture; |
54 | size_t size; | 54 | size_t size; |
55 | u64 gpu_va; | 55 | u64 gpu_va; |
56 | bool fixed; /* vidmem only */ | ||
56 | }; | 57 | }; |
57 | 58 | ||
58 | struct mem_desc_sub { | 59 | struct mem_desc_sub { |
@@ -371,6 +372,7 @@ struct mm_gk20a { | |||
371 | #endif | 372 | #endif |
372 | 373 | ||
373 | size_t vidmem_size; | 374 | size_t vidmem_size; |
375 | struct device vidmem_dev; | ||
374 | }; | 376 | }; |
375 | 377 | ||
376 | int gk20a_mm_init(struct mm_gk20a *mm); | 378 | int gk20a_mm_init(struct mm_gk20a *mm); |
@@ -526,6 +528,15 @@ int gk20a_gmmu_alloc_map_attr(struct vm_gk20a *vm, | |||
526 | size_t size, | 528 | size_t size, |
527 | struct mem_desc *mem); | 529 | struct mem_desc *mem); |
528 | 530 | ||
531 | int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, | ||
532 | size_t size, | ||
533 | struct mem_desc *mem); | ||
534 | |||
535 | int gk20a_gmmu_alloc_map_attr_vid(struct vm_gk20a *vm, | ||
536 | enum dma_attr attr, | ||
537 | size_t size, | ||
538 | struct mem_desc *mem); | ||
539 | |||
529 | void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, | 540 | void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, |
530 | struct mem_desc *mem); | 541 | struct mem_desc *mem); |
531 | 542 | ||
@@ -538,6 +549,26 @@ int gk20a_gmmu_alloc_attr(struct gk20a *g, | |||
538 | size_t size, | 549 | size_t size, |
539 | struct mem_desc *mem); | 550 | struct mem_desc *mem); |
540 | 551 | ||
552 | int gk20a_gmmu_alloc_attr_sys(struct gk20a *g, | ||
553 | enum dma_attr attr, | ||
554 | size_t size, | ||
555 | struct mem_desc *mem); | ||
556 | |||
557 | int gk20a_gmmu_alloc_vid(struct gk20a *g, | ||
558 | size_t size, | ||
559 | struct mem_desc *mem); | ||
560 | |||
561 | int gk20a_gmmu_alloc_attr_vid(struct gk20a *g, | ||
562 | enum dma_attr attr, | ||
563 | size_t size, | ||
564 | struct mem_desc *mem); | ||
565 | |||
566 | int gk20a_gmmu_alloc_attr_vid_at(struct gk20a *g, | ||
567 | enum dma_attr attr, | ||
568 | size_t size, | ||
569 | struct mem_desc *mem, | ||
570 | dma_addr_t at); | ||
571 | |||
541 | void gk20a_gmmu_free(struct gk20a *g, | 572 | void gk20a_gmmu_free(struct gk20a *g, |
542 | struct mem_desc *mem); | 573 | struct mem_desc *mem); |
543 | 574 | ||