diff options
author | Kevin Huang <kevinh@nvidia.com> | 2014-04-14 19:53:09 -0400 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-03-18 15:09:59 -0400 |
commit | da56b17de0b360cb020a8831ba82c202057c2968 (patch) | |
tree | 7e759f5ca412775001b00cc4211515c37b3c35ed /drivers/gpu | |
parent | d364553f7c94b4caa649f5e83f709c1c31bc45bb (diff) |
gpu: nvgpu: add generic api for sparse memory
Bug 1442531
Change-Id: I97408b54e27f5ed6411792e73f079a6f86cbe5f6
Signed-off-by: Kevin Huang <kevinh@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hal_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 42 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 29 |
4 files changed, 54 insertions, 23 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 6d3c8225..081ec077 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -211,6 +211,10 @@ struct gpu_ops { | |||
211 | int (*get_netlist_name)(int index, char *name); | 211 | int (*get_netlist_name)(int index, char *name); |
212 | bool (*is_fw_defined)(void); | 212 | bool (*is_fw_defined)(void); |
213 | } gr_ctx; | 213 | } gr_ctx; |
214 | struct { | ||
215 | int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr, | ||
216 | u32 num_pages, u32 pgsz_idx); | ||
217 | } mm; | ||
214 | }; | 218 | }; |
215 | 219 | ||
216 | struct gk20a { | 220 | struct gk20a { |
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index f8a997e2..de6c3713 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include "gk20a_gating_reglist.h" | 22 | #include "gk20a_gating_reglist.h" |
23 | #include "channel_gk20a.h" | 23 | #include "channel_gk20a.h" |
24 | #include "gr_ctx_gk20a.h" | 24 | #include "gr_ctx_gk20a.h" |
25 | #include "mm_gk20a.h" | ||
25 | 26 | ||
26 | struct gpu_ops gk20a_ops = { | 27 | struct gpu_ops gk20a_ops = { |
27 | .clock_gating = { | 28 | .clock_gating = { |
@@ -46,6 +47,7 @@ int gk20a_init_hal(struct gpu_ops *gops) | |||
46 | gk20a_init_fb(gops); | 47 | gk20a_init_fb(gops); |
47 | gk20a_init_fifo(gops); | 48 | gk20a_init_fifo(gops); |
48 | gk20a_init_gr_ctx(gops); | 49 | gk20a_init_gr_ctx(gops); |
50 | gk20a_init_mm(gops); | ||
49 | gops->name = "gk20a"; | 51 | gops->name = "gk20a"; |
50 | 52 | ||
51 | return 0; | 53 | return 0; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index ebd829d3..dd492d92 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -29,7 +29,6 @@ | |||
29 | #include <linux/tegra-soc.h> | 29 | #include <linux/tegra-soc.h> |
30 | #include <linux/vmalloc.h> | 30 | #include <linux/vmalloc.h> |
31 | #include <linux/dma-buf.h> | 31 | #include <linux/dma-buf.h> |
32 | #include <asm/cacheflush.h> | ||
33 | 32 | ||
34 | #include "gk20a.h" | 33 | #include "gk20a.h" |
35 | #include "mm_gk20a.h" | 34 | #include "mm_gk20a.h" |
@@ -43,11 +42,6 @@ | |||
43 | 42 | ||
44 | #include "kind_gk20a.h" | 43 | #include "kind_gk20a.h" |
45 | 44 | ||
46 | #ifdef CONFIG_ARM64 | ||
47 | #define outer_flush_range(a, b) | ||
48 | #define __cpuc_flush_dcache_area __flush_dcache_area | ||
49 | #endif | ||
50 | |||
51 | /* | 45 | /* |
52 | * GPU mapping life cycle | 46 | * GPU mapping life cycle |
53 | * ====================== | 47 | * ====================== |
@@ -93,12 +87,6 @@ static inline u32 lo32(u64 f) | |||
93 | return (u32)(f & 0xffffffff); | 87 | return (u32)(f & 0xffffffff); |
94 | } | 88 | } |
95 | 89 | ||
96 | #define FLUSH_CPU_DCACHE(va, pa, size) \ | ||
97 | do { \ | ||
98 | __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \ | ||
99 | outer_flush_range(pa, pa + (size_t)(size)); \ | ||
100 | } while (0) | ||
101 | |||
102 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); | 90 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); |
103 | static struct mapped_buffer_node *find_mapped_buffer_locked( | 91 | static struct mapped_buffer_node *find_mapped_buffer_locked( |
104 | struct rb_root *root, u64 addr); | 92 | struct rb_root *root, u64 addr); |
@@ -463,7 +451,7 @@ static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | |||
463 | kfree(sgt); | 451 | kfree(sgt); |
464 | } | 452 | } |
465 | 453 | ||
466 | static int map_gmmu_pages(void *handle, struct sg_table *sgt, | 454 | int map_gmmu_pages(void *handle, struct sg_table *sgt, |
467 | void **va, size_t size) | 455 | void **va, size_t size) |
468 | { | 456 | { |
469 | FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); | 457 | FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); |
@@ -471,7 +459,7 @@ static int map_gmmu_pages(void *handle, struct sg_table *sgt, | |||
471 | return 0; | 459 | return 0; |
472 | } | 460 | } |
473 | 461 | ||
474 | static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) | 462 | void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) |
475 | { | 463 | { |
476 | FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); | 464 | FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); |
477 | } | 465 | } |
@@ -571,7 +559,7 @@ static void free_gmmu_pages(struct vm_gk20a *vm, void *handle, | |||
571 | iova = 0; | 559 | iova = 0; |
572 | } | 560 | } |
573 | 561 | ||
574 | static int map_gmmu_pages(void *handle, struct sg_table *sgt, | 562 | int map_gmmu_pages(void *handle, struct sg_table *sgt, |
575 | void **kva, size_t size) | 563 | void **kva, size_t size) |
576 | { | 564 | { |
577 | int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | 565 | int count = PAGE_ALIGN(size) >> PAGE_SHIFT; |
@@ -590,7 +578,7 @@ static int map_gmmu_pages(void *handle, struct sg_table *sgt, | |||
590 | return 0; | 578 | return 0; |
591 | } | 579 | } |
592 | 580 | ||
593 | static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) | 581 | void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) |
594 | { | 582 | { |
595 | gk20a_dbg_fn(""); | 583 | gk20a_dbg_fn(""); |
596 | 584 | ||
@@ -605,7 +593,7 @@ static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) | |||
605 | * the whole range is zeroed so it's "invalid"/will fault | 593 | * the whole range is zeroed so it's "invalid"/will fault |
606 | */ | 594 | */ |
607 | 595 | ||
608 | static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, | 596 | int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, |
609 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx, | 597 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx, |
610 | struct page_table_gk20a *pte) | 598 | struct page_table_gk20a *pte) |
611 | { | 599 | { |
@@ -635,7 +623,7 @@ static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, | |||
635 | } | 623 | } |
636 | 624 | ||
637 | /* given address range (inclusive) determine the pdes crossed */ | 625 | /* given address range (inclusive) determine the pdes crossed */ |
638 | static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm, | 626 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, |
639 | u64 addr_lo, u64 addr_hi, | 627 | u64 addr_lo, u64 addr_hi, |
640 | u32 *pde_lo, u32 *pde_hi) | 628 | u32 *pde_lo, u32 *pde_hi) |
641 | { | 629 | { |
@@ -647,12 +635,12 @@ static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm, | |||
647 | *pde_lo, *pde_hi); | 635 | *pde_lo, *pde_hi); |
648 | } | 636 | } |
649 | 637 | ||
650 | static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i) | 638 | u32 *pde_from_index(struct vm_gk20a *vm, u32 i) |
651 | { | 639 | { |
652 | return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v()); | 640 | return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v()); |
653 | } | 641 | } |
654 | 642 | ||
655 | static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm, | 643 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, |
656 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx) | 644 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx) |
657 | { | 645 | { |
658 | u32 ret; | 646 | u32 ret; |
@@ -686,7 +674,7 @@ static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page, | |||
686 | * backing store and if not go ahead allocate it and | 674 | * backing store and if not go ahead allocate it and |
687 | * record it in the appropriate pde | 675 | * record it in the appropriate pde |
688 | */ | 676 | */ |
689 | static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, | 677 | int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, |
690 | u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) | 678 | u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) |
691 | { | 679 | { |
692 | int err; | 680 | int err; |
@@ -1981,6 +1969,7 @@ err_unmap: | |||
1981 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | 1969 | static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) |
1982 | { | 1970 | { |
1983 | struct vm_gk20a *vm = mapped_buffer->vm; | 1971 | struct vm_gk20a *vm = mapped_buffer->vm; |
1972 | struct gk20a *g = vm->mm->g; | ||
1984 | 1973 | ||
1985 | if (mapped_buffer->va_node && | 1974 | if (mapped_buffer->va_node && |
1986 | mapped_buffer->va_node->sparse) { | 1975 | mapped_buffer->va_node->sparse) { |
@@ -1990,7 +1979,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
1990 | gmmu_page_shifts[pgsz_idx]; | 1979 | gmmu_page_shifts[pgsz_idx]; |
1991 | 1980 | ||
1992 | /* there is little we can do if this fails... */ | 1981 | /* there is little we can do if this fails... */ |
1993 | gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx); | 1982 | g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx); |
1994 | 1983 | ||
1995 | } else | 1984 | } else |
1996 | __locked_gmmu_unmap(vm, | 1985 | __locked_gmmu_unmap(vm, |
@@ -2259,6 +2248,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2259 | u32 start_page_nr; | 2248 | u32 start_page_nr; |
2260 | struct gk20a_allocator *vma; | 2249 | struct gk20a_allocator *vma; |
2261 | struct vm_gk20a *vm = as_share->vm; | 2250 | struct vm_gk20a *vm = as_share->vm; |
2251 | struct gk20a *g = vm->mm->g; | ||
2262 | struct vm_reserved_va_node *va_node; | 2252 | struct vm_reserved_va_node *va_node; |
2263 | u64 vaddr_start = 0; | 2253 | u64 vaddr_start = 0; |
2264 | 2254 | ||
@@ -2316,7 +2306,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, | |||
2316 | 2306 | ||
2317 | /* mark that we need to use sparse mappings here */ | 2307 | /* mark that we need to use sparse mappings here */ |
2318 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { | 2308 | if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { |
2319 | err = gk20a_vm_put_empty(vm, vaddr_start, args->pages, | 2309 | err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages, |
2320 | pgsz_idx); | 2310 | pgsz_idx); |
2321 | if (err) { | 2311 | if (err) { |
2322 | mutex_unlock(&vm->update_gmmu_lock); | 2312 | mutex_unlock(&vm->update_gmmu_lock); |
@@ -3076,3 +3066,9 @@ int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g) | |||
3076 | gk20a_idle(g->dev); | 3066 | gk20a_idle(g->dev); |
3077 | return ret; | 3067 | return ret; |
3078 | } | 3068 | } |
3069 | |||
3070 | void gk20a_init_mm(struct gpu_ops *gops) | ||
3071 | { | ||
3072 | gops->mm.set_sparse = gk20a_vm_put_empty; | ||
3073 | } | ||
3074 | |||
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 7f86710b..b0f3536e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/dma-attrs.h> | 25 | #include <linux/dma-attrs.h> |
26 | #include <linux/iommu.h> | 26 | #include <linux/iommu.h> |
27 | #include <asm/dma-iommu.h> | 27 | #include <asm/dma-iommu.h> |
28 | #include <asm/cacheflush.h> | ||
28 | #include "gk20a_allocator.h" | 29 | #include "gk20a_allocator.h" |
29 | 30 | ||
30 | /* This "address bit" in the gmmu ptes (and other gk20a accesses) | 31 | /* This "address bit" in the gmmu ptes (and other gk20a accesses) |
@@ -41,6 +42,17 @@ | |||
41 | #define NV_GMMU_VA_RANGE 35ULL | 42 | #define NV_GMMU_VA_RANGE 35ULL |
42 | #define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1))) | 43 | #define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1))) |
43 | 44 | ||
45 | #ifdef CONFIG_ARM64 | ||
46 | #define outer_flush_range(a, b) | ||
47 | #define __cpuc_flush_dcache_area __flush_dcache_area | ||
48 | #endif | ||
49 | |||
50 | #define FLUSH_CPU_DCACHE(va, pa, size) \ | ||
51 | do { \ | ||
52 | __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \ | ||
53 | outer_flush_range(pa, pa + (size_t)(size)); \ | ||
54 | } while (0) | ||
55 | |||
44 | struct mem_desc { | 56 | struct mem_desc { |
45 | struct dma_buf *ref; | 57 | struct dma_buf *ref; |
46 | struct sg_table *sgt; | 58 | struct sg_table *sgt; |
@@ -463,4 +475,21 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share, | |||
463 | int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset); | 475 | int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset); |
464 | 476 | ||
465 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); | 477 | int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); |
478 | |||
479 | int map_gmmu_pages(void *handle, struct sg_table *sgt, | ||
480 | void **va, size_t size); | ||
481 | void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va); | ||
482 | void pde_range_from_vaddr_range(struct vm_gk20a *vm, | ||
483 | u64 addr_lo, u64 addr_hi, | ||
484 | u32 *pde_lo, u32 *pde_hi); | ||
485 | u32 *pde_from_index(struct vm_gk20a *vm, u32 i); | ||
486 | u32 pte_index_from_vaddr(struct vm_gk20a *vm, | ||
487 | u64 addr, enum gmmu_pgsz_gk20a pgsz_idx); | ||
488 | int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, | ||
489 | u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx); | ||
490 | int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, | ||
491 | enum gmmu_pgsz_gk20a gmmu_pgsz_idx, | ||
492 | struct page_table_gk20a *pte); | ||
493 | struct gpu_ops; | ||
494 | void gk20a_init_mm(struct gpu_ops *gops); | ||
466 | #endif /*_MM_GK20A_H_ */ | 495 | #endif /*_MM_GK20A_H_ */ |