summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a
diff options
context:
space:
mode:
authorKevin Huang <kevinh@nvidia.com>2014-04-14 19:53:09 -0400
committerDan Willemsen <dwillemsen@nvidia.com>2015-03-18 15:09:59 -0400
commitda56b17de0b360cb020a8831ba82c202057c2968 (patch)
tree7e759f5ca412775001b00cc4211515c37b3c35ed /drivers/gpu/nvgpu/gk20a
parentd364553f7c94b4caa649f5e83f709c1c31bc45bb (diff)
gpu: nvgpu: add generic api for sparse memory
Bug 1442531 Change-Id: I97408b54e27f5ed6411792e73f079a6f86cbe5f6 Signed-off-by: Kevin Huang <kevinh@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/gk20a/hal_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c42
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h29
4 files changed, 54 insertions, 23 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 6d3c8225..081ec077 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -211,6 +211,10 @@ struct gpu_ops {
211 int (*get_netlist_name)(int index, char *name); 211 int (*get_netlist_name)(int index, char *name);
212 bool (*is_fw_defined)(void); 212 bool (*is_fw_defined)(void);
213 } gr_ctx; 213 } gr_ctx;
214 struct {
215 int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
216 u32 num_pages, u32 pgsz_idx);
217 } mm;
214}; 218};
215 219
216struct gk20a { 220struct gk20a {
diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
index f8a997e2..de6c3713 100644
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -22,6 +22,7 @@
22#include "gk20a_gating_reglist.h" 22#include "gk20a_gating_reglist.h"
23#include "channel_gk20a.h" 23#include "channel_gk20a.h"
24#include "gr_ctx_gk20a.h" 24#include "gr_ctx_gk20a.h"
25#include "mm_gk20a.h"
25 26
26struct gpu_ops gk20a_ops = { 27struct gpu_ops gk20a_ops = {
27 .clock_gating = { 28 .clock_gating = {
@@ -46,6 +47,7 @@ int gk20a_init_hal(struct gpu_ops *gops)
46 gk20a_init_fb(gops); 47 gk20a_init_fb(gops);
47 gk20a_init_fifo(gops); 48 gk20a_init_fifo(gops);
48 gk20a_init_gr_ctx(gops); 49 gk20a_init_gr_ctx(gops);
50 gk20a_init_mm(gops);
49 gops->name = "gk20a"; 51 gops->name = "gk20a";
50 52
51 return 0; 53 return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index ebd829d3..dd492d92 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -29,7 +29,6 @@
29#include <linux/tegra-soc.h> 29#include <linux/tegra-soc.h>
30#include <linux/vmalloc.h> 30#include <linux/vmalloc.h>
31#include <linux/dma-buf.h> 31#include <linux/dma-buf.h>
32#include <asm/cacheflush.h>
33 32
34#include "gk20a.h" 33#include "gk20a.h"
35#include "mm_gk20a.h" 34#include "mm_gk20a.h"
@@ -43,11 +42,6 @@
43 42
44#include "kind_gk20a.h" 43#include "kind_gk20a.h"
45 44
46#ifdef CONFIG_ARM64
47#define outer_flush_range(a, b)
48#define __cpuc_flush_dcache_area __flush_dcache_area
49#endif
50
51/* 45/*
52 * GPU mapping life cycle 46 * GPU mapping life cycle
53 * ====================== 47 * ======================
@@ -93,12 +87,6 @@ static inline u32 lo32(u64 f)
93 return (u32)(f & 0xffffffff); 87 return (u32)(f & 0xffffffff);
94} 88}
95 89
96#define FLUSH_CPU_DCACHE(va, pa, size) \
97 do { \
98 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
99 outer_flush_range(pa, pa + (size_t)(size)); \
100 } while (0)
101
102static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer); 90static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer);
103static struct mapped_buffer_node *find_mapped_buffer_locked( 91static struct mapped_buffer_node *find_mapped_buffer_locked(
104 struct rb_root *root, u64 addr); 92 struct rb_root *root, u64 addr);
@@ -463,7 +451,7 @@ static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
463 kfree(sgt); 451 kfree(sgt);
464} 452}
465 453
466static int map_gmmu_pages(void *handle, struct sg_table *sgt, 454int map_gmmu_pages(void *handle, struct sg_table *sgt,
467 void **va, size_t size) 455 void **va, size_t size)
468{ 456{
469 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); 457 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
@@ -471,7 +459,7 @@ static int map_gmmu_pages(void *handle, struct sg_table *sgt,
471 return 0; 459 return 0;
472} 460}
473 461
474static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) 462void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
475{ 463{
476 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length); 464 FLUSH_CPU_DCACHE(handle, sg_phys(sgt->sgl), sgt->sgl->length);
477} 465}
@@ -571,7 +559,7 @@ static void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
571 iova = 0; 559 iova = 0;
572} 560}
573 561
574static int map_gmmu_pages(void *handle, struct sg_table *sgt, 562int map_gmmu_pages(void *handle, struct sg_table *sgt,
575 void **kva, size_t size) 563 void **kva, size_t size)
576{ 564{
577 int count = PAGE_ALIGN(size) >> PAGE_SHIFT; 565 int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -590,7 +578,7 @@ static int map_gmmu_pages(void *handle, struct sg_table *sgt,
590 return 0; 578 return 0;
591} 579}
592 580
593static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va) 581void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
594{ 582{
595 gk20a_dbg_fn(""); 583 gk20a_dbg_fn("");
596 584
@@ -605,7 +593,7 @@ static void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va)
605 * the whole range is zeroed so it's "invalid"/will fault 593 * the whole range is zeroed so it's "invalid"/will fault
606 */ 594 */
607 595
608static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm, 596int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
609 enum gmmu_pgsz_gk20a gmmu_pgsz_idx, 597 enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
610 struct page_table_gk20a *pte) 598 struct page_table_gk20a *pte)
611{ 599{
@@ -635,7 +623,7 @@ static int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
635} 623}
636 624
637/* given address range (inclusive) determine the pdes crossed */ 625/* given address range (inclusive) determine the pdes crossed */
638static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm, 626void pde_range_from_vaddr_range(struct vm_gk20a *vm,
639 u64 addr_lo, u64 addr_hi, 627 u64 addr_lo, u64 addr_hi,
640 u32 *pde_lo, u32 *pde_hi) 628 u32 *pde_lo, u32 *pde_hi)
641{ 629{
@@ -647,12 +635,12 @@ static inline void pde_range_from_vaddr_range(struct vm_gk20a *vm,
647 *pde_lo, *pde_hi); 635 *pde_lo, *pde_hi);
648} 636}
649 637
650static inline u32 *pde_from_index(struct vm_gk20a *vm, u32 i) 638u32 *pde_from_index(struct vm_gk20a *vm, u32 i)
651{ 639{
652 return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v()); 640 return (u32 *) (((u8 *)vm->pdes.kv) + i*gmmu_pde__size_v());
653} 641}
654 642
655static inline u32 pte_index_from_vaddr(struct vm_gk20a *vm, 643u32 pte_index_from_vaddr(struct vm_gk20a *vm,
656 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx) 644 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx)
657{ 645{
658 u32 ret; 646 u32 ret;
@@ -686,7 +674,7 @@ static inline void pte_space_page_offset_from_index(u32 i, u32 *pte_page,
686 * backing store and if not go ahead allocate it and 674 * backing store and if not go ahead allocate it and
687 * record it in the appropriate pde 675 * record it in the appropriate pde
688 */ 676 */
689static int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm, 677int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
690 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) 678 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
691{ 679{
692 int err; 680 int err;
@@ -1981,6 +1969,7 @@ err_unmap:
1981static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) 1969static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1982{ 1970{
1983 struct vm_gk20a *vm = mapped_buffer->vm; 1971 struct vm_gk20a *vm = mapped_buffer->vm;
1972 struct gk20a *g = vm->mm->g;
1984 1973
1985 if (mapped_buffer->va_node && 1974 if (mapped_buffer->va_node &&
1986 mapped_buffer->va_node->sparse) { 1975 mapped_buffer->va_node->sparse) {
@@ -1990,7 +1979,7 @@ static void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
1990 gmmu_page_shifts[pgsz_idx]; 1979 gmmu_page_shifts[pgsz_idx];
1991 1980
1992 /* there is little we can do if this fails... */ 1981 /* there is little we can do if this fails... */
1993 gk20a_vm_put_empty(vm, vaddr, num_pages, pgsz_idx); 1982 g->ops.mm.set_sparse(vm, vaddr, num_pages, pgsz_idx);
1994 1983
1995 } else 1984 } else
1996 __locked_gmmu_unmap(vm, 1985 __locked_gmmu_unmap(vm,
@@ -2259,6 +2248,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2259 u32 start_page_nr; 2248 u32 start_page_nr;
2260 struct gk20a_allocator *vma; 2249 struct gk20a_allocator *vma;
2261 struct vm_gk20a *vm = as_share->vm; 2250 struct vm_gk20a *vm = as_share->vm;
2251 struct gk20a *g = vm->mm->g;
2262 struct vm_reserved_va_node *va_node; 2252 struct vm_reserved_va_node *va_node;
2263 u64 vaddr_start = 0; 2253 u64 vaddr_start = 0;
2264 2254
@@ -2316,7 +2306,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
2316 2306
2317 /* mark that we need to use sparse mappings here */ 2307 /* mark that we need to use sparse mappings here */
2318 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) { 2308 if (args->flags & NVHOST_AS_ALLOC_SPACE_FLAGS_SPARSE) {
2319 err = gk20a_vm_put_empty(vm, vaddr_start, args->pages, 2309 err = g->ops.mm.set_sparse(vm, vaddr_start, args->pages,
2320 pgsz_idx); 2310 pgsz_idx);
2321 if (err) { 2311 if (err) {
2322 mutex_unlock(&vm->update_gmmu_lock); 2312 mutex_unlock(&vm->update_gmmu_lock);
@@ -3076,3 +3066,9 @@ int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g)
3076 gk20a_idle(g->dev); 3066 gk20a_idle(g->dev);
3077 return ret; 3067 return ret;
3078} 3068}
3069
3070void gk20a_init_mm(struct gpu_ops *gops)
3071{
3072 gops->mm.set_sparse = gk20a_vm_put_empty;
3073}
3074
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7f86710b..b0f3536e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -25,6 +25,7 @@
25#include <linux/dma-attrs.h> 25#include <linux/dma-attrs.h>
26#include <linux/iommu.h> 26#include <linux/iommu.h>
27#include <asm/dma-iommu.h> 27#include <asm/dma-iommu.h>
28#include <asm/cacheflush.h>
28#include "gk20a_allocator.h" 29#include "gk20a_allocator.h"
29 30
30/* This "address bit" in the gmmu ptes (and other gk20a accesses) 31/* This "address bit" in the gmmu ptes (and other gk20a accesses)
@@ -41,6 +42,17 @@
41#define NV_GMMU_VA_RANGE 35ULL 42#define NV_GMMU_VA_RANGE 35ULL
42#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1))) 43#define NV_GMMU_VA_IS_UPPER(x) ((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1)))
43 44
45#ifdef CONFIG_ARM64
46#define outer_flush_range(a, b)
47#define __cpuc_flush_dcache_area __flush_dcache_area
48#endif
49
50#define FLUSH_CPU_DCACHE(va, pa, size) \
51 do { \
52 __cpuc_flush_dcache_area((void *)(va), (size_t)(size)); \
53 outer_flush_range(pa, pa + (size_t)(size)); \
54 } while (0)
55
44struct mem_desc { 56struct mem_desc {
45 struct dma_buf *ref; 57 struct dma_buf *ref;
46 struct sg_table *sgt; 58 struct sg_table *sgt;
@@ -463,4 +475,21 @@ int gk20a_vm_map_buffer(struct gk20a_as_share *as_share,
463int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset); 475int gk20a_vm_unmap_buffer(struct gk20a_as_share *, u64 offset);
464 476
465int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev); 477int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
478
479int map_gmmu_pages(void *handle, struct sg_table *sgt,
480 void **va, size_t size);
481void unmap_gmmu_pages(void *handle, struct sg_table *sgt, void *va);
482void pde_range_from_vaddr_range(struct vm_gk20a *vm,
483 u64 addr_lo, u64 addr_hi,
484 u32 *pde_lo, u32 *pde_hi);
485u32 *pde_from_index(struct vm_gk20a *vm, u32 i);
486u32 pte_index_from_vaddr(struct vm_gk20a *vm,
487 u64 addr, enum gmmu_pgsz_gk20a pgsz_idx);
488int validate_gmmu_page_table_gk20a_locked(struct vm_gk20a *vm,
489 u32 i, enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
490int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
491 enum gmmu_pgsz_gk20a gmmu_pgsz_idx,
492 struct page_table_gk20a *pte);
493struct gpu_ops;
494void gk20a_init_mm(struct gpu_ops *gops);
466#endif /*_MM_GK20A_H_ */ 495#endif /*_MM_GK20A_H_ */