diff options
author | Terje Bergstrom <tbergstrom@nvidia.com> | 2015-01-19 17:50:57 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:04:45 -0400 |
commit | 4aef10c9507a19fb288936b88b0faeb62a520817 (patch) | |
tree | 0c773b9c18e3c9d318783c0cd575b6bb94f2bf30 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |
parent | f4883ab97af69610c0507c245f69eef00d203a28 (diff) |
gpu: nvgpu: Set compression page per SoC
Compression page size varies depending on architecture. Make it
129kB on gk20a and gm20b.
Also export some common functions from gm20b.
Bug 1592495
Change-Id: Ifb1c5b15d25fa961dab097021080055fc385fecd
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/673790
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 22 |
1 files changed, 11 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 3bce3c74..6b7f84a3 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -1001,7 +1001,9 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |||
1001 | enum gmmu_pgsz_gk20a pgsz_idx) | 1001 | enum gmmu_pgsz_gk20a pgsz_idx) |
1002 | { | 1002 | { |
1003 | bool kind_compressible; | 1003 | bool kind_compressible; |
1004 | struct device *d = dev_from_gk20a(vm->mm->g); | 1004 | struct gk20a *g = gk20a_from_vm(vm); |
1005 | struct device *d = dev_from_gk20a(g); | ||
1006 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1005 | 1007 | ||
1006 | if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) | 1008 | if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) |
1007 | bfr->kind_v = gmmu_pte_kind_pitch_v(); | 1009 | bfr->kind_v = gmmu_pte_kind_pitch_v(); |
@@ -1036,8 +1038,7 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, | |||
1036 | kind_compressible = false; | 1038 | kind_compressible = false; |
1037 | } | 1039 | } |
1038 | if (kind_compressible) | 1040 | if (kind_compressible) |
1039 | bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >> | 1041 | bfr->ctag_lines = DIV_ROUND_UP_ULL(bfr->size, ctag_granularity); |
1040 | COMP_TAG_LINE_SIZE_SHIFT; | ||
1041 | else | 1042 | else |
1042 | bfr->ctag_lines = 0; | 1043 | bfr->ctag_lines = 0; |
1043 | 1044 | ||
@@ -1113,10 +1114,10 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, | |||
1113 | u32 pde_lo, pde_hi; | 1114 | u32 pde_lo, pde_hi; |
1114 | struct device *d = dev_from_vm(vm); | 1115 | struct device *d = dev_from_vm(vm); |
1115 | struct gk20a *g = gk20a_from_vm(vm); | 1116 | struct gk20a *g = gk20a_from_vm(vm); |
1117 | int ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1116 | 1118 | ||
1117 | if (clear_ctags && ctag_offset) { | 1119 | if (clear_ctags && ctag_offset) { |
1118 | u32 ctag_lines = ALIGN(size, COMP_TAG_LINE_SIZE) >> | 1120 | u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity); |
1119 | COMP_TAG_LINE_SIZE_SHIFT; | ||
1120 | 1121 | ||
1121 | /* init/clear the ctag buffer */ | 1122 | /* init/clear the ctag buffer */ |
1122 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | 1123 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, |
@@ -1756,7 +1757,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1756 | struct scatterlist *cur_chunk; | 1757 | struct scatterlist *cur_chunk; |
1757 | unsigned int cur_offset; | 1758 | unsigned int cur_offset; |
1758 | u32 pte_w[2] = {0, 0}; /* invalid pte */ | 1759 | u32 pte_w[2] = {0, 0}; /* invalid pte */ |
1759 | u32 ctag = ctag_offset * SZ_128K; | 1760 | struct gk20a *g = gk20a_from_vm(vm); |
1761 | u32 ctag_granularity = g->ops.fb.compression_page_size(g); | ||
1762 | u32 ctag = ctag_offset * ctag_granularity; | ||
1760 | u32 ctag_incr; | 1763 | u32 ctag_incr; |
1761 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; | 1764 | u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; |
1762 | u64 addr = 0; | 1765 | u64 addr = 0; |
@@ -1768,9 +1771,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1768 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", | 1771 | gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", |
1769 | pgsz_idx, pde_lo, pde_hi); | 1772 | pgsz_idx, pde_lo, pde_hi); |
1770 | 1773 | ||
1771 | /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch | ||
1772 | * below (per-pte). Note: this doesn't work unless page size (when | ||
1773 | * comptags are active) is 128KB. We have checks elsewhere for that. */ | ||
1774 | ctag_incr = ctag_offset ? page_size : 0; | 1774 | ctag_incr = ctag_offset ? page_size : 0; |
1775 | 1775 | ||
1776 | cur_offset = 0; | 1776 | cur_offset = 0; |
@@ -1843,7 +1843,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, | |||
1843 | >> gmmu_pte_address_shift_v()); | 1843 | >> gmmu_pte_address_shift_v()); |
1844 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | | 1844 | pte_w[1] = gmmu_pte_aperture_video_memory_f() | |
1845 | gmmu_pte_kind_f(kind_v) | | 1845 | gmmu_pte_kind_f(kind_v) | |
1846 | gmmu_pte_comptagline_f(ctag / SZ_128K); | 1846 | gmmu_pte_comptagline_f(ctag |
1847 | / ctag_granularity); | ||
1847 | 1848 | ||
1848 | if (rw_flag == gk20a_mem_flag_read_only) { | 1849 | if (rw_flag == gk20a_mem_flag_read_only) { |
1849 | pte_w[0] |= gmmu_pte_read_only_true_f(); | 1850 | pte_w[0] |= gmmu_pte_read_only_true_f(); |
@@ -2161,7 +2162,6 @@ int gk20a_init_vm(struct mm_gk20a *mm, | |||
2161 | vm->big_pages = big_pages; | 2162 | vm->big_pages = big_pages; |
2162 | 2163 | ||
2163 | vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; | 2164 | vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; |
2164 | vm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; | ||
2165 | vm->pde_stride = vm->big_page_size << 10; | 2165 | vm->pde_stride = vm->big_page_size << 10; |
2166 | vm->pde_stride_shift = ilog2(vm->pde_stride); | 2166 | vm->pde_stride_shift = ilog2(vm->pde_stride); |
2167 | 2167 | ||