summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTerje Bergstrom <tbergstrom@nvidia.com>2015-01-19 17:50:57 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:04:45 -0400
commit4aef10c9507a19fb288936b88b0faeb62a520817 (patch)
tree0c773b9c18e3c9d318783c0cd575b6bb94f2bf30
parentf4883ab97af69610c0507c245f69eef00d203a28 (diff)
gpu: nvgpu: Set compression page per SoC
Compression page size varies depending on architecture. Make it 129kB on gk20a and gm20b. Also export some common functions from gm20b. Bug 1592495 Change-Id: Ifb1c5b15d25fa961dab097021080055fc385fecd Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/673790
-rw-r--r--drivers/gpu/nvgpu/gk20a/fb_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c22
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gm20b/fb_gm20b.c6
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c14
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.h7
-rw-r--r--drivers/gpu/nvgpu/vgpu/mm_vgpu.c1
10 files changed, 39 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
index d5b3fd87..568aed7a 100644
--- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -42,10 +42,16 @@ static void gk20a_fb_set_mmu_page_size(struct gk20a *g)
42 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); 42 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
43} 43}
44 44
45static int gk20a_fb_compression_page_size(struct gk20a *g)
46{
47 return SZ_128K;
48}
49
45void gk20a_init_fb(struct gpu_ops *gops) 50void gk20a_init_fb(struct gpu_ops *gops)
46{ 51{
47 gops->fb.reset = fb_gk20a_reset; 52 gops->fb.reset = fb_gk20a_reset;
48 gops->fb.set_mmu_page_size = gk20a_fb_set_mmu_page_size; 53 gops->fb.set_mmu_page_size = gk20a_fb_set_mmu_page_size;
54 gops->fb.compression_page_size = gk20a_fb_compression_page_size;
49 gk20a_init_uncompressed_kind_map(); 55 gk20a_init_uncompressed_kind_map();
50 gk20a_init_kind_attr(); 56 gk20a_init_kind_attr();
51} 57}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 6c18c895..57d5f09a 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1788,7 +1788,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
1788 gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ 1788 gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
1789 1789
1790 gpu->big_page_size = g->mm.pmu.vm.big_page_size; 1790 gpu->big_page_size = g->mm.pmu.vm.big_page_size;
1791 gpu->compression_page_size = g->mm.pmu.vm.compression_page_size; 1791 gpu->compression_page_size = g->ops.fb.compression_page_size(g);
1792 gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift; 1792 gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift;
1793 1793
1794 gpu->available_big_page_sizes = gpu->big_page_size; 1794 gpu->available_big_page_sizes = gpu->big_page_size;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index b9796faa..4fbc25be 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -167,6 +167,7 @@ struct gpu_ops {
167 void (*init_uncompressed_kind_map)(struct gk20a *g); 167 void (*init_uncompressed_kind_map)(struct gk20a *g);
168 void (*init_kind_attr)(struct gk20a *g); 168 void (*init_kind_attr)(struct gk20a *g);
169 void (*set_mmu_page_size)(struct gk20a *g); 169 void (*set_mmu_page_size)(struct gk20a *g);
170 int (*compression_page_size)(struct gk20a *g);
170 } fb; 171 } fb;
171 struct { 172 struct {
172 void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod); 173 void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index f130b830..cd6fe9cb 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -258,9 +258,6 @@ struct gr_gk20a {
258 u32 map_tile_count; 258 u32 map_tile_count;
259 u32 map_row_offset; 259 u32 map_row_offset;
260 260
261#define COMP_TAG_LINE_SIZE_SHIFT (17) /* one tag covers 128K */
262#define COMP_TAG_LINE_SIZE (1 << COMP_TAG_LINE_SIZE_SHIFT)
263
264 u32 max_comptag_mem; /* max memory size (MB) for comptag */ 261 u32 max_comptag_mem; /* max memory size (MB) for comptag */
265 struct compbit_store_desc compbit_store; 262 struct compbit_store_desc compbit_store;
266 struct gk20a_allocator comp_tags; 263 struct gk20a_allocator comp_tags;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 3bce3c74..6b7f84a3 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1001,7 +1001,9 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1001 enum gmmu_pgsz_gk20a pgsz_idx) 1001 enum gmmu_pgsz_gk20a pgsz_idx)
1002{ 1002{
1003 bool kind_compressible; 1003 bool kind_compressible;
1004 struct device *d = dev_from_gk20a(vm->mm->g); 1004 struct gk20a *g = gk20a_from_vm(vm);
1005 struct device *d = dev_from_gk20a(g);
1006 int ctag_granularity = g->ops.fb.compression_page_size(g);
1005 1007
1006 if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) 1008 if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v()))
1007 bfr->kind_v = gmmu_pte_kind_pitch_v(); 1009 bfr->kind_v = gmmu_pte_kind_pitch_v();
@@ -1036,8 +1038,7 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
1036 kind_compressible = false; 1038 kind_compressible = false;
1037 } 1039 }
1038 if (kind_compressible) 1040 if (kind_compressible)
1039 bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >> 1041 bfr->ctag_lines = DIV_ROUND_UP_ULL(bfr->size, ctag_granularity);
1040 COMP_TAG_LINE_SIZE_SHIFT;
1041 else 1042 else
1042 bfr->ctag_lines = 0; 1043 bfr->ctag_lines = 0;
1043 1044
@@ -1113,10 +1114,10 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
1113 u32 pde_lo, pde_hi; 1114 u32 pde_lo, pde_hi;
1114 struct device *d = dev_from_vm(vm); 1115 struct device *d = dev_from_vm(vm);
1115 struct gk20a *g = gk20a_from_vm(vm); 1116 struct gk20a *g = gk20a_from_vm(vm);
1117 int ctag_granularity = g->ops.fb.compression_page_size(g);
1116 1118
1117 if (clear_ctags && ctag_offset) { 1119 if (clear_ctags && ctag_offset) {
1118 u32 ctag_lines = ALIGN(size, COMP_TAG_LINE_SIZE) >> 1120 u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity);
1119 COMP_TAG_LINE_SIZE_SHIFT;
1120 1121
1121 /* init/clear the ctag buffer */ 1122 /* init/clear the ctag buffer */
1122 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, 1123 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
@@ -1756,7 +1757,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1756 struct scatterlist *cur_chunk; 1757 struct scatterlist *cur_chunk;
1757 unsigned int cur_offset; 1758 unsigned int cur_offset;
1758 u32 pte_w[2] = {0, 0}; /* invalid pte */ 1759 u32 pte_w[2] = {0, 0}; /* invalid pte */
1759 u32 ctag = ctag_offset * SZ_128K; 1760 struct gk20a *g = gk20a_from_vm(vm);
1761 u32 ctag_granularity = g->ops.fb.compression_page_size(g);
1762 u32 ctag = ctag_offset * ctag_granularity;
1760 u32 ctag_incr; 1763 u32 ctag_incr;
1761 u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; 1764 u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
1762 u64 addr = 0; 1765 u64 addr = 0;
@@ -1768,9 +1771,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1768 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", 1771 gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
1769 pgsz_idx, pde_lo, pde_hi); 1772 pgsz_idx, pde_lo, pde_hi);
1770 1773
1771 /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch
1772 * below (per-pte). Note: this doesn't work unless page size (when
1773 * comptags are active) is 128KB. We have checks elsewhere for that. */
1774 ctag_incr = ctag_offset ? page_size : 0; 1774 ctag_incr = ctag_offset ? page_size : 0;
1775 1775
1776 cur_offset = 0; 1776 cur_offset = 0;
@@ -1843,7 +1843,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
1843 >> gmmu_pte_address_shift_v()); 1843 >> gmmu_pte_address_shift_v());
1844 pte_w[1] = gmmu_pte_aperture_video_memory_f() | 1844 pte_w[1] = gmmu_pte_aperture_video_memory_f() |
1845 gmmu_pte_kind_f(kind_v) | 1845 gmmu_pte_kind_f(kind_v) |
1846 gmmu_pte_comptagline_f(ctag / SZ_128K); 1846 gmmu_pte_comptagline_f(ctag
1847 / ctag_granularity);
1847 1848
1848 if (rw_flag == gk20a_mem_flag_read_only) { 1849 if (rw_flag == gk20a_mem_flag_read_only) {
1849 pte_w[0] |= gmmu_pte_read_only_true_f(); 1850 pte_w[0] |= gmmu_pte_read_only_true_f();
@@ -2161,7 +2162,6 @@ int gk20a_init_vm(struct mm_gk20a *mm,
2161 vm->big_pages = big_pages; 2162 vm->big_pages = big_pages;
2162 2163
2163 vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; 2164 vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];
2164 vm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big];
2165 vm->pde_stride = vm->big_page_size << 10; 2165 vm->pde_stride = vm->big_page_size << 10;
2166 vm->pde_stride_shift = ilog2(vm->pde_stride); 2166 vm->pde_stride_shift = ilog2(vm->pde_stride);
2167 2167
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 8470a7ac..e4fc3085 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -293,7 +293,6 @@ struct vm_gk20a {
293 bool tlb_dirty; 293 bool tlb_dirty;
294 bool mapped; 294 bool mapped;
295 295
296 u32 compression_page_size;
297 u32 big_page_size; 296 u32 big_page_size;
298 u32 pde_stride; 297 u32 pde_stride;
299 u32 pde_stride_shift; 298 u32 pde_stride_shift;
diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
index 7cdd776e..deef7896 100644
--- a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
@@ -90,10 +90,16 @@ static void gm20b_fb_set_mmu_page_size(struct gk20a *g)
90 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); 90 gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl);
91} 91}
92 92
93static int gm20b_fb_compression_page_size(struct gk20a *g)
94{
95 return SZ_128K;
96}
97
93void gm20b_init_fb(struct gpu_ops *gops) 98void gm20b_init_fb(struct gpu_ops *gops)
94{ 99{
95 gops->fb.init_fs_state = fb_gm20b_init_fs_state; 100 gops->fb.init_fs_state = fb_gm20b_init_fs_state;
96 gops->fb.set_mmu_page_size = gm20b_fb_set_mmu_page_size; 101 gops->fb.set_mmu_page_size = gm20b_fb_set_mmu_page_size;
102 gops->fb.compression_page_size = gm20b_fb_compression_page_size;
97 gm20b_init_uncompressed_kind_map(); 103 gm20b_init_uncompressed_kind_map();
98 gm20b_init_kind_attr(); 104 gm20b_init_kind_attr();
99} 105}
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index fe2e06d5..0a0efe41 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -101,8 +101,8 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
101 return 0; 101 return 0;
102} 102}
103 103
104static int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, 104int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
105 u32 min, u32 max) 105 u32 min, u32 max)
106{ 106{
107 int err = 0; 107 int err = 0;
108 struct gr_gk20a *gr = &g->gr; 108 struct gr_gk20a *gr = &g->gr;
@@ -170,7 +170,7 @@ out:
170 return 0; 170 return 0;
171} 171}
172 172
173static void gm20b_ltc_init_fs_state(struct gk20a *g) 173void gm20b_ltc_init_fs_state(struct gk20a *g)
174{ 174{
175 u32 reg; 175 u32 reg;
176 176
@@ -196,7 +196,7 @@ static void gm20b_ltc_init_fs_state(struct gk20a *g)
196 gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); 196 gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg);
197} 197}
198 198
199static void gm20b_ltc_isr(struct gk20a *g) 199void gm20b_ltc_isr(struct gk20a *g)
200{ 200{
201 u32 mc_intr, ltc_intr; 201 u32 mc_intr, ltc_intr;
202 int ltc, slice; 202 int ltc, slice;
@@ -221,7 +221,7 @@ static void gm20b_ltc_isr(struct gk20a *g)
221 } 221 }
222} 222}
223 223
224static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) 224void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
225{ 225{
226 u32 data; 226 u32 data;
227 bool done[g->ltc_count]; 227 bool done[g->ltc_count];
@@ -265,7 +265,7 @@ static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g)
265 "g_elpg_flush too many retries"); 265 "g_elpg_flush too many retries");
266} 266}
267 267
268static u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) 268u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
269{ 269{
270 u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); 270 u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r());
271 if (val == 2) { 271 if (val == 2) {
@@ -281,7 +281,7 @@ static u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base)
281/* 281/*
282 * Performs a full flush of the L2 cache. 282 * Performs a full flush of the L2 cache.
283 */ 283 */
284static void gm20b_flush_ltc(struct gk20a *g) 284void gm20b_flush_ltc(struct gk20a *g)
285{ 285{
286 u32 op_pending; 286 u32 op_pending;
287 unsigned long now, timeout; 287 unsigned long now, timeout;
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h
index c7524264..288e193a 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h
@@ -18,4 +18,11 @@
18struct gpu_ops; 18struct gpu_ops;
19 19
20void gm20b_init_ltc(struct gpu_ops *gops); 20void gm20b_init_ltc(struct gpu_ops *gops);
21void gm20b_ltc_init_fs_state(struct gk20a *g);
22int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
23 u32 min, u32 max);
24void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g);
25void gm20b_ltc_isr(struct gk20a *g);
26u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base);
27void gm20b_flush_ltc(struct gk20a *g);
21#endif 28#endif
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 2dd8cb68..6817b107 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -39,7 +39,6 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g)
39 39
40 /* gk20a_init_gpu_characteristics expects this to be populated */ 40 /* gk20a_init_gpu_characteristics expects this to be populated */
41 vm->big_page_size = big_page_size; 41 vm->big_page_size = big_page_size;
42 vm->compression_page_size = big_page_size;
43 vm->pde_stride = vm->big_page_size << 10; 42 vm->pde_stride = vm->big_page_size << 10;
44 vm->pde_stride_shift = ilog2(vm->pde_stride); 43 vm->pde_stride_shift = ilog2(vm->pde_stride);
45 44