summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2017-08-17 13:57:59 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-08-24 11:24:30 -0400
commit82ba1277f3da7379ed6b8288c04bb91db008549c (patch)
tree2ee45afb7c07468218ea56b6e662d933887f197b /drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
parent3fa47b877db1edc16018d662e7b9915d92354745 (diff)
gpu: nvgpu: Limit max CBC clear job size to 16384 lines
Limit the maximum job size of CBC ctrl clear to 16 klines. This avoids timeouts and excessive lock hold duration when clearing comptags for huge surface. 16 klines corresponds to a 1-GB surface for 64-kB compression page size. If the requested CBC ctrl job is larger than 16 klines, split it to at most 16-kline chunks. Bug 1860962 Bug 200334740 Change-Id: Ibc69adc8bf59527b1acec5b2097b5aefa2169960 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1540432 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b/ltc_gm20b.c')
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c102
1 files changed, 65 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 74c56487..b96f0b5c 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -113,6 +113,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
113 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); 113 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
114 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); 114 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
115 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); 115 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
116 const u32 max_lines = 16384;
116 117
117 gk20a_dbg_fn(""); 118 gk20a_dbg_fn("");
118 119
@@ -121,45 +122,72 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
121 if (gr->compbit_store.mem.size == 0) 122 if (gr->compbit_store.mem.size == 0)
122 return 0; 123 return 0;
123 124
124 nvgpu_mutex_acquire(&g->mm.l2_op_lock); 125 while (1) {
125 126 const u32 iter_max = min(min + max_lines - 1, max);
126 if (op == gk20a_cbc_op_clear) { 127 bool full_cache_op = true;
127 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), 128
128 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); 129 nvgpu_mutex_acquire(&g->mm.l2_op_lock);
129 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), 130
130 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); 131 gk20a_dbg_info("clearing CBC lines %u..%u", min, iter_max);
131 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); 132
132 } else if (op == gk20a_cbc_op_clean) { 133 if (op == gk20a_cbc_op_clear) {
133 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); 134 gk20a_writel(
134 } else if (op == gk20a_cbc_op_invalidate) { 135 g, ltc_ltcs_ltss_cbc_ctrl2_r(),
135 hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); 136 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(
136 } else { 137 min));
137 BUG_ON(1); 138 gk20a_writel(
138 } 139 g, ltc_ltcs_ltss_cbc_ctrl3_r(),
139 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), 140 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(
140 gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); 141 iter_max));
141 142 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
142 for (ltc = 0; ltc < g->ltc_count; ltc++) { 143 full_cache_op = false;
143 for (slice = 0; slice < slices_per_ltc; slice++) { 144 } else if (op == gk20a_cbc_op_clean) {
144 145 /* this is full-cache op */
145 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + 146 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
146 ltc * ltc_stride + slice * lts_stride; 147 } else if (op == gk20a_cbc_op_invalidate) {
147 148 /* this is full-cache op */
148 nvgpu_timeout_init(g, &timeout, 2000, 149 hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
149 NVGPU_TIMER_RETRY_TIMER); 150 } else {
150 do { 151 nvgpu_err(g, "Unknown op: %u", (unsigned)op);
151 val = gk20a_readl(g, ctrl1); 152 err = -EINVAL;
152 if (!(val & hw_op)) 153 goto out;
153 break; 154 }
154 nvgpu_udelay(5); 155 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
155 } while (!nvgpu_timeout_expired(&timeout)); 156 gk20a_readl(g,
156 157 ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);
157 if (nvgpu_timeout_peek_expired(&timeout)) { 158
158 nvgpu_err(g, "comp tag clear timeout"); 159 for (ltc = 0; ltc < g->ltc_count; ltc++) {
159 err = -EBUSY; 160 for (slice = 0; slice < slices_per_ltc; slice++) {
160 goto out; 161
162 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
163 ltc * ltc_stride + slice * lts_stride;
164
165 nvgpu_timeout_init(g, &timeout, 2000,
166 NVGPU_TIMER_RETRY_TIMER);
167 do {
168 val = gk20a_readl(g, ctrl1);
169 if (!(val & hw_op))
170 break;
171 nvgpu_udelay(5);
172 } while (!nvgpu_timeout_expired(&timeout));
173
174 if (nvgpu_timeout_peek_expired(&timeout)) {
175 nvgpu_err(g, "comp tag clear timeout");
176 err = -EBUSY;
177 goto out;
178 }
161 } 179 }
162 } 180 }
181
182 /* are we done? */
183 if (full_cache_op || iter_max == max)
184 break;
185
186 /* note: iter_max is inclusive upper bound */
187 min = iter_max + 1;
188
189 /* give a chance for higher-priority threads to progress */
190 nvgpu_mutex_release(&g->mm.l2_op_lock);
163 } 191 }
164out: 192out:
165 trace_gk20a_ltc_cbc_ctrl_done(g->name); 193 trace_gk20a_ltc_cbc_ctrl_done(g->name);