diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2017-08-17 13:57:59 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-08-24 11:24:30 -0400 |
commit | 82ba1277f3da7379ed6b8288c04bb91db008549c (patch) | |
tree | 2ee45afb7c07468218ea56b6e662d933887f197b | |
parent | 3fa47b877db1edc16018d662e7b9915d92354745 (diff) |
gpu: nvgpu: Limit max CBC clear job size to 16384 lines
Limit the maximum job size of CBC ctrl clear to 16 klines. This avoids
timeouts and excessive lock hold duration when clearing comptags for
huge surface. 16 klines corresponds to a 1-GB surface for 64-kB
compression page size.
If the requested CBC ctrl job is larger than 16 klines, split it to
at most 16-kline chunks.
Bug 1860962
Bug 200334740
Change-Id: Ibc69adc8bf59527b1acec5b2097b5aefa2169960
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1540432
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 102 |
1 files changed, 65 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 74c56487..b96f0b5c 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -113,6 +113,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
113 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | 113 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); |
114 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | 114 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); |
115 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | 115 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); |
116 | const u32 max_lines = 16384; | ||
116 | 117 | ||
117 | gk20a_dbg_fn(""); | 118 | gk20a_dbg_fn(""); |
118 | 119 | ||
@@ -121,45 +122,72 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
121 | if (gr->compbit_store.mem.size == 0) | 122 | if (gr->compbit_store.mem.size == 0) |
122 | return 0; | 123 | return 0; |
123 | 124 | ||
124 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); | 125 | while (1) { |
125 | 126 | const u32 iter_max = min(min + max_lines - 1, max); | |
126 | if (op == gk20a_cbc_op_clear) { | 127 | bool full_cache_op = true; |
127 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), | 128 | |
128 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); | 129 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); |
129 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), | 130 | |
130 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); | 131 | gk20a_dbg_info("clearing CBC lines %u..%u", min, iter_max); |
131 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | 132 | |
132 | } else if (op == gk20a_cbc_op_clean) { | 133 | if (op == gk20a_cbc_op_clear) { |
133 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); | 134 | gk20a_writel( |
134 | } else if (op == gk20a_cbc_op_invalidate) { | 135 | g, ltc_ltcs_ltss_cbc_ctrl2_r(), |
135 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); | 136 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( |
136 | } else { | 137 | min)); |
137 | BUG_ON(1); | 138 | gk20a_writel( |
138 | } | 139 | g, ltc_ltcs_ltss_cbc_ctrl3_r(), |
139 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), | 140 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( |
140 | gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | 141 | iter_max)); |
141 | 142 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | |
142 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | 143 | full_cache_op = false; |
143 | for (slice = 0; slice < slices_per_ltc; slice++) { | 144 | } else if (op == gk20a_cbc_op_clean) { |
144 | 145 | /* this is full-cache op */ | |
145 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | 146 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); |
146 | ltc * ltc_stride + slice * lts_stride; | 147 | } else if (op == gk20a_cbc_op_invalidate) { |
147 | 148 | /* this is full-cache op */ | |
148 | nvgpu_timeout_init(g, &timeout, 2000, | 149 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); |
149 | NVGPU_TIMER_RETRY_TIMER); | 150 | } else { |
150 | do { | 151 | nvgpu_err(g, "Unknown op: %u", (unsigned)op); |
151 | val = gk20a_readl(g, ctrl1); | 152 | err = -EINVAL; |
152 | if (!(val & hw_op)) | 153 | goto out; |
153 | break; | 154 | } |
154 | nvgpu_udelay(5); | 155 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), |
155 | } while (!nvgpu_timeout_expired(&timeout)); | 156 | gk20a_readl(g, |
156 | 157 | ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | |
157 | if (nvgpu_timeout_peek_expired(&timeout)) { | 158 | |
158 | nvgpu_err(g, "comp tag clear timeout"); | 159 | for (ltc = 0; ltc < g->ltc_count; ltc++) { |
159 | err = -EBUSY; | 160 | for (slice = 0; slice < slices_per_ltc; slice++) { |
160 | goto out; | 161 | |
162 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | ||
163 | ltc * ltc_stride + slice * lts_stride; | ||
164 | |||
165 | nvgpu_timeout_init(g, &timeout, 2000, | ||
166 | NVGPU_TIMER_RETRY_TIMER); | ||
167 | do { | ||
168 | val = gk20a_readl(g, ctrl1); | ||
169 | if (!(val & hw_op)) | ||
170 | break; | ||
171 | nvgpu_udelay(5); | ||
172 | } while (!nvgpu_timeout_expired(&timeout)); | ||
173 | |||
174 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
175 | nvgpu_err(g, "comp tag clear timeout"); | ||
176 | err = -EBUSY; | ||
177 | goto out; | ||
178 | } | ||
161 | } | 179 | } |
162 | } | 180 | } |
181 | |||
182 | /* are we done? */ | ||
183 | if (full_cache_op || iter_max == max) | ||
184 | break; | ||
185 | |||
186 | /* note: iter_max is inclusive upper bound */ | ||
187 | min = iter_max + 1; | ||
188 | |||
189 | /* give a chance for higher-priority threads to progress */ | ||
190 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
163 | } | 191 | } |
164 | out: | 192 | out: |
165 | trace_gk20a_ltc_cbc_ctrl_done(g->name); | 193 | trace_gk20a_ltc_cbc_ctrl_done(g->name); |