diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 102 |
1 files changed, 65 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 74c56487..b96f0b5c 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | |||
@@ -113,6 +113,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
113 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); | 113 | gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); |
114 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | 114 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); |
115 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | 115 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); |
116 | const u32 max_lines = 16384; | ||
116 | 117 | ||
117 | gk20a_dbg_fn(""); | 118 | gk20a_dbg_fn(""); |
118 | 119 | ||
@@ -121,45 +122,72 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, | |||
121 | if (gr->compbit_store.mem.size == 0) | 122 | if (gr->compbit_store.mem.size == 0) |
122 | return 0; | 123 | return 0; |
123 | 124 | ||
124 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); | 125 | while (1) { |
125 | 126 | const u32 iter_max = min(min + max_lines - 1, max); | |
126 | if (op == gk20a_cbc_op_clear) { | 127 | bool full_cache_op = true; |
127 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), | 128 | |
128 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); | 129 | nvgpu_mutex_acquire(&g->mm.l2_op_lock); |
129 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), | 130 | |
130 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); | 131 | gk20a_dbg_info("clearing CBC lines %u..%u", min, iter_max); |
131 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | 132 | |
132 | } else if (op == gk20a_cbc_op_clean) { | 133 | if (op == gk20a_cbc_op_clear) { |
133 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); | 134 | gk20a_writel( |
134 | } else if (op == gk20a_cbc_op_invalidate) { | 135 | g, ltc_ltcs_ltss_cbc_ctrl2_r(), |
135 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); | 136 | ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( |
136 | } else { | 137 | min)); |
137 | BUG_ON(1); | 138 | gk20a_writel( |
138 | } | 139 | g, ltc_ltcs_ltss_cbc_ctrl3_r(), |
139 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), | 140 | ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( |
140 | gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | 141 | iter_max)); |
141 | 142 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); | |
142 | for (ltc = 0; ltc < g->ltc_count; ltc++) { | 143 | full_cache_op = false; |
143 | for (slice = 0; slice < slices_per_ltc; slice++) { | 144 | } else if (op == gk20a_cbc_op_clean) { |
144 | 145 | /* this is full-cache op */ | |
145 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | 146 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); |
146 | ltc * ltc_stride + slice * lts_stride; | 147 | } else if (op == gk20a_cbc_op_invalidate) { |
147 | 148 | /* this is full-cache op */ | |
148 | nvgpu_timeout_init(g, &timeout, 2000, | 149 | hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); |
149 | NVGPU_TIMER_RETRY_TIMER); | 150 | } else { |
150 | do { | 151 | nvgpu_err(g, "Unknown op: %u", (unsigned)op); |
151 | val = gk20a_readl(g, ctrl1); | 152 | err = -EINVAL; |
152 | if (!(val & hw_op)) | 153 | goto out; |
153 | break; | 154 | } |
154 | nvgpu_udelay(5); | 155 | gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), |
155 | } while (!nvgpu_timeout_expired(&timeout)); | 156 | gk20a_readl(g, |
156 | 157 | ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); | |
157 | if (nvgpu_timeout_peek_expired(&timeout)) { | 158 | |
158 | nvgpu_err(g, "comp tag clear timeout"); | 159 | for (ltc = 0; ltc < g->ltc_count; ltc++) { |
159 | err = -EBUSY; | 160 | for (slice = 0; slice < slices_per_ltc; slice++) { |
160 | goto out; | 161 | |
162 | ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + | ||
163 | ltc * ltc_stride + slice * lts_stride; | ||
164 | |||
165 | nvgpu_timeout_init(g, &timeout, 2000, | ||
166 | NVGPU_TIMER_RETRY_TIMER); | ||
167 | do { | ||
168 | val = gk20a_readl(g, ctrl1); | ||
169 | if (!(val & hw_op)) | ||
170 | break; | ||
171 | nvgpu_udelay(5); | ||
172 | } while (!nvgpu_timeout_expired(&timeout)); | ||
173 | |||
174 | if (nvgpu_timeout_peek_expired(&timeout)) { | ||
175 | nvgpu_err(g, "comp tag clear timeout"); | ||
176 | err = -EBUSY; | ||
177 | goto out; | ||
178 | } | ||
161 | } | 179 | } |
162 | } | 180 | } |
181 | |||
182 | /* are we done? */ | ||
183 | if (full_cache_op || iter_max == max) | ||
184 | break; | ||
185 | |||
186 | /* note: iter_max is inclusive upper bound */ | ||
187 | min = iter_max + 1; | ||
188 | |||
189 | /* give a chance for higher-priority threads to progress */ | ||
190 | nvgpu_mutex_release(&g->mm.l2_op_lock); | ||
163 | } | 191 | } |
164 | out: | 192 | out: |
165 | trace_gk20a_ltc_cbc_ctrl_done(g->name); | 193 | trace_gk20a_ltc_cbc_ctrl_done(g->name); |