summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gm20b
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu/gm20b')
-rw-r--r--drivers/gpu/nvgpu/gm20b/ltc_gm20b.c102
1 files changed, 65 insertions, 37 deletions
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index 74c56487..b96f0b5c 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -113,6 +113,7 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
113 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); 113 gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()));
114 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); 114 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
115 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); 115 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
116 const u32 max_lines = 16384;
116 117
117 gk20a_dbg_fn(""); 118 gk20a_dbg_fn("");
118 119
@@ -121,45 +122,72 @@ int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op,
121 if (gr->compbit_store.mem.size == 0) 122 if (gr->compbit_store.mem.size == 0)
122 return 0; 123 return 0;
123 124
124 nvgpu_mutex_acquire(&g->mm.l2_op_lock); 125 while (1) {
125 126 const u32 iter_max = min(min + max_lines - 1, max);
126 if (op == gk20a_cbc_op_clear) { 127 bool full_cache_op = true;
127 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl2_r(), 128
128 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(min)); 129 nvgpu_mutex_acquire(&g->mm.l2_op_lock);
129 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl3_r(), 130
130 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(max)); 131 gk20a_dbg_info("clearing CBC lines %u..%u", min, iter_max);
131 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); 132
132 } else if (op == gk20a_cbc_op_clean) { 133 if (op == gk20a_cbc_op_clear) {
133 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); 134 gk20a_writel(
134 } else if (op == gk20a_cbc_op_invalidate) { 135 g, ltc_ltcs_ltss_cbc_ctrl2_r(),
135 hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); 136 ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f(
136 } else { 137 min));
137 BUG_ON(1); 138 gk20a_writel(
138 } 139 g, ltc_ltcs_ltss_cbc_ctrl3_r(),
139 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), 140 ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f(
140 gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); 141 iter_max));
141 142 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f();
142 for (ltc = 0; ltc < g->ltc_count; ltc++) { 143 full_cache_op = false;
143 for (slice = 0; slice < slices_per_ltc; slice++) { 144 } else if (op == gk20a_cbc_op_clean) {
144 145 /* this is full-cache op */
145 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + 146 hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f();
146 ltc * ltc_stride + slice * lts_stride; 147 } else if (op == gk20a_cbc_op_invalidate) {
147 148 /* this is full-cache op */
148 nvgpu_timeout_init(g, &timeout, 2000, 149 hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f();
149 NVGPU_TIMER_RETRY_TIMER); 150 } else {
150 do { 151 nvgpu_err(g, "Unknown op: %u", (unsigned)op);
151 val = gk20a_readl(g, ctrl1); 152 err = -EINVAL;
152 if (!(val & hw_op)) 153 goto out;
153 break; 154 }
154 nvgpu_udelay(5); 155 gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(),
155 } while (!nvgpu_timeout_expired(&timeout)); 156 gk20a_readl(g,
156 157 ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op);
157 if (nvgpu_timeout_peek_expired(&timeout)) { 158
158 nvgpu_err(g, "comp tag clear timeout"); 159 for (ltc = 0; ltc < g->ltc_count; ltc++) {
159 err = -EBUSY; 160 for (slice = 0; slice < slices_per_ltc; slice++) {
160 goto out; 161
162 ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() +
163 ltc * ltc_stride + slice * lts_stride;
164
165 nvgpu_timeout_init(g, &timeout, 2000,
166 NVGPU_TIMER_RETRY_TIMER);
167 do {
168 val = gk20a_readl(g, ctrl1);
169 if (!(val & hw_op))
170 break;
171 nvgpu_udelay(5);
172 } while (!nvgpu_timeout_expired(&timeout));
173
174 if (nvgpu_timeout_peek_expired(&timeout)) {
175 nvgpu_err(g, "comp tag clear timeout");
176 err = -EBUSY;
177 goto out;
178 }
161 } 179 }
162 } 180 }
181
182 /* are we done? */
183 if (full_cache_op || iter_max == max)
184 break;
185
186 /* note: iter_max is inclusive upper bound */
187 min = iter_max + 1;
188
189 /* give a chance for higher-priority threads to progress */
190 nvgpu_mutex_release(&g->mm.l2_op_lock);
163 } 191 }
164out: 192out:
165 trace_gk20a_ltc_cbc_ctrl_done(g->name); 193 trace_gk20a_ltc_cbc_ctrl_done(g->name);