diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 137 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 5 |
2 files changed, 131 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 37d47c18..30bfd5a2 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/tegra-soc.h> | 26 | #include <linux/tegra-soc.h> |
27 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
28 | #include <linux/dma-buf.h> | 28 | #include <linux/dma-buf.h> |
29 | #include <linux/lcm.h> | ||
29 | #include <uapi/linux/nvgpu.h> | 30 | #include <uapi/linux/nvgpu.h> |
30 | #include <trace/events/gk20a.h> | 31 | #include <trace/events/gk20a.h> |
31 | 32 | ||
@@ -133,7 +134,8 @@ static void gk20a_mm_delete_priv(void *_priv) | |||
133 | BUG_ON(!priv->comptag_allocator); | 134 | BUG_ON(!priv->comptag_allocator); |
134 | priv->comptag_allocator->free(priv->comptag_allocator, | 135 | priv->comptag_allocator->free(priv->comptag_allocator, |
135 | priv->comptags.offset, | 136 | priv->comptags.offset, |
136 | priv->comptags.lines, 1); | 137 | priv->comptags.allocated_lines, |
138 | 1); | ||
137 | } | 139 | } |
138 | 140 | ||
139 | /* Free buffer states */ | 141 | /* Free buffer states */ |
@@ -208,22 +210,28 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, | |||
208 | return; | 210 | return; |
209 | 211 | ||
210 | if (!priv) { | 212 | if (!priv) { |
211 | comptags->lines = 0; | 213 | memset(comptags, 0, sizeof(*comptags)); |
212 | comptags->offset = 0; | ||
213 | return; | 214 | return; |
214 | } | 215 | } |
215 | 216 | ||
216 | *comptags = priv->comptags; | 217 | *comptags = priv->comptags; |
217 | } | 218 | } |
218 | 219 | ||
219 | static int gk20a_alloc_comptags(struct device *dev, | 220 | static int gk20a_alloc_comptags(struct gk20a *g, |
221 | struct device *dev, | ||
220 | struct dma_buf *dmabuf, | 222 | struct dma_buf *dmabuf, |
221 | struct gk20a_allocator *allocator, | 223 | struct gk20a_allocator *allocator, |
222 | int lines) | 224 | u32 lines, bool user_mappable) |
223 | { | 225 | { |
224 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | 226 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); |
225 | u32 offset = 0; | 227 | u32 offset = 0; |
226 | int err; | 228 | int err; |
229 | u32 ctaglines_to_allocate; | ||
230 | u32 ctagline_align; | ||
231 | const u32 aggregate_cacheline_sz = | ||
232 | g->gr.cacheline_size * g->gr.slices_per_ltc * | ||
233 | g->ltc_count; | ||
234 | const u32 small_pgsz = 4096; | ||
227 | 235 | ||
228 | if (!priv) | 236 | if (!priv) |
229 | return -ENOSYS; | 237 | return -ENOSYS; |
@@ -231,12 +239,99 @@ static int gk20a_alloc_comptags(struct device *dev, | |||
231 | if (!lines) | 239 | if (!lines) |
232 | return -EINVAL; | 240 | return -EINVAL; |
233 | 241 | ||
242 | if (!user_mappable) { | ||
243 | ctaglines_to_allocate = lines; | ||
244 | ctagline_align = 1; | ||
245 | } else { | ||
246 | /* Unfortunately, we cannot use allocation alignment | ||
247 | * here, since compbits per cacheline is not always a | ||
248 | * power of two. So, we just have to allocate enough | ||
249 | * extra that we're guaranteed to find a ctagline | ||
250 | * inside the allocation so that: 1) it is the first | ||
251 | * ctagline in a cacheline that starts at a page | ||
252 | * boundary, and 2) we can add enough overallocation | ||
253 | * that the ctaglines of the succeeding allocation | ||
254 | * are on different page than ours | ||
255 | */ | ||
256 | |||
257 | ctagline_align = | ||
258 | (lcm(aggregate_cacheline_sz, small_pgsz) / | ||
259 | aggregate_cacheline_sz) * | ||
260 | g->gr.comptags_per_cacheline; | ||
261 | |||
262 | ctaglines_to_allocate = | ||
263 | /* for alignment */ | ||
264 | ctagline_align + | ||
265 | |||
266 | /* lines rounded up to cachelines */ | ||
267 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) * | ||
268 | g->gr.comptags_per_cacheline + | ||
269 | |||
270 | /* trail-padding */ | ||
271 | DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) * | ||
272 | g->gr.comptags_per_cacheline; | ||
273 | |||
274 | if (ctaglines_to_allocate < lines) | ||
275 | return -EINVAL; /* integer overflow */ | ||
276 | } | ||
277 | |||
234 | /* store the allocator so we can use it when we free the ctags */ | 278 | /* store the allocator so we can use it when we free the ctags */ |
235 | priv->comptag_allocator = allocator; | 279 | priv->comptag_allocator = allocator; |
236 | err = allocator->alloc(allocator, &offset, lines, 1); | 280 | err = allocator->alloc(allocator, &offset, |
281 | ctaglines_to_allocate, 1); | ||
237 | if (!err) { | 282 | if (!err) { |
238 | priv->comptags.lines = lines; | 283 | const u32 alignment_lines = |
284 | DIV_ROUND_UP(offset, ctagline_align) * ctagline_align - | ||
285 | offset; | ||
286 | |||
287 | /* prune the preceding ctaglines that were allocated | ||
288 | for alignment */ | ||
289 | if (alignment_lines) { | ||
290 | /* free alignment lines */ | ||
291 | int tmp= | ||
292 | allocator->free(allocator, offset, | ||
293 | alignment_lines, | ||
294 | 1); | ||
295 | WARN_ON(tmp); | ||
296 | |||
297 | offset += alignment_lines; | ||
298 | ctaglines_to_allocate -= alignment_lines; | ||
299 | } | ||
300 | |||
301 | /* check if we can prune the trailing, too */ | ||
302 | if (user_mappable) | ||
303 | { | ||
304 | u32 needed_cachelines = | ||
305 | DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline); | ||
306 | |||
307 | u32 first_unneeded_cacheline = | ||
308 | DIV_ROUND_UP(round_up(needed_cachelines * | ||
309 | aggregate_cacheline_sz, | ||
310 | small_pgsz), | ||
311 | aggregate_cacheline_sz); | ||
312 | u32 needed_ctaglines = | ||
313 | first_unneeded_cacheline * | ||
314 | g->gr.comptags_per_cacheline; | ||
315 | |||
316 | if (needed_ctaglines < ctaglines_to_allocate) { | ||
317 | /* free alignment lines */ | ||
318 | int tmp= | ||
319 | allocator->free( | ||
320 | allocator, | ||
321 | offset + needed_ctaglines, | ||
322 | (ctaglines_to_allocate - | ||
323 | needed_ctaglines), | ||
324 | 1); | ||
325 | WARN_ON(tmp); | ||
326 | |||
327 | ctaglines_to_allocate = needed_ctaglines; | ||
328 | } | ||
329 | } | ||
330 | |||
239 | priv->comptags.offset = offset; | 331 | priv->comptags.offset = offset; |
332 | priv->comptags.lines = lines; | ||
333 | priv->comptags.allocated_lines = ctaglines_to_allocate; | ||
334 | priv->comptags.user_mappable = user_mappable; | ||
240 | } | 335 | } |
241 | return err; | 336 | return err; |
242 | } | 337 | } |
@@ -955,9 +1050,11 @@ struct buffer_attrs { | |||
955 | u64 align; | 1050 | u64 align; |
956 | u32 ctag_offset; | 1051 | u32 ctag_offset; |
957 | u32 ctag_lines; | 1052 | u32 ctag_lines; |
1053 | u32 ctag_allocated_lines; | ||
958 | int pgsz_idx; | 1054 | int pgsz_idx; |
959 | u8 kind_v; | 1055 | u8 kind_v; |
960 | u8 uc_kind_v; | 1056 | u8 uc_kind_v; |
1057 | bool ctag_user_mappable; | ||
961 | }; | 1058 | }; |
962 | 1059 | ||
963 | static void gmmu_select_page_size(struct vm_gk20a *vm, | 1060 | static void gmmu_select_page_size(struct vm_gk20a *vm, |
@@ -1399,22 +1496,37 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1399 | g->ops.fb.compression_page_size(g)); | 1496 | g->ops.fb.compression_page_size(g)); |
1400 | 1497 | ||
1401 | if (bfr.ctag_lines && !comptags.lines) { | 1498 | if (bfr.ctag_lines && !comptags.lines) { |
1499 | const bool user_mappable = | ||
1500 | !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS); | ||
1501 | |||
1402 | /* allocate compression resources if needed */ | 1502 | /* allocate compression resources if needed */ |
1403 | err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator, | 1503 | err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, |
1404 | bfr.ctag_lines); | 1504 | bfr.ctag_lines, user_mappable); |
1405 | if (err) { | 1505 | if (err) { |
1406 | /* ok to fall back here if we ran out */ | 1506 | /* ok to fall back here if we ran out */ |
1407 | /* TBD: we can partially alloc ctags as well... */ | 1507 | /* TBD: we can partially alloc ctags as well... */ |
1408 | bfr.ctag_lines = bfr.ctag_offset = 0; | ||
1409 | bfr.kind_v = bfr.uc_kind_v; | 1508 | bfr.kind_v = bfr.uc_kind_v; |
1410 | } else { | 1509 | } else { |
1411 | gk20a_get_comptags(d, dmabuf, &comptags); | 1510 | gk20a_get_comptags(d, dmabuf, &comptags); |
1412 | clear_ctags = true; | 1511 | clear_ctags = true; |
1512 | |||
1513 | if (comptags.lines < comptags.allocated_lines) { | ||
1514 | /* clear tail-padding comptags */ | ||
1515 | u32 ctagmin = comptags.offset + comptags.lines; | ||
1516 | u32 ctagmax = comptags.offset + | ||
1517 | comptags.allocated_lines - 1; | ||
1518 | |||
1519 | g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, | ||
1520 | ctagmin, ctagmax); | ||
1521 | } | ||
1413 | } | 1522 | } |
1414 | } | 1523 | } |
1415 | 1524 | ||
1416 | /* store the comptag info */ | 1525 | /* store the comptag info */ |
1417 | bfr.ctag_offset = comptags.offset; | 1526 | bfr.ctag_offset = comptags.offset; |
1527 | bfr.ctag_lines = comptags.lines; | ||
1528 | bfr.ctag_allocated_lines = comptags.allocated_lines; | ||
1529 | bfr.ctag_user_mappable = comptags.user_mappable; | ||
1418 | 1530 | ||
1419 | /* update gmmu ptes */ | 1531 | /* update gmmu ptes */ |
1420 | map_offset = g->ops.mm.gmmu_map(vm, map_offset, | 1532 | map_offset = g->ops.mm.gmmu_map(vm, map_offset, |
@@ -1433,10 +1545,11 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1433 | gk20a_dbg(gpu_dbg_map, | 1545 | gk20a_dbg(gpu_dbg_map, |
1434 | "as=%d pgsz=%d " | 1546 | "as=%d pgsz=%d " |
1435 | "kind=0x%x kind_uc=0x%x flags=0x%x " | 1547 | "kind=0x%x kind_uc=0x%x flags=0x%x " |
1436 | "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x", | 1548 | "ctags=%d start=%d ctags_allocated=%d ctags_mappable=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x", |
1437 | vm_aspace_id(vm), gmmu_page_size, | 1549 | vm_aspace_id(vm), gmmu_page_size, |
1438 | bfr.kind_v, bfr.uc_kind_v, flags, | 1550 | bfr.kind_v, bfr.uc_kind_v, flags, |
1439 | bfr.ctag_lines, bfr.ctag_offset, | 1551 | bfr.ctag_lines, bfr.ctag_offset, |
1552 | bfr.ctag_allocated_lines, bfr.ctag_user_mappable, | ||
1440 | hi32(map_offset), lo32(map_offset), | 1553 | hi32(map_offset), lo32(map_offset), |
1441 | hi32((u64)sg_dma_address(bfr.sgt->sgl)), | 1554 | hi32((u64)sg_dma_address(bfr.sgt->sgl)), |
1442 | lo32((u64)sg_dma_address(bfr.sgt->sgl)), | 1555 | lo32((u64)sg_dma_address(bfr.sgt->sgl)), |
@@ -1473,6 +1586,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1473 | mapped_buffer->pgsz_idx = bfr.pgsz_idx; | 1586 | mapped_buffer->pgsz_idx = bfr.pgsz_idx; |
1474 | mapped_buffer->ctag_offset = bfr.ctag_offset; | 1587 | mapped_buffer->ctag_offset = bfr.ctag_offset; |
1475 | mapped_buffer->ctag_lines = bfr.ctag_lines; | 1588 | mapped_buffer->ctag_lines = bfr.ctag_lines; |
1589 | mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; | ||
1590 | mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; | ||
1476 | mapped_buffer->vm = vm; | 1591 | mapped_buffer->vm = vm; |
1477 | mapped_buffer->flags = flags; | 1592 | mapped_buffer->flags = flags; |
1478 | mapped_buffer->kind = kind; | 1593 | mapped_buffer->kind = kind; |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 895e52ff..8d1dba66 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -140,6 +140,8 @@ enum gmmu_pgsz_gk20a { | |||
140 | struct gk20a_comptags { | 140 | struct gk20a_comptags { |
141 | u32 offset; | 141 | u32 offset; |
142 | u32 lines; | 142 | u32 lines; |
143 | u32 allocated_lines; | ||
144 | bool user_mappable; | ||
143 | }; | 145 | }; |
144 | 146 | ||
145 | struct gk20a_mm_entry { | 147 | struct gk20a_mm_entry { |
@@ -190,6 +192,9 @@ struct mapped_buffer_node { | |||
190 | u32 pgsz_idx; | 192 | u32 pgsz_idx; |
191 | u32 ctag_offset; | 193 | u32 ctag_offset; |
192 | u32 ctag_lines; | 194 | u32 ctag_lines; |
195 | u32 ctag_allocated_lines; | ||
196 | bool ctags_mappable; | ||
197 | |||
193 | u32 flags; | 198 | u32 flags; |
194 | u32 kind; | 199 | u32 kind; |
195 | bool va_allocated; | 200 | bool va_allocated; |