summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-04-10 08:40:46 -0400
committerTerje Bergstrom <tbergstrom@nvidia.com>2015-05-11 11:50:49 -0400
commit8d6fe0f2ef2d52c077263a0621d05953b19320ff (patch)
tree8d7512c1b0e3ab86f6e73fbf64072bbc3f9e6300 /drivers/gpu/nvgpu/gk20a/mm_gk20a.c
parent5a5662fffbce050adf59ba4fac60d217b5651f90 (diff)
gpu: nvgpu: Implement compbits padding for mapping
Implement NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS, which adds extra alignment to compbits allocation for safe compbits mapping. Bug 200077571 Change-Id: I3a74ebb81412e4e1e69501debeb9ef4e2056ef1a Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/730763 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/740693 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/mm_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c137
1 files changed, 126 insertions, 11 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 37d47c18..30bfd5a2 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -26,6 +26,7 @@
26#include <linux/tegra-soc.h> 26#include <linux/tegra-soc.h>
27#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
28#include <linux/dma-buf.h> 28#include <linux/dma-buf.h>
29#include <linux/lcm.h>
29#include <uapi/linux/nvgpu.h> 30#include <uapi/linux/nvgpu.h>
30#include <trace/events/gk20a.h> 31#include <trace/events/gk20a.h>
31 32
@@ -133,7 +134,8 @@ static void gk20a_mm_delete_priv(void *_priv)
133 BUG_ON(!priv->comptag_allocator); 134 BUG_ON(!priv->comptag_allocator);
134 priv->comptag_allocator->free(priv->comptag_allocator, 135 priv->comptag_allocator->free(priv->comptag_allocator,
135 priv->comptags.offset, 136 priv->comptags.offset,
136 priv->comptags.lines, 1); 137 priv->comptags.allocated_lines,
138 1);
137 } 139 }
138 140
139 /* Free buffer states */ 141 /* Free buffer states */
@@ -208,22 +210,28 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
208 return; 210 return;
209 211
210 if (!priv) { 212 if (!priv) {
211 comptags->lines = 0; 213 memset(comptags, 0, sizeof(*comptags));
212 comptags->offset = 0;
213 return; 214 return;
214 } 215 }
215 216
216 *comptags = priv->comptags; 217 *comptags = priv->comptags;
217} 218}
218 219
219static int gk20a_alloc_comptags(struct device *dev, 220static int gk20a_alloc_comptags(struct gk20a *g,
221 struct device *dev,
220 struct dma_buf *dmabuf, 222 struct dma_buf *dmabuf,
221 struct gk20a_allocator *allocator, 223 struct gk20a_allocator *allocator,
222 int lines) 224 u32 lines, bool user_mappable)
223{ 225{
224 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
225 u32 offset = 0; 227 u32 offset = 0;
226 int err; 228 int err;
229 u32 ctaglines_to_allocate;
230 u32 ctagline_align;
231 const u32 aggregate_cacheline_sz =
232 g->gr.cacheline_size * g->gr.slices_per_ltc *
233 g->ltc_count;
234 const u32 small_pgsz = 4096;
227 235
228 if (!priv) 236 if (!priv)
229 return -ENOSYS; 237 return -ENOSYS;
@@ -231,12 +239,99 @@ static int gk20a_alloc_comptags(struct device *dev,
231 if (!lines) 239 if (!lines)
232 return -EINVAL; 240 return -EINVAL;
233 241
242 if (!user_mappable) {
243 ctaglines_to_allocate = lines;
244 ctagline_align = 1;
245 } else {
246 /* Unfortunately, we cannot use allocation alignment
247 * here, since compbits per cacheline is not always a
248 * power of two. So, we just have to allocate enough
249 * extra that we're guaranteed to find a ctagline
250 * inside the allocation so that: 1) it is the first
251 * ctagline in a cacheline that starts at a page
252 * boundary, and 2) we can add enough overallocation
253 * that the ctaglines of the succeeding allocation
254 * are on different page than ours
255 */
256
257 ctagline_align =
258 (lcm(aggregate_cacheline_sz, small_pgsz) /
259 aggregate_cacheline_sz) *
260 g->gr.comptags_per_cacheline;
261
262 ctaglines_to_allocate =
263 /* for alignment */
264 ctagline_align +
265
266 /* lines rounded up to cachelines */
267 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) *
268 g->gr.comptags_per_cacheline +
269
270 /* trail-padding */
271 DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) *
272 g->gr.comptags_per_cacheline;
273
274 if (ctaglines_to_allocate < lines)
275 return -EINVAL; /* integer overflow */
276 }
277
234 /* store the allocator so we can use it when we free the ctags */ 278 /* store the allocator so we can use it when we free the ctags */
235 priv->comptag_allocator = allocator; 279 priv->comptag_allocator = allocator;
236 err = allocator->alloc(allocator, &offset, lines, 1); 280 err = allocator->alloc(allocator, &offset,
281 ctaglines_to_allocate, 1);
237 if (!err) { 282 if (!err) {
238 priv->comptags.lines = lines; 283 const u32 alignment_lines =
284 DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
285 offset;
286
287 /* prune the preceding ctaglines that were allocated
288 for alignment */
289 if (alignment_lines) {
290 /* free alignment lines */
291 int tmp=
292 allocator->free(allocator, offset,
293 alignment_lines,
294 1);
295 WARN_ON(tmp);
296
297 offset += alignment_lines;
298 ctaglines_to_allocate -= alignment_lines;
299 }
300
301 /* check if we can prune the trailing, too */
302 if (user_mappable)
303 {
304 u32 needed_cachelines =
305 DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
306
307 u32 first_unneeded_cacheline =
308 DIV_ROUND_UP(round_up(needed_cachelines *
309 aggregate_cacheline_sz,
310 small_pgsz),
311 aggregate_cacheline_sz);
312 u32 needed_ctaglines =
313 first_unneeded_cacheline *
314 g->gr.comptags_per_cacheline;
315
316 if (needed_ctaglines < ctaglines_to_allocate) {
317 /* free alignment lines */
318 int tmp=
319 allocator->free(
320 allocator,
321 offset + needed_ctaglines,
322 (ctaglines_to_allocate -
323 needed_ctaglines),
324 1);
325 WARN_ON(tmp);
326
327 ctaglines_to_allocate = needed_ctaglines;
328 }
329 }
330
239 priv->comptags.offset = offset; 331 priv->comptags.offset = offset;
332 priv->comptags.lines = lines;
333 priv->comptags.allocated_lines = ctaglines_to_allocate;
334 priv->comptags.user_mappable = user_mappable;
240 } 335 }
241 return err; 336 return err;
242} 337}
@@ -955,9 +1050,11 @@ struct buffer_attrs {
955 u64 align; 1050 u64 align;
956 u32 ctag_offset; 1051 u32 ctag_offset;
957 u32 ctag_lines; 1052 u32 ctag_lines;
1053 u32 ctag_allocated_lines;
958 int pgsz_idx; 1054 int pgsz_idx;
959 u8 kind_v; 1055 u8 kind_v;
960 u8 uc_kind_v; 1056 u8 uc_kind_v;
1057 bool ctag_user_mappable;
961}; 1058};
962 1059
963static void gmmu_select_page_size(struct vm_gk20a *vm, 1060static void gmmu_select_page_size(struct vm_gk20a *vm,
@@ -1399,22 +1496,37 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1399 g->ops.fb.compression_page_size(g)); 1496 g->ops.fb.compression_page_size(g));
1400 1497
1401 if (bfr.ctag_lines && !comptags.lines) { 1498 if (bfr.ctag_lines && !comptags.lines) {
1499 const bool user_mappable =
1500 !!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
1501
1402 /* allocate compression resources if needed */ 1502 /* allocate compression resources if needed */
1403 err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator, 1503 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
1404 bfr.ctag_lines); 1504 bfr.ctag_lines, user_mappable);
1405 if (err) { 1505 if (err) {
1406 /* ok to fall back here if we ran out */ 1506 /* ok to fall back here if we ran out */
1407 /* TBD: we can partially alloc ctags as well... */ 1507 /* TBD: we can partially alloc ctags as well... */
1408 bfr.ctag_lines = bfr.ctag_offset = 0;
1409 bfr.kind_v = bfr.uc_kind_v; 1508 bfr.kind_v = bfr.uc_kind_v;
1410 } else { 1509 } else {
1411 gk20a_get_comptags(d, dmabuf, &comptags); 1510 gk20a_get_comptags(d, dmabuf, &comptags);
1412 clear_ctags = true; 1511 clear_ctags = true;
1512
1513 if (comptags.lines < comptags.allocated_lines) {
1514 /* clear tail-padding comptags */
1515 u32 ctagmin = comptags.offset + comptags.lines;
1516 u32 ctagmax = comptags.offset +
1517 comptags.allocated_lines - 1;
1518
1519 g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
1520 ctagmin, ctagmax);
1521 }
1413 } 1522 }
1414 } 1523 }
1415 1524
1416 /* store the comptag info */ 1525 /* store the comptag info */
1417 bfr.ctag_offset = comptags.offset; 1526 bfr.ctag_offset = comptags.offset;
1527 bfr.ctag_lines = comptags.lines;
1528 bfr.ctag_allocated_lines = comptags.allocated_lines;
1529 bfr.ctag_user_mappable = comptags.user_mappable;
1418 1530
1419 /* update gmmu ptes */ 1531 /* update gmmu ptes */
1420 map_offset = g->ops.mm.gmmu_map(vm, map_offset, 1532 map_offset = g->ops.mm.gmmu_map(vm, map_offset,
@@ -1433,10 +1545,11 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1433 gk20a_dbg(gpu_dbg_map, 1545 gk20a_dbg(gpu_dbg_map,
1434 "as=%d pgsz=%d " 1546 "as=%d pgsz=%d "
1435 "kind=0x%x kind_uc=0x%x flags=0x%x " 1547 "kind=0x%x kind_uc=0x%x flags=0x%x "
1436 "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x", 1548 "ctags=%d start=%d ctags_allocated=%d ctags_mappable=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
1437 vm_aspace_id(vm), gmmu_page_size, 1549 vm_aspace_id(vm), gmmu_page_size,
1438 bfr.kind_v, bfr.uc_kind_v, flags, 1550 bfr.kind_v, bfr.uc_kind_v, flags,
1439 bfr.ctag_lines, bfr.ctag_offset, 1551 bfr.ctag_lines, bfr.ctag_offset,
1552 bfr.ctag_allocated_lines, bfr.ctag_user_mappable,
1440 hi32(map_offset), lo32(map_offset), 1553 hi32(map_offset), lo32(map_offset),
1441 hi32((u64)sg_dma_address(bfr.sgt->sgl)), 1554 hi32((u64)sg_dma_address(bfr.sgt->sgl)),
1442 lo32((u64)sg_dma_address(bfr.sgt->sgl)), 1555 lo32((u64)sg_dma_address(bfr.sgt->sgl)),
@@ -1473,6 +1586,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1473 mapped_buffer->pgsz_idx = bfr.pgsz_idx; 1586 mapped_buffer->pgsz_idx = bfr.pgsz_idx;
1474 mapped_buffer->ctag_offset = bfr.ctag_offset; 1587 mapped_buffer->ctag_offset = bfr.ctag_offset;
1475 mapped_buffer->ctag_lines = bfr.ctag_lines; 1588 mapped_buffer->ctag_lines = bfr.ctag_lines;
1589 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
1590 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
1476 mapped_buffer->vm = vm; 1591 mapped_buffer->vm = vm;
1477 mapped_buffer->flags = flags; 1592 mapped_buffer->flags = flags;
1478 mapped_buffer->kind = kind; 1593 mapped_buffer->kind = kind;