summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2015-03-19 15:28:34 -0400
committerIshan Mittal <imittal@nvidia.com>2015-05-18 02:03:19 -0400
commit520ff00e870eadc98a50f58ecd514ced53a8612f (patch)
tree6822d8ad2a51f98c2df421ba9cc7727e06757fcb /drivers/gpu
parent069accc8571716dc616c9f96776d54bf657afaee (diff)
gpu: nvgpu: Implement compbits mapping
Implement NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO for requesting info on compbits-mappable buffers; and NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS, which enables mapping compbits to the GPU address space of said buffers. This, subsequently, enables moving comptag swizzling from GPU to CDEH/CDEV formats to userspace. Compbits mapping is conservative and it may map more than what is strictly needed. This is because two reasons: 1) mapping must be done on small page alignment (4kB), and 2) GPU comptags are swizzled all around the aggregate cache line, which means that the whole cache line must be visible even if only some comptag lines are required from it. Cache line size is not necessarily a multiple of the small page size. Bug 200077571 Change-Id: I5ae88fe6b616e5ea37d3bff0dff46c07e9c9267e Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/719710 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c35
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c9
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c168
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h19
4 files changed, 226 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 038fa4c8..63569008 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Address Spaces 2 * GK20A Address Spaces
3 * 3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -225,6 +225,31 @@ static int gk20a_as_ioctl_get_va_regions(
225 return 0; 225 return 0;
226} 226}
227 227
228static int gk20a_as_ioctl_get_buffer_compbits_info(
229 struct gk20a_as_share *as_share,
230 struct nvgpu_as_get_buffer_compbits_info_args *args)
231{
232 gk20a_dbg_fn("");
233 return gk20a_vm_get_compbits_info(as_share->vm,
234 args->mapping_gva,
235 &args->compbits_win_size,
236 &args->compbits_win_ctagline,
237 &args->mapping_ctagline,
238 &args->flags);
239}
240
241static int gk20a_as_ioctl_map_buffer_compbits(
242 struct gk20a_as_share *as_share,
243 struct nvgpu_as_map_buffer_compbits_args *args)
244{
245 gk20a_dbg_fn("");
246 return gk20a_vm_map_compbits(as_share->vm,
247 args->mapping_gva,
248 &args->compbits_win_gva,
249 &args->mapping_iova,
250 args->flags);
251}
252
228int gk20a_as_dev_open(struct inode *inode, struct file *filp) 253int gk20a_as_dev_open(struct inode *inode, struct file *filp)
229{ 254{
230 struct gk20a_as_share *as_share; 255 struct gk20a_as_share *as_share;
@@ -334,6 +359,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
334 err = gk20a_as_ioctl_get_va_regions(as_share, 359 err = gk20a_as_ioctl_get_va_regions(as_share,
335 (struct nvgpu_as_get_va_regions_args *)buf); 360 (struct nvgpu_as_get_va_regions_args *)buf);
336 break; 361 break;
362 case NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO:
363 err = gk20a_as_ioctl_get_buffer_compbits_info(as_share,
364 (struct nvgpu_as_get_buffer_compbits_info_args *)buf);
365 break;
366 case NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS:
367 err = gk20a_as_ioctl_map_buffer_compbits(as_share,
368 (struct nvgpu_as_map_buffer_compbits_args *)buf);
369 break;
337 default: 370 default:
338 dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); 371 dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
339 err = -ENOTTY; 372 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 470729b7..d3114ecd 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -2016,8 +2016,13 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
2016 gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); 2016 gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);
2017 gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); 2017 gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
2018 g->ops.gr.get_rop_l2_en_mask(g); 2018 g->ops.gr.get_rop_l2_en_mask(g);
2019 2019 gpu->gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
2020 gpu->reserved = 0; 2020 gpu->gr_gobs_per_comptagline_per_slice =
2021 g->gr.gobs_per_comptagline_per_slice;
2022 gpu->num_ltc = g->ltc_count;
2023 gpu->lts_per_ltc = g->gr.slices_per_ltc;
2024 gpu->cbc_cache_line_size = g->gr.cacheline_size;
2025 gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline;
2021 2026
2022 return 0; 2027 return 0;
2023} 2028}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 5d1ff563..d896d783 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -221,7 +221,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
221 struct device *dev, 221 struct device *dev,
222 struct dma_buf *dmabuf, 222 struct dma_buf *dmabuf,
223 struct gk20a_allocator *allocator, 223 struct gk20a_allocator *allocator,
224 u32 lines, bool user_mappable) 224 u32 lines, bool user_mappable,
225 u64 *ctag_map_win_size,
226 u32 *ctag_map_win_ctagline)
225{ 227{
226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 228 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
227 u32 offset = 0; 229 u32 offset = 0;
@@ -313,6 +315,8 @@ static int gk20a_alloc_comptags(struct gk20a *g,
313 first_unneeded_cacheline * 315 first_unneeded_cacheline *
314 g->gr.comptags_per_cacheline; 316 g->gr.comptags_per_cacheline;
315 317
318 u64 win_size;
319
316 if (needed_ctaglines < ctaglines_to_allocate) { 320 if (needed_ctaglines < ctaglines_to_allocate) {
317 /* free alignment lines */ 321 /* free alignment lines */
318 int tmp= 322 int tmp=
@@ -326,6 +330,14 @@ static int gk20a_alloc_comptags(struct gk20a *g,
326 330
327 ctaglines_to_allocate = needed_ctaglines; 331 ctaglines_to_allocate = needed_ctaglines;
328 } 332 }
333
334 *ctag_map_win_ctagline = offset;
335 win_size =
336 DIV_ROUND_UP(lines,
337 g->gr.comptags_per_cacheline) *
338 aggregate_cacheline_sz;
339
340 *ctag_map_win_size = round_up(win_size, small_pgsz);
329 } 341 }
330 342
331 priv->comptags.offset = offset; 343 priv->comptags.offset = offset;
@@ -1374,6 +1386,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1374 bool clear_ctags = false; 1386 bool clear_ctags = false;
1375 struct scatterlist *sgl; 1387 struct scatterlist *sgl;
1376 u64 buf_addr; 1388 u64 buf_addr;
1389 u64 ctag_map_win_size = 0;
1390 u32 ctag_map_win_ctagline = 0;
1377 1391
1378 mutex_lock(&vm->update_gmmu_lock); 1392 mutex_lock(&vm->update_gmmu_lock);
1379 1393
@@ -1501,7 +1515,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1501 1515
1502 /* allocate compression resources if needed */ 1516 /* allocate compression resources if needed */
1503 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, 1517 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
1504 bfr.ctag_lines, user_mappable); 1518 bfr.ctag_lines, user_mappable,
1519 &ctag_map_win_size,
1520 &ctag_map_win_ctagline);
1505 if (err) { 1521 if (err) {
1506 /* ok to fall back here if we ran out */ 1522 /* ok to fall back here if we ran out */
1507 /* TBD: we can partially alloc ctags as well... */ 1523 /* TBD: we can partially alloc ctags as well... */
@@ -1588,6 +1604,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1588 mapped_buffer->ctag_lines = bfr.ctag_lines; 1604 mapped_buffer->ctag_lines = bfr.ctag_lines;
1589 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; 1605 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
1590 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; 1606 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
1607 mapped_buffer->ctag_map_win_size = ctag_map_win_size;
1608 mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
1591 mapped_buffer->vm = vm; 1609 mapped_buffer->vm = vm;
1592 mapped_buffer->flags = flags; 1610 mapped_buffer->flags = flags;
1593 mapped_buffer->kind = kind; 1611 mapped_buffer->kind = kind;
@@ -1640,6 +1658,140 @@ clean_up:
1640 return 0; 1658 return 0;
1641} 1659}
1642 1660
1661int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
1662 u64 mapping_gva,
1663 u64 *compbits_win_size,
1664 u32 *compbits_win_ctagline,
1665 u32 *mapping_ctagline,
1666 u32 *flags)
1667{
1668 struct mapped_buffer_node *mapped_buffer;
1669 struct device *d = dev_from_vm(vm);
1670
1671 mutex_lock(&vm->update_gmmu_lock);
1672
1673 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1674
1675 if (!mapped_buffer | !mapped_buffer->user_mapped)
1676 {
1677 mutex_unlock(&vm->update_gmmu_lock);
1678 gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
1679 return -EFAULT;
1680 }
1681
1682 *compbits_win_size = 0;
1683 *compbits_win_ctagline = 0;
1684 *mapping_ctagline = 0;
1685 *flags = 0;
1686
1687 if (mapped_buffer->ctag_offset)
1688 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS;
1689
1690 if (mapped_buffer->ctags_mappable)
1691 {
1692 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE;
1693 *compbits_win_size = mapped_buffer->ctag_map_win_size;
1694 *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline;
1695 *mapping_ctagline = mapped_buffer->ctag_offset;
1696 }
1697
1698 mutex_unlock(&vm->update_gmmu_lock);
1699 return 0;
1700}
1701
1702
1703int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1704 u64 mapping_gva,
1705 u64 *compbits_win_gva,
1706 u64 *mapping_iova,
1707 u32 flags)
1708{
1709 struct mapped_buffer_node *mapped_buffer;
1710 struct gk20a *g = gk20a_from_vm(vm);
1711 struct device *d = dev_from_vm(vm);
1712
1713 if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
1714 /* This will be implemented later */
1715 gk20a_err(d,
1716 "%s: fixed-offset compbits mapping not yet supported",
1717 __func__);
1718 return -EFAULT;
1719 }
1720
1721 mutex_lock(&vm->update_gmmu_lock);
1722
1723 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1724
1725 if (!mapped_buffer || !mapped_buffer->user_mapped) {
1726 mutex_unlock(&vm->update_gmmu_lock);
1727 gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
1728 return -EFAULT;
1729 }
1730
1731 if (!mapped_buffer->ctags_mappable) {
1732 mutex_unlock(&vm->update_gmmu_lock);
1733 gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
1734 return -EFAULT;
1735 }
1736
1737 if (!mapped_buffer->ctag_map_win_addr) {
1738 const u32 small_pgsz_index = 0; /* small pages, 4K */
1739 const u32 aggregate_cacheline_sz =
1740 g->gr.cacheline_size * g->gr.slices_per_ltc *
1741 g->ltc_count;
1742
1743 /* first aggregate cacheline to map */
1744 u32 cacheline_start; /* inclusive */
1745
1746 /* offset of the start cacheline (will be page aligned) */
1747 u64 cacheline_offset_start;
1748
1749 if (!mapped_buffer->ctag_map_win_size) {
1750 mutex_unlock(&vm->update_gmmu_lock);
1751 gk20a_err(d,
1752 "%s: mapping 0x%llx does not have "
1753 "mappable comptags",
1754 __func__, mapping_gva);
1755 return -EFAULT;
1756 }
1757
1758 cacheline_start = mapped_buffer->ctag_offset /
1759 g->gr.comptags_per_cacheline;
1760 cacheline_offset_start =
1761 cacheline_start * aggregate_cacheline_sz;
1762
1763 mapped_buffer->ctag_map_win_addr =
1764 g->ops.mm.gmmu_map(
1765 vm,
1766 0,
1767 g->gr.compbit_store.mem.sgt,
1768 cacheline_offset_start, /* sg offset */
1769 mapped_buffer->ctag_map_win_size, /* size */
1770 small_pgsz_index,
1771 0, /* kind */
1772 0, /* ctag_offset */
1773 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1774 gk20a_mem_flag_read_only,
1775 false,
1776 false);
1777
1778 if (!mapped_buffer->ctag_map_win_addr) {
1779 mutex_unlock(&vm->update_gmmu_lock);
1780 gk20a_err(d,
1781 "%s: failed to map comptags for mapping 0x%llx",
1782 __func__, mapping_gva);
1783 return -ENOMEM;
1784 }
1785 }
1786
1787 *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
1788 *compbits_win_gva = mapped_buffer->ctag_map_win_addr;
1789
1790 mutex_unlock(&vm->update_gmmu_lock);
1791
1792 return 0;
1793}
1794
1643u64 gk20a_gmmu_map(struct vm_gk20a *vm, 1795u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1644 struct sg_table **sgt, 1796 struct sg_table **sgt,
1645 u64 size, 1797 u64 size,
@@ -2276,6 +2428,18 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2276 struct vm_gk20a *vm = mapped_buffer->vm; 2428 struct vm_gk20a *vm = mapped_buffer->vm;
2277 struct gk20a *g = vm->mm->g; 2429 struct gk20a *g = vm->mm->g;
2278 2430
2431 if (mapped_buffer->ctag_map_win_addr) {
2432 /* unmap compbits */
2433
2434 g->ops.mm.gmmu_unmap(vm,
2435 mapped_buffer->ctag_map_win_addr,
2436 mapped_buffer->ctag_map_win_size,
2437 0, /* page size 4k */
2438 true, /* va allocated */
2439 gk20a_mem_flag_none,
2440 false); /* not sparse */
2441 }
2442
2279 g->ops.mm.gmmu_unmap(vm, 2443 g->ops.mm.gmmu_unmap(vm,
2280 mapped_buffer->addr, 2444 mapped_buffer->addr,
2281 mapped_buffer->size, 2445 mapped_buffer->size,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 0ff11d09..e07b95fe 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -186,7 +186,13 @@ struct mapped_buffer_node {
186 u32 ctag_offset; 186 u32 ctag_offset;
187 u32 ctag_lines; 187 u32 ctag_lines;
188 u32 ctag_allocated_lines; 188 u32 ctag_allocated_lines;
189
190 /* For comptag mapping, these are the mapping window parameters */
189 bool ctags_mappable; 191 bool ctags_mappable;
192 u64 ctag_map_win_addr; /* non-zero if mapped */
193 u64 ctag_map_win_size; /* non-zero if ctags_mappable */
194 u32 ctag_map_win_ctagline; /* ctagline at win start, set if
195 * ctags_mappable */
190 196
191 u32 flags; 197 u32 flags;
192 u32 kind; 198 u32 kind;
@@ -504,6 +510,19 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
504 u64 buffer_offset, 510 u64 buffer_offset,
505 u64 mapping_size); 511 u64 mapping_size);
506 512
513int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
514 u64 mapping_gva,
515 u64 *compbits_win_size,
516 u32 *compbits_win_ctagline,
517 u32 *mapping_ctagline,
518 u32 *flags);
519
520int gk20a_vm_map_compbits(struct vm_gk20a *vm,
521 u64 mapping_gva,
522 u64 *compbits_win_gva,
523 u64 *mapping_iova,
524 u32 flags);
525
507/* unmap handle from kernel */ 526/* unmap handle from kernel */
508void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); 527void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
509 528