summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/as_gk20a.c35
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c9
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.c168
-rw-r--r--drivers/gpu/nvgpu/gk20a/mm_gk20a.h19
4 files changed, 226 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index 038fa4c8..63569008 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Address Spaces 2 * GK20A Address Spaces
3 * 3 *
4 * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -225,6 +225,31 @@ static int gk20a_as_ioctl_get_va_regions(
225 return 0; 225 return 0;
226} 226}
227 227
228static int gk20a_as_ioctl_get_buffer_compbits_info(
229 struct gk20a_as_share *as_share,
230 struct nvgpu_as_get_buffer_compbits_info_args *args)
231{
232 gk20a_dbg_fn("");
233 return gk20a_vm_get_compbits_info(as_share->vm,
234 args->mapping_gva,
235 &args->compbits_win_size,
236 &args->compbits_win_ctagline,
237 &args->mapping_ctagline,
238 &args->flags);
239}
240
241static int gk20a_as_ioctl_map_buffer_compbits(
242 struct gk20a_as_share *as_share,
243 struct nvgpu_as_map_buffer_compbits_args *args)
244{
245 gk20a_dbg_fn("");
246 return gk20a_vm_map_compbits(as_share->vm,
247 args->mapping_gva,
248 &args->compbits_win_gva,
249 &args->mapping_iova,
250 args->flags);
251}
252
228int gk20a_as_dev_open(struct inode *inode, struct file *filp) 253int gk20a_as_dev_open(struct inode *inode, struct file *filp)
229{ 254{
230 struct gk20a_as_share *as_share; 255 struct gk20a_as_share *as_share;
@@ -334,6 +359,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
334 err = gk20a_as_ioctl_get_va_regions(as_share, 359 err = gk20a_as_ioctl_get_va_regions(as_share,
335 (struct nvgpu_as_get_va_regions_args *)buf); 360 (struct nvgpu_as_get_va_regions_args *)buf);
336 break; 361 break;
362 case NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO:
363 err = gk20a_as_ioctl_get_buffer_compbits_info(as_share,
364 (struct nvgpu_as_get_buffer_compbits_info_args *)buf);
365 break;
366 case NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS:
367 err = gk20a_as_ioctl_map_buffer_compbits(as_share,
368 (struct nvgpu_as_map_buffer_compbits_args *)buf);
369 break;
337 default: 370 default:
338 dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); 371 dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd);
339 err = -ENOTTY; 372 err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 470729b7..d3114ecd 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -2016,8 +2016,13 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
2016 gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); 2016 gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);
2017 gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); 2017 gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
2018 g->ops.gr.get_rop_l2_en_mask(g); 2018 g->ops.gr.get_rop_l2_en_mask(g);
2019 2019 gpu->gr_compbit_store_base_hw = g->gr.compbit_store.base_hw;
2020 gpu->reserved = 0; 2020 gpu->gr_gobs_per_comptagline_per_slice =
2021 g->gr.gobs_per_comptagline_per_slice;
2022 gpu->num_ltc = g->ltc_count;
2023 gpu->lts_per_ltc = g->gr.slices_per_ltc;
2024 gpu->cbc_cache_line_size = g->gr.cacheline_size;
2025 gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline;
2021 2026
2022 return 0; 2027 return 0;
2023} 2028}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 5d1ff563..d896d783 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -221,7 +221,9 @@ static int gk20a_alloc_comptags(struct gk20a *g,
221 struct device *dev, 221 struct device *dev,
222 struct dma_buf *dmabuf, 222 struct dma_buf *dmabuf,
223 struct gk20a_allocator *allocator, 223 struct gk20a_allocator *allocator,
224 u32 lines, bool user_mappable) 224 u32 lines, bool user_mappable,
225 u64 *ctag_map_win_size,
226 u32 *ctag_map_win_ctagline)
225{ 227{
226 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); 228 struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
227 u32 offset = 0; 229 u32 offset = 0;
@@ -313,6 +315,8 @@ static int gk20a_alloc_comptags(struct gk20a *g,
313 first_unneeded_cacheline * 315 first_unneeded_cacheline *
314 g->gr.comptags_per_cacheline; 316 g->gr.comptags_per_cacheline;
315 317
318 u64 win_size;
319
316 if (needed_ctaglines < ctaglines_to_allocate) { 320 if (needed_ctaglines < ctaglines_to_allocate) {
317 /* free alignment lines */ 321 /* free alignment lines */
318 int tmp= 322 int tmp=
@@ -326,6 +330,14 @@ static int gk20a_alloc_comptags(struct gk20a *g,
326 330
327 ctaglines_to_allocate = needed_ctaglines; 331 ctaglines_to_allocate = needed_ctaglines;
328 } 332 }
333
334 *ctag_map_win_ctagline = offset;
335 win_size =
336 DIV_ROUND_UP(lines,
337 g->gr.comptags_per_cacheline) *
338 aggregate_cacheline_sz;
339
340 *ctag_map_win_size = round_up(win_size, small_pgsz);
329 } 341 }
330 342
331 priv->comptags.offset = offset; 343 priv->comptags.offset = offset;
@@ -1374,6 +1386,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1374 bool clear_ctags = false; 1386 bool clear_ctags = false;
1375 struct scatterlist *sgl; 1387 struct scatterlist *sgl;
1376 u64 buf_addr; 1388 u64 buf_addr;
1389 u64 ctag_map_win_size = 0;
1390 u32 ctag_map_win_ctagline = 0;
1377 1391
1378 mutex_lock(&vm->update_gmmu_lock); 1392 mutex_lock(&vm->update_gmmu_lock);
1379 1393
@@ -1501,7 +1515,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1501 1515
1502 /* allocate compression resources if needed */ 1516 /* allocate compression resources if needed */
1503 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, 1517 err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
1504 bfr.ctag_lines, user_mappable); 1518 bfr.ctag_lines, user_mappable,
1519 &ctag_map_win_size,
1520 &ctag_map_win_ctagline);
1505 if (err) { 1521 if (err) {
1506 /* ok to fall back here if we ran out */ 1522 /* ok to fall back here if we ran out */
1507 /* TBD: we can partially alloc ctags as well... */ 1523 /* TBD: we can partially alloc ctags as well... */
@@ -1588,6 +1604,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
1588 mapped_buffer->ctag_lines = bfr.ctag_lines; 1604 mapped_buffer->ctag_lines = bfr.ctag_lines;
1589 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; 1605 mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
1590 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; 1606 mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
1607 mapped_buffer->ctag_map_win_size = ctag_map_win_size;
1608 mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline;
1591 mapped_buffer->vm = vm; 1609 mapped_buffer->vm = vm;
1592 mapped_buffer->flags = flags; 1610 mapped_buffer->flags = flags;
1593 mapped_buffer->kind = kind; 1611 mapped_buffer->kind = kind;
@@ -1640,6 +1658,140 @@ clean_up:
1640 return 0; 1658 return 0;
1641} 1659}
1642 1660
1661int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
1662 u64 mapping_gva,
1663 u64 *compbits_win_size,
1664 u32 *compbits_win_ctagline,
1665 u32 *mapping_ctagline,
1666 u32 *flags)
1667{
1668 struct mapped_buffer_node *mapped_buffer;
1669 struct device *d = dev_from_vm(vm);
1670
1671 mutex_lock(&vm->update_gmmu_lock);
1672
1673 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1674
1675 if (!mapped_buffer | !mapped_buffer->user_mapped)
1676 {
1677 mutex_unlock(&vm->update_gmmu_lock);
1678 gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
1679 return -EFAULT;
1680 }
1681
1682 *compbits_win_size = 0;
1683 *compbits_win_ctagline = 0;
1684 *mapping_ctagline = 0;
1685 *flags = 0;
1686
1687 if (mapped_buffer->ctag_offset)
1688 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS;
1689
1690 if (mapped_buffer->ctags_mappable)
1691 {
1692 *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE;
1693 *compbits_win_size = mapped_buffer->ctag_map_win_size;
1694 *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline;
1695 *mapping_ctagline = mapped_buffer->ctag_offset;
1696 }
1697
1698 mutex_unlock(&vm->update_gmmu_lock);
1699 return 0;
1700}
1701
1702
1703int gk20a_vm_map_compbits(struct vm_gk20a *vm,
1704 u64 mapping_gva,
1705 u64 *compbits_win_gva,
1706 u64 *mapping_iova,
1707 u32 flags)
1708{
1709 struct mapped_buffer_node *mapped_buffer;
1710 struct gk20a *g = gk20a_from_vm(vm);
1711 struct device *d = dev_from_vm(vm);
1712
1713 if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
1714 /* This will be implemented later */
1715 gk20a_err(d,
1716 "%s: fixed-offset compbits mapping not yet supported",
1717 __func__);
1718 return -EFAULT;
1719 }
1720
1721 mutex_lock(&vm->update_gmmu_lock);
1722
1723 mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
1724
1725 if (!mapped_buffer || !mapped_buffer->user_mapped) {
1726 mutex_unlock(&vm->update_gmmu_lock);
1727 gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva);
1728 return -EFAULT;
1729 }
1730
1731 if (!mapped_buffer->ctags_mappable) {
1732 mutex_unlock(&vm->update_gmmu_lock);
1733 gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
1734 return -EFAULT;
1735 }
1736
1737 if (!mapped_buffer->ctag_map_win_addr) {
1738 const u32 small_pgsz_index = 0; /* small pages, 4K */
1739 const u32 aggregate_cacheline_sz =
1740 g->gr.cacheline_size * g->gr.slices_per_ltc *
1741 g->ltc_count;
1742
1743 /* first aggregate cacheline to map */
1744 u32 cacheline_start; /* inclusive */
1745
1746 /* offset of the start cacheline (will be page aligned) */
1747 u64 cacheline_offset_start;
1748
1749 if (!mapped_buffer->ctag_map_win_size) {
1750 mutex_unlock(&vm->update_gmmu_lock);
1751 gk20a_err(d,
1752 "%s: mapping 0x%llx does not have "
1753 "mappable comptags",
1754 __func__, mapping_gva);
1755 return -EFAULT;
1756 }
1757
1758 cacheline_start = mapped_buffer->ctag_offset /
1759 g->gr.comptags_per_cacheline;
1760 cacheline_offset_start =
1761 cacheline_start * aggregate_cacheline_sz;
1762
1763 mapped_buffer->ctag_map_win_addr =
1764 g->ops.mm.gmmu_map(
1765 vm,
1766 0,
1767 g->gr.compbit_store.mem.sgt,
1768 cacheline_offset_start, /* sg offset */
1769 mapped_buffer->ctag_map_win_size, /* size */
1770 small_pgsz_index,
1771 0, /* kind */
1772 0, /* ctag_offset */
1773 NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE,
1774 gk20a_mem_flag_read_only,
1775 false,
1776 false);
1777
1778 if (!mapped_buffer->ctag_map_win_addr) {
1779 mutex_unlock(&vm->update_gmmu_lock);
1780 gk20a_err(d,
1781 "%s: failed to map comptags for mapping 0x%llx",
1782 __func__, mapping_gva);
1783 return -ENOMEM;
1784 }
1785 }
1786
1787 *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
1788 *compbits_win_gva = mapped_buffer->ctag_map_win_addr;
1789
1790 mutex_unlock(&vm->update_gmmu_lock);
1791
1792 return 0;
1793}
1794
1643u64 gk20a_gmmu_map(struct vm_gk20a *vm, 1795u64 gk20a_gmmu_map(struct vm_gk20a *vm,
1644 struct sg_table **sgt, 1796 struct sg_table **sgt,
1645 u64 size, 1797 u64 size,
@@ -2276,6 +2428,18 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer)
2276 struct vm_gk20a *vm = mapped_buffer->vm; 2428 struct vm_gk20a *vm = mapped_buffer->vm;
2277 struct gk20a *g = vm->mm->g; 2429 struct gk20a *g = vm->mm->g;
2278 2430
2431 if (mapped_buffer->ctag_map_win_addr) {
2432 /* unmap compbits */
2433
2434 g->ops.mm.gmmu_unmap(vm,
2435 mapped_buffer->ctag_map_win_addr,
2436 mapped_buffer->ctag_map_win_size,
2437 0, /* page size 4k */
2438 true, /* va allocated */
2439 gk20a_mem_flag_none,
2440 false); /* not sparse */
2441 }
2442
2279 g->ops.mm.gmmu_unmap(vm, 2443 g->ops.mm.gmmu_unmap(vm,
2280 mapped_buffer->addr, 2444 mapped_buffer->addr,
2281 mapped_buffer->size, 2445 mapped_buffer->size,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 0ff11d09..e07b95fe 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -186,7 +186,13 @@ struct mapped_buffer_node {
186 u32 ctag_offset; 186 u32 ctag_offset;
187 u32 ctag_lines; 187 u32 ctag_lines;
188 u32 ctag_allocated_lines; 188 u32 ctag_allocated_lines;
189
190 /* For comptag mapping, these are the mapping window parameters */
189 bool ctags_mappable; 191 bool ctags_mappable;
192 u64 ctag_map_win_addr; /* non-zero if mapped */
193 u64 ctag_map_win_size; /* non-zero if ctags_mappable */
194 u32 ctag_map_win_ctagline; /* ctagline at win start, set if
195 * ctags_mappable */
190 196
191 u32 flags; 197 u32 flags;
192 u32 kind; 198 u32 kind;
@@ -504,6 +510,19 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
504 u64 buffer_offset, 510 u64 buffer_offset,
505 u64 mapping_size); 511 u64 mapping_size);
506 512
513int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
514 u64 mapping_gva,
515 u64 *compbits_win_size,
516 u32 *compbits_win_ctagline,
517 u32 *mapping_ctagline,
518 u32 *flags);
519
520int gk20a_vm_map_compbits(struct vm_gk20a *vm,
521 u64 mapping_gva,
522 u64 *compbits_win_gva,
523 u64 *mapping_iova,
524 u32 flags);
525
507/* unmap handle from kernel */ 526/* unmap handle from kernel */
508void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); 527void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset);
509 528