diff options
Diffstat (limited to 'drivers/gpu/nvgpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/as_gk20a.c | 35 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 168 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 19 |
4 files changed, 226 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 038fa4c8..63569008 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GK20A Address Spaces | 2 | * GK20A Address Spaces |
3 | * | 3 | * |
4 | * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -225,6 +225,31 @@ static int gk20a_as_ioctl_get_va_regions( | |||
225 | return 0; | 225 | return 0; |
226 | } | 226 | } |
227 | 227 | ||
228 | static int gk20a_as_ioctl_get_buffer_compbits_info( | ||
229 | struct gk20a_as_share *as_share, | ||
230 | struct nvgpu_as_get_buffer_compbits_info_args *args) | ||
231 | { | ||
232 | gk20a_dbg_fn(""); | ||
233 | return gk20a_vm_get_compbits_info(as_share->vm, | ||
234 | args->mapping_gva, | ||
235 | &args->compbits_win_size, | ||
236 | &args->compbits_win_ctagline, | ||
237 | &args->mapping_ctagline, | ||
238 | &args->flags); | ||
239 | } | ||
240 | |||
241 | static int gk20a_as_ioctl_map_buffer_compbits( | ||
242 | struct gk20a_as_share *as_share, | ||
243 | struct nvgpu_as_map_buffer_compbits_args *args) | ||
244 | { | ||
245 | gk20a_dbg_fn(""); | ||
246 | return gk20a_vm_map_compbits(as_share->vm, | ||
247 | args->mapping_gva, | ||
248 | &args->compbits_win_gva, | ||
249 | &args->mapping_iova, | ||
250 | args->flags); | ||
251 | } | ||
252 | |||
228 | int gk20a_as_dev_open(struct inode *inode, struct file *filp) | 253 | int gk20a_as_dev_open(struct inode *inode, struct file *filp) |
229 | { | 254 | { |
230 | struct gk20a_as_share *as_share; | 255 | struct gk20a_as_share *as_share; |
@@ -334,6 +359,14 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
334 | err = gk20a_as_ioctl_get_va_regions(as_share, | 359 | err = gk20a_as_ioctl_get_va_regions(as_share, |
335 | (struct nvgpu_as_get_va_regions_args *)buf); | 360 | (struct nvgpu_as_get_va_regions_args *)buf); |
336 | break; | 361 | break; |
362 | case NVGPU_AS_IOCTL_GET_BUFFER_COMPBITS_INFO: | ||
363 | err = gk20a_as_ioctl_get_buffer_compbits_info(as_share, | ||
364 | (struct nvgpu_as_get_buffer_compbits_info_args *)buf); | ||
365 | break; | ||
366 | case NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS: | ||
367 | err = gk20a_as_ioctl_map_buffer_compbits(as_share, | ||
368 | (struct nvgpu_as_map_buffer_compbits_args *)buf); | ||
369 | break; | ||
337 | default: | 370 | default: |
338 | dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); | 371 | dev_dbg(dev_from_gk20a(g), "unrecognized as ioctl: 0x%x", cmd); |
339 | err = -ENOTTY; | 372 | err = -ENOTTY; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 470729b7..d3114ecd 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -2016,8 +2016,13 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
2016 | gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); | 2016 | gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g); |
2017 | gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); | 2017 | gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g); |
2018 | g->ops.gr.get_rop_l2_en_mask(g); | 2018 | g->ops.gr.get_rop_l2_en_mask(g); |
2019 | 2019 | gpu->gr_compbit_store_base_hw = g->gr.compbit_store.base_hw; | |
2020 | gpu->reserved = 0; | 2020 | gpu->gr_gobs_per_comptagline_per_slice = |
2021 | g->gr.gobs_per_comptagline_per_slice; | ||
2022 | gpu->num_ltc = g->ltc_count; | ||
2023 | gpu->lts_per_ltc = g->gr.slices_per_ltc; | ||
2024 | gpu->cbc_cache_line_size = g->gr.cacheline_size; | ||
2025 | gpu->cbc_comptags_per_line = g->gr.comptags_per_cacheline; | ||
2021 | 2026 | ||
2022 | return 0; | 2027 | return 0; |
2023 | } | 2028 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 5d1ff563..d896d783 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c | |||
@@ -221,7 +221,9 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
221 | struct device *dev, | 221 | struct device *dev, |
222 | struct dma_buf *dmabuf, | 222 | struct dma_buf *dmabuf, |
223 | struct gk20a_allocator *allocator, | 223 | struct gk20a_allocator *allocator, |
224 | u32 lines, bool user_mappable) | 224 | u32 lines, bool user_mappable, |
225 | u64 *ctag_map_win_size, | ||
226 | u32 *ctag_map_win_ctagline) | ||
225 | { | 227 | { |
226 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); | 228 | struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); |
227 | u32 offset = 0; | 229 | u32 offset = 0; |
@@ -313,6 +315,8 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
313 | first_unneeded_cacheline * | 315 | first_unneeded_cacheline * |
314 | g->gr.comptags_per_cacheline; | 316 | g->gr.comptags_per_cacheline; |
315 | 317 | ||
318 | u64 win_size; | ||
319 | |||
316 | if (needed_ctaglines < ctaglines_to_allocate) { | 320 | if (needed_ctaglines < ctaglines_to_allocate) { |
317 | /* free alignment lines */ | 321 | /* free alignment lines */ |
318 | int tmp= | 322 | int tmp= |
@@ -326,6 +330,14 @@ static int gk20a_alloc_comptags(struct gk20a *g, | |||
326 | 330 | ||
327 | ctaglines_to_allocate = needed_ctaglines; | 331 | ctaglines_to_allocate = needed_ctaglines; |
328 | } | 332 | } |
333 | |||
334 | *ctag_map_win_ctagline = offset; | ||
335 | win_size = | ||
336 | DIV_ROUND_UP(lines, | ||
337 | g->gr.comptags_per_cacheline) * | ||
338 | aggregate_cacheline_sz; | ||
339 | |||
340 | *ctag_map_win_size = round_up(win_size, small_pgsz); | ||
329 | } | 341 | } |
330 | 342 | ||
331 | priv->comptags.offset = offset; | 343 | priv->comptags.offset = offset; |
@@ -1374,6 +1386,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1374 | bool clear_ctags = false; | 1386 | bool clear_ctags = false; |
1375 | struct scatterlist *sgl; | 1387 | struct scatterlist *sgl; |
1376 | u64 buf_addr; | 1388 | u64 buf_addr; |
1389 | u64 ctag_map_win_size = 0; | ||
1390 | u32 ctag_map_win_ctagline = 0; | ||
1377 | 1391 | ||
1378 | mutex_lock(&vm->update_gmmu_lock); | 1392 | mutex_lock(&vm->update_gmmu_lock); |
1379 | 1393 | ||
@@ -1501,7 +1515,9 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1501 | 1515 | ||
1502 | /* allocate compression resources if needed */ | 1516 | /* allocate compression resources if needed */ |
1503 | err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, | 1517 | err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator, |
1504 | bfr.ctag_lines, user_mappable); | 1518 | bfr.ctag_lines, user_mappable, |
1519 | &ctag_map_win_size, | ||
1520 | &ctag_map_win_ctagline); | ||
1505 | if (err) { | 1521 | if (err) { |
1506 | /* ok to fall back here if we ran out */ | 1522 | /* ok to fall back here if we ran out */ |
1507 | /* TBD: we can partially alloc ctags as well... */ | 1523 | /* TBD: we can partially alloc ctags as well... */ |
@@ -1588,6 +1604,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
1588 | mapped_buffer->ctag_lines = bfr.ctag_lines; | 1604 | mapped_buffer->ctag_lines = bfr.ctag_lines; |
1589 | mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; | 1605 | mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines; |
1590 | mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; | 1606 | mapped_buffer->ctags_mappable = bfr.ctag_user_mappable; |
1607 | mapped_buffer->ctag_map_win_size = ctag_map_win_size; | ||
1608 | mapped_buffer->ctag_map_win_ctagline = ctag_map_win_ctagline; | ||
1591 | mapped_buffer->vm = vm; | 1609 | mapped_buffer->vm = vm; |
1592 | mapped_buffer->flags = flags; | 1610 | mapped_buffer->flags = flags; |
1593 | mapped_buffer->kind = kind; | 1611 | mapped_buffer->kind = kind; |
@@ -1640,6 +1658,140 @@ clean_up: | |||
1640 | return 0; | 1658 | return 0; |
1641 | } | 1659 | } |
1642 | 1660 | ||
1661 | int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, | ||
1662 | u64 mapping_gva, | ||
1663 | u64 *compbits_win_size, | ||
1664 | u32 *compbits_win_ctagline, | ||
1665 | u32 *mapping_ctagline, | ||
1666 | u32 *flags) | ||
1667 | { | ||
1668 | struct mapped_buffer_node *mapped_buffer; | ||
1669 | struct device *d = dev_from_vm(vm); | ||
1670 | |||
1671 | mutex_lock(&vm->update_gmmu_lock); | ||
1672 | |||
1673 | mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); | ||
1674 | |||
1675 | if (!mapped_buffer | !mapped_buffer->user_mapped) | ||
1676 | { | ||
1677 | mutex_unlock(&vm->update_gmmu_lock); | ||
1678 | gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva); | ||
1679 | return -EFAULT; | ||
1680 | } | ||
1681 | |||
1682 | *compbits_win_size = 0; | ||
1683 | *compbits_win_ctagline = 0; | ||
1684 | *mapping_ctagline = 0; | ||
1685 | *flags = 0; | ||
1686 | |||
1687 | if (mapped_buffer->ctag_offset) | ||
1688 | *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_HAS_COMPBITS; | ||
1689 | |||
1690 | if (mapped_buffer->ctags_mappable) | ||
1691 | { | ||
1692 | *flags |= NVGPU_AS_GET_BUFFER_COMPBITS_INFO_FLAGS_MAPPABLE; | ||
1693 | *compbits_win_size = mapped_buffer->ctag_map_win_size; | ||
1694 | *compbits_win_ctagline = mapped_buffer->ctag_map_win_ctagline; | ||
1695 | *mapping_ctagline = mapped_buffer->ctag_offset; | ||
1696 | } | ||
1697 | |||
1698 | mutex_unlock(&vm->update_gmmu_lock); | ||
1699 | return 0; | ||
1700 | } | ||
1701 | |||
1702 | |||
1703 | int gk20a_vm_map_compbits(struct vm_gk20a *vm, | ||
1704 | u64 mapping_gva, | ||
1705 | u64 *compbits_win_gva, | ||
1706 | u64 *mapping_iova, | ||
1707 | u32 flags) | ||
1708 | { | ||
1709 | struct mapped_buffer_node *mapped_buffer; | ||
1710 | struct gk20a *g = gk20a_from_vm(vm); | ||
1711 | struct device *d = dev_from_vm(vm); | ||
1712 | |||
1713 | if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) { | ||
1714 | /* This will be implemented later */ | ||
1715 | gk20a_err(d, | ||
1716 | "%s: fixed-offset compbits mapping not yet supported", | ||
1717 | __func__); | ||
1718 | return -EFAULT; | ||
1719 | } | ||
1720 | |||
1721 | mutex_lock(&vm->update_gmmu_lock); | ||
1722 | |||
1723 | mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); | ||
1724 | |||
1725 | if (!mapped_buffer || !mapped_buffer->user_mapped) { | ||
1726 | mutex_unlock(&vm->update_gmmu_lock); | ||
1727 | gk20a_err(d, "%s: bad offset 0x%llx", __func__, mapping_gva); | ||
1728 | return -EFAULT; | ||
1729 | } | ||
1730 | |||
1731 | if (!mapped_buffer->ctags_mappable) { | ||
1732 | mutex_unlock(&vm->update_gmmu_lock); | ||
1733 | gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva); | ||
1734 | return -EFAULT; | ||
1735 | } | ||
1736 | |||
1737 | if (!mapped_buffer->ctag_map_win_addr) { | ||
1738 | const u32 small_pgsz_index = 0; /* small pages, 4K */ | ||
1739 | const u32 aggregate_cacheline_sz = | ||
1740 | g->gr.cacheline_size * g->gr.slices_per_ltc * | ||
1741 | g->ltc_count; | ||
1742 | |||
1743 | /* first aggregate cacheline to map */ | ||
1744 | u32 cacheline_start; /* inclusive */ | ||
1745 | |||
1746 | /* offset of the start cacheline (will be page aligned) */ | ||
1747 | u64 cacheline_offset_start; | ||
1748 | |||
1749 | if (!mapped_buffer->ctag_map_win_size) { | ||
1750 | mutex_unlock(&vm->update_gmmu_lock); | ||
1751 | gk20a_err(d, | ||
1752 | "%s: mapping 0x%llx does not have " | ||
1753 | "mappable comptags", | ||
1754 | __func__, mapping_gva); | ||
1755 | return -EFAULT; | ||
1756 | } | ||
1757 | |||
1758 | cacheline_start = mapped_buffer->ctag_offset / | ||
1759 | g->gr.comptags_per_cacheline; | ||
1760 | cacheline_offset_start = | ||
1761 | cacheline_start * aggregate_cacheline_sz; | ||
1762 | |||
1763 | mapped_buffer->ctag_map_win_addr = | ||
1764 | g->ops.mm.gmmu_map( | ||
1765 | vm, | ||
1766 | 0, | ||
1767 | g->gr.compbit_store.mem.sgt, | ||
1768 | cacheline_offset_start, /* sg offset */ | ||
1769 | mapped_buffer->ctag_map_win_size, /* size */ | ||
1770 | small_pgsz_index, | ||
1771 | 0, /* kind */ | ||
1772 | 0, /* ctag_offset */ | ||
1773 | NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, | ||
1774 | gk20a_mem_flag_read_only, | ||
1775 | false, | ||
1776 | false); | ||
1777 | |||
1778 | if (!mapped_buffer->ctag_map_win_addr) { | ||
1779 | mutex_unlock(&vm->update_gmmu_lock); | ||
1780 | gk20a_err(d, | ||
1781 | "%s: failed to map comptags for mapping 0x%llx", | ||
1782 | __func__, mapping_gva); | ||
1783 | return -ENOMEM; | ||
1784 | } | ||
1785 | } | ||
1786 | |||
1787 | *mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0); | ||
1788 | *compbits_win_gva = mapped_buffer->ctag_map_win_addr; | ||
1789 | |||
1790 | mutex_unlock(&vm->update_gmmu_lock); | ||
1791 | |||
1792 | return 0; | ||
1793 | } | ||
1794 | |||
1643 | u64 gk20a_gmmu_map(struct vm_gk20a *vm, | 1795 | u64 gk20a_gmmu_map(struct vm_gk20a *vm, |
1644 | struct sg_table **sgt, | 1796 | struct sg_table **sgt, |
1645 | u64 size, | 1797 | u64 size, |
@@ -2276,6 +2428,18 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer) | |||
2276 | struct vm_gk20a *vm = mapped_buffer->vm; | 2428 | struct vm_gk20a *vm = mapped_buffer->vm; |
2277 | struct gk20a *g = vm->mm->g; | 2429 | struct gk20a *g = vm->mm->g; |
2278 | 2430 | ||
2431 | if (mapped_buffer->ctag_map_win_addr) { | ||
2432 | /* unmap compbits */ | ||
2433 | |||
2434 | g->ops.mm.gmmu_unmap(vm, | ||
2435 | mapped_buffer->ctag_map_win_addr, | ||
2436 | mapped_buffer->ctag_map_win_size, | ||
2437 | 0, /* page size 4k */ | ||
2438 | true, /* va allocated */ | ||
2439 | gk20a_mem_flag_none, | ||
2440 | false); /* not sparse */ | ||
2441 | } | ||
2442 | |||
2279 | g->ops.mm.gmmu_unmap(vm, | 2443 | g->ops.mm.gmmu_unmap(vm, |
2280 | mapped_buffer->addr, | 2444 | mapped_buffer->addr, |
2281 | mapped_buffer->size, | 2445 | mapped_buffer->size, |
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 0ff11d09..e07b95fe 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h | |||
@@ -186,7 +186,13 @@ struct mapped_buffer_node { | |||
186 | u32 ctag_offset; | 186 | u32 ctag_offset; |
187 | u32 ctag_lines; | 187 | u32 ctag_lines; |
188 | u32 ctag_allocated_lines; | 188 | u32 ctag_allocated_lines; |
189 | |||
190 | /* For comptag mapping, these are the mapping window parameters */ | ||
189 | bool ctags_mappable; | 191 | bool ctags_mappable; |
192 | u64 ctag_map_win_addr; /* non-zero if mapped */ | ||
193 | u64 ctag_map_win_size; /* non-zero if ctags_mappable */ | ||
194 | u32 ctag_map_win_ctagline; /* ctagline at win start, set if | ||
195 | * ctags_mappable */ | ||
190 | 196 | ||
191 | u32 flags; | 197 | u32 flags; |
192 | u32 kind; | 198 | u32 kind; |
@@ -504,6 +510,19 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, | |||
504 | u64 buffer_offset, | 510 | u64 buffer_offset, |
505 | u64 mapping_size); | 511 | u64 mapping_size); |
506 | 512 | ||
513 | int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, | ||
514 | u64 mapping_gva, | ||
515 | u64 *compbits_win_size, | ||
516 | u32 *compbits_win_ctagline, | ||
517 | u32 *mapping_ctagline, | ||
518 | u32 *flags); | ||
519 | |||
520 | int gk20a_vm_map_compbits(struct vm_gk20a *vm, | ||
521 | u64 mapping_gva, | ||
522 | u64 *compbits_win_gva, | ||
523 | u64 *mapping_iova, | ||
524 | u32 flags); | ||
525 | |||
507 | /* unmap handle from kernel */ | 526 | /* unmap handle from kernel */ |
508 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); | 527 | void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); |
509 | 528 | ||