aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-08-19 11:54:28 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2016-08-19 12:13:36 -0400
commitc58305af1835095ddc25ee6f548ac05915e66ac5 (patch)
treec397de7bd3efb5dc92ba0feab1a5e0be2b325caa
parentf7bbe7883c3f119714fd09a8ceaac8075ba04dfe (diff)
drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass
Very old numbers indicate this is a 66% improvement when remapping the entire object for fence contention - due to the elimination of track_pfn_insert and its strcmp. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Testcase: igt/gem_fence_upload/performance Testcase: igt/gem_mmap_gtt Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160819155428.1670-6-chris@chris-wilson.co.uk
-rw-r--r--drivers/gpu/drm/Makefile2
-rw-r--r--drivers/gpu/drm/i915/Makefile3
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h5
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c50
-rw-r--r--drivers/gpu/drm/i915/i915_mm.c84
5 files changed, 99 insertions, 45 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 0238bf8bc8c3..3ff094171ee5 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -46,7 +46,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/
46obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ 46obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
47obj-$(CONFIG_DRM_MGA) += mga/ 47obj-$(CONFIG_DRM_MGA) += mga/
48obj-$(CONFIG_DRM_I810) += i810/ 48obj-$(CONFIG_DRM_I810) += i810/
49obj-$(CONFIG_DRM_I915) += i915/ 49obj-$(CONFIG_DRM_I915) += i915/
50obj-$(CONFIG_DRM_MGAG200) += mgag200/ 50obj-$(CONFIG_DRM_MGAG200) += mgag200/
51obj-$(CONFIG_DRM_VC4) += vc4/ 51obj-$(CONFIG_DRM_VC4) += vc4/
52obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ 52obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 3412413408c0..a7da24640e88 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -12,6 +12,7 @@ subdir-ccflags-y += \
12i915-y := i915_drv.o \ 12i915-y := i915_drv.o \
13 i915_irq.o \ 13 i915_irq.o \
14 i915_memcpy.o \ 14 i915_memcpy.o \
15 i915_mm.o \
15 i915_params.o \ 16 i915_params.o \
16 i915_pci.o \ 17 i915_pci.o \
17 i915_suspend.o \ 18 i915_suspend.o \
@@ -113,6 +114,6 @@ i915-y += intel_gvt.o
113include $(src)/gvt/Makefile 114include $(src)/gvt/Makefile
114endif 115endif
115 116
116obj-$(CONFIG_DRM_I915) += i915.o 117obj-$(CONFIG_DRM_I915) += i915.o
117 118
118CFLAGS_i915_trace_points.o := -I$(src) 119CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 016425c0b475..9cd102cd931e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3931,6 +3931,11 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
3931void i915_memcpy_init_early(struct drm_i915_private *dev_priv); 3931void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
3932bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); 3932bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
3933 3933
3934/* i915_mm.c */
3935int remap_io_mapping(struct vm_area_struct *vma,
3936 unsigned long addr, unsigned long pfn, unsigned long size,
3937 struct io_mapping *iomap);
3938
3934#define ptr_mask_bits(ptr) ({ \ 3939#define ptr_mask_bits(ptr) ({ \
3935 unsigned long __v = (unsigned long)(ptr); \ 3940 unsigned long __v = (unsigned long)(ptr); \
3936 (typeof(ptr))(__v & PAGE_MASK); \ 3941 (typeof(ptr))(__v & PAGE_MASK); \
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5398af7f7580..04607d4115d6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1705,7 +1705,6 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
1705 bool write = !!(vmf->flags & FAULT_FLAG_WRITE); 1705 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1706 struct i915_vma *vma; 1706 struct i915_vma *vma;
1707 pgoff_t page_offset; 1707 pgoff_t page_offset;
1708 unsigned long pfn;
1709 unsigned int flags; 1708 unsigned int flags;
1710 int ret; 1709 int ret;
1711 1710
@@ -1790,48 +1789,13 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
1790 goto err_unpin; 1789 goto err_unpin;
1791 1790
1792 /* Finally, remap it using the new GTT offset */ 1791 /* Finally, remap it using the new GTT offset */
1793 pfn = ggtt->mappable_base + i915_ggtt_offset(vma); 1792 ret = remap_io_mapping(area,
1794 pfn >>= PAGE_SHIFT; 1793 area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT),
1795 1794 (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
1796 if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { 1795 min_t(u64, vma->size, area->vm_end - area->vm_start),
1797 if (!obj->fault_mappable) { 1796 &ggtt->mappable);
1798 unsigned long size = 1797 if (ret)
1799 min_t(unsigned long, 1798 goto err_unpin;
1800 area->vm_end - area->vm_start,
1801 obj->base.size) >> PAGE_SHIFT;
1802 unsigned long base = area->vm_start;
1803 int i;
1804
1805 for (i = 0; i < size; i++) {
1806 ret = vm_insert_pfn(area,
1807 base + i * PAGE_SIZE,
1808 pfn + i);
1809 if (ret)
1810 break;
1811 }
1812 } else
1813 ret = vm_insert_pfn(area,
1814 (unsigned long)vmf->virtual_address,
1815 pfn + page_offset);
1816 } else {
1817 /* Overriding existing pages in partial view does not cause
1818 * us any trouble as TLBs are still valid because the fault
1819 * is due to userspace losing part of the mapping or never
1820 * having accessed it before (at this partials' range).
1821 */
1822 const struct i915_ggtt_view *view = &vma->ggtt_view;
1823 unsigned long base = area->vm_start +
1824 (view->params.partial.offset << PAGE_SHIFT);
1825 unsigned int i;
1826
1827 for (i = 0; i < view->params.partial.size; i++) {
1828 ret = vm_insert_pfn(area,
1829 base + i * PAGE_SIZE,
1830 pfn + i);
1831 if (ret)
1832 break;
1833 }
1834 }
1835 1799
1836 obj->fault_mappable = true; 1800 obj->fault_mappable = true;
1837err_unpin: 1801err_unpin:
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
new file mode 100644
index 000000000000..e4935dd1fd37
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -0,0 +1,84 @@
1/*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/mm.h>
26#include <linux/io-mapping.h>
27
28#include <asm/pgtable.h>
29
30#include "i915_drv.h"
31
32struct remap_pfn {
33 struct mm_struct *mm;
34 unsigned long pfn;
35 pgprot_t prot;
36};
37
38static int remap_pfn(pte_t *pte, pgtable_t token,
39 unsigned long addr, void *data)
40{
41 struct remap_pfn *r = data;
42
43 /* Special PTE are not associated with any struct page */
44 set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot)));
45 r->pfn++;
46
47 return 0;
48}
49
50/**
51 * remap_io_mapping - remap an IO mapping to userspace
52 * @vma: user vma to map to
53 * @addr: target user address to start at
54 * @pfn: physical address of kernel memory
55 * @size: size of map area
56 * @iomap: the source io_mapping
57 *
58 * Note: this is only safe if the mm semaphore is held when called.
59 */
60int remap_io_mapping(struct vm_area_struct *vma,
61 unsigned long addr, unsigned long pfn, unsigned long size,
62 struct io_mapping *iomap)
63{
64 struct remap_pfn r;
65 int err;
66
67 GEM_BUG_ON((vma->vm_flags &
68 (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)) !=
69 (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP));
70
71 /* We rely on prevalidation of the io-mapping to skip track_pfn(). */
72 r.mm = vma->vm_mm;
73 r.pfn = pfn;
74 r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) |
75 (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK));
76
77 err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r);
78 if (unlikely(err)) {
79 zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT);
80 return err;
81 }
82
83 return 0;
84}