diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2016-08-19 11:54:28 -0400 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2016-08-19 12:13:36 -0400 |
commit | c58305af1835095ddc25ee6f548ac05915e66ac5 (patch) | |
tree | c397de7bd3efb5dc92ba0feab1a5e0be2b325caa | |
parent | f7bbe7883c3f119714fd09a8ceaac8075ba04dfe (diff) |
drm/i915: Use remap_io_mapping() to prefault all PTE in a single pass
Very old numbers indicate this is a 66% improvement when remapping the
entire object for fence contention - due to the elimination of
track_pfn_insert and its strcmp.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Testcase: igt/gem_fence_upload/performance
Testcase: igt/gem_mmap_gtt
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160819155428.1670-6-chris@chris-wilson.co.uk
-rw-r--r-- | drivers/gpu/drm/Makefile | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/Makefile | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 50 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_mm.c | 84 |
5 files changed, 99 insertions, 45 deletions
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 0238bf8bc8c3..3ff094171ee5 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile | |||
@@ -46,7 +46,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/ | |||
46 | obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ | 46 | obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ |
47 | obj-$(CONFIG_DRM_MGA) += mga/ | 47 | obj-$(CONFIG_DRM_MGA) += mga/ |
48 | obj-$(CONFIG_DRM_I810) += i810/ | 48 | obj-$(CONFIG_DRM_I810) += i810/ |
49 | obj-$(CONFIG_DRM_I915) += i915/ | 49 | obj-$(CONFIG_DRM_I915) += i915/ |
50 | obj-$(CONFIG_DRM_MGAG200) += mgag200/ | 50 | obj-$(CONFIG_DRM_MGAG200) += mgag200/ |
51 | obj-$(CONFIG_DRM_VC4) += vc4/ | 51 | obj-$(CONFIG_DRM_VC4) += vc4/ |
52 | obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ | 52 | obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ |
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 3412413408c0..a7da24640e88 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile | |||
@@ -12,6 +12,7 @@ subdir-ccflags-y += \ | |||
12 | i915-y := i915_drv.o \ | 12 | i915-y := i915_drv.o \ |
13 | i915_irq.o \ | 13 | i915_irq.o \ |
14 | i915_memcpy.o \ | 14 | i915_memcpy.o \ |
15 | i915_mm.o \ | ||
15 | i915_params.o \ | 16 | i915_params.o \ |
16 | i915_pci.o \ | 17 | i915_pci.o \ |
17 | i915_suspend.o \ | 18 | i915_suspend.o \ |
@@ -113,6 +114,6 @@ i915-y += intel_gvt.o | |||
113 | include $(src)/gvt/Makefile | 114 | include $(src)/gvt/Makefile |
114 | endif | 115 | endif |
115 | 116 | ||
116 | obj-$(CONFIG_DRM_I915) += i915.o | 117 | obj-$(CONFIG_DRM_I915) += i915.o |
117 | 118 | ||
118 | CFLAGS_i915_trace_points.o := -I$(src) | 119 | CFLAGS_i915_trace_points.o := -I$(src) |
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 016425c0b475..9cd102cd931e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -3931,6 +3931,11 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) | |||
3931 | void i915_memcpy_init_early(struct drm_i915_private *dev_priv); | 3931 | void i915_memcpy_init_early(struct drm_i915_private *dev_priv); |
3932 | bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); | 3932 | bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); |
3933 | 3933 | ||
3934 | /* i915_mm.c */ | ||
3935 | int remap_io_mapping(struct vm_area_struct *vma, | ||
3936 | unsigned long addr, unsigned long pfn, unsigned long size, | ||
3937 | struct io_mapping *iomap); | ||
3938 | |||
3934 | #define ptr_mask_bits(ptr) ({ \ | 3939 | #define ptr_mask_bits(ptr) ({ \ |
3935 | unsigned long __v = (unsigned long)(ptr); \ | 3940 | unsigned long __v = (unsigned long)(ptr); \ |
3936 | (typeof(ptr))(__v & PAGE_MASK); \ | 3941 | (typeof(ptr))(__v & PAGE_MASK); \ |
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5398af7f7580..04607d4115d6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -1705,7 +1705,6 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) | |||
1705 | bool write = !!(vmf->flags & FAULT_FLAG_WRITE); | 1705 | bool write = !!(vmf->flags & FAULT_FLAG_WRITE); |
1706 | struct i915_vma *vma; | 1706 | struct i915_vma *vma; |
1707 | pgoff_t page_offset; | 1707 | pgoff_t page_offset; |
1708 | unsigned long pfn; | ||
1709 | unsigned int flags; | 1708 | unsigned int flags; |
1710 | int ret; | 1709 | int ret; |
1711 | 1710 | ||
@@ -1790,48 +1789,13 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) | |||
1790 | goto err_unpin; | 1789 | goto err_unpin; |
1791 | 1790 | ||
1792 | /* Finally, remap it using the new GTT offset */ | 1791 | /* Finally, remap it using the new GTT offset */ |
1793 | pfn = ggtt->mappable_base + i915_ggtt_offset(vma); | 1792 | ret = remap_io_mapping(area, |
1794 | pfn >>= PAGE_SHIFT; | 1793 | area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT), |
1795 | 1794 | (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, | |
1796 | if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { | 1795 | min_t(u64, vma->size, area->vm_end - area->vm_start), |
1797 | if (!obj->fault_mappable) { | 1796 | &ggtt->mappable); |
1798 | unsigned long size = | 1797 | if (ret) |
1799 | min_t(unsigned long, | 1798 | goto err_unpin; |
1800 | area->vm_end - area->vm_start, | ||
1801 | obj->base.size) >> PAGE_SHIFT; | ||
1802 | unsigned long base = area->vm_start; | ||
1803 | int i; | ||
1804 | |||
1805 | for (i = 0; i < size; i++) { | ||
1806 | ret = vm_insert_pfn(area, | ||
1807 | base + i * PAGE_SIZE, | ||
1808 | pfn + i); | ||
1809 | if (ret) | ||
1810 | break; | ||
1811 | } | ||
1812 | } else | ||
1813 | ret = vm_insert_pfn(area, | ||
1814 | (unsigned long)vmf->virtual_address, | ||
1815 | pfn + page_offset); | ||
1816 | } else { | ||
1817 | /* Overriding existing pages in partial view does not cause | ||
1818 | * us any trouble as TLBs are still valid because the fault | ||
1819 | * is due to userspace losing part of the mapping or never | ||
1820 | * having accessed it before (at this partials' range). | ||
1821 | */ | ||
1822 | const struct i915_ggtt_view *view = &vma->ggtt_view; | ||
1823 | unsigned long base = area->vm_start + | ||
1824 | (view->params.partial.offset << PAGE_SHIFT); | ||
1825 | unsigned int i; | ||
1826 | |||
1827 | for (i = 0; i < view->params.partial.size; i++) { | ||
1828 | ret = vm_insert_pfn(area, | ||
1829 | base + i * PAGE_SIZE, | ||
1830 | pfn + i); | ||
1831 | if (ret) | ||
1832 | break; | ||
1833 | } | ||
1834 | } | ||
1835 | 1799 | ||
1836 | obj->fault_mappable = true; | 1800 | obj->fault_mappable = true; |
1837 | err_unpin: | 1801 | err_unpin: |
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c new file mode 100644 index 000000000000..e4935dd1fd37 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mm.c | |||
@@ -0,0 +1,84 @@ | |||
1 | /* | ||
2 | * Copyright © 2014 Intel Corporation | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice (including the next | ||
12 | * paragraph) shall be included in all copies or substantial portions of the | ||
13 | * Software. | ||
14 | * | ||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||
21 | * IN THE SOFTWARE. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/mm.h> | ||
26 | #include <linux/io-mapping.h> | ||
27 | |||
28 | #include <asm/pgtable.h> | ||
29 | |||
30 | #include "i915_drv.h" | ||
31 | |||
32 | struct remap_pfn { | ||
33 | struct mm_struct *mm; | ||
34 | unsigned long pfn; | ||
35 | pgprot_t prot; | ||
36 | }; | ||
37 | |||
38 | static int remap_pfn(pte_t *pte, pgtable_t token, | ||
39 | unsigned long addr, void *data) | ||
40 | { | ||
41 | struct remap_pfn *r = data; | ||
42 | |||
43 | /* Special PTE are not associated with any struct page */ | ||
44 | set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot))); | ||
45 | r->pfn++; | ||
46 | |||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | /** | ||
51 | * remap_io_mapping - remap an IO mapping to userspace | ||
52 | * @vma: user vma to map to | ||
53 | * @addr: target user address to start at | ||
54 | * @pfn: physical address of kernel memory | ||
55 | * @size: size of map area | ||
56 | * @iomap: the source io_mapping | ||
57 | * | ||
58 | * Note: this is only safe if the mm semaphore is held when called. | ||
59 | */ | ||
60 | int remap_io_mapping(struct vm_area_struct *vma, | ||
61 | unsigned long addr, unsigned long pfn, unsigned long size, | ||
62 | struct io_mapping *iomap) | ||
63 | { | ||
64 | struct remap_pfn r; | ||
65 | int err; | ||
66 | |||
67 | GEM_BUG_ON((vma->vm_flags & | ||
68 | (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)) != | ||
69 | (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)); | ||
70 | |||
71 | /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ | ||
72 | r.mm = vma->vm_mm; | ||
73 | r.pfn = pfn; | ||
74 | r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) | | ||
75 | (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK)); | ||
76 | |||
77 | err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r); | ||
78 | if (unlikely(err)) { | ||
79 | zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT); | ||
80 | return err; | ||
81 | } | ||
82 | |||
83 | return 0; | ||
84 | } | ||