aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2016-08-18 12:17:13 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2016-08-18 17:36:59 -0400
commited13033f0287051577bc1678cde63a42fa419f3c (patch)
treeeb373a58eaa673846e9fc24f324b62cc7587a6ca /drivers/gpu
parent0b5372727be37944239100ff05a63df9771c8484 (diff)
drm/i915/cmdparser: Only cache the dst vmap
For simplicity, we want to continue using a contiguous mapping of the command buffer, but we can reduce the number of vmappings we hold by switching over to a page-by-page copy from the user batch buffer to the shadow. The cost for saving one linear mapping is about 5% in trivial workloads - which is more or less the overhead in calling kmap_atomic(). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Matthew Auld <matthew.auld@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-34-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c33
1 files changed, 19 insertions, 14 deletions
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 5d9ea163d1c8..d1858f80d64c 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -946,7 +946,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
946{ 946{
947 unsigned int src_needs_clflush; 947 unsigned int src_needs_clflush;
948 unsigned int dst_needs_clflush; 948 unsigned int dst_needs_clflush;
949 void *src, *dst; 949 void *dst, *ptr;
950 int offset, n;
950 int ret; 951 int ret;
951 952
952 ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush); 953 ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
@@ -959,19 +960,12 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
959 goto unpin_src; 960 goto unpin_src;
960 } 961 }
961 962
962 src = i915_gem_object_pin_map(src_obj, I915_MAP_WB);
963 if (IS_ERR(src)) {
964 dst = src;
965 goto unpin_dst;
966 }
967
968 dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); 963 dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
969 if (IS_ERR(dst)) 964 if (IS_ERR(dst))
970 goto unmap_src; 965 goto unpin_dst;
971 966
972 src += batch_start_offset; 967 ptr = dst;
973 if (src_needs_clflush) 968 offset = offset_in_page(batch_start_offset);
974 drm_clflush_virt_range(src, batch_len);
975 969
976 /* We can avoid clflushing partial cachelines before the write if we 970 /* We can avoid clflushing partial cachelines before the write if we
977 * only every write full cache-lines. Since we know that both the 971 * only every write full cache-lines. Since we know that both the
@@ -982,13 +976,24 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
982 if (dst_needs_clflush & CLFLUSH_BEFORE) 976 if (dst_needs_clflush & CLFLUSH_BEFORE)
983 batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size); 977 batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size);
984 978
985 memcpy(dst, src, batch_len); 979 for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
980 int len = min_t(int, batch_len, PAGE_SIZE - offset);
981 void *vaddr;
982
983 vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n));
984 if (src_needs_clflush)
985 drm_clflush_virt_range(vaddr + offset, len);
986 memcpy(ptr, vaddr + offset, len);
987 kunmap_atomic(vaddr);
988
989 ptr += len;
990 batch_len -= len;
991 offset = 0;
992 }
986 993
987 /* dst_obj is returned with vmap pinned */ 994 /* dst_obj is returned with vmap pinned */
988 *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; 995 *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
989 996
990unmap_src:
991 i915_gem_object_unpin_map(src_obj);
992unpin_dst: 997unpin_dst:
993 i915_gem_obj_finish_shmem_access(dst_obj); 998 i915_gem_obj_finish_shmem_access(dst_obj);
994unpin_src: 999unpin_src: