diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2016-08-18 12:17:13 -0400 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2016-08-18 17:36:59 -0400 |
commit | ed13033f0287051577bc1678cde63a42fa419f3c (patch) | |
tree | eb373a58eaa673846e9fc24f324b62cc7587a6ca /drivers/gpu | |
parent | 0b5372727be37944239100ff05a63df9771c8484 (diff) |
drm/i915/cmdparser: Only cache the dst vmap
For simplicity, we want to continue using a contiguous mapping of the
command buffer, but we can reduce the number of vmappings we hold by
switching over to a page-by-page copy from the user batch buffer to the
shadow. The cost for saving one linear mapping is about 5% in trivial
workloads - which is more or less the overhead in calling kmap_atomic().
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-34-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/i915/i915_cmd_parser.c | 33 |
1 files changed, 19 insertions, 14 deletions
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 5d9ea163d1c8..d1858f80d64c 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c | |||
@@ -946,7 +946,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, | |||
946 | { | 946 | { |
947 | unsigned int src_needs_clflush; | 947 | unsigned int src_needs_clflush; |
948 | unsigned int dst_needs_clflush; | 948 | unsigned int dst_needs_clflush; |
949 | void *src, *dst; | 949 | void *dst, *ptr; |
950 | int offset, n; | ||
950 | int ret; | 951 | int ret; |
951 | 952 | ||
952 | ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush); | 953 | ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush); |
@@ -959,19 +960,12 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, | |||
959 | goto unpin_src; | 960 | goto unpin_src; |
960 | } | 961 | } |
961 | 962 | ||
962 | src = i915_gem_object_pin_map(src_obj, I915_MAP_WB); | ||
963 | if (IS_ERR(src)) { | ||
964 | dst = src; | ||
965 | goto unpin_dst; | ||
966 | } | ||
967 | |||
968 | dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); | 963 | dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); |
969 | if (IS_ERR(dst)) | 964 | if (IS_ERR(dst)) |
970 | goto unmap_src; | 965 | goto unpin_dst; |
971 | 966 | ||
972 | src += batch_start_offset; | 967 | ptr = dst; |
973 | if (src_needs_clflush) | 968 | offset = offset_in_page(batch_start_offset); |
974 | drm_clflush_virt_range(src, batch_len); | ||
975 | 969 | ||
976 | /* We can avoid clflushing partial cachelines before the write if we | 970 | /* We can avoid clflushing partial cachelines before the write if we |
977 | * only every write full cache-lines. Since we know that both the | 971 | * only every write full cache-lines. Since we know that both the |
@@ -982,13 +976,24 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, | |||
982 | if (dst_needs_clflush & CLFLUSH_BEFORE) | 976 | if (dst_needs_clflush & CLFLUSH_BEFORE) |
983 | batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size); | 977 | batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size); |
984 | 978 | ||
985 | memcpy(dst, src, batch_len); | 979 | for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) { |
980 | int len = min_t(int, batch_len, PAGE_SIZE - offset); | ||
981 | void *vaddr; | ||
982 | |||
983 | vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n)); | ||
984 | if (src_needs_clflush) | ||
985 | drm_clflush_virt_range(vaddr + offset, len); | ||
986 | memcpy(ptr, vaddr + offset, len); | ||
987 | kunmap_atomic(vaddr); | ||
988 | |||
989 | ptr += len; | ||
990 | batch_len -= len; | ||
991 | offset = 0; | ||
992 | } | ||
986 | 993 | ||
987 | /* dst_obj is returned with vmap pinned */ | 994 | /* dst_obj is returned with vmap pinned */ |
988 | *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; | 995 | *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; |
989 | 996 | ||
990 | unmap_src: | ||
991 | i915_gem_object_unpin_map(src_obj); | ||
992 | unpin_dst: | 997 | unpin_dst: |
993 | i915_gem_obj_finish_shmem_access(dst_obj); | 998 | i915_gem_obj_finish_shmem_access(dst_obj); |
994 | unpin_src: | 999 | unpin_src: |