aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2012-03-25 13:47:31 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-03-27 07:20:01 -0400
commit8489731c9bd22c27ab17a2190cd7444604abf95f (patch)
tree3d1a3ea9b7b0e47c5a4ba73a5457ee4439344786 /drivers/gpu/drm/i915/i915_gem.c
parent6d5cd9cb1e32e4f4e4468704430b26bcb0bfb129 (diff)
drm/i915: move clflushing into shmem_pread
This is obviously gonna slow down pread. But for a half-way realistic micro-benchmark, it doesn't matter: Non-broken userspace reads back data from the gpu once before the gpu again dirties it. So all this ranged clflush tracking is just a waste of time. No pread performance change (neglecting the dumb benchmark of constantly reading the same data) measured. As an added bonus, this avoids clflush on read on coherent objects. Which means that partial preads on snb are now roughly 4x as fast. This will be usefull for e.g. the libva encoder - when I finally get around to fix that up. v2: Properly sync with the gpu on LLC machines. Tested-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c26
1 files changed, 20 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1855e72859a8..9cdeeef5d6d7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -304,12 +304,25 @@ i915_gem_shmem_pread(struct drm_device *dev,
304 int shmem_page_offset, page_length, ret = 0; 304 int shmem_page_offset, page_length, ret = 0;
305 int obj_do_bit17_swizzling, page_do_bit17_swizzling; 305 int obj_do_bit17_swizzling, page_do_bit17_swizzling;
306 int hit_slowpath = 0; 306 int hit_slowpath = 0;
307 int needs_clflush = 0;
307 308
308 user_data = (char __user *) (uintptr_t) args->data_ptr; 309 user_data = (char __user *) (uintptr_t) args->data_ptr;
309 remain = args->size; 310 remain = args->size;
310 311
311 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); 312 obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
312 313
314 if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
315 /* If we're not in the cpu read domain, set ourself into the gtt
316 * read domain and manually flush cachelines (if required). This
317 * optimizes for the case when the gpu will dirty the data
318 * anyway again before the next pread happens. */
319 if (obj->cache_level == I915_CACHE_NONE)
320 needs_clflush = 1;
321 ret = i915_gem_object_set_to_gtt_domain(obj, false);
322 if (ret)
323 return ret;
324 }
325
313 offset = args->offset; 326 offset = args->offset;
314 327
315 while (remain > 0) { 328 while (remain > 0) {
@@ -337,6 +350,9 @@ i915_gem_shmem_pread(struct drm_device *dev,
337 350
338 if (!page_do_bit17_swizzling) { 351 if (!page_do_bit17_swizzling) {
339 vaddr = kmap_atomic(page); 352 vaddr = kmap_atomic(page);
353 if (needs_clflush)
354 drm_clflush_virt_range(vaddr + shmem_page_offset,
355 page_length);
340 ret = __copy_to_user_inatomic(user_data, 356 ret = __copy_to_user_inatomic(user_data,
341 vaddr + shmem_page_offset, 357 vaddr + shmem_page_offset,
342 page_length); 358 page_length);
@@ -350,6 +366,10 @@ i915_gem_shmem_pread(struct drm_device *dev,
350 mutex_unlock(&dev->struct_mutex); 366 mutex_unlock(&dev->struct_mutex);
351 367
352 vaddr = kmap(page); 368 vaddr = kmap(page);
369 if (needs_clflush)
370 drm_clflush_virt_range(vaddr + shmem_page_offset,
371 page_length);
372
353 if (page_do_bit17_swizzling) 373 if (page_do_bit17_swizzling)
354 ret = __copy_to_user_swizzled(user_data, 374 ret = __copy_to_user_swizzled(user_data,
355 vaddr, shmem_page_offset, 375 vaddr, shmem_page_offset,
@@ -430,12 +450,6 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
430 450
431 trace_i915_gem_object_pread(obj, args->offset, args->size); 451 trace_i915_gem_object_pread(obj, args->offset, args->size);
432 452
433 ret = i915_gem_object_set_cpu_read_domain_range(obj,
434 args->offset,
435 args->size);
436 if (ret)
437 goto out;
438
439 ret = i915_gem_shmem_pread(dev, obj, args, file); 453 ret = i915_gem_shmem_pread(dev, obj, args, file);
440 454
441out: 455out: