aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-03-24 16:12:53 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2012-03-26 03:59:05 -0400
commit1d83f4426fa0555c98c989915be6df01a8125aca (patch)
tree20252ea39d4e57cec394d8fcffd160a6f38128fd
parent0fb3f969c8683505fb7323c06bf8a999a5a45a15 (diff)
drm/i915: Batch copy_from_user for relocation processing
Originally the code tried to allocate a large enough array to perform the copy using vmalloc, performance wasn't great and throughput was improved by processing each individual relocation entry separately. This too is not as efficient as one would desire. A compromise would be to allocate a single page, or to allocate a few entries on the stack, and process the copy in batches. The latter gives simpler code and more consistent performance due to a lack of heuristic. x11perf -copywinwin10: n450/pnv i3-330m i5-2520m (cpu) before: 249000 785000 1280000 (80%) page: 264000 896000 1280000 (65%) on-stack: 264000 902000 1280000 (67%) v2: Use 512-bytes of stack for batching rather than allocate a page. v3: Tidy the code slightly with more descriptive variable names Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c42
1 files changed, 29 insertions, 13 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0e051eca3639..1fa01313d89f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -405,30 +405,46 @@ static int
405i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, 405i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
406 struct eb_objects *eb) 406 struct eb_objects *eb)
407{ 407{
408#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
409 struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
408 struct drm_i915_gem_relocation_entry __user *user_relocs; 410 struct drm_i915_gem_relocation_entry __user *user_relocs;
409 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; 411 struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
410 int i, ret; 412 int remain, ret;
411 413
412 user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr; 414 user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
413 for (i = 0; i < entry->relocation_count; i++) {
414 struct drm_i915_gem_relocation_entry reloc;
415 415
416 if (__copy_from_user_inatomic(&reloc, 416 remain = entry->relocation_count;
417 user_relocs+i, 417 while (remain) {
418 sizeof(reloc))) 418 struct drm_i915_gem_relocation_entry *r = stack_reloc;
419 int count = remain;
420 if (count > ARRAY_SIZE(stack_reloc))
421 count = ARRAY_SIZE(stack_reloc);
422 remain -= count;
423
424 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
419 return -EFAULT; 425 return -EFAULT;
420 426
421 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc); 427 do {
422 if (ret) 428 u64 offset = r->presumed_offset;
423 return ret;
424 429
425 if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset, 430 ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
426 &reloc.presumed_offset, 431 if (ret)
427 sizeof(reloc.presumed_offset))) 432 return ret;
428 return -EFAULT; 433
434 if (r->presumed_offset != offset &&
435 __copy_to_user_inatomic(&user_relocs->presumed_offset,
436 &r->presumed_offset,
437 sizeof(r->presumed_offset))) {
438 return -EFAULT;
439 }
440
441 user_relocs++;
442 r++;
443 } while (--count);
429 } 444 }
430 445
431 return 0; 446 return 0;
447#undef N_RELOC
432} 448}
433 449
434static int 450static int