diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2012-03-25 13:47:42 -0400 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2012-03-27 07:40:57 -0400 |
commit | 23c18c71da801fb7ce11acc3041e4f10a1bb5cb0 (patch) | |
tree | a0e4a48c5736670aef88dceb18fec82036a518e6 /drivers/gpu/drm/i915/i915_gem.c | |
parent | f56f821feb7b36223f309e0ec05986bb137ce418 (diff) |
drm/i915: fixup in-line clflushing on bit17 swizzled bos
The issue is that with inline clflushing the clflushing isn't properly
swizzled. Fix this by
- always clflushing entire 128 byte chunks and
- unconditionally flush before writes when swizzling a given page.
We could be clever and check whether we pwrite a partial 128 byte
chunk instead of a partial cacheline, but I've figured that's not
worth it.
Now the usual approach is to fold this into the original patch series, but
I've opted against this because
- this fixes a corner case only very old userspace relies on and
- I'd like to not invalidate all the testing the pwrite rewrite has gotten.
This fixes the regression notice by tests/gem_tiled_partial_prite_pread
from i-g-t. Unfortunately it doesn't fix the issues with partial pwrites to
tiled buffers on bit17 swizzling machines. But that is also broken without
the pwrite patches, so likely a different issue (or a problem with the
testcase).
v2: Simplify the patch by dropping the overly clever partial write
logic for swizzled pages.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 39 |
1 files changed, 32 insertions, 7 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6dc832902f53..c964dfbdb577 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -313,6 +313,28 @@ shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, | |||
313 | return ret; | 313 | return ret; |
314 | } | 314 | } |
315 | 315 | ||
316 | static void | ||
317 | shmem_clflush_swizzled_range(char *addr, unsigned long length, | ||
318 | bool swizzled) | ||
319 | { | ||
320 | if (swizzled) { | ||
321 | unsigned long start = (unsigned long) addr; | ||
322 | unsigned long end = (unsigned long) addr + length; | ||
323 | |||
324 | /* For swizzling simply ensure that we always flush both | ||
325 | * channels. Lame, but simple and it works. Swizzled | ||
326 | * pwrite/pread is far from a hotpath - current userspace | ||
327 | * doesn't use it at all. */ | ||
328 | start = round_down(start, 128); | ||
329 | end = round_up(end, 128); | ||
330 | |||
331 | drm_clflush_virt_range((void *)start, end - start); | ||
332 | } else { | ||
333 | drm_clflush_virt_range(addr, length); | ||
334 | } | ||
335 | |||
336 | } | ||
337 | |||
316 | /* Only difference to the fast-path function is that this can handle bit17 | 338 | /* Only difference to the fast-path function is that this can handle bit17 |
317 | * and uses non-atomic copy and kmap functions. */ | 339 | * and uses non-atomic copy and kmap functions. */ |
318 | static int | 340 | static int |
@@ -325,8 +347,9 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, | |||
325 | 347 | ||
326 | vaddr = kmap(page); | 348 | vaddr = kmap(page); |
327 | if (needs_clflush) | 349 | if (needs_clflush) |
328 | drm_clflush_virt_range(vaddr + shmem_page_offset, | 350 | shmem_clflush_swizzled_range(vaddr + shmem_page_offset, |
329 | page_length); | 351 | page_length, |
352 | page_do_bit17_swizzling); | ||
330 | 353 | ||
331 | if (page_do_bit17_swizzling) | 354 | if (page_do_bit17_swizzling) |
332 | ret = __copy_to_user_swizzled(user_data, | 355 | ret = __copy_to_user_swizzled(user_data, |
@@ -637,9 +660,10 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, | |||
637 | int ret; | 660 | int ret; |
638 | 661 | ||
639 | vaddr = kmap(page); | 662 | vaddr = kmap(page); |
640 | if (needs_clflush_before) | 663 | if (needs_clflush_before || page_do_bit17_swizzling) |
641 | drm_clflush_virt_range(vaddr + shmem_page_offset, | 664 | shmem_clflush_swizzled_range(vaddr + shmem_page_offset, |
642 | page_length); | 665 | page_length, |
666 | page_do_bit17_swizzling); | ||
643 | if (page_do_bit17_swizzling) | 667 | if (page_do_bit17_swizzling) |
644 | ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, | 668 | ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, |
645 | user_data, | 669 | user_data, |
@@ -649,8 +673,9 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, | |||
649 | user_data, | 673 | user_data, |
650 | page_length); | 674 | page_length); |
651 | if (needs_clflush_after) | 675 | if (needs_clflush_after) |
652 | drm_clflush_virt_range(vaddr + shmem_page_offset, | 676 | shmem_clflush_swizzled_range(vaddr + shmem_page_offset, |
653 | page_length); | 677 | page_length, |
678 | page_do_bit17_swizzling); | ||
654 | kunmap(page); | 679 | kunmap(page); |
655 | 680 | ||
656 | return ret; | 681 | return ret; |