diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 152 |
1 files changed, 133 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1aef516cc6fa..ecbc5c5dbbbc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -261,6 +261,83 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring, | |||
261 | return 0; | 261 | return 0; |
262 | } | 262 | } |
263 | 263 | ||
264 | static int | ||
265 | gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring) | ||
266 | { | ||
267 | int ret; | ||
268 | |||
269 | ret = intel_ring_begin(ring, 4); | ||
270 | if (ret) | ||
271 | return ret; | ||
272 | |||
273 | intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); | ||
274 | intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | | ||
275 | PIPE_CONTROL_STALL_AT_SCOREBOARD); | ||
276 | intel_ring_emit(ring, 0); | ||
277 | intel_ring_emit(ring, 0); | ||
278 | intel_ring_advance(ring); | ||
279 | |||
280 | return 0; | ||
281 | } | ||
282 | |||
283 | static int | ||
284 | gen7_render_ring_flush(struct intel_ring_buffer *ring, | ||
285 | u32 invalidate_domains, u32 flush_domains) | ||
286 | { | ||
287 | u32 flags = 0; | ||
288 | struct pipe_control *pc = ring->private; | ||
289 | u32 scratch_addr = pc->gtt_offset + 128; | ||
290 | int ret; | ||
291 | |||
292 | /* | ||
293 | * Ensure that any following seqno writes only happen when the render | ||
294 | * cache is indeed flushed. | ||
295 | * | ||
296 | * Workaround: 4th PIPE_CONTROL command (except the ones with only | ||
297 | * read-cache invalidate bits set) must have the CS_STALL bit set. We | ||
298 | * don't try to be clever and just set it unconditionally. | ||
299 | */ | ||
300 | flags |= PIPE_CONTROL_CS_STALL; | ||
301 | |||
302 | /* Just flush everything. Experiments have shown that reducing the | ||
303 | * number of bits based on the write domains has little performance | ||
304 | * impact. | ||
305 | */ | ||
306 | if (flush_domains) { | ||
307 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | ||
308 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | ||
309 | } | ||
310 | if (invalidate_domains) { | ||
311 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | ||
312 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | ||
313 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | ||
314 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | ||
315 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | ||
316 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | ||
317 | /* | ||
318 | * TLB invalidate requires a post-sync write. | ||
319 | */ | ||
320 | flags |= PIPE_CONTROL_QW_WRITE; | ||
321 | |||
322 | /* Workaround: we must issue a pipe_control with CS-stall bit | ||
323 | * set before a pipe_control command that has the state cache | ||
324 | * invalidate bit set. */ | ||
325 | gen7_render_ring_cs_stall_wa(ring); | ||
326 | } | ||
327 | |||
328 | ret = intel_ring_begin(ring, 4); | ||
329 | if (ret) | ||
330 | return ret; | ||
331 | |||
332 | intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); | ||
333 | intel_ring_emit(ring, flags); | ||
334 | intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); | ||
335 | intel_ring_emit(ring, 0); | ||
336 | intel_ring_advance(ring); | ||
337 | |||
338 | return 0; | ||
339 | } | ||
340 | |||
264 | static void ring_write_tail(struct intel_ring_buffer *ring, | 341 | static void ring_write_tail(struct intel_ring_buffer *ring, |
265 | u32 value) | 342 | u32 value) |
266 | { | 343 | { |
@@ -381,12 +458,12 @@ init_pipe_control(struct intel_ring_buffer *ring) | |||
381 | 458 | ||
382 | i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); | 459 | i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); |
383 | 460 | ||
384 | ret = i915_gem_object_pin(obj, 4096, true); | 461 | ret = i915_gem_object_pin(obj, 4096, true, false); |
385 | if (ret) | 462 | if (ret) |
386 | goto err_unref; | 463 | goto err_unref; |
387 | 464 | ||
388 | pc->gtt_offset = obj->gtt_offset; | 465 | pc->gtt_offset = obj->gtt_offset; |
389 | pc->cpu_page = kmap(obj->pages[0]); | 466 | pc->cpu_page = kmap(sg_page(obj->pages->sgl)); |
390 | if (pc->cpu_page == NULL) | 467 | if (pc->cpu_page == NULL) |
391 | goto err_unpin; | 468 | goto err_unpin; |
392 | 469 | ||
@@ -413,7 +490,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring) | |||
413 | return; | 490 | return; |
414 | 491 | ||
415 | obj = pc->obj; | 492 | obj = pc->obj; |
416 | kunmap(obj->pages[0]); | 493 | |
494 | kunmap(sg_page(obj->pages->sgl)); | ||
417 | i915_gem_object_unpin(obj); | 495 | i915_gem_object_unpin(obj); |
418 | drm_gem_object_unreference(&obj->base); | 496 | drm_gem_object_unreference(&obj->base); |
419 | 497 | ||
@@ -461,7 +539,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) | |||
461 | if (INTEL_INFO(dev)->gen >= 6) | 539 | if (INTEL_INFO(dev)->gen >= 6) |
462 | I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); | 540 | I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); |
463 | 541 | ||
464 | if (IS_IVYBRIDGE(dev)) | 542 | if (HAS_L3_GPU_CACHE(dev)) |
465 | I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); | 543 | I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); |
466 | 544 | ||
467 | return ret; | 545 | return ret; |
@@ -627,26 +705,24 @@ pc_render_add_request(struct intel_ring_buffer *ring, | |||
627 | } | 705 | } |
628 | 706 | ||
629 | static u32 | 707 | static u32 |
630 | gen6_ring_get_seqno(struct intel_ring_buffer *ring) | 708 | gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) |
631 | { | 709 | { |
632 | struct drm_device *dev = ring->dev; | ||
633 | |||
634 | /* Workaround to force correct ordering between irq and seqno writes on | 710 | /* Workaround to force correct ordering between irq and seqno writes on |
635 | * ivb (and maybe also on snb) by reading from a CS register (like | 711 | * ivb (and maybe also on snb) by reading from a CS register (like |
636 | * ACTHD) before reading the status page. */ | 712 | * ACTHD) before reading the status page. */ |
637 | if (IS_GEN6(dev) || IS_GEN7(dev)) | 713 | if (!lazy_coherency) |
638 | intel_ring_get_active_head(ring); | 714 | intel_ring_get_active_head(ring); |
639 | return intel_read_status_page(ring, I915_GEM_HWS_INDEX); | 715 | return intel_read_status_page(ring, I915_GEM_HWS_INDEX); |
640 | } | 716 | } |
641 | 717 | ||
642 | static u32 | 718 | static u32 |
643 | ring_get_seqno(struct intel_ring_buffer *ring) | 719 | ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) |
644 | { | 720 | { |
645 | return intel_read_status_page(ring, I915_GEM_HWS_INDEX); | 721 | return intel_read_status_page(ring, I915_GEM_HWS_INDEX); |
646 | } | 722 | } |
647 | 723 | ||
648 | static u32 | 724 | static u32 |
649 | pc_render_get_seqno(struct intel_ring_buffer *ring) | 725 | pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) |
650 | { | 726 | { |
651 | struct pipe_control *pc = ring->private; | 727 | struct pipe_control *pc = ring->private; |
652 | return pc->cpu_page[0]; | 728 | return pc->cpu_page[0]; |
@@ -851,7 +927,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring) | |||
851 | 927 | ||
852 | spin_lock_irqsave(&dev_priv->irq_lock, flags); | 928 | spin_lock_irqsave(&dev_priv->irq_lock, flags); |
853 | if (ring->irq_refcount++ == 0) { | 929 | if (ring->irq_refcount++ == 0) { |
854 | if (IS_IVYBRIDGE(dev) && ring->id == RCS) | 930 | if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) |
855 | I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | | 931 | I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | |
856 | GEN6_RENDER_L3_PARITY_ERROR)); | 932 | GEN6_RENDER_L3_PARITY_ERROR)); |
857 | else | 933 | else |
@@ -874,7 +950,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring) | |||
874 | 950 | ||
875 | spin_lock_irqsave(&dev_priv->irq_lock, flags); | 951 | spin_lock_irqsave(&dev_priv->irq_lock, flags); |
876 | if (--ring->irq_refcount == 0) { | 952 | if (--ring->irq_refcount == 0) { |
877 | if (IS_IVYBRIDGE(dev) && ring->id == RCS) | 953 | if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) |
878 | I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); | 954 | I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); |
879 | else | 955 | else |
880 | I915_WRITE_IMR(ring, ~0); | 956 | I915_WRITE_IMR(ring, ~0); |
@@ -950,7 +1026,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring) | |||
950 | if (obj == NULL) | 1026 | if (obj == NULL) |
951 | return; | 1027 | return; |
952 | 1028 | ||
953 | kunmap(obj->pages[0]); | 1029 | kunmap(sg_page(obj->pages->sgl)); |
954 | i915_gem_object_unpin(obj); | 1030 | i915_gem_object_unpin(obj); |
955 | drm_gem_object_unreference(&obj->base); | 1031 | drm_gem_object_unreference(&obj->base); |
956 | ring->status_page.obj = NULL; | 1032 | ring->status_page.obj = NULL; |
@@ -971,13 +1047,13 @@ static int init_status_page(struct intel_ring_buffer *ring) | |||
971 | 1047 | ||
972 | i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); | 1048 | i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); |
973 | 1049 | ||
974 | ret = i915_gem_object_pin(obj, 4096, true); | 1050 | ret = i915_gem_object_pin(obj, 4096, true, false); |
975 | if (ret != 0) { | 1051 | if (ret != 0) { |
976 | goto err_unref; | 1052 | goto err_unref; |
977 | } | 1053 | } |
978 | 1054 | ||
979 | ring->status_page.gfx_addr = obj->gtt_offset; | 1055 | ring->status_page.gfx_addr = obj->gtt_offset; |
980 | ring->status_page.page_addr = kmap(obj->pages[0]); | 1056 | ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); |
981 | if (ring->status_page.page_addr == NULL) { | 1057 | if (ring->status_page.page_addr == NULL) { |
982 | ret = -ENOMEM; | 1058 | ret = -ENOMEM; |
983 | goto err_unpin; | 1059 | goto err_unpin; |
@@ -1009,7 +1085,6 @@ static int intel_init_ring_buffer(struct drm_device *dev, | |||
1009 | ring->dev = dev; | 1085 | ring->dev = dev; |
1010 | INIT_LIST_HEAD(&ring->active_list); | 1086 | INIT_LIST_HEAD(&ring->active_list); |
1011 | INIT_LIST_HEAD(&ring->request_list); | 1087 | INIT_LIST_HEAD(&ring->request_list); |
1012 | INIT_LIST_HEAD(&ring->gpu_write_list); | ||
1013 | ring->size = 32 * PAGE_SIZE; | 1088 | ring->size = 32 * PAGE_SIZE; |
1014 | 1089 | ||
1015 | init_waitqueue_head(&ring->irq_queue); | 1090 | init_waitqueue_head(&ring->irq_queue); |
@@ -1029,7 +1104,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, | |||
1029 | 1104 | ||
1030 | ring->obj = obj; | 1105 | ring->obj = obj; |
1031 | 1106 | ||
1032 | ret = i915_gem_object_pin(obj, PAGE_SIZE, true); | 1107 | ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false); |
1033 | if (ret) | 1108 | if (ret) |
1034 | goto err_unref; | 1109 | goto err_unref; |
1035 | 1110 | ||
@@ -1378,7 +1453,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev) | |||
1378 | 1453 | ||
1379 | if (INTEL_INFO(dev)->gen >= 6) { | 1454 | if (INTEL_INFO(dev)->gen >= 6) { |
1380 | ring->add_request = gen6_add_request; | 1455 | ring->add_request = gen6_add_request; |
1381 | ring->flush = gen6_render_ring_flush; | 1456 | ring->flush = gen7_render_ring_flush; |
1457 | if (INTEL_INFO(dev)->gen == 6) | ||
1458 | ring->flush = gen6_render_ring_flush; | ||
1382 | ring->irq_get = gen6_ring_get_irq; | 1459 | ring->irq_get = gen6_ring_get_irq; |
1383 | ring->irq_put = gen6_ring_put_irq; | 1460 | ring->irq_put = gen6_ring_put_irq; |
1384 | ring->irq_enable_mask = GT_USER_INTERRUPT; | 1461 | ring->irq_enable_mask = GT_USER_INTERRUPT; |
@@ -1480,7 +1557,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) | |||
1480 | ring->dev = dev; | 1557 | ring->dev = dev; |
1481 | INIT_LIST_HEAD(&ring->active_list); | 1558 | INIT_LIST_HEAD(&ring->active_list); |
1482 | INIT_LIST_HEAD(&ring->request_list); | 1559 | INIT_LIST_HEAD(&ring->request_list); |
1483 | INIT_LIST_HEAD(&ring->gpu_write_list); | ||
1484 | 1560 | ||
1485 | ring->size = size; | 1561 | ring->size = size; |
1486 | ring->effective_size = ring->size; | 1562 | ring->effective_size = ring->size; |
@@ -1573,3 +1649,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) | |||
1573 | 1649 | ||
1574 | return intel_init_ring_buffer(dev, ring); | 1650 | return intel_init_ring_buffer(dev, ring); |
1575 | } | 1651 | } |
1652 | |||
1653 | int | ||
1654 | intel_ring_flush_all_caches(struct intel_ring_buffer *ring) | ||
1655 | { | ||
1656 | int ret; | ||
1657 | |||
1658 | if (!ring->gpu_caches_dirty) | ||
1659 | return 0; | ||
1660 | |||
1661 | ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); | ||
1662 | if (ret) | ||
1663 | return ret; | ||
1664 | |||
1665 | trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); | ||
1666 | |||
1667 | ring->gpu_caches_dirty = false; | ||
1668 | return 0; | ||
1669 | } | ||
1670 | |||
1671 | int | ||
1672 | intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) | ||
1673 | { | ||
1674 | uint32_t flush_domains; | ||
1675 | int ret; | ||
1676 | |||
1677 | flush_domains = 0; | ||
1678 | if (ring->gpu_caches_dirty) | ||
1679 | flush_domains = I915_GEM_GPU_DOMAINS; | ||
1680 | |||
1681 | ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); | ||
1682 | if (ret) | ||
1683 | return ret; | ||
1684 | |||
1685 | trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); | ||
1686 | |||
1687 | ring->gpu_caches_dirty = false; | ||
1688 | return 0; | ||
1689 | } | ||