aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c152
1 files changed, 133 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1aef516cc6fa..ecbc5c5dbbbc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -261,6 +261,83 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
261 return 0; 261 return 0;
262} 262}
263 263
264static int
265gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
266{
267 int ret;
268
269 ret = intel_ring_begin(ring, 4);
270 if (ret)
271 return ret;
272
273 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
274 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
275 PIPE_CONTROL_STALL_AT_SCOREBOARD);
276 intel_ring_emit(ring, 0);
277 intel_ring_emit(ring, 0);
278 intel_ring_advance(ring);
279
280 return 0;
281}
282
283static int
284gen7_render_ring_flush(struct intel_ring_buffer *ring,
285 u32 invalidate_domains, u32 flush_domains)
286{
287 u32 flags = 0;
288 struct pipe_control *pc = ring->private;
289 u32 scratch_addr = pc->gtt_offset + 128;
290 int ret;
291
292 /*
293 * Ensure that any following seqno writes only happen when the render
294 * cache is indeed flushed.
295 *
296 * Workaround: 4th PIPE_CONTROL command (except the ones with only
297 * read-cache invalidate bits set) must have the CS_STALL bit set. We
298 * don't try to be clever and just set it unconditionally.
299 */
300 flags |= PIPE_CONTROL_CS_STALL;
301
302 /* Just flush everything. Experiments have shown that reducing the
303 * number of bits based on the write domains has little performance
304 * impact.
305 */
306 if (flush_domains) {
307 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
308 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
309 }
310 if (invalidate_domains) {
311 flags |= PIPE_CONTROL_TLB_INVALIDATE;
312 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
313 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
314 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
315 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
316 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
317 /*
318 * TLB invalidate requires a post-sync write.
319 */
320 flags |= PIPE_CONTROL_QW_WRITE;
321
322 /* Workaround: we must issue a pipe_control with CS-stall bit
323 * set before a pipe_control command that has the state cache
324 * invalidate bit set. */
325 gen7_render_ring_cs_stall_wa(ring);
326 }
327
328 ret = intel_ring_begin(ring, 4);
329 if (ret)
330 return ret;
331
332 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
333 intel_ring_emit(ring, flags);
334 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
335 intel_ring_emit(ring, 0);
336 intel_ring_advance(ring);
337
338 return 0;
339}
340
264static void ring_write_tail(struct intel_ring_buffer *ring, 341static void ring_write_tail(struct intel_ring_buffer *ring,
265 u32 value) 342 u32 value)
266{ 343{
@@ -381,12 +458,12 @@ init_pipe_control(struct intel_ring_buffer *ring)
381 458
382 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 459 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
383 460
384 ret = i915_gem_object_pin(obj, 4096, true); 461 ret = i915_gem_object_pin(obj, 4096, true, false);
385 if (ret) 462 if (ret)
386 goto err_unref; 463 goto err_unref;
387 464
388 pc->gtt_offset = obj->gtt_offset; 465 pc->gtt_offset = obj->gtt_offset;
389 pc->cpu_page = kmap(obj->pages[0]); 466 pc->cpu_page = kmap(sg_page(obj->pages->sgl));
390 if (pc->cpu_page == NULL) 467 if (pc->cpu_page == NULL)
391 goto err_unpin; 468 goto err_unpin;
392 469
@@ -413,7 +490,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring)
413 return; 490 return;
414 491
415 obj = pc->obj; 492 obj = pc->obj;
416 kunmap(obj->pages[0]); 493
494 kunmap(sg_page(obj->pages->sgl));
417 i915_gem_object_unpin(obj); 495 i915_gem_object_unpin(obj);
418 drm_gem_object_unreference(&obj->base); 496 drm_gem_object_unreference(&obj->base);
419 497
@@ -461,7 +539,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
461 if (INTEL_INFO(dev)->gen >= 6) 539 if (INTEL_INFO(dev)->gen >= 6)
462 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 540 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
463 541
464 if (IS_IVYBRIDGE(dev)) 542 if (HAS_L3_GPU_CACHE(dev))
465 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 543 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
466 544
467 return ret; 545 return ret;
@@ -627,26 +705,24 @@ pc_render_add_request(struct intel_ring_buffer *ring,
627} 705}
628 706
629static u32 707static u32
630gen6_ring_get_seqno(struct intel_ring_buffer *ring) 708gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
631{ 709{
632 struct drm_device *dev = ring->dev;
633
634 /* Workaround to force correct ordering between irq and seqno writes on 710 /* Workaround to force correct ordering between irq and seqno writes on
635 * ivb (and maybe also on snb) by reading from a CS register (like 711 * ivb (and maybe also on snb) by reading from a CS register (like
636 * ACTHD) before reading the status page. */ 712 * ACTHD) before reading the status page. */
637 if (IS_GEN6(dev) || IS_GEN7(dev)) 713 if (!lazy_coherency)
638 intel_ring_get_active_head(ring); 714 intel_ring_get_active_head(ring);
639 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 715 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
640} 716}
641 717
642static u32 718static u32
643ring_get_seqno(struct intel_ring_buffer *ring) 719ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
644{ 720{
645 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 721 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
646} 722}
647 723
648static u32 724static u32
649pc_render_get_seqno(struct intel_ring_buffer *ring) 725pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
650{ 726{
651 struct pipe_control *pc = ring->private; 727 struct pipe_control *pc = ring->private;
652 return pc->cpu_page[0]; 728 return pc->cpu_page[0];
@@ -851,7 +927,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring)
851 927
852 spin_lock_irqsave(&dev_priv->irq_lock, flags); 928 spin_lock_irqsave(&dev_priv->irq_lock, flags);
853 if (ring->irq_refcount++ == 0) { 929 if (ring->irq_refcount++ == 0) {
854 if (IS_IVYBRIDGE(dev) && ring->id == RCS) 930 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
855 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | 931 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
856 GEN6_RENDER_L3_PARITY_ERROR)); 932 GEN6_RENDER_L3_PARITY_ERROR));
857 else 933 else
@@ -874,7 +950,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
874 950
875 spin_lock_irqsave(&dev_priv->irq_lock, flags); 951 spin_lock_irqsave(&dev_priv->irq_lock, flags);
876 if (--ring->irq_refcount == 0) { 952 if (--ring->irq_refcount == 0) {
877 if (IS_IVYBRIDGE(dev) && ring->id == RCS) 953 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
878 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 954 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
879 else 955 else
880 I915_WRITE_IMR(ring, ~0); 956 I915_WRITE_IMR(ring, ~0);
@@ -950,7 +1026,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring)
950 if (obj == NULL) 1026 if (obj == NULL)
951 return; 1027 return;
952 1028
953 kunmap(obj->pages[0]); 1029 kunmap(sg_page(obj->pages->sgl));
954 i915_gem_object_unpin(obj); 1030 i915_gem_object_unpin(obj);
955 drm_gem_object_unreference(&obj->base); 1031 drm_gem_object_unreference(&obj->base);
956 ring->status_page.obj = NULL; 1032 ring->status_page.obj = NULL;
@@ -971,13 +1047,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
971 1047
972 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1048 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
973 1049
974 ret = i915_gem_object_pin(obj, 4096, true); 1050 ret = i915_gem_object_pin(obj, 4096, true, false);
975 if (ret != 0) { 1051 if (ret != 0) {
976 goto err_unref; 1052 goto err_unref;
977 } 1053 }
978 1054
979 ring->status_page.gfx_addr = obj->gtt_offset; 1055 ring->status_page.gfx_addr = obj->gtt_offset;
980 ring->status_page.page_addr = kmap(obj->pages[0]); 1056 ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
981 if (ring->status_page.page_addr == NULL) { 1057 if (ring->status_page.page_addr == NULL) {
982 ret = -ENOMEM; 1058 ret = -ENOMEM;
983 goto err_unpin; 1059 goto err_unpin;
@@ -1009,7 +1085,6 @@ static int intel_init_ring_buffer(struct drm_device *dev,
1009 ring->dev = dev; 1085 ring->dev = dev;
1010 INIT_LIST_HEAD(&ring->active_list); 1086 INIT_LIST_HEAD(&ring->active_list);
1011 INIT_LIST_HEAD(&ring->request_list); 1087 INIT_LIST_HEAD(&ring->request_list);
1012 INIT_LIST_HEAD(&ring->gpu_write_list);
1013 ring->size = 32 * PAGE_SIZE; 1088 ring->size = 32 * PAGE_SIZE;
1014 1089
1015 init_waitqueue_head(&ring->irq_queue); 1090 init_waitqueue_head(&ring->irq_queue);
@@ -1029,7 +1104,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
1029 1104
1030 ring->obj = obj; 1105 ring->obj = obj;
1031 1106
1032 ret = i915_gem_object_pin(obj, PAGE_SIZE, true); 1107 ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
1033 if (ret) 1108 if (ret)
1034 goto err_unref; 1109 goto err_unref;
1035 1110
@@ -1378,7 +1453,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
1378 1453
1379 if (INTEL_INFO(dev)->gen >= 6) { 1454 if (INTEL_INFO(dev)->gen >= 6) {
1380 ring->add_request = gen6_add_request; 1455 ring->add_request = gen6_add_request;
1381 ring->flush = gen6_render_ring_flush; 1456 ring->flush = gen7_render_ring_flush;
1457 if (INTEL_INFO(dev)->gen == 6)
1458 ring->flush = gen6_render_ring_flush;
1382 ring->irq_get = gen6_ring_get_irq; 1459 ring->irq_get = gen6_ring_get_irq;
1383 ring->irq_put = gen6_ring_put_irq; 1460 ring->irq_put = gen6_ring_put_irq;
1384 ring->irq_enable_mask = GT_USER_INTERRUPT; 1461 ring->irq_enable_mask = GT_USER_INTERRUPT;
@@ -1480,7 +1557,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1480 ring->dev = dev; 1557 ring->dev = dev;
1481 INIT_LIST_HEAD(&ring->active_list); 1558 INIT_LIST_HEAD(&ring->active_list);
1482 INIT_LIST_HEAD(&ring->request_list); 1559 INIT_LIST_HEAD(&ring->request_list);
1483 INIT_LIST_HEAD(&ring->gpu_write_list);
1484 1560
1485 ring->size = size; 1561 ring->size = size;
1486 ring->effective_size = ring->size; 1562 ring->effective_size = ring->size;
@@ -1573,3 +1649,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
1573 1649
1574 return intel_init_ring_buffer(dev, ring); 1650 return intel_init_ring_buffer(dev, ring);
1575} 1651}
1652
1653int
1654intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1655{
1656 int ret;
1657
1658 if (!ring->gpu_caches_dirty)
1659 return 0;
1660
1661 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1662 if (ret)
1663 return ret;
1664
1665 trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
1666
1667 ring->gpu_caches_dirty = false;
1668 return 0;
1669}
1670
1671int
1672intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1673{
1674 uint32_t flush_domains;
1675 int ret;
1676
1677 flush_domains = 0;
1678 if (ring->gpu_caches_dirty)
1679 flush_domains = I915_GEM_GPU_DOMAINS;
1680
1681 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1682 if (ret)
1683 return ret;
1684
1685 trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1686
1687 ring->gpu_caches_dirty = false;
1688 return 0;
1689}