diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 152 |
1 files changed, 133 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1aef516cc6fa..ecbc5c5dbbbc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
| @@ -261,6 +261,83 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring, | |||
| 261 | return 0; | 261 | return 0; |
| 262 | } | 262 | } |
| 263 | 263 | ||
| 264 | static int | ||
| 265 | gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring) | ||
| 266 | { | ||
| 267 | int ret; | ||
| 268 | |||
| 269 | ret = intel_ring_begin(ring, 4); | ||
| 270 | if (ret) | ||
| 271 | return ret; | ||
| 272 | |||
| 273 | intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); | ||
| 274 | intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | | ||
| 275 | PIPE_CONTROL_STALL_AT_SCOREBOARD); | ||
| 276 | intel_ring_emit(ring, 0); | ||
| 277 | intel_ring_emit(ring, 0); | ||
| 278 | intel_ring_advance(ring); | ||
| 279 | |||
| 280 | return 0; | ||
| 281 | } | ||
| 282 | |||
| 283 | static int | ||
| 284 | gen7_render_ring_flush(struct intel_ring_buffer *ring, | ||
| 285 | u32 invalidate_domains, u32 flush_domains) | ||
| 286 | { | ||
| 287 | u32 flags = 0; | ||
| 288 | struct pipe_control *pc = ring->private; | ||
| 289 | u32 scratch_addr = pc->gtt_offset + 128; | ||
| 290 | int ret; | ||
| 291 | |||
| 292 | /* | ||
| 293 | * Ensure that any following seqno writes only happen when the render | ||
| 294 | * cache is indeed flushed. | ||
| 295 | * | ||
| 296 | * Workaround: 4th PIPE_CONTROL command (except the ones with only | ||
| 297 | * read-cache invalidate bits set) must have the CS_STALL bit set. We | ||
| 298 | * don't try to be clever and just set it unconditionally. | ||
| 299 | */ | ||
| 300 | flags |= PIPE_CONTROL_CS_STALL; | ||
| 301 | |||
| 302 | /* Just flush everything. Experiments have shown that reducing the | ||
| 303 | * number of bits based on the write domains has little performance | ||
| 304 | * impact. | ||
| 305 | */ | ||
| 306 | if (flush_domains) { | ||
| 307 | flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; | ||
| 308 | flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; | ||
| 309 | } | ||
| 310 | if (invalidate_domains) { | ||
| 311 | flags |= PIPE_CONTROL_TLB_INVALIDATE; | ||
| 312 | flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; | ||
| 313 | flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; | ||
| 314 | flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; | ||
| 315 | flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; | ||
| 316 | flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; | ||
| 317 | /* | ||
| 318 | * TLB invalidate requires a post-sync write. | ||
| 319 | */ | ||
| 320 | flags |= PIPE_CONTROL_QW_WRITE; | ||
| 321 | |||
| 322 | /* Workaround: we must issue a pipe_control with CS-stall bit | ||
| 323 | * set before a pipe_control command that has the state cache | ||
| 324 | * invalidate bit set. */ | ||
| 325 | gen7_render_ring_cs_stall_wa(ring); | ||
| 326 | } | ||
| 327 | |||
| 328 | ret = intel_ring_begin(ring, 4); | ||
| 329 | if (ret) | ||
| 330 | return ret; | ||
| 331 | |||
| 332 | intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); | ||
| 333 | intel_ring_emit(ring, flags); | ||
| 334 | intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); | ||
| 335 | intel_ring_emit(ring, 0); | ||
| 336 | intel_ring_advance(ring); | ||
| 337 | |||
| 338 | return 0; | ||
| 339 | } | ||
| 340 | |||
| 264 | static void ring_write_tail(struct intel_ring_buffer *ring, | 341 | static void ring_write_tail(struct intel_ring_buffer *ring, |
| 265 | u32 value) | 342 | u32 value) |
| 266 | { | 343 | { |
| @@ -381,12 +458,12 @@ init_pipe_control(struct intel_ring_buffer *ring) | |||
| 381 | 458 | ||
| 382 | i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); | 459 | i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); |
| 383 | 460 | ||
| 384 | ret = i915_gem_object_pin(obj, 4096, true); | 461 | ret = i915_gem_object_pin(obj, 4096, true, false); |
| 385 | if (ret) | 462 | if (ret) |
| 386 | goto err_unref; | 463 | goto err_unref; |
| 387 | 464 | ||
| 388 | pc->gtt_offset = obj->gtt_offset; | 465 | pc->gtt_offset = obj->gtt_offset; |
| 389 | pc->cpu_page = kmap(obj->pages[0]); | 466 | pc->cpu_page = kmap(sg_page(obj->pages->sgl)); |
| 390 | if (pc->cpu_page == NULL) | 467 | if (pc->cpu_page == NULL) |
| 391 | goto err_unpin; | 468 | goto err_unpin; |
| 392 | 469 | ||
| @@ -413,7 +490,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring) | |||
| 413 | return; | 490 | return; |
| 414 | 491 | ||
| 415 | obj = pc->obj; | 492 | obj = pc->obj; |
| 416 | kunmap(obj->pages[0]); | 493 | |
| 494 | kunmap(sg_page(obj->pages->sgl)); | ||
| 417 | i915_gem_object_unpin(obj); | 495 | i915_gem_object_unpin(obj); |
| 418 | drm_gem_object_unreference(&obj->base); | 496 | drm_gem_object_unreference(&obj->base); |
| 419 | 497 | ||
| @@ -461,7 +539,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) | |||
| 461 | if (INTEL_INFO(dev)->gen >= 6) | 539 | if (INTEL_INFO(dev)->gen >= 6) |
| 462 | I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); | 540 | I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); |
| 463 | 541 | ||
| 464 | if (IS_IVYBRIDGE(dev)) | 542 | if (HAS_L3_GPU_CACHE(dev)) |
| 465 | I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); | 543 | I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); |
| 466 | 544 | ||
| 467 | return ret; | 545 | return ret; |
| @@ -627,26 +705,24 @@ pc_render_add_request(struct intel_ring_buffer *ring, | |||
| 627 | } | 705 | } |
| 628 | 706 | ||
| 629 | static u32 | 707 | static u32 |
| 630 | gen6_ring_get_seqno(struct intel_ring_buffer *ring) | 708 | gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) |
| 631 | { | 709 | { |
| 632 | struct drm_device *dev = ring->dev; | ||
| 633 | |||
| 634 | /* Workaround to force correct ordering between irq and seqno writes on | 710 | /* Workaround to force correct ordering between irq and seqno writes on |
| 635 | * ivb (and maybe also on snb) by reading from a CS register (like | 711 | * ivb (and maybe also on snb) by reading from a CS register (like |
| 636 | * ACTHD) before reading the status page. */ | 712 | * ACTHD) before reading the status page. */ |
| 637 | if (IS_GEN6(dev) || IS_GEN7(dev)) | 713 | if (!lazy_coherency) |
| 638 | intel_ring_get_active_head(ring); | 714 | intel_ring_get_active_head(ring); |
| 639 | return intel_read_status_page(ring, I915_GEM_HWS_INDEX); | 715 | return intel_read_status_page(ring, I915_GEM_HWS_INDEX); |
| 640 | } | 716 | } |
| 641 | 717 | ||
| 642 | static u32 | 718 | static u32 |
| 643 | ring_get_seqno(struct intel_ring_buffer *ring) | 719 | ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) |
| 644 | { | 720 | { |
| 645 | return intel_read_status_page(ring, I915_GEM_HWS_INDEX); | 721 | return intel_read_status_page(ring, I915_GEM_HWS_INDEX); |
| 646 | } | 722 | } |
| 647 | 723 | ||
| 648 | static u32 | 724 | static u32 |
| 649 | pc_render_get_seqno(struct intel_ring_buffer *ring) | 725 | pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) |
| 650 | { | 726 | { |
| 651 | struct pipe_control *pc = ring->private; | 727 | struct pipe_control *pc = ring->private; |
| 652 | return pc->cpu_page[0]; | 728 | return pc->cpu_page[0]; |
| @@ -851,7 +927,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring) | |||
| 851 | 927 | ||
| 852 | spin_lock_irqsave(&dev_priv->irq_lock, flags); | 928 | spin_lock_irqsave(&dev_priv->irq_lock, flags); |
| 853 | if (ring->irq_refcount++ == 0) { | 929 | if (ring->irq_refcount++ == 0) { |
| 854 | if (IS_IVYBRIDGE(dev) && ring->id == RCS) | 930 | if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) |
| 855 | I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | | 931 | I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | |
| 856 | GEN6_RENDER_L3_PARITY_ERROR)); | 932 | GEN6_RENDER_L3_PARITY_ERROR)); |
| 857 | else | 933 | else |
| @@ -874,7 +950,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring) | |||
| 874 | 950 | ||
| 875 | spin_lock_irqsave(&dev_priv->irq_lock, flags); | 951 | spin_lock_irqsave(&dev_priv->irq_lock, flags); |
| 876 | if (--ring->irq_refcount == 0) { | 952 | if (--ring->irq_refcount == 0) { |
| 877 | if (IS_IVYBRIDGE(dev) && ring->id == RCS) | 953 | if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) |
| 878 | I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); | 954 | I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); |
| 879 | else | 955 | else |
| 880 | I915_WRITE_IMR(ring, ~0); | 956 | I915_WRITE_IMR(ring, ~0); |
| @@ -950,7 +1026,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring) | |||
| 950 | if (obj == NULL) | 1026 | if (obj == NULL) |
| 951 | return; | 1027 | return; |
| 952 | 1028 | ||
| 953 | kunmap(obj->pages[0]); | 1029 | kunmap(sg_page(obj->pages->sgl)); |
| 954 | i915_gem_object_unpin(obj); | 1030 | i915_gem_object_unpin(obj); |
| 955 | drm_gem_object_unreference(&obj->base); | 1031 | drm_gem_object_unreference(&obj->base); |
| 956 | ring->status_page.obj = NULL; | 1032 | ring->status_page.obj = NULL; |
| @@ -971,13 +1047,13 @@ static int init_status_page(struct intel_ring_buffer *ring) | |||
| 971 | 1047 | ||
| 972 | i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); | 1048 | i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); |
| 973 | 1049 | ||
| 974 | ret = i915_gem_object_pin(obj, 4096, true); | 1050 | ret = i915_gem_object_pin(obj, 4096, true, false); |
| 975 | if (ret != 0) { | 1051 | if (ret != 0) { |
| 976 | goto err_unref; | 1052 | goto err_unref; |
| 977 | } | 1053 | } |
| 978 | 1054 | ||
| 979 | ring->status_page.gfx_addr = obj->gtt_offset; | 1055 | ring->status_page.gfx_addr = obj->gtt_offset; |
| 980 | ring->status_page.page_addr = kmap(obj->pages[0]); | 1056 | ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); |
| 981 | if (ring->status_page.page_addr == NULL) { | 1057 | if (ring->status_page.page_addr == NULL) { |
| 982 | ret = -ENOMEM; | 1058 | ret = -ENOMEM; |
| 983 | goto err_unpin; | 1059 | goto err_unpin; |
| @@ -1009,7 +1085,6 @@ static int intel_init_ring_buffer(struct drm_device *dev, | |||
| 1009 | ring->dev = dev; | 1085 | ring->dev = dev; |
| 1010 | INIT_LIST_HEAD(&ring->active_list); | 1086 | INIT_LIST_HEAD(&ring->active_list); |
| 1011 | INIT_LIST_HEAD(&ring->request_list); | 1087 | INIT_LIST_HEAD(&ring->request_list); |
| 1012 | INIT_LIST_HEAD(&ring->gpu_write_list); | ||
| 1013 | ring->size = 32 * PAGE_SIZE; | 1088 | ring->size = 32 * PAGE_SIZE; |
| 1014 | 1089 | ||
| 1015 | init_waitqueue_head(&ring->irq_queue); | 1090 | init_waitqueue_head(&ring->irq_queue); |
| @@ -1029,7 +1104,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, | |||
| 1029 | 1104 | ||
| 1030 | ring->obj = obj; | 1105 | ring->obj = obj; |
| 1031 | 1106 | ||
| 1032 | ret = i915_gem_object_pin(obj, PAGE_SIZE, true); | 1107 | ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false); |
| 1033 | if (ret) | 1108 | if (ret) |
| 1034 | goto err_unref; | 1109 | goto err_unref; |
| 1035 | 1110 | ||
| @@ -1378,7 +1453,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev) | |||
| 1378 | 1453 | ||
| 1379 | if (INTEL_INFO(dev)->gen >= 6) { | 1454 | if (INTEL_INFO(dev)->gen >= 6) { |
| 1380 | ring->add_request = gen6_add_request; | 1455 | ring->add_request = gen6_add_request; |
| 1381 | ring->flush = gen6_render_ring_flush; | 1456 | ring->flush = gen7_render_ring_flush; |
| 1457 | if (INTEL_INFO(dev)->gen == 6) | ||
| 1458 | ring->flush = gen6_render_ring_flush; | ||
| 1382 | ring->irq_get = gen6_ring_get_irq; | 1459 | ring->irq_get = gen6_ring_get_irq; |
| 1383 | ring->irq_put = gen6_ring_put_irq; | 1460 | ring->irq_put = gen6_ring_put_irq; |
| 1384 | ring->irq_enable_mask = GT_USER_INTERRUPT; | 1461 | ring->irq_enable_mask = GT_USER_INTERRUPT; |
| @@ -1480,7 +1557,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) | |||
| 1480 | ring->dev = dev; | 1557 | ring->dev = dev; |
| 1481 | INIT_LIST_HEAD(&ring->active_list); | 1558 | INIT_LIST_HEAD(&ring->active_list); |
| 1482 | INIT_LIST_HEAD(&ring->request_list); | 1559 | INIT_LIST_HEAD(&ring->request_list); |
| 1483 | INIT_LIST_HEAD(&ring->gpu_write_list); | ||
| 1484 | 1560 | ||
| 1485 | ring->size = size; | 1561 | ring->size = size; |
| 1486 | ring->effective_size = ring->size; | 1562 | ring->effective_size = ring->size; |
| @@ -1573,3 +1649,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) | |||
| 1573 | 1649 | ||
| 1574 | return intel_init_ring_buffer(dev, ring); | 1650 | return intel_init_ring_buffer(dev, ring); |
| 1575 | } | 1651 | } |
| 1652 | |||
| 1653 | int | ||
| 1654 | intel_ring_flush_all_caches(struct intel_ring_buffer *ring) | ||
| 1655 | { | ||
| 1656 | int ret; | ||
| 1657 | |||
| 1658 | if (!ring->gpu_caches_dirty) | ||
| 1659 | return 0; | ||
| 1660 | |||
| 1661 | ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); | ||
| 1662 | if (ret) | ||
| 1663 | return ret; | ||
| 1664 | |||
| 1665 | trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); | ||
| 1666 | |||
| 1667 | ring->gpu_caches_dirty = false; | ||
| 1668 | return 0; | ||
| 1669 | } | ||
| 1670 | |||
| 1671 | int | ||
| 1672 | intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) | ||
| 1673 | { | ||
| 1674 | uint32_t flush_domains; | ||
| 1675 | int ret; | ||
| 1676 | |||
| 1677 | flush_domains = 0; | ||
| 1678 | if (ring->gpu_caches_dirty) | ||
| 1679 | flush_domains = I915_GEM_GPU_DOMAINS; | ||
| 1680 | |||
| 1681 | ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); | ||
| 1682 | if (ret) | ||
| 1683 | return ret; | ||
| 1684 | |||
| 1685 | trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); | ||
| 1686 | |||
| 1687 | ring->gpu_caches_dirty = false; | ||
| 1688 | return 0; | ||
| 1689 | } | ||
