aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 02:29:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-04 02:29:23 -0400
commit612a9aab56a93533e76e3ad91642db7033e03b69 (patch)
tree8402096973f67af941f9392f7da06cca03e0b58a /drivers/gpu/drm/i915/intel_ringbuffer.c
parent3a494318b14b1bc0f59d2d6ce84c505c74d82d2a (diff)
parent268d28371cd326be4dfcd7eba5917bf4b9d30c8f (diff)
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm merge (part 1) from Dave Airlie: "So first of all my tree and uapi stuff has a conflict mess, its my fault as the nouveau stuff didn't hit -next as were trying to rebase regressions out of it before we merged. Highlights: - SH mobile modesetting driver and associated helpers - some DRM core documentation - i915 modesetting rework, haswell hdmi, haswell and vlv fixes, write combined pte writing, ilk rc6 support, - nouveau: major driver rework into a hw core driver, makes features like SLI a lot saner to implement, - psb: add eDP/DP support for Cedarview - radeon: 2 layer page tables, async VM pte updates, better PLL selection for > 2 screens, better ACPI interactions The rest is general grab bag of fixes. So why part 1? well I have the exynos pull req which came in a bit late but was waiting for me to do something they shouldn't have and it looks fairly safe, and David Howells has some more header cleanups he'd like me to pull, that seem like a good idea, but I'd like to get this merge out of the way so -next dosen't get blocked." Tons of conflicts mostly due to silly include line changes, but mostly mindless. A few other small semantic conflicts too, noted from Dave's pre-merged branch. * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (447 commits) drm/nv98/crypt: fix fuc build with latest envyas drm/nouveau/devinit: fixup various issues with subdev ctor/init ordering drm/nv41/vm: fix and enable use of "real" pciegart drm/nv44/vm: fix and enable use of "real" pciegart drm/nv04/dmaobj: fixup vm target handling in preparation for nv4x pcie drm/nouveau: store supported dma mask in vmmgr drm/nvc0/ibus: initial implementation of subdev drm/nouveau/therm: add support for fan-control modes drm/nouveau/hwmon: rename pwm0* to pmw1* to follow hwmon's rules drm/nouveau/therm: calculate the pwm divisor on nv50+ drm/nouveau/fan: rewrite the fan tachometer driver to get more precision, faster drm/nouveau/therm: move thermal-related functions to the therm subdev drm/nouveau/bios: parse the pwm divisor from the perf table drm/nouveau/therm: use the EXTDEV table to detect i2c monitoring devices drm/nouveau/therm: rework thermal table parsing drm/nouveau/gpio: expose the PWM/TOGGLE parameter found in the gpio vbios table drm/nouveau: fix pm initialization order drm/nouveau/bios: check that fixed tvdac gpio data is valid before using it drm/nouveau: log channel debug/error messages from client object rather than drm client drm/nouveau: have drm debugging macros build on top of core macros ...
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c152
1 files changed, 133 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1aef516cc6fa..ecbc5c5dbbbc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -261,6 +261,83 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
261 return 0; 261 return 0;
262} 262}
263 263
264static int
265gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
266{
267 int ret;
268
269 ret = intel_ring_begin(ring, 4);
270 if (ret)
271 return ret;
272
273 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
274 intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
275 PIPE_CONTROL_STALL_AT_SCOREBOARD);
276 intel_ring_emit(ring, 0);
277 intel_ring_emit(ring, 0);
278 intel_ring_advance(ring);
279
280 return 0;
281}
282
283static int
284gen7_render_ring_flush(struct intel_ring_buffer *ring,
285 u32 invalidate_domains, u32 flush_domains)
286{
287 u32 flags = 0;
288 struct pipe_control *pc = ring->private;
289 u32 scratch_addr = pc->gtt_offset + 128;
290 int ret;
291
292 /*
293 * Ensure that any following seqno writes only happen when the render
294 * cache is indeed flushed.
295 *
296 * Workaround: 4th PIPE_CONTROL command (except the ones with only
297 * read-cache invalidate bits set) must have the CS_STALL bit set. We
298 * don't try to be clever and just set it unconditionally.
299 */
300 flags |= PIPE_CONTROL_CS_STALL;
301
302 /* Just flush everything. Experiments have shown that reducing the
303 * number of bits based on the write domains has little performance
304 * impact.
305 */
306 if (flush_domains) {
307 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
308 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
309 }
310 if (invalidate_domains) {
311 flags |= PIPE_CONTROL_TLB_INVALIDATE;
312 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
313 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
314 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
315 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
316 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
317 /*
318 * TLB invalidate requires a post-sync write.
319 */
320 flags |= PIPE_CONTROL_QW_WRITE;
321
322 /* Workaround: we must issue a pipe_control with CS-stall bit
323 * set before a pipe_control command that has the state cache
324 * invalidate bit set. */
325 gen7_render_ring_cs_stall_wa(ring);
326 }
327
328 ret = intel_ring_begin(ring, 4);
329 if (ret)
330 return ret;
331
332 intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
333 intel_ring_emit(ring, flags);
334 intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
335 intel_ring_emit(ring, 0);
336 intel_ring_advance(ring);
337
338 return 0;
339}
340
264static void ring_write_tail(struct intel_ring_buffer *ring, 341static void ring_write_tail(struct intel_ring_buffer *ring,
265 u32 value) 342 u32 value)
266{ 343{
@@ -381,12 +458,12 @@ init_pipe_control(struct intel_ring_buffer *ring)
381 458
382 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 459 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
383 460
384 ret = i915_gem_object_pin(obj, 4096, true); 461 ret = i915_gem_object_pin(obj, 4096, true, false);
385 if (ret) 462 if (ret)
386 goto err_unref; 463 goto err_unref;
387 464
388 pc->gtt_offset = obj->gtt_offset; 465 pc->gtt_offset = obj->gtt_offset;
389 pc->cpu_page = kmap(obj->pages[0]); 466 pc->cpu_page = kmap(sg_page(obj->pages->sgl));
390 if (pc->cpu_page == NULL) 467 if (pc->cpu_page == NULL)
391 goto err_unpin; 468 goto err_unpin;
392 469
@@ -413,7 +490,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring)
413 return; 490 return;
414 491
415 obj = pc->obj; 492 obj = pc->obj;
416 kunmap(obj->pages[0]); 493
494 kunmap(sg_page(obj->pages->sgl));
417 i915_gem_object_unpin(obj); 495 i915_gem_object_unpin(obj);
418 drm_gem_object_unreference(&obj->base); 496 drm_gem_object_unreference(&obj->base);
419 497
@@ -461,7 +539,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
461 if (INTEL_INFO(dev)->gen >= 6) 539 if (INTEL_INFO(dev)->gen >= 6)
462 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); 540 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
463 541
464 if (IS_IVYBRIDGE(dev)) 542 if (HAS_L3_GPU_CACHE(dev))
465 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 543 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
466 544
467 return ret; 545 return ret;
@@ -627,26 +705,24 @@ pc_render_add_request(struct intel_ring_buffer *ring,
627} 705}
628 706
629static u32 707static u32
630gen6_ring_get_seqno(struct intel_ring_buffer *ring) 708gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
631{ 709{
632 struct drm_device *dev = ring->dev;
633
634 /* Workaround to force correct ordering between irq and seqno writes on 710 /* Workaround to force correct ordering between irq and seqno writes on
635 * ivb (and maybe also on snb) by reading from a CS register (like 711 * ivb (and maybe also on snb) by reading from a CS register (like
636 * ACTHD) before reading the status page. */ 712 * ACTHD) before reading the status page. */
637 if (IS_GEN6(dev) || IS_GEN7(dev)) 713 if (!lazy_coherency)
638 intel_ring_get_active_head(ring); 714 intel_ring_get_active_head(ring);
639 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 715 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
640} 716}
641 717
642static u32 718static u32
643ring_get_seqno(struct intel_ring_buffer *ring) 719ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
644{ 720{
645 return intel_read_status_page(ring, I915_GEM_HWS_INDEX); 721 return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
646} 722}
647 723
648static u32 724static u32
649pc_render_get_seqno(struct intel_ring_buffer *ring) 725pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
650{ 726{
651 struct pipe_control *pc = ring->private; 727 struct pipe_control *pc = ring->private;
652 return pc->cpu_page[0]; 728 return pc->cpu_page[0];
@@ -851,7 +927,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring)
851 927
852 spin_lock_irqsave(&dev_priv->irq_lock, flags); 928 spin_lock_irqsave(&dev_priv->irq_lock, flags);
853 if (ring->irq_refcount++ == 0) { 929 if (ring->irq_refcount++ == 0) {
854 if (IS_IVYBRIDGE(dev) && ring->id == RCS) 930 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
855 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | 931 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
856 GEN6_RENDER_L3_PARITY_ERROR)); 932 GEN6_RENDER_L3_PARITY_ERROR));
857 else 933 else
@@ -874,7 +950,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
874 950
875 spin_lock_irqsave(&dev_priv->irq_lock, flags); 951 spin_lock_irqsave(&dev_priv->irq_lock, flags);
876 if (--ring->irq_refcount == 0) { 952 if (--ring->irq_refcount == 0) {
877 if (IS_IVYBRIDGE(dev) && ring->id == RCS) 953 if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
878 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); 954 I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
879 else 955 else
880 I915_WRITE_IMR(ring, ~0); 956 I915_WRITE_IMR(ring, ~0);
@@ -950,7 +1026,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring)
950 if (obj == NULL) 1026 if (obj == NULL)
951 return; 1027 return;
952 1028
953 kunmap(obj->pages[0]); 1029 kunmap(sg_page(obj->pages->sgl));
954 i915_gem_object_unpin(obj); 1030 i915_gem_object_unpin(obj);
955 drm_gem_object_unreference(&obj->base); 1031 drm_gem_object_unreference(&obj->base);
956 ring->status_page.obj = NULL; 1032 ring->status_page.obj = NULL;
@@ -971,13 +1047,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
971 1047
972 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); 1048 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
973 1049
974 ret = i915_gem_object_pin(obj, 4096, true); 1050 ret = i915_gem_object_pin(obj, 4096, true, false);
975 if (ret != 0) { 1051 if (ret != 0) {
976 goto err_unref; 1052 goto err_unref;
977 } 1053 }
978 1054
979 ring->status_page.gfx_addr = obj->gtt_offset; 1055 ring->status_page.gfx_addr = obj->gtt_offset;
980 ring->status_page.page_addr = kmap(obj->pages[0]); 1056 ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
981 if (ring->status_page.page_addr == NULL) { 1057 if (ring->status_page.page_addr == NULL) {
982 ret = -ENOMEM; 1058 ret = -ENOMEM;
983 goto err_unpin; 1059 goto err_unpin;
@@ -1009,7 +1085,6 @@ static int intel_init_ring_buffer(struct drm_device *dev,
1009 ring->dev = dev; 1085 ring->dev = dev;
1010 INIT_LIST_HEAD(&ring->active_list); 1086 INIT_LIST_HEAD(&ring->active_list);
1011 INIT_LIST_HEAD(&ring->request_list); 1087 INIT_LIST_HEAD(&ring->request_list);
1012 INIT_LIST_HEAD(&ring->gpu_write_list);
1013 ring->size = 32 * PAGE_SIZE; 1088 ring->size = 32 * PAGE_SIZE;
1014 1089
1015 init_waitqueue_head(&ring->irq_queue); 1090 init_waitqueue_head(&ring->irq_queue);
@@ -1029,7 +1104,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
1029 1104
1030 ring->obj = obj; 1105 ring->obj = obj;
1031 1106
1032 ret = i915_gem_object_pin(obj, PAGE_SIZE, true); 1107 ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
1033 if (ret) 1108 if (ret)
1034 goto err_unref; 1109 goto err_unref;
1035 1110
@@ -1378,7 +1453,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
1378 1453
1379 if (INTEL_INFO(dev)->gen >= 6) { 1454 if (INTEL_INFO(dev)->gen >= 6) {
1380 ring->add_request = gen6_add_request; 1455 ring->add_request = gen6_add_request;
1381 ring->flush = gen6_render_ring_flush; 1456 ring->flush = gen7_render_ring_flush;
1457 if (INTEL_INFO(dev)->gen == 6)
1458 ring->flush = gen6_render_ring_flush;
1382 ring->irq_get = gen6_ring_get_irq; 1459 ring->irq_get = gen6_ring_get_irq;
1383 ring->irq_put = gen6_ring_put_irq; 1460 ring->irq_put = gen6_ring_put_irq;
1384 ring->irq_enable_mask = GT_USER_INTERRUPT; 1461 ring->irq_enable_mask = GT_USER_INTERRUPT;
@@ -1480,7 +1557,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1480 ring->dev = dev; 1557 ring->dev = dev;
1481 INIT_LIST_HEAD(&ring->active_list); 1558 INIT_LIST_HEAD(&ring->active_list);
1482 INIT_LIST_HEAD(&ring->request_list); 1559 INIT_LIST_HEAD(&ring->request_list);
1483 INIT_LIST_HEAD(&ring->gpu_write_list);
1484 1560
1485 ring->size = size; 1561 ring->size = size;
1486 ring->effective_size = ring->size; 1562 ring->effective_size = ring->size;
@@ -1573,3 +1649,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
1573 1649
1574 return intel_init_ring_buffer(dev, ring); 1650 return intel_init_ring_buffer(dev, ring);
1575} 1651}
1652
1653int
1654intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
1655{
1656 int ret;
1657
1658 if (!ring->gpu_caches_dirty)
1659 return 0;
1660
1661 ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
1662 if (ret)
1663 return ret;
1664
1665 trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
1666
1667 ring->gpu_caches_dirty = false;
1668 return 0;
1669}
1670
1671int
1672intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
1673{
1674 uint32_t flush_domains;
1675 int ret;
1676
1677 flush_domains = 0;
1678 if (ring->gpu_caches_dirty)
1679 flush_domains = I915_GEM_GPU_DOMAINS;
1680
1681 ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1682 if (ret)
1683 return ret;
1684
1685 trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
1686
1687 ring->gpu_caches_dirty = false;
1688 return 0;
1689}