diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 342 |
1 files changed, 253 insertions, 89 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ecbc5c5dbbbc..42ff97d667d2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -45,7 +45,7 @@ struct pipe_control { | |||
45 | 45 | ||
46 | static inline int ring_space(struct intel_ring_buffer *ring) | 46 | static inline int ring_space(struct intel_ring_buffer *ring) |
47 | { | 47 | { |
48 | int space = (ring->head & HEAD_ADDR) - (ring->tail + 8); | 48 | int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE); |
49 | if (space < 0) | 49 | if (space < 0) |
50 | space += ring->size; | 50 | space += ring->size; |
51 | return space; | 51 | return space; |
@@ -245,7 +245,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring, | |||
245 | /* | 245 | /* |
246 | * TLB invalidate requires a post-sync write. | 246 | * TLB invalidate requires a post-sync write. |
247 | */ | 247 | */ |
248 | flags |= PIPE_CONTROL_QW_WRITE; | 248 | flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; |
249 | } | 249 | } |
250 | 250 | ||
251 | ret = intel_ring_begin(ring, 4); | 251 | ret = intel_ring_begin(ring, 4); |
@@ -505,13 +505,25 @@ static int init_render_ring(struct intel_ring_buffer *ring) | |||
505 | struct drm_i915_private *dev_priv = dev->dev_private; | 505 | struct drm_i915_private *dev_priv = dev->dev_private; |
506 | int ret = init_ring_common(ring); | 506 | int ret = init_ring_common(ring); |
507 | 507 | ||
508 | if (INTEL_INFO(dev)->gen > 3) { | 508 | if (INTEL_INFO(dev)->gen > 3) |
509 | I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); | 509 | I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); |
510 | if (IS_GEN7(dev)) | 510 | |
511 | I915_WRITE(GFX_MODE_GEN7, | 511 | /* We need to disable the AsyncFlip performance optimisations in order |
512 | _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | | 512 | * to use MI_WAIT_FOR_EVENT within the CS. It should already be |
513 | _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); | 513 | * programmed to '1' on all products. |
514 | } | 514 | */ |
515 | if (INTEL_INFO(dev)->gen >= 6) | ||
516 | I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); | ||
517 | |||
518 | /* Required for the hardware to program scanline values for waiting */ | ||
519 | if (INTEL_INFO(dev)->gen == 6) | ||
520 | I915_WRITE(GFX_MODE, | ||
521 | _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS)); | ||
522 | |||
523 | if (IS_GEN7(dev)) | ||
524 | I915_WRITE(GFX_MODE_GEN7, | ||
525 | _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | | ||
526 | _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); | ||
515 | 527 | ||
516 | if (INTEL_INFO(dev)->gen >= 5) { | 528 | if (INTEL_INFO(dev)->gen >= 5) { |
517 | ret = init_pipe_control(ring); | 529 | ret = init_pipe_control(ring); |
@@ -547,23 +559,24 @@ static int init_render_ring(struct intel_ring_buffer *ring) | |||
547 | 559 | ||
548 | static void render_ring_cleanup(struct intel_ring_buffer *ring) | 560 | static void render_ring_cleanup(struct intel_ring_buffer *ring) |
549 | { | 561 | { |
562 | struct drm_device *dev = ring->dev; | ||
563 | |||
550 | if (!ring->private) | 564 | if (!ring->private) |
551 | return; | 565 | return; |
552 | 566 | ||
567 | if (HAS_BROKEN_CS_TLB(dev)) | ||
568 | drm_gem_object_unreference(to_gem_object(ring->private)); | ||
569 | |||
553 | cleanup_pipe_control(ring); | 570 | cleanup_pipe_control(ring); |
554 | } | 571 | } |
555 | 572 | ||
556 | static void | 573 | static void |
557 | update_mboxes(struct intel_ring_buffer *ring, | 574 | update_mboxes(struct intel_ring_buffer *ring, |
558 | u32 seqno, | 575 | u32 mmio_offset) |
559 | u32 mmio_offset) | ||
560 | { | 576 | { |
561 | intel_ring_emit(ring, MI_SEMAPHORE_MBOX | | 577 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
562 | MI_SEMAPHORE_GLOBAL_GTT | | ||
563 | MI_SEMAPHORE_REGISTER | | ||
564 | MI_SEMAPHORE_UPDATE); | ||
565 | intel_ring_emit(ring, seqno); | ||
566 | intel_ring_emit(ring, mmio_offset); | 578 | intel_ring_emit(ring, mmio_offset); |
579 | intel_ring_emit(ring, ring->outstanding_lazy_request); | ||
567 | } | 580 | } |
568 | 581 | ||
569 | /** | 582 | /** |
@@ -576,8 +589,7 @@ update_mboxes(struct intel_ring_buffer *ring, | |||
576 | * This acts like a signal in the canonical semaphore. | 589 | * This acts like a signal in the canonical semaphore. |
577 | */ | 590 | */ |
578 | static int | 591 | static int |
579 | gen6_add_request(struct intel_ring_buffer *ring, | 592 | gen6_add_request(struct intel_ring_buffer *ring) |
580 | u32 *seqno) | ||
581 | { | 593 | { |
582 | u32 mbox1_reg; | 594 | u32 mbox1_reg; |
583 | u32 mbox2_reg; | 595 | u32 mbox2_reg; |
@@ -590,13 +602,11 @@ gen6_add_request(struct intel_ring_buffer *ring, | |||
590 | mbox1_reg = ring->signal_mbox[0]; | 602 | mbox1_reg = ring->signal_mbox[0]; |
591 | mbox2_reg = ring->signal_mbox[1]; | 603 | mbox2_reg = ring->signal_mbox[1]; |
592 | 604 | ||
593 | *seqno = i915_gem_next_request_seqno(ring); | 605 | update_mboxes(ring, mbox1_reg); |
594 | 606 | update_mboxes(ring, mbox2_reg); | |
595 | update_mboxes(ring, *seqno, mbox1_reg); | ||
596 | update_mboxes(ring, *seqno, mbox2_reg); | ||
597 | intel_ring_emit(ring, MI_STORE_DWORD_INDEX); | 607 | intel_ring_emit(ring, MI_STORE_DWORD_INDEX); |
598 | intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); | 608 | intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); |
599 | intel_ring_emit(ring, *seqno); | 609 | intel_ring_emit(ring, ring->outstanding_lazy_request); |
600 | intel_ring_emit(ring, MI_USER_INTERRUPT); | 610 | intel_ring_emit(ring, MI_USER_INTERRUPT); |
601 | intel_ring_advance(ring); | 611 | intel_ring_advance(ring); |
602 | 612 | ||
@@ -653,10 +663,8 @@ do { \ | |||
653 | } while (0) | 663 | } while (0) |
654 | 664 | ||
655 | static int | 665 | static int |
656 | pc_render_add_request(struct intel_ring_buffer *ring, | 666 | pc_render_add_request(struct intel_ring_buffer *ring) |
657 | u32 *result) | ||
658 | { | 667 | { |
659 | u32 seqno = i915_gem_next_request_seqno(ring); | ||
660 | struct pipe_control *pc = ring->private; | 668 | struct pipe_control *pc = ring->private; |
661 | u32 scratch_addr = pc->gtt_offset + 128; | 669 | u32 scratch_addr = pc->gtt_offset + 128; |
662 | int ret; | 670 | int ret; |
@@ -677,7 +685,7 @@ pc_render_add_request(struct intel_ring_buffer *ring, | |||
677 | PIPE_CONTROL_WRITE_FLUSH | | 685 | PIPE_CONTROL_WRITE_FLUSH | |
678 | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); | 686 | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); |
679 | intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); | 687 | intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); |
680 | intel_ring_emit(ring, seqno); | 688 | intel_ring_emit(ring, ring->outstanding_lazy_request); |
681 | intel_ring_emit(ring, 0); | 689 | intel_ring_emit(ring, 0); |
682 | PIPE_CONTROL_FLUSH(ring, scratch_addr); | 690 | PIPE_CONTROL_FLUSH(ring, scratch_addr); |
683 | scratch_addr += 128; /* write to separate cachelines */ | 691 | scratch_addr += 128; /* write to separate cachelines */ |
@@ -696,11 +704,10 @@ pc_render_add_request(struct intel_ring_buffer *ring, | |||
696 | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | | 704 | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | |
697 | PIPE_CONTROL_NOTIFY); | 705 | PIPE_CONTROL_NOTIFY); |
698 | intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); | 706 | intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); |
699 | intel_ring_emit(ring, seqno); | 707 | intel_ring_emit(ring, ring->outstanding_lazy_request); |
700 | intel_ring_emit(ring, 0); | 708 | intel_ring_emit(ring, 0); |
701 | intel_ring_advance(ring); | 709 | intel_ring_advance(ring); |
702 | 710 | ||
703 | *result = seqno; | ||
704 | return 0; | 711 | return 0; |
705 | } | 712 | } |
706 | 713 | ||
@@ -888,25 +895,20 @@ bsd_ring_flush(struct intel_ring_buffer *ring, | |||
888 | } | 895 | } |
889 | 896 | ||
890 | static int | 897 | static int |
891 | i9xx_add_request(struct intel_ring_buffer *ring, | 898 | i9xx_add_request(struct intel_ring_buffer *ring) |
892 | u32 *result) | ||
893 | { | 899 | { |
894 | u32 seqno; | ||
895 | int ret; | 900 | int ret; |
896 | 901 | ||
897 | ret = intel_ring_begin(ring, 4); | 902 | ret = intel_ring_begin(ring, 4); |
898 | if (ret) | 903 | if (ret) |
899 | return ret; | 904 | return ret; |
900 | 905 | ||
901 | seqno = i915_gem_next_request_seqno(ring); | ||
902 | |||
903 | intel_ring_emit(ring, MI_STORE_DWORD_INDEX); | 906 | intel_ring_emit(ring, MI_STORE_DWORD_INDEX); |
904 | intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); | 907 | intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); |
905 | intel_ring_emit(ring, seqno); | 908 | intel_ring_emit(ring, ring->outstanding_lazy_request); |
906 | intel_ring_emit(ring, MI_USER_INTERRUPT); | 909 | intel_ring_emit(ring, MI_USER_INTERRUPT); |
907 | intel_ring_advance(ring); | 910 | intel_ring_advance(ring); |
908 | 911 | ||
909 | *result = seqno; | ||
910 | return 0; | 912 | return 0; |
911 | } | 913 | } |
912 | 914 | ||
@@ -964,7 +966,9 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring) | |||
964 | } | 966 | } |
965 | 967 | ||
966 | static int | 968 | static int |
967 | i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length) | 969 | i965_dispatch_execbuffer(struct intel_ring_buffer *ring, |
970 | u32 offset, u32 length, | ||
971 | unsigned flags) | ||
968 | { | 972 | { |
969 | int ret; | 973 | int ret; |
970 | 974 | ||
@@ -975,35 +979,71 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length) | |||
975 | intel_ring_emit(ring, | 979 | intel_ring_emit(ring, |
976 | MI_BATCH_BUFFER_START | | 980 | MI_BATCH_BUFFER_START | |
977 | MI_BATCH_GTT | | 981 | MI_BATCH_GTT | |
978 | MI_BATCH_NON_SECURE_I965); | 982 | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); |
979 | intel_ring_emit(ring, offset); | 983 | intel_ring_emit(ring, offset); |
980 | intel_ring_advance(ring); | 984 | intel_ring_advance(ring); |
981 | 985 | ||
982 | return 0; | 986 | return 0; |
983 | } | 987 | } |
984 | 988 | ||
989 | /* Just userspace ABI convention to limit the wa batch bo to a resonable size */ | ||
990 | #define I830_BATCH_LIMIT (256*1024) | ||
985 | static int | 991 | static int |
986 | i830_dispatch_execbuffer(struct intel_ring_buffer *ring, | 992 | i830_dispatch_execbuffer(struct intel_ring_buffer *ring, |
987 | u32 offset, u32 len) | 993 | u32 offset, u32 len, |
994 | unsigned flags) | ||
988 | { | 995 | { |
989 | int ret; | 996 | int ret; |
990 | 997 | ||
991 | ret = intel_ring_begin(ring, 4); | 998 | if (flags & I915_DISPATCH_PINNED) { |
992 | if (ret) | 999 | ret = intel_ring_begin(ring, 4); |
993 | return ret; | 1000 | if (ret) |
1001 | return ret; | ||
994 | 1002 | ||
995 | intel_ring_emit(ring, MI_BATCH_BUFFER); | 1003 | intel_ring_emit(ring, MI_BATCH_BUFFER); |
996 | intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); | 1004 | intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); |
997 | intel_ring_emit(ring, offset + len - 8); | 1005 | intel_ring_emit(ring, offset + len - 8); |
998 | intel_ring_emit(ring, 0); | 1006 | intel_ring_emit(ring, MI_NOOP); |
999 | intel_ring_advance(ring); | 1007 | intel_ring_advance(ring); |
1008 | } else { | ||
1009 | struct drm_i915_gem_object *obj = ring->private; | ||
1010 | u32 cs_offset = obj->gtt_offset; | ||
1011 | |||
1012 | if (len > I830_BATCH_LIMIT) | ||
1013 | return -ENOSPC; | ||
1014 | |||
1015 | ret = intel_ring_begin(ring, 9+3); | ||
1016 | if (ret) | ||
1017 | return ret; | ||
1018 | /* Blit the batch (which has now all relocs applied) to the stable batch | ||
1019 | * scratch bo area (so that the CS never stumbles over its tlb | ||
1020 | * invalidation bug) ... */ | ||
1021 | intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | | ||
1022 | XY_SRC_COPY_BLT_WRITE_ALPHA | | ||
1023 | XY_SRC_COPY_BLT_WRITE_RGB); | ||
1024 | intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); | ||
1025 | intel_ring_emit(ring, 0); | ||
1026 | intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); | ||
1027 | intel_ring_emit(ring, cs_offset); | ||
1028 | intel_ring_emit(ring, 0); | ||
1029 | intel_ring_emit(ring, 4096); | ||
1030 | intel_ring_emit(ring, offset); | ||
1031 | intel_ring_emit(ring, MI_FLUSH); | ||
1032 | |||
1033 | /* ... and execute it. */ | ||
1034 | intel_ring_emit(ring, MI_BATCH_BUFFER); | ||
1035 | intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); | ||
1036 | intel_ring_emit(ring, cs_offset + len - 8); | ||
1037 | intel_ring_advance(ring); | ||
1038 | } | ||
1000 | 1039 | ||
1001 | return 0; | 1040 | return 0; |
1002 | } | 1041 | } |
1003 | 1042 | ||
1004 | static int | 1043 | static int |
1005 | i915_dispatch_execbuffer(struct intel_ring_buffer *ring, | 1044 | i915_dispatch_execbuffer(struct intel_ring_buffer *ring, |
1006 | u32 offset, u32 len) | 1045 | u32 offset, u32 len, |
1046 | unsigned flags) | ||
1007 | { | 1047 | { |
1008 | int ret; | 1048 | int ret; |
1009 | 1049 | ||
@@ -1012,7 +1052,7 @@ i915_dispatch_execbuffer(struct intel_ring_buffer *ring, | |||
1012 | return ret; | 1052 | return ret; |
1013 | 1053 | ||
1014 | intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); | 1054 | intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); |
1015 | intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); | 1055 | intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); |
1016 | intel_ring_advance(ring); | 1056 | intel_ring_advance(ring); |
1017 | 1057 | ||
1018 | return 0; | 1058 | return 0; |
@@ -1075,6 +1115,29 @@ err: | |||
1075 | return ret; | 1115 | return ret; |
1076 | } | 1116 | } |
1077 | 1117 | ||
1118 | static int init_phys_hws_pga(struct intel_ring_buffer *ring) | ||
1119 | { | ||
1120 | struct drm_i915_private *dev_priv = ring->dev->dev_private; | ||
1121 | u32 addr; | ||
1122 | |||
1123 | if (!dev_priv->status_page_dmah) { | ||
1124 | dev_priv->status_page_dmah = | ||
1125 | drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE); | ||
1126 | if (!dev_priv->status_page_dmah) | ||
1127 | return -ENOMEM; | ||
1128 | } | ||
1129 | |||
1130 | addr = dev_priv->status_page_dmah->busaddr; | ||
1131 | if (INTEL_INFO(ring->dev)->gen >= 4) | ||
1132 | addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; | ||
1133 | I915_WRITE(HWS_PGA, addr); | ||
1134 | |||
1135 | ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; | ||
1136 | memset(ring->status_page.page_addr, 0, PAGE_SIZE); | ||
1137 | |||
1138 | return 0; | ||
1139 | } | ||
1140 | |||
1078 | static int intel_init_ring_buffer(struct drm_device *dev, | 1141 | static int intel_init_ring_buffer(struct drm_device *dev, |
1079 | struct intel_ring_buffer *ring) | 1142 | struct intel_ring_buffer *ring) |
1080 | { | 1143 | { |
@@ -1086,6 +1149,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, | |||
1086 | INIT_LIST_HEAD(&ring->active_list); | 1149 | INIT_LIST_HEAD(&ring->active_list); |
1087 | INIT_LIST_HEAD(&ring->request_list); | 1150 | INIT_LIST_HEAD(&ring->request_list); |
1088 | ring->size = 32 * PAGE_SIZE; | 1151 | ring->size = 32 * PAGE_SIZE; |
1152 | memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno)); | ||
1089 | 1153 | ||
1090 | init_waitqueue_head(&ring->irq_queue); | 1154 | init_waitqueue_head(&ring->irq_queue); |
1091 | 1155 | ||
@@ -1093,6 +1157,11 @@ static int intel_init_ring_buffer(struct drm_device *dev, | |||
1093 | ret = init_status_page(ring); | 1157 | ret = init_status_page(ring); |
1094 | if (ret) | 1158 | if (ret) |
1095 | return ret; | 1159 | return ret; |
1160 | } else { | ||
1161 | BUG_ON(ring->id != RCS); | ||
1162 | ret = init_phys_hws_pga(ring); | ||
1163 | if (ret) | ||
1164 | return ret; | ||
1096 | } | 1165 | } |
1097 | 1166 | ||
1098 | obj = i915_gem_alloc_object(dev, ring->size); | 1167 | obj = i915_gem_alloc_object(dev, ring->size); |
@@ -1157,7 +1226,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) | |||
1157 | 1226 | ||
1158 | /* Disable the ring buffer. The ring must be idle at this point */ | 1227 | /* Disable the ring buffer. The ring must be idle at this point */ |
1159 | dev_priv = ring->dev->dev_private; | 1228 | dev_priv = ring->dev->dev_private; |
1160 | ret = intel_wait_ring_idle(ring); | 1229 | ret = intel_ring_idle(ring); |
1161 | if (ret) | 1230 | if (ret) |
1162 | DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", | 1231 | DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", |
1163 | ring->name, ret); | 1232 | ring->name, ret); |
@@ -1176,28 +1245,6 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring) | |||
1176 | cleanup_status_page(ring); | 1245 | cleanup_status_page(ring); |
1177 | } | 1246 | } |
1178 | 1247 | ||
1179 | static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) | ||
1180 | { | ||
1181 | uint32_t __iomem *virt; | ||
1182 | int rem = ring->size - ring->tail; | ||
1183 | |||
1184 | if (ring->space < rem) { | ||
1185 | int ret = intel_wait_ring_buffer(ring, rem); | ||
1186 | if (ret) | ||
1187 | return ret; | ||
1188 | } | ||
1189 | |||
1190 | virt = ring->virtual_start + ring->tail; | ||
1191 | rem /= 4; | ||
1192 | while (rem--) | ||
1193 | iowrite32(MI_NOOP, virt++); | ||
1194 | |||
1195 | ring->tail = 0; | ||
1196 | ring->space = ring_space(ring); | ||
1197 | |||
1198 | return 0; | ||
1199 | } | ||
1200 | |||
1201 | static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) | 1248 | static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) |
1202 | { | 1249 | { |
1203 | int ret; | 1250 | int ret; |
@@ -1231,7 +1278,7 @@ static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) | |||
1231 | if (request->tail == -1) | 1278 | if (request->tail == -1) |
1232 | continue; | 1279 | continue; |
1233 | 1280 | ||
1234 | space = request->tail - (ring->tail + 8); | 1281 | space = request->tail - (ring->tail + I915_RING_FREE_SPACE); |
1235 | if (space < 0) | 1282 | if (space < 0) |
1236 | space += ring->size; | 1283 | space += ring->size; |
1237 | if (space >= n) { | 1284 | if (space >= n) { |
@@ -1266,7 +1313,7 @@ static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) | |||
1266 | return 0; | 1313 | return 0; |
1267 | } | 1314 | } |
1268 | 1315 | ||
1269 | int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) | 1316 | static int ring_wait_for_space(struct intel_ring_buffer *ring, int n) |
1270 | { | 1317 | { |
1271 | struct drm_device *dev = ring->dev; | 1318 | struct drm_device *dev = ring->dev; |
1272 | struct drm_i915_private *dev_priv = dev->dev_private; | 1319 | struct drm_i915_private *dev_priv = dev->dev_private; |
@@ -1309,6 +1356,60 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) | |||
1309 | return -EBUSY; | 1356 | return -EBUSY; |
1310 | } | 1357 | } |
1311 | 1358 | ||
1359 | static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) | ||
1360 | { | ||
1361 | uint32_t __iomem *virt; | ||
1362 | int rem = ring->size - ring->tail; | ||
1363 | |||
1364 | if (ring->space < rem) { | ||
1365 | int ret = ring_wait_for_space(ring, rem); | ||
1366 | if (ret) | ||
1367 | return ret; | ||
1368 | } | ||
1369 | |||
1370 | virt = ring->virtual_start + ring->tail; | ||
1371 | rem /= 4; | ||
1372 | while (rem--) | ||
1373 | iowrite32(MI_NOOP, virt++); | ||
1374 | |||
1375 | ring->tail = 0; | ||
1376 | ring->space = ring_space(ring); | ||
1377 | |||
1378 | return 0; | ||
1379 | } | ||
1380 | |||
1381 | int intel_ring_idle(struct intel_ring_buffer *ring) | ||
1382 | { | ||
1383 | u32 seqno; | ||
1384 | int ret; | ||
1385 | |||
1386 | /* We need to add any requests required to flush the objects and ring */ | ||
1387 | if (ring->outstanding_lazy_request) { | ||
1388 | ret = i915_add_request(ring, NULL, NULL); | ||
1389 | if (ret) | ||
1390 | return ret; | ||
1391 | } | ||
1392 | |||
1393 | /* Wait upon the last request to be completed */ | ||
1394 | if (list_empty(&ring->request_list)) | ||
1395 | return 0; | ||
1396 | |||
1397 | seqno = list_entry(ring->request_list.prev, | ||
1398 | struct drm_i915_gem_request, | ||
1399 | list)->seqno; | ||
1400 | |||
1401 | return i915_wait_seqno(ring, seqno); | ||
1402 | } | ||
1403 | |||
1404 | static int | ||
1405 | intel_ring_alloc_seqno(struct intel_ring_buffer *ring) | ||
1406 | { | ||
1407 | if (ring->outstanding_lazy_request) | ||
1408 | return 0; | ||
1409 | |||
1410 | return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request); | ||
1411 | } | ||
1412 | |||
1312 | int intel_ring_begin(struct intel_ring_buffer *ring, | 1413 | int intel_ring_begin(struct intel_ring_buffer *ring, |
1313 | int num_dwords) | 1414 | int num_dwords) |
1314 | { | 1415 | { |
@@ -1320,6 +1421,11 @@ int intel_ring_begin(struct intel_ring_buffer *ring, | |||
1320 | if (ret) | 1421 | if (ret) |
1321 | return ret; | 1422 | return ret; |
1322 | 1423 | ||
1424 | /* Preallocate the olr before touching the ring */ | ||
1425 | ret = intel_ring_alloc_seqno(ring); | ||
1426 | if (ret) | ||
1427 | return ret; | ||
1428 | |||
1323 | if (unlikely(ring->tail + n > ring->effective_size)) { | 1429 | if (unlikely(ring->tail + n > ring->effective_size)) { |
1324 | ret = intel_wrap_ring_buffer(ring); | 1430 | ret = intel_wrap_ring_buffer(ring); |
1325 | if (unlikely(ret)) | 1431 | if (unlikely(ret)) |
@@ -1327,7 +1433,7 @@ int intel_ring_begin(struct intel_ring_buffer *ring, | |||
1327 | } | 1433 | } |
1328 | 1434 | ||
1329 | if (unlikely(ring->space < n)) { | 1435 | if (unlikely(ring->space < n)) { |
1330 | ret = intel_wait_ring_buffer(ring, n); | 1436 | ret = ring_wait_for_space(ring, n); |
1331 | if (unlikely(ret)) | 1437 | if (unlikely(ret)) |
1332 | return ret; | 1438 | return ret; |
1333 | } | 1439 | } |
@@ -1391,10 +1497,17 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, | |||
1391 | return ret; | 1497 | return ret; |
1392 | 1498 | ||
1393 | cmd = MI_FLUSH_DW; | 1499 | cmd = MI_FLUSH_DW; |
1500 | /* | ||
1501 | * Bspec vol 1c.5 - video engine command streamer: | ||
1502 | * "If ENABLED, all TLBs will be invalidated once the flush | ||
1503 | * operation is complete. This bit is only valid when the | ||
1504 | * Post-Sync Operation field is a value of 1h or 3h." | ||
1505 | */ | ||
1394 | if (invalidate & I915_GEM_GPU_DOMAINS) | 1506 | if (invalidate & I915_GEM_GPU_DOMAINS) |
1395 | cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; | 1507 | cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | |
1508 | MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; | ||
1396 | intel_ring_emit(ring, cmd); | 1509 | intel_ring_emit(ring, cmd); |
1397 | intel_ring_emit(ring, 0); | 1510 | intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); |
1398 | intel_ring_emit(ring, 0); | 1511 | intel_ring_emit(ring, 0); |
1399 | intel_ring_emit(ring, MI_NOOP); | 1512 | intel_ring_emit(ring, MI_NOOP); |
1400 | intel_ring_advance(ring); | 1513 | intel_ring_advance(ring); |
@@ -1402,8 +1515,30 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, | |||
1402 | } | 1515 | } |
1403 | 1516 | ||
1404 | static int | 1517 | static int |
1518 | hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, | ||
1519 | u32 offset, u32 len, | ||
1520 | unsigned flags) | ||
1521 | { | ||
1522 | int ret; | ||
1523 | |||
1524 | ret = intel_ring_begin(ring, 2); | ||
1525 | if (ret) | ||
1526 | return ret; | ||
1527 | |||
1528 | intel_ring_emit(ring, | ||
1529 | MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | | ||
1530 | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); | ||
1531 | /* bit0-7 is the length on GEN6+ */ | ||
1532 | intel_ring_emit(ring, offset); | ||
1533 | intel_ring_advance(ring); | ||
1534 | |||
1535 | return 0; | ||
1536 | } | ||
1537 | |||
1538 | static int | ||
1405 | gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, | 1539 | gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, |
1406 | u32 offset, u32 len) | 1540 | u32 offset, u32 len, |
1541 | unsigned flags) | ||
1407 | { | 1542 | { |
1408 | int ret; | 1543 | int ret; |
1409 | 1544 | ||
@@ -1411,7 +1546,9 @@ gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, | |||
1411 | if (ret) | 1546 | if (ret) |
1412 | return ret; | 1547 | return ret; |
1413 | 1548 | ||
1414 | intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965); | 1549 | intel_ring_emit(ring, |
1550 | MI_BATCH_BUFFER_START | | ||
1551 | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); | ||
1415 | /* bit0-7 is the length on GEN6+ */ | 1552 | /* bit0-7 is the length on GEN6+ */ |
1416 | intel_ring_emit(ring, offset); | 1553 | intel_ring_emit(ring, offset); |
1417 | intel_ring_advance(ring); | 1554 | intel_ring_advance(ring); |
@@ -1432,10 +1569,17 @@ static int blt_ring_flush(struct intel_ring_buffer *ring, | |||
1432 | return ret; | 1569 | return ret; |
1433 | 1570 | ||
1434 | cmd = MI_FLUSH_DW; | 1571 | cmd = MI_FLUSH_DW; |
1572 | /* | ||
1573 | * Bspec vol 1c.3 - blitter engine command streamer: | ||
1574 | * "If ENABLED, all TLBs will be invalidated once the flush | ||
1575 | * operation is complete. This bit is only valid when the | ||
1576 | * Post-Sync Operation field is a value of 1h or 3h." | ||
1577 | */ | ||
1435 | if (invalidate & I915_GEM_DOMAIN_RENDER) | 1578 | if (invalidate & I915_GEM_DOMAIN_RENDER) |
1436 | cmd |= MI_INVALIDATE_TLB; | 1579 | cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | |
1580 | MI_FLUSH_DW_OP_STOREDW; | ||
1437 | intel_ring_emit(ring, cmd); | 1581 | intel_ring_emit(ring, cmd); |
1438 | intel_ring_emit(ring, 0); | 1582 | intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); |
1439 | intel_ring_emit(ring, 0); | 1583 | intel_ring_emit(ring, 0); |
1440 | intel_ring_emit(ring, MI_NOOP); | 1584 | intel_ring_emit(ring, MI_NOOP); |
1441 | intel_ring_advance(ring); | 1585 | intel_ring_advance(ring); |
@@ -1490,7 +1634,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev) | |||
1490 | ring->irq_enable_mask = I915_USER_INTERRUPT; | 1634 | ring->irq_enable_mask = I915_USER_INTERRUPT; |
1491 | } | 1635 | } |
1492 | ring->write_tail = ring_write_tail; | 1636 | ring->write_tail = ring_write_tail; |
1493 | if (INTEL_INFO(dev)->gen >= 6) | 1637 | if (IS_HASWELL(dev)) |
1638 | ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; | ||
1639 | else if (INTEL_INFO(dev)->gen >= 6) | ||
1494 | ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; | 1640 | ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; |
1495 | else if (INTEL_INFO(dev)->gen >= 4) | 1641 | else if (INTEL_INFO(dev)->gen >= 4) |
1496 | ring->dispatch_execbuffer = i965_dispatch_execbuffer; | 1642 | ring->dispatch_execbuffer = i965_dispatch_execbuffer; |
@@ -1501,10 +1647,25 @@ int intel_init_render_ring_buffer(struct drm_device *dev) | |||
1501 | ring->init = init_render_ring; | 1647 | ring->init = init_render_ring; |
1502 | ring->cleanup = render_ring_cleanup; | 1648 | ring->cleanup = render_ring_cleanup; |
1503 | 1649 | ||
1650 | /* Workaround batchbuffer to combat CS tlb bug. */ | ||
1651 | if (HAS_BROKEN_CS_TLB(dev)) { | ||
1652 | struct drm_i915_gem_object *obj; | ||
1653 | int ret; | ||
1504 | 1654 | ||
1505 | if (!I915_NEED_GFX_HWS(dev)) { | 1655 | obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); |
1506 | ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; | 1656 | if (obj == NULL) { |
1507 | memset(ring->status_page.page_addr, 0, PAGE_SIZE); | 1657 | DRM_ERROR("Failed to allocate batch bo\n"); |
1658 | return -ENOMEM; | ||
1659 | } | ||
1660 | |||
1661 | ret = i915_gem_object_pin(obj, 0, true, false); | ||
1662 | if (ret != 0) { | ||
1663 | drm_gem_object_unreference(&obj->base); | ||
1664 | DRM_ERROR("Failed to ping batch bo\n"); | ||
1665 | return ret; | ||
1666 | } | ||
1667 | |||
1668 | ring->private = obj; | ||
1508 | } | 1669 | } |
1509 | 1670 | ||
1510 | return intel_init_ring_buffer(dev, ring); | 1671 | return intel_init_ring_buffer(dev, ring); |
@@ -1514,6 +1675,7 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) | |||
1514 | { | 1675 | { |
1515 | drm_i915_private_t *dev_priv = dev->dev_private; | 1676 | drm_i915_private_t *dev_priv = dev->dev_private; |
1516 | struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; | 1677 | struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; |
1678 | int ret; | ||
1517 | 1679 | ||
1518 | ring->name = "render ring"; | 1680 | ring->name = "render ring"; |
1519 | ring->id = RCS; | 1681 | ring->id = RCS; |
@@ -1551,16 +1713,13 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) | |||
1551 | ring->init = init_render_ring; | 1713 | ring->init = init_render_ring; |
1552 | ring->cleanup = render_ring_cleanup; | 1714 | ring->cleanup = render_ring_cleanup; |
1553 | 1715 | ||
1554 | if (!I915_NEED_GFX_HWS(dev)) | ||
1555 | ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; | ||
1556 | |||
1557 | ring->dev = dev; | 1716 | ring->dev = dev; |
1558 | INIT_LIST_HEAD(&ring->active_list); | 1717 | INIT_LIST_HEAD(&ring->active_list); |
1559 | INIT_LIST_HEAD(&ring->request_list); | 1718 | INIT_LIST_HEAD(&ring->request_list); |
1560 | 1719 | ||
1561 | ring->size = size; | 1720 | ring->size = size; |
1562 | ring->effective_size = ring->size; | 1721 | ring->effective_size = ring->size; |
1563 | if (IS_I830(ring->dev)) | 1722 | if (IS_I830(ring->dev) || IS_845G(ring->dev)) |
1564 | ring->effective_size -= 128; | 1723 | ring->effective_size -= 128; |
1565 | 1724 | ||
1566 | ring->virtual_start = ioremap_wc(start, size); | 1725 | ring->virtual_start = ioremap_wc(start, size); |
@@ -1570,6 +1729,12 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) | |||
1570 | return -ENOMEM; | 1729 | return -ENOMEM; |
1571 | } | 1730 | } |
1572 | 1731 | ||
1732 | if (!I915_NEED_GFX_HWS(dev)) { | ||
1733 | ret = init_phys_hws_pga(ring); | ||
1734 | if (ret) | ||
1735 | return ret; | ||
1736 | } | ||
1737 | |||
1573 | return 0; | 1738 | return 0; |
1574 | } | 1739 | } |
1575 | 1740 | ||
@@ -1618,7 +1783,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) | |||
1618 | } | 1783 | } |
1619 | ring->init = init_ring_common; | 1784 | ring->init = init_ring_common; |
1620 | 1785 | ||
1621 | |||
1622 | return intel_init_ring_buffer(dev, ring); | 1786 | return intel_init_ring_buffer(dev, ring); |
1623 | } | 1787 | } |
1624 | 1788 | ||