aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c342
1 files changed, 253 insertions, 89 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ecbc5c5dbbbc..42ff97d667d2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -45,7 +45,7 @@ struct pipe_control {
45 45
46static inline int ring_space(struct intel_ring_buffer *ring) 46static inline int ring_space(struct intel_ring_buffer *ring)
47{ 47{
48 int space = (ring->head & HEAD_ADDR) - (ring->tail + 8); 48 int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
49 if (space < 0) 49 if (space < 0)
50 space += ring->size; 50 space += ring->size;
51 return space; 51 return space;
@@ -245,7 +245,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
245 /* 245 /*
246 * TLB invalidate requires a post-sync write. 246 * TLB invalidate requires a post-sync write.
247 */ 247 */
248 flags |= PIPE_CONTROL_QW_WRITE; 248 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
249 } 249 }
250 250
251 ret = intel_ring_begin(ring, 4); 251 ret = intel_ring_begin(ring, 4);
@@ -505,13 +505,25 @@ static int init_render_ring(struct intel_ring_buffer *ring)
505 struct drm_i915_private *dev_priv = dev->dev_private; 505 struct drm_i915_private *dev_priv = dev->dev_private;
506 int ret = init_ring_common(ring); 506 int ret = init_ring_common(ring);
507 507
508 if (INTEL_INFO(dev)->gen > 3) { 508 if (INTEL_INFO(dev)->gen > 3)
509 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); 509 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
510 if (IS_GEN7(dev)) 510
511 I915_WRITE(GFX_MODE_GEN7, 511 /* We need to disable the AsyncFlip performance optimisations in order
512 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | 512 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
513 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); 513 * programmed to '1' on all products.
514 } 514 */
515 if (INTEL_INFO(dev)->gen >= 6)
516 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
517
518 /* Required for the hardware to program scanline values for waiting */
519 if (INTEL_INFO(dev)->gen == 6)
520 I915_WRITE(GFX_MODE,
521 _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_ALWAYS));
522
523 if (IS_GEN7(dev))
524 I915_WRITE(GFX_MODE_GEN7,
525 _MASKED_BIT_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) |
526 _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
515 527
516 if (INTEL_INFO(dev)->gen >= 5) { 528 if (INTEL_INFO(dev)->gen >= 5) {
517 ret = init_pipe_control(ring); 529 ret = init_pipe_control(ring);
@@ -547,23 +559,24 @@ static int init_render_ring(struct intel_ring_buffer *ring)
547 559
548static void render_ring_cleanup(struct intel_ring_buffer *ring) 560static void render_ring_cleanup(struct intel_ring_buffer *ring)
549{ 561{
562 struct drm_device *dev = ring->dev;
563
550 if (!ring->private) 564 if (!ring->private)
551 return; 565 return;
552 566
567 if (HAS_BROKEN_CS_TLB(dev))
568 drm_gem_object_unreference(to_gem_object(ring->private));
569
553 cleanup_pipe_control(ring); 570 cleanup_pipe_control(ring);
554} 571}
555 572
556static void 573static void
557update_mboxes(struct intel_ring_buffer *ring, 574update_mboxes(struct intel_ring_buffer *ring,
558 u32 seqno, 575 u32 mmio_offset)
559 u32 mmio_offset)
560{ 576{
561 intel_ring_emit(ring, MI_SEMAPHORE_MBOX | 577 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
562 MI_SEMAPHORE_GLOBAL_GTT |
563 MI_SEMAPHORE_REGISTER |
564 MI_SEMAPHORE_UPDATE);
565 intel_ring_emit(ring, seqno);
566 intel_ring_emit(ring, mmio_offset); 578 intel_ring_emit(ring, mmio_offset);
579 intel_ring_emit(ring, ring->outstanding_lazy_request);
567} 580}
568 581
569/** 582/**
@@ -576,8 +589,7 @@ update_mboxes(struct intel_ring_buffer *ring,
576 * This acts like a signal in the canonical semaphore. 589 * This acts like a signal in the canonical semaphore.
577 */ 590 */
578static int 591static int
579gen6_add_request(struct intel_ring_buffer *ring, 592gen6_add_request(struct intel_ring_buffer *ring)
580 u32 *seqno)
581{ 593{
582 u32 mbox1_reg; 594 u32 mbox1_reg;
583 u32 mbox2_reg; 595 u32 mbox2_reg;
@@ -590,13 +602,11 @@ gen6_add_request(struct intel_ring_buffer *ring,
590 mbox1_reg = ring->signal_mbox[0]; 602 mbox1_reg = ring->signal_mbox[0];
591 mbox2_reg = ring->signal_mbox[1]; 603 mbox2_reg = ring->signal_mbox[1];
592 604
593 *seqno = i915_gem_next_request_seqno(ring); 605 update_mboxes(ring, mbox1_reg);
594 606 update_mboxes(ring, mbox2_reg);
595 update_mboxes(ring, *seqno, mbox1_reg);
596 update_mboxes(ring, *seqno, mbox2_reg);
597 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 607 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
598 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 608 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
599 intel_ring_emit(ring, *seqno); 609 intel_ring_emit(ring, ring->outstanding_lazy_request);
600 intel_ring_emit(ring, MI_USER_INTERRUPT); 610 intel_ring_emit(ring, MI_USER_INTERRUPT);
601 intel_ring_advance(ring); 611 intel_ring_advance(ring);
602 612
@@ -653,10 +663,8 @@ do { \
653} while (0) 663} while (0)
654 664
655static int 665static int
656pc_render_add_request(struct intel_ring_buffer *ring, 666pc_render_add_request(struct intel_ring_buffer *ring)
657 u32 *result)
658{ 667{
659 u32 seqno = i915_gem_next_request_seqno(ring);
660 struct pipe_control *pc = ring->private; 668 struct pipe_control *pc = ring->private;
661 u32 scratch_addr = pc->gtt_offset + 128; 669 u32 scratch_addr = pc->gtt_offset + 128;
662 int ret; 670 int ret;
@@ -677,7 +685,7 @@ pc_render_add_request(struct intel_ring_buffer *ring,
677 PIPE_CONTROL_WRITE_FLUSH | 685 PIPE_CONTROL_WRITE_FLUSH |
678 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 686 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
679 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 687 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
680 intel_ring_emit(ring, seqno); 688 intel_ring_emit(ring, ring->outstanding_lazy_request);
681 intel_ring_emit(ring, 0); 689 intel_ring_emit(ring, 0);
682 PIPE_CONTROL_FLUSH(ring, scratch_addr); 690 PIPE_CONTROL_FLUSH(ring, scratch_addr);
683 scratch_addr += 128; /* write to separate cachelines */ 691 scratch_addr += 128; /* write to separate cachelines */
@@ -696,11 +704,10 @@ pc_render_add_request(struct intel_ring_buffer *ring,
696 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 704 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
697 PIPE_CONTROL_NOTIFY); 705 PIPE_CONTROL_NOTIFY);
698 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 706 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
699 intel_ring_emit(ring, seqno); 707 intel_ring_emit(ring, ring->outstanding_lazy_request);
700 intel_ring_emit(ring, 0); 708 intel_ring_emit(ring, 0);
701 intel_ring_advance(ring); 709 intel_ring_advance(ring);
702 710
703 *result = seqno;
704 return 0; 711 return 0;
705} 712}
706 713
@@ -888,25 +895,20 @@ bsd_ring_flush(struct intel_ring_buffer *ring,
888} 895}
889 896
890static int 897static int
891i9xx_add_request(struct intel_ring_buffer *ring, 898i9xx_add_request(struct intel_ring_buffer *ring)
892 u32 *result)
893{ 899{
894 u32 seqno;
895 int ret; 900 int ret;
896 901
897 ret = intel_ring_begin(ring, 4); 902 ret = intel_ring_begin(ring, 4);
898 if (ret) 903 if (ret)
899 return ret; 904 return ret;
900 905
901 seqno = i915_gem_next_request_seqno(ring);
902
903 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 906 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
904 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 907 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
905 intel_ring_emit(ring, seqno); 908 intel_ring_emit(ring, ring->outstanding_lazy_request);
906 intel_ring_emit(ring, MI_USER_INTERRUPT); 909 intel_ring_emit(ring, MI_USER_INTERRUPT);
907 intel_ring_advance(ring); 910 intel_ring_advance(ring);
908 911
909 *result = seqno;
910 return 0; 912 return 0;
911} 913}
912 914
@@ -964,7 +966,9 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
964} 966}
965 967
966static int 968static int
967i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length) 969i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
970 u32 offset, u32 length,
971 unsigned flags)
968{ 972{
969 int ret; 973 int ret;
970 974
@@ -975,35 +979,71 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
975 intel_ring_emit(ring, 979 intel_ring_emit(ring,
976 MI_BATCH_BUFFER_START | 980 MI_BATCH_BUFFER_START |
977 MI_BATCH_GTT | 981 MI_BATCH_GTT |
978 MI_BATCH_NON_SECURE_I965); 982 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
979 intel_ring_emit(ring, offset); 983 intel_ring_emit(ring, offset);
980 intel_ring_advance(ring); 984 intel_ring_advance(ring);
981 985
982 return 0; 986 return 0;
983} 987}
984 988
989/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
990#define I830_BATCH_LIMIT (256*1024)
985static int 991static int
986i830_dispatch_execbuffer(struct intel_ring_buffer *ring, 992i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
987 u32 offset, u32 len) 993 u32 offset, u32 len,
994 unsigned flags)
988{ 995{
989 int ret; 996 int ret;
990 997
991 ret = intel_ring_begin(ring, 4); 998 if (flags & I915_DISPATCH_PINNED) {
992 if (ret) 999 ret = intel_ring_begin(ring, 4);
993 return ret; 1000 if (ret)
1001 return ret;
994 1002
995 intel_ring_emit(ring, MI_BATCH_BUFFER); 1003 intel_ring_emit(ring, MI_BATCH_BUFFER);
996 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 1004 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
997 intel_ring_emit(ring, offset + len - 8); 1005 intel_ring_emit(ring, offset + len - 8);
998 intel_ring_emit(ring, 0); 1006 intel_ring_emit(ring, MI_NOOP);
999 intel_ring_advance(ring); 1007 intel_ring_advance(ring);
1008 } else {
1009 struct drm_i915_gem_object *obj = ring->private;
1010 u32 cs_offset = obj->gtt_offset;
1011
1012 if (len > I830_BATCH_LIMIT)
1013 return -ENOSPC;
1014
1015 ret = intel_ring_begin(ring, 9+3);
1016 if (ret)
1017 return ret;
1018 /* Blit the batch (which has now all relocs applied) to the stable batch
1019 * scratch bo area (so that the CS never stumbles over its tlb
1020 * invalidation bug) ... */
1021 intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
1022 XY_SRC_COPY_BLT_WRITE_ALPHA |
1023 XY_SRC_COPY_BLT_WRITE_RGB);
1024 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
1025 intel_ring_emit(ring, 0);
1026 intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1027 intel_ring_emit(ring, cs_offset);
1028 intel_ring_emit(ring, 0);
1029 intel_ring_emit(ring, 4096);
1030 intel_ring_emit(ring, offset);
1031 intel_ring_emit(ring, MI_FLUSH);
1032
1033 /* ... and execute it. */
1034 intel_ring_emit(ring, MI_BATCH_BUFFER);
1035 intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1036 intel_ring_emit(ring, cs_offset + len - 8);
1037 intel_ring_advance(ring);
1038 }
1000 1039
1001 return 0; 1040 return 0;
1002} 1041}
1003 1042
1004static int 1043static int
1005i915_dispatch_execbuffer(struct intel_ring_buffer *ring, 1044i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1006 u32 offset, u32 len) 1045 u32 offset, u32 len,
1046 unsigned flags)
1007{ 1047{
1008 int ret; 1048 int ret;
1009 1049
@@ -1012,7 +1052,7 @@ i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1012 return ret; 1052 return ret;
1013 1053
1014 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1054 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1015 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 1055 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1016 intel_ring_advance(ring); 1056 intel_ring_advance(ring);
1017 1057
1018 return 0; 1058 return 0;
@@ -1075,6 +1115,29 @@ err:
1075 return ret; 1115 return ret;
1076} 1116}
1077 1117
1118static int init_phys_hws_pga(struct intel_ring_buffer *ring)
1119{
1120 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1121 u32 addr;
1122
1123 if (!dev_priv->status_page_dmah) {
1124 dev_priv->status_page_dmah =
1125 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
1126 if (!dev_priv->status_page_dmah)
1127 return -ENOMEM;
1128 }
1129
1130 addr = dev_priv->status_page_dmah->busaddr;
1131 if (INTEL_INFO(ring->dev)->gen >= 4)
1132 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
1133 I915_WRITE(HWS_PGA, addr);
1134
1135 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1136 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1137
1138 return 0;
1139}
1140
1078static int intel_init_ring_buffer(struct drm_device *dev, 1141static int intel_init_ring_buffer(struct drm_device *dev,
1079 struct intel_ring_buffer *ring) 1142 struct intel_ring_buffer *ring)
1080{ 1143{
@@ -1086,6 +1149,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
1086 INIT_LIST_HEAD(&ring->active_list); 1149 INIT_LIST_HEAD(&ring->active_list);
1087 INIT_LIST_HEAD(&ring->request_list); 1150 INIT_LIST_HEAD(&ring->request_list);
1088 ring->size = 32 * PAGE_SIZE; 1151 ring->size = 32 * PAGE_SIZE;
1152 memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1089 1153
1090 init_waitqueue_head(&ring->irq_queue); 1154 init_waitqueue_head(&ring->irq_queue);
1091 1155
@@ -1093,6 +1157,11 @@ static int intel_init_ring_buffer(struct drm_device *dev,
1093 ret = init_status_page(ring); 1157 ret = init_status_page(ring);
1094 if (ret) 1158 if (ret)
1095 return ret; 1159 return ret;
1160 } else {
1161 BUG_ON(ring->id != RCS);
1162 ret = init_phys_hws_pga(ring);
1163 if (ret)
1164 return ret;
1096 } 1165 }
1097 1166
1098 obj = i915_gem_alloc_object(dev, ring->size); 1167 obj = i915_gem_alloc_object(dev, ring->size);
@@ -1157,7 +1226,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1157 1226
1158 /* Disable the ring buffer. The ring must be idle at this point */ 1227 /* Disable the ring buffer. The ring must be idle at this point */
1159 dev_priv = ring->dev->dev_private; 1228 dev_priv = ring->dev->dev_private;
1160 ret = intel_wait_ring_idle(ring); 1229 ret = intel_ring_idle(ring);
1161 if (ret) 1230 if (ret)
1162 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 1231 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1163 ring->name, ret); 1232 ring->name, ret);
@@ -1176,28 +1245,6 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1176 cleanup_status_page(ring); 1245 cleanup_status_page(ring);
1177} 1246}
1178 1247
1179static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1180{
1181 uint32_t __iomem *virt;
1182 int rem = ring->size - ring->tail;
1183
1184 if (ring->space < rem) {
1185 int ret = intel_wait_ring_buffer(ring, rem);
1186 if (ret)
1187 return ret;
1188 }
1189
1190 virt = ring->virtual_start + ring->tail;
1191 rem /= 4;
1192 while (rem--)
1193 iowrite32(MI_NOOP, virt++);
1194
1195 ring->tail = 0;
1196 ring->space = ring_space(ring);
1197
1198 return 0;
1199}
1200
1201static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1248static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1202{ 1249{
1203 int ret; 1250 int ret;
@@ -1231,7 +1278,7 @@ static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1231 if (request->tail == -1) 1278 if (request->tail == -1)
1232 continue; 1279 continue;
1233 1280
1234 space = request->tail - (ring->tail + 8); 1281 space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
1235 if (space < 0) 1282 if (space < 0)
1236 space += ring->size; 1283 space += ring->size;
1237 if (space >= n) { 1284 if (space >= n) {
@@ -1266,7 +1313,7 @@ static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1266 return 0; 1313 return 0;
1267} 1314}
1268 1315
1269int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) 1316static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1270{ 1317{
1271 struct drm_device *dev = ring->dev; 1318 struct drm_device *dev = ring->dev;
1272 struct drm_i915_private *dev_priv = dev->dev_private; 1319 struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1309,6 +1356,60 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1309 return -EBUSY; 1356 return -EBUSY;
1310} 1357}
1311 1358
1359static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1360{
1361 uint32_t __iomem *virt;
1362 int rem = ring->size - ring->tail;
1363
1364 if (ring->space < rem) {
1365 int ret = ring_wait_for_space(ring, rem);
1366 if (ret)
1367 return ret;
1368 }
1369
1370 virt = ring->virtual_start + ring->tail;
1371 rem /= 4;
1372 while (rem--)
1373 iowrite32(MI_NOOP, virt++);
1374
1375 ring->tail = 0;
1376 ring->space = ring_space(ring);
1377
1378 return 0;
1379}
1380
1381int intel_ring_idle(struct intel_ring_buffer *ring)
1382{
1383 u32 seqno;
1384 int ret;
1385
1386 /* We need to add any requests required to flush the objects and ring */
1387 if (ring->outstanding_lazy_request) {
1388 ret = i915_add_request(ring, NULL, NULL);
1389 if (ret)
1390 return ret;
1391 }
1392
1393 /* Wait upon the last request to be completed */
1394 if (list_empty(&ring->request_list))
1395 return 0;
1396
1397 seqno = list_entry(ring->request_list.prev,
1398 struct drm_i915_gem_request,
1399 list)->seqno;
1400
1401 return i915_wait_seqno(ring, seqno);
1402}
1403
1404static int
1405intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
1406{
1407 if (ring->outstanding_lazy_request)
1408 return 0;
1409
1410 return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request);
1411}
1412
1312int intel_ring_begin(struct intel_ring_buffer *ring, 1413int intel_ring_begin(struct intel_ring_buffer *ring,
1313 int num_dwords) 1414 int num_dwords)
1314{ 1415{
@@ -1320,6 +1421,11 @@ int intel_ring_begin(struct intel_ring_buffer *ring,
1320 if (ret) 1421 if (ret)
1321 return ret; 1422 return ret;
1322 1423
1424 /* Preallocate the olr before touching the ring */
1425 ret = intel_ring_alloc_seqno(ring);
1426 if (ret)
1427 return ret;
1428
1323 if (unlikely(ring->tail + n > ring->effective_size)) { 1429 if (unlikely(ring->tail + n > ring->effective_size)) {
1324 ret = intel_wrap_ring_buffer(ring); 1430 ret = intel_wrap_ring_buffer(ring);
1325 if (unlikely(ret)) 1431 if (unlikely(ret))
@@ -1327,7 +1433,7 @@ int intel_ring_begin(struct intel_ring_buffer *ring,
1327 } 1433 }
1328 1434
1329 if (unlikely(ring->space < n)) { 1435 if (unlikely(ring->space < n)) {
1330 ret = intel_wait_ring_buffer(ring, n); 1436 ret = ring_wait_for_space(ring, n);
1331 if (unlikely(ret)) 1437 if (unlikely(ret))
1332 return ret; 1438 return ret;
1333 } 1439 }
@@ -1391,10 +1497,17 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
1391 return ret; 1497 return ret;
1392 1498
1393 cmd = MI_FLUSH_DW; 1499 cmd = MI_FLUSH_DW;
1500 /*
1501 * Bspec vol 1c.5 - video engine command streamer:
1502 * "If ENABLED, all TLBs will be invalidated once the flush
1503 * operation is complete. This bit is only valid when the
1504 * Post-Sync Operation field is a value of 1h or 3h."
1505 */
1394 if (invalidate & I915_GEM_GPU_DOMAINS) 1506 if (invalidate & I915_GEM_GPU_DOMAINS)
1395 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1507 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1508 MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1396 intel_ring_emit(ring, cmd); 1509 intel_ring_emit(ring, cmd);
1397 intel_ring_emit(ring, 0); 1510 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1398 intel_ring_emit(ring, 0); 1511 intel_ring_emit(ring, 0);
1399 intel_ring_emit(ring, MI_NOOP); 1512 intel_ring_emit(ring, MI_NOOP);
1400 intel_ring_advance(ring); 1513 intel_ring_advance(ring);
@@ -1402,8 +1515,30 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
1402} 1515}
1403 1516
1404static int 1517static int
1518hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1519 u32 offset, u32 len,
1520 unsigned flags)
1521{
1522 int ret;
1523
1524 ret = intel_ring_begin(ring, 2);
1525 if (ret)
1526 return ret;
1527
1528 intel_ring_emit(ring,
1529 MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
1530 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW));
1531 /* bit0-7 is the length on GEN6+ */
1532 intel_ring_emit(ring, offset);
1533 intel_ring_advance(ring);
1534
1535 return 0;
1536}
1537
1538static int
1405gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1539gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1406 u32 offset, u32 len) 1540 u32 offset, u32 len,
1541 unsigned flags)
1407{ 1542{
1408 int ret; 1543 int ret;
1409 1544
@@ -1411,7 +1546,9 @@ gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1411 if (ret) 1546 if (ret)
1412 return ret; 1547 return ret;
1413 1548
1414 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965); 1549 intel_ring_emit(ring,
1550 MI_BATCH_BUFFER_START |
1551 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1415 /* bit0-7 is the length on GEN6+ */ 1552 /* bit0-7 is the length on GEN6+ */
1416 intel_ring_emit(ring, offset); 1553 intel_ring_emit(ring, offset);
1417 intel_ring_advance(ring); 1554 intel_ring_advance(ring);
@@ -1432,10 +1569,17 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
1432 return ret; 1569 return ret;
1433 1570
1434 cmd = MI_FLUSH_DW; 1571 cmd = MI_FLUSH_DW;
1572 /*
1573 * Bspec vol 1c.3 - blitter engine command streamer:
1574 * "If ENABLED, all TLBs will be invalidated once the flush
1575 * operation is complete. This bit is only valid when the
1576 * Post-Sync Operation field is a value of 1h or 3h."
1577 */
1435 if (invalidate & I915_GEM_DOMAIN_RENDER) 1578 if (invalidate & I915_GEM_DOMAIN_RENDER)
1436 cmd |= MI_INVALIDATE_TLB; 1579 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1580 MI_FLUSH_DW_OP_STOREDW;
1437 intel_ring_emit(ring, cmd); 1581 intel_ring_emit(ring, cmd);
1438 intel_ring_emit(ring, 0); 1582 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1439 intel_ring_emit(ring, 0); 1583 intel_ring_emit(ring, 0);
1440 intel_ring_emit(ring, MI_NOOP); 1584 intel_ring_emit(ring, MI_NOOP);
1441 intel_ring_advance(ring); 1585 intel_ring_advance(ring);
@@ -1490,7 +1634,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
1490 ring->irq_enable_mask = I915_USER_INTERRUPT; 1634 ring->irq_enable_mask = I915_USER_INTERRUPT;
1491 } 1635 }
1492 ring->write_tail = ring_write_tail; 1636 ring->write_tail = ring_write_tail;
1493 if (INTEL_INFO(dev)->gen >= 6) 1637 if (IS_HASWELL(dev))
1638 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1639 else if (INTEL_INFO(dev)->gen >= 6)
1494 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1640 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1495 else if (INTEL_INFO(dev)->gen >= 4) 1641 else if (INTEL_INFO(dev)->gen >= 4)
1496 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1642 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
@@ -1501,10 +1647,25 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
1501 ring->init = init_render_ring; 1647 ring->init = init_render_ring;
1502 ring->cleanup = render_ring_cleanup; 1648 ring->cleanup = render_ring_cleanup;
1503 1649
1650 /* Workaround batchbuffer to combat CS tlb bug. */
1651 if (HAS_BROKEN_CS_TLB(dev)) {
1652 struct drm_i915_gem_object *obj;
1653 int ret;
1504 1654
1505 if (!I915_NEED_GFX_HWS(dev)) { 1655 obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
1506 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1656 if (obj == NULL) {
1507 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1657 DRM_ERROR("Failed to allocate batch bo\n");
1658 return -ENOMEM;
1659 }
1660
1661 ret = i915_gem_object_pin(obj, 0, true, false);
1662 if (ret != 0) {
1663 drm_gem_object_unreference(&obj->base);
1664 DRM_ERROR("Failed to ping batch bo\n");
1665 return ret;
1666 }
1667
1668 ring->private = obj;
1508 } 1669 }
1509 1670
1510 return intel_init_ring_buffer(dev, ring); 1671 return intel_init_ring_buffer(dev, ring);
@@ -1514,6 +1675,7 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1514{ 1675{
1515 drm_i915_private_t *dev_priv = dev->dev_private; 1676 drm_i915_private_t *dev_priv = dev->dev_private;
1516 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1677 struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1678 int ret;
1517 1679
1518 ring->name = "render ring"; 1680 ring->name = "render ring";
1519 ring->id = RCS; 1681 ring->id = RCS;
@@ -1551,16 +1713,13 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1551 ring->init = init_render_ring; 1713 ring->init = init_render_ring;
1552 ring->cleanup = render_ring_cleanup; 1714 ring->cleanup = render_ring_cleanup;
1553 1715
1554 if (!I915_NEED_GFX_HWS(dev))
1555 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1556
1557 ring->dev = dev; 1716 ring->dev = dev;
1558 INIT_LIST_HEAD(&ring->active_list); 1717 INIT_LIST_HEAD(&ring->active_list);
1559 INIT_LIST_HEAD(&ring->request_list); 1718 INIT_LIST_HEAD(&ring->request_list);
1560 1719
1561 ring->size = size; 1720 ring->size = size;
1562 ring->effective_size = ring->size; 1721 ring->effective_size = ring->size;
1563 if (IS_I830(ring->dev)) 1722 if (IS_I830(ring->dev) || IS_845G(ring->dev))
1564 ring->effective_size -= 128; 1723 ring->effective_size -= 128;
1565 1724
1566 ring->virtual_start = ioremap_wc(start, size); 1725 ring->virtual_start = ioremap_wc(start, size);
@@ -1570,6 +1729,12 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1570 return -ENOMEM; 1729 return -ENOMEM;
1571 } 1730 }
1572 1731
1732 if (!I915_NEED_GFX_HWS(dev)) {
1733 ret = init_phys_hws_pga(ring);
1734 if (ret)
1735 return ret;
1736 }
1737
1573 return 0; 1738 return 0;
1574} 1739}
1575 1740
@@ -1618,7 +1783,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
1618 } 1783 }
1619 ring->init = init_ring_common; 1784 ring->init = init_ring_common;
1620 1785
1621
1622 return intel_init_ring_buffer(dev, ring); 1786 return intel_init_ring_buffer(dev, ring);
1623} 1787}
1624 1788