aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c318
1 files changed, 235 insertions, 83 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index ecbc5c5dbbbc..ae253e04c391 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -45,7 +45,7 @@ struct pipe_control {
45 45
46static inline int ring_space(struct intel_ring_buffer *ring) 46static inline int ring_space(struct intel_ring_buffer *ring)
47{ 47{
48 int space = (ring->head & HEAD_ADDR) - (ring->tail + 8); 48 int space = (ring->head & HEAD_ADDR) - (ring->tail + I915_RING_FREE_SPACE);
49 if (space < 0) 49 if (space < 0)
50 space += ring->size; 50 space += ring->size;
51 return space; 51 return space;
@@ -245,7 +245,7 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
245 /* 245 /*
246 * TLB invalidate requires a post-sync write. 246 * TLB invalidate requires a post-sync write.
247 */ 247 */
248 flags |= PIPE_CONTROL_QW_WRITE; 248 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
249 } 249 }
250 250
251 ret = intel_ring_begin(ring, 4); 251 ret = intel_ring_begin(ring, 4);
@@ -547,23 +547,24 @@ static int init_render_ring(struct intel_ring_buffer *ring)
547 547
548static void render_ring_cleanup(struct intel_ring_buffer *ring) 548static void render_ring_cleanup(struct intel_ring_buffer *ring)
549{ 549{
550 struct drm_device *dev = ring->dev;
551
550 if (!ring->private) 552 if (!ring->private)
551 return; 553 return;
552 554
555 if (HAS_BROKEN_CS_TLB(dev))
556 drm_gem_object_unreference(to_gem_object(ring->private));
557
553 cleanup_pipe_control(ring); 558 cleanup_pipe_control(ring);
554} 559}
555 560
556static void 561static void
557update_mboxes(struct intel_ring_buffer *ring, 562update_mboxes(struct intel_ring_buffer *ring,
558 u32 seqno, 563 u32 mmio_offset)
559 u32 mmio_offset)
560{ 564{
561 intel_ring_emit(ring, MI_SEMAPHORE_MBOX | 565 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
562 MI_SEMAPHORE_GLOBAL_GTT |
563 MI_SEMAPHORE_REGISTER |
564 MI_SEMAPHORE_UPDATE);
565 intel_ring_emit(ring, seqno);
566 intel_ring_emit(ring, mmio_offset); 566 intel_ring_emit(ring, mmio_offset);
567 intel_ring_emit(ring, ring->outstanding_lazy_request);
567} 568}
568 569
569/** 570/**
@@ -576,8 +577,7 @@ update_mboxes(struct intel_ring_buffer *ring,
576 * This acts like a signal in the canonical semaphore. 577 * This acts like a signal in the canonical semaphore.
577 */ 578 */
578static int 579static int
579gen6_add_request(struct intel_ring_buffer *ring, 580gen6_add_request(struct intel_ring_buffer *ring)
580 u32 *seqno)
581{ 581{
582 u32 mbox1_reg; 582 u32 mbox1_reg;
583 u32 mbox2_reg; 583 u32 mbox2_reg;
@@ -590,13 +590,11 @@ gen6_add_request(struct intel_ring_buffer *ring,
590 mbox1_reg = ring->signal_mbox[0]; 590 mbox1_reg = ring->signal_mbox[0];
591 mbox2_reg = ring->signal_mbox[1]; 591 mbox2_reg = ring->signal_mbox[1];
592 592
593 *seqno = i915_gem_next_request_seqno(ring); 593 update_mboxes(ring, mbox1_reg);
594 594 update_mboxes(ring, mbox2_reg);
595 update_mboxes(ring, *seqno, mbox1_reg);
596 update_mboxes(ring, *seqno, mbox2_reg);
597 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 595 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
598 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 596 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
599 intel_ring_emit(ring, *seqno); 597 intel_ring_emit(ring, ring->outstanding_lazy_request);
600 intel_ring_emit(ring, MI_USER_INTERRUPT); 598 intel_ring_emit(ring, MI_USER_INTERRUPT);
601 intel_ring_advance(ring); 599 intel_ring_advance(ring);
602 600
@@ -653,10 +651,8 @@ do { \
653} while (0) 651} while (0)
654 652
655static int 653static int
656pc_render_add_request(struct intel_ring_buffer *ring, 654pc_render_add_request(struct intel_ring_buffer *ring)
657 u32 *result)
658{ 655{
659 u32 seqno = i915_gem_next_request_seqno(ring);
660 struct pipe_control *pc = ring->private; 656 struct pipe_control *pc = ring->private;
661 u32 scratch_addr = pc->gtt_offset + 128; 657 u32 scratch_addr = pc->gtt_offset + 128;
662 int ret; 658 int ret;
@@ -677,7 +673,7 @@ pc_render_add_request(struct intel_ring_buffer *ring,
677 PIPE_CONTROL_WRITE_FLUSH | 673 PIPE_CONTROL_WRITE_FLUSH |
678 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); 674 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
679 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 675 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
680 intel_ring_emit(ring, seqno); 676 intel_ring_emit(ring, ring->outstanding_lazy_request);
681 intel_ring_emit(ring, 0); 677 intel_ring_emit(ring, 0);
682 PIPE_CONTROL_FLUSH(ring, scratch_addr); 678 PIPE_CONTROL_FLUSH(ring, scratch_addr);
683 scratch_addr += 128; /* write to separate cachelines */ 679 scratch_addr += 128; /* write to separate cachelines */
@@ -696,11 +692,10 @@ pc_render_add_request(struct intel_ring_buffer *ring,
696 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 692 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
697 PIPE_CONTROL_NOTIFY); 693 PIPE_CONTROL_NOTIFY);
698 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); 694 intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
699 intel_ring_emit(ring, seqno); 695 intel_ring_emit(ring, ring->outstanding_lazy_request);
700 intel_ring_emit(ring, 0); 696 intel_ring_emit(ring, 0);
701 intel_ring_advance(ring); 697 intel_ring_advance(ring);
702 698
703 *result = seqno;
704 return 0; 699 return 0;
705} 700}
706 701
@@ -888,25 +883,20 @@ bsd_ring_flush(struct intel_ring_buffer *ring,
888} 883}
889 884
890static int 885static int
891i9xx_add_request(struct intel_ring_buffer *ring, 886i9xx_add_request(struct intel_ring_buffer *ring)
892 u32 *result)
893{ 887{
894 u32 seqno;
895 int ret; 888 int ret;
896 889
897 ret = intel_ring_begin(ring, 4); 890 ret = intel_ring_begin(ring, 4);
898 if (ret) 891 if (ret)
899 return ret; 892 return ret;
900 893
901 seqno = i915_gem_next_request_seqno(ring);
902
903 intel_ring_emit(ring, MI_STORE_DWORD_INDEX); 894 intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
904 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); 895 intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
905 intel_ring_emit(ring, seqno); 896 intel_ring_emit(ring, ring->outstanding_lazy_request);
906 intel_ring_emit(ring, MI_USER_INTERRUPT); 897 intel_ring_emit(ring, MI_USER_INTERRUPT);
907 intel_ring_advance(ring); 898 intel_ring_advance(ring);
908 899
909 *result = seqno;
910 return 0; 900 return 0;
911} 901}
912 902
@@ -964,7 +954,9 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
964} 954}
965 955
966static int 956static int
967i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length) 957i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
958 u32 offset, u32 length,
959 unsigned flags)
968{ 960{
969 int ret; 961 int ret;
970 962
@@ -975,35 +967,71 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
975 intel_ring_emit(ring, 967 intel_ring_emit(ring,
976 MI_BATCH_BUFFER_START | 968 MI_BATCH_BUFFER_START |
977 MI_BATCH_GTT | 969 MI_BATCH_GTT |
978 MI_BATCH_NON_SECURE_I965); 970 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
979 intel_ring_emit(ring, offset); 971 intel_ring_emit(ring, offset);
980 intel_ring_advance(ring); 972 intel_ring_advance(ring);
981 973
982 return 0; 974 return 0;
983} 975}
984 976
977/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
978#define I830_BATCH_LIMIT (256*1024)
985static int 979static int
986i830_dispatch_execbuffer(struct intel_ring_buffer *ring, 980i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
987 u32 offset, u32 len) 981 u32 offset, u32 len,
982 unsigned flags)
988{ 983{
989 int ret; 984 int ret;
990 985
991 ret = intel_ring_begin(ring, 4); 986 if (flags & I915_DISPATCH_PINNED) {
992 if (ret) 987 ret = intel_ring_begin(ring, 4);
993 return ret; 988 if (ret)
989 return ret;
994 990
995 intel_ring_emit(ring, MI_BATCH_BUFFER); 991 intel_ring_emit(ring, MI_BATCH_BUFFER);
996 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 992 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
997 intel_ring_emit(ring, offset + len - 8); 993 intel_ring_emit(ring, offset + len - 8);
998 intel_ring_emit(ring, 0); 994 intel_ring_emit(ring, MI_NOOP);
999 intel_ring_advance(ring); 995 intel_ring_advance(ring);
996 } else {
997 struct drm_i915_gem_object *obj = ring->private;
998 u32 cs_offset = obj->gtt_offset;
999
1000 if (len > I830_BATCH_LIMIT)
1001 return -ENOSPC;
1002
1003 ret = intel_ring_begin(ring, 9+3);
1004 if (ret)
1005 return ret;
1006 /* Blit the batch (which has now all relocs applied) to the stable batch
1007 * scratch bo area (so that the CS never stumbles over its tlb
1008 * invalidation bug) ... */
1009 intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
1010 XY_SRC_COPY_BLT_WRITE_ALPHA |
1011 XY_SRC_COPY_BLT_WRITE_RGB);
1012 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
1013 intel_ring_emit(ring, 0);
1014 intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
1015 intel_ring_emit(ring, cs_offset);
1016 intel_ring_emit(ring, 0);
1017 intel_ring_emit(ring, 4096);
1018 intel_ring_emit(ring, offset);
1019 intel_ring_emit(ring, MI_FLUSH);
1020
1021 /* ... and execute it. */
1022 intel_ring_emit(ring, MI_BATCH_BUFFER);
1023 intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1024 intel_ring_emit(ring, cs_offset + len - 8);
1025 intel_ring_advance(ring);
1026 }
1000 1027
1001 return 0; 1028 return 0;
1002} 1029}
1003 1030
1004static int 1031static int
1005i915_dispatch_execbuffer(struct intel_ring_buffer *ring, 1032i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1006 u32 offset, u32 len) 1033 u32 offset, u32 len,
1034 unsigned flags)
1007{ 1035{
1008 int ret; 1036 int ret;
1009 1037
@@ -1012,7 +1040,7 @@ i915_dispatch_execbuffer(struct intel_ring_buffer *ring,
1012 return ret; 1040 return ret;
1013 1041
1014 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); 1042 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1015 intel_ring_emit(ring, offset | MI_BATCH_NON_SECURE); 1043 intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1016 intel_ring_advance(ring); 1044 intel_ring_advance(ring);
1017 1045
1018 return 0; 1046 return 0;
@@ -1075,6 +1103,29 @@ err:
1075 return ret; 1103 return ret;
1076} 1104}
1077 1105
1106static int init_phys_hws_pga(struct intel_ring_buffer *ring)
1107{
1108 struct drm_i915_private *dev_priv = ring->dev->dev_private;
1109 u32 addr;
1110
1111 if (!dev_priv->status_page_dmah) {
1112 dev_priv->status_page_dmah =
1113 drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
1114 if (!dev_priv->status_page_dmah)
1115 return -ENOMEM;
1116 }
1117
1118 addr = dev_priv->status_page_dmah->busaddr;
1119 if (INTEL_INFO(ring->dev)->gen >= 4)
1120 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
1121 I915_WRITE(HWS_PGA, addr);
1122
1123 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1124 memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1125
1126 return 0;
1127}
1128
1078static int intel_init_ring_buffer(struct drm_device *dev, 1129static int intel_init_ring_buffer(struct drm_device *dev,
1079 struct intel_ring_buffer *ring) 1130 struct intel_ring_buffer *ring)
1080{ 1131{
@@ -1086,6 +1137,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
1086 INIT_LIST_HEAD(&ring->active_list); 1137 INIT_LIST_HEAD(&ring->active_list);
1087 INIT_LIST_HEAD(&ring->request_list); 1138 INIT_LIST_HEAD(&ring->request_list);
1088 ring->size = 32 * PAGE_SIZE; 1139 ring->size = 32 * PAGE_SIZE;
1140 memset(ring->sync_seqno, 0, sizeof(ring->sync_seqno));
1089 1141
1090 init_waitqueue_head(&ring->irq_queue); 1142 init_waitqueue_head(&ring->irq_queue);
1091 1143
@@ -1093,6 +1145,11 @@ static int intel_init_ring_buffer(struct drm_device *dev,
1093 ret = init_status_page(ring); 1145 ret = init_status_page(ring);
1094 if (ret) 1146 if (ret)
1095 return ret; 1147 return ret;
1148 } else {
1149 BUG_ON(ring->id != RCS);
1150 ret = init_phys_hws_pga(ring);
1151 if (ret)
1152 return ret;
1096 } 1153 }
1097 1154
1098 obj = i915_gem_alloc_object(dev, ring->size); 1155 obj = i915_gem_alloc_object(dev, ring->size);
@@ -1157,7 +1214,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1157 1214
1158 /* Disable the ring buffer. The ring must be idle at this point */ 1215 /* Disable the ring buffer. The ring must be idle at this point */
1159 dev_priv = ring->dev->dev_private; 1216 dev_priv = ring->dev->dev_private;
1160 ret = intel_wait_ring_idle(ring); 1217 ret = intel_ring_idle(ring);
1161 if (ret) 1218 if (ret)
1162 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", 1219 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
1163 ring->name, ret); 1220 ring->name, ret);
@@ -1176,28 +1233,6 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
1176 cleanup_status_page(ring); 1233 cleanup_status_page(ring);
1177} 1234}
1178 1235
1179static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1180{
1181 uint32_t __iomem *virt;
1182 int rem = ring->size - ring->tail;
1183
1184 if (ring->space < rem) {
1185 int ret = intel_wait_ring_buffer(ring, rem);
1186 if (ret)
1187 return ret;
1188 }
1189
1190 virt = ring->virtual_start + ring->tail;
1191 rem /= 4;
1192 while (rem--)
1193 iowrite32(MI_NOOP, virt++);
1194
1195 ring->tail = 0;
1196 ring->space = ring_space(ring);
1197
1198 return 0;
1199}
1200
1201static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno) 1236static int intel_ring_wait_seqno(struct intel_ring_buffer *ring, u32 seqno)
1202{ 1237{
1203 int ret; 1238 int ret;
@@ -1231,7 +1266,7 @@ static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1231 if (request->tail == -1) 1266 if (request->tail == -1)
1232 continue; 1267 continue;
1233 1268
1234 space = request->tail - (ring->tail + 8); 1269 space = request->tail - (ring->tail + I915_RING_FREE_SPACE);
1235 if (space < 0) 1270 if (space < 0)
1236 space += ring->size; 1271 space += ring->size;
1237 if (space >= n) { 1272 if (space >= n) {
@@ -1266,7 +1301,7 @@ static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n)
1266 return 0; 1301 return 0;
1267} 1302}
1268 1303
1269int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) 1304static int ring_wait_for_space(struct intel_ring_buffer *ring, int n)
1270{ 1305{
1271 struct drm_device *dev = ring->dev; 1306 struct drm_device *dev = ring->dev;
1272 struct drm_i915_private *dev_priv = dev->dev_private; 1307 struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1309,6 +1344,60 @@ int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n)
1309 return -EBUSY; 1344 return -EBUSY;
1310} 1345}
1311 1346
1347static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring)
1348{
1349 uint32_t __iomem *virt;
1350 int rem = ring->size - ring->tail;
1351
1352 if (ring->space < rem) {
1353 int ret = ring_wait_for_space(ring, rem);
1354 if (ret)
1355 return ret;
1356 }
1357
1358 virt = ring->virtual_start + ring->tail;
1359 rem /= 4;
1360 while (rem--)
1361 iowrite32(MI_NOOP, virt++);
1362
1363 ring->tail = 0;
1364 ring->space = ring_space(ring);
1365
1366 return 0;
1367}
1368
1369int intel_ring_idle(struct intel_ring_buffer *ring)
1370{
1371 u32 seqno;
1372 int ret;
1373
1374 /* We need to add any requests required to flush the objects and ring */
1375 if (ring->outstanding_lazy_request) {
1376 ret = i915_add_request(ring, NULL, NULL);
1377 if (ret)
1378 return ret;
1379 }
1380
1381 /* Wait upon the last request to be completed */
1382 if (list_empty(&ring->request_list))
1383 return 0;
1384
1385 seqno = list_entry(ring->request_list.prev,
1386 struct drm_i915_gem_request,
1387 list)->seqno;
1388
1389 return i915_wait_seqno(ring, seqno);
1390}
1391
1392static int
1393intel_ring_alloc_seqno(struct intel_ring_buffer *ring)
1394{
1395 if (ring->outstanding_lazy_request)
1396 return 0;
1397
1398 return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_request);
1399}
1400
1312int intel_ring_begin(struct intel_ring_buffer *ring, 1401int intel_ring_begin(struct intel_ring_buffer *ring,
1313 int num_dwords) 1402 int num_dwords)
1314{ 1403{
@@ -1320,6 +1409,11 @@ int intel_ring_begin(struct intel_ring_buffer *ring,
1320 if (ret) 1409 if (ret)
1321 return ret; 1410 return ret;
1322 1411
1412 /* Preallocate the olr before touching the ring */
1413 ret = intel_ring_alloc_seqno(ring);
1414 if (ret)
1415 return ret;
1416
1323 if (unlikely(ring->tail + n > ring->effective_size)) { 1417 if (unlikely(ring->tail + n > ring->effective_size)) {
1324 ret = intel_wrap_ring_buffer(ring); 1418 ret = intel_wrap_ring_buffer(ring);
1325 if (unlikely(ret)) 1419 if (unlikely(ret))
@@ -1327,7 +1421,7 @@ int intel_ring_begin(struct intel_ring_buffer *ring,
1327 } 1421 }
1328 1422
1329 if (unlikely(ring->space < n)) { 1423 if (unlikely(ring->space < n)) {
1330 ret = intel_wait_ring_buffer(ring, n); 1424 ret = ring_wait_for_space(ring, n);
1331 if (unlikely(ret)) 1425 if (unlikely(ret))
1332 return ret; 1426 return ret;
1333 } 1427 }
@@ -1391,10 +1485,17 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
1391 return ret; 1485 return ret;
1392 1486
1393 cmd = MI_FLUSH_DW; 1487 cmd = MI_FLUSH_DW;
1488 /*
1489 * Bspec vol 1c.5 - video engine command streamer:
1490 * "If ENABLED, all TLBs will be invalidated once the flush
1491 * operation is complete. This bit is only valid when the
1492 * Post-Sync Operation field is a value of 1h or 3h."
1493 */
1394 if (invalidate & I915_GEM_GPU_DOMAINS) 1494 if (invalidate & I915_GEM_GPU_DOMAINS)
1395 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; 1495 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1496 MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1396 intel_ring_emit(ring, cmd); 1497 intel_ring_emit(ring, cmd);
1397 intel_ring_emit(ring, 0); 1498 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1398 intel_ring_emit(ring, 0); 1499 intel_ring_emit(ring, 0);
1399 intel_ring_emit(ring, MI_NOOP); 1500 intel_ring_emit(ring, MI_NOOP);
1400 intel_ring_advance(ring); 1501 intel_ring_advance(ring);
@@ -1402,8 +1503,30 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring,
1402} 1503}
1403 1504
1404static int 1505static int
1506hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1507 u32 offset, u32 len,
1508 unsigned flags)
1509{
1510 int ret;
1511
1512 ret = intel_ring_begin(ring, 2);
1513 if (ret)
1514 return ret;
1515
1516 intel_ring_emit(ring,
1517 MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
1518 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW));
1519 /* bit0-7 is the length on GEN6+ */
1520 intel_ring_emit(ring, offset);
1521 intel_ring_advance(ring);
1522
1523 return 0;
1524}
1525
1526static int
1405gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, 1527gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1406 u32 offset, u32 len) 1528 u32 offset, u32 len,
1529 unsigned flags)
1407{ 1530{
1408 int ret; 1531 int ret;
1409 1532
@@ -1411,7 +1534,9 @@ gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
1411 if (ret) 1534 if (ret)
1412 return ret; 1535 return ret;
1413 1536
1414 intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965); 1537 intel_ring_emit(ring,
1538 MI_BATCH_BUFFER_START |
1539 (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1415 /* bit0-7 is the length on GEN6+ */ 1540 /* bit0-7 is the length on GEN6+ */
1416 intel_ring_emit(ring, offset); 1541 intel_ring_emit(ring, offset);
1417 intel_ring_advance(ring); 1542 intel_ring_advance(ring);
@@ -1432,10 +1557,17 @@ static int blt_ring_flush(struct intel_ring_buffer *ring,
1432 return ret; 1557 return ret;
1433 1558
1434 cmd = MI_FLUSH_DW; 1559 cmd = MI_FLUSH_DW;
1560 /*
1561 * Bspec vol 1c.3 - blitter engine command streamer:
1562 * "If ENABLED, all TLBs will be invalidated once the flush
1563 * operation is complete. This bit is only valid when the
1564 * Post-Sync Operation field is a value of 1h or 3h."
1565 */
1435 if (invalidate & I915_GEM_DOMAIN_RENDER) 1566 if (invalidate & I915_GEM_DOMAIN_RENDER)
1436 cmd |= MI_INVALIDATE_TLB; 1567 cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1568 MI_FLUSH_DW_OP_STOREDW;
1437 intel_ring_emit(ring, cmd); 1569 intel_ring_emit(ring, cmd);
1438 intel_ring_emit(ring, 0); 1570 intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
1439 intel_ring_emit(ring, 0); 1571 intel_ring_emit(ring, 0);
1440 intel_ring_emit(ring, MI_NOOP); 1572 intel_ring_emit(ring, MI_NOOP);
1441 intel_ring_advance(ring); 1573 intel_ring_advance(ring);
@@ -1490,7 +1622,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
1490 ring->irq_enable_mask = I915_USER_INTERRUPT; 1622 ring->irq_enable_mask = I915_USER_INTERRUPT;
1491 } 1623 }
1492 ring->write_tail = ring_write_tail; 1624 ring->write_tail = ring_write_tail;
1493 if (INTEL_INFO(dev)->gen >= 6) 1625 if (IS_HASWELL(dev))
1626 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
1627 else if (INTEL_INFO(dev)->gen >= 6)
1494 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; 1628 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
1495 else if (INTEL_INFO(dev)->gen >= 4) 1629 else if (INTEL_INFO(dev)->gen >= 4)
1496 ring->dispatch_execbuffer = i965_dispatch_execbuffer; 1630 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
@@ -1501,10 +1635,25 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
1501 ring->init = init_render_ring; 1635 ring->init = init_render_ring;
1502 ring->cleanup = render_ring_cleanup; 1636 ring->cleanup = render_ring_cleanup;
1503 1637
1638 /* Workaround batchbuffer to combat CS tlb bug. */
1639 if (HAS_BROKEN_CS_TLB(dev)) {
1640 struct drm_i915_gem_object *obj;
1641 int ret;
1504 1642
1505 if (!I915_NEED_GFX_HWS(dev)) { 1643 obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
1506 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; 1644 if (obj == NULL) {
1507 memset(ring->status_page.page_addr, 0, PAGE_SIZE); 1645 DRM_ERROR("Failed to allocate batch bo\n");
1646 return -ENOMEM;
1647 }
1648
1649 ret = i915_gem_object_pin(obj, 0, true, false);
1650 if (ret != 0) {
1651 drm_gem_object_unreference(&obj->base);
1652 DRM_ERROR("Failed to ping batch bo\n");
1653 return ret;
1654 }
1655
1656 ring->private = obj;
1508 } 1657 }
1509 1658
1510 return intel_init_ring_buffer(dev, ring); 1659 return intel_init_ring_buffer(dev, ring);
@@ -1514,6 +1663,7 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1514{ 1663{
1515 drm_i915_private_t *dev_priv = dev->dev_private; 1664 drm_i915_private_t *dev_priv = dev->dev_private;
1516 struct intel_ring_buffer *ring = &dev_priv->ring[RCS]; 1665 struct intel_ring_buffer *ring = &dev_priv->ring[RCS];
1666 int ret;
1517 1667
1518 ring->name = "render ring"; 1668 ring->name = "render ring";
1519 ring->id = RCS; 1669 ring->id = RCS;
@@ -1551,16 +1701,13 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1551 ring->init = init_render_ring; 1701 ring->init = init_render_ring;
1552 ring->cleanup = render_ring_cleanup; 1702 ring->cleanup = render_ring_cleanup;
1553 1703
1554 if (!I915_NEED_GFX_HWS(dev))
1555 ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1556
1557 ring->dev = dev; 1704 ring->dev = dev;
1558 INIT_LIST_HEAD(&ring->active_list); 1705 INIT_LIST_HEAD(&ring->active_list);
1559 INIT_LIST_HEAD(&ring->request_list); 1706 INIT_LIST_HEAD(&ring->request_list);
1560 1707
1561 ring->size = size; 1708 ring->size = size;
1562 ring->effective_size = ring->size; 1709 ring->effective_size = ring->size;
1563 if (IS_I830(ring->dev)) 1710 if (IS_I830(ring->dev) || IS_845G(ring->dev))
1564 ring->effective_size -= 128; 1711 ring->effective_size -= 128;
1565 1712
1566 ring->virtual_start = ioremap_wc(start, size); 1713 ring->virtual_start = ioremap_wc(start, size);
@@ -1570,6 +1717,12 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
1570 return -ENOMEM; 1717 return -ENOMEM;
1571 } 1718 }
1572 1719
1720 if (!I915_NEED_GFX_HWS(dev)) {
1721 ret = init_phys_hws_pga(ring);
1722 if (ret)
1723 return ret;
1724 }
1725
1573 return 0; 1726 return 0;
1574} 1727}
1575 1728
@@ -1618,7 +1771,6 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
1618 } 1771 }
1619 ring->init = init_ring_common; 1772 ring->init = init_ring_common;
1620 1773
1621
1622 return intel_init_ring_buffer(dev, ring); 1774 return intel_init_ring_buffer(dev, ring);
1623} 1775}
1624 1776