aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2012-01-27 12:17:59 -0500
committerDave Airlie <airlied@redhat.com>2012-02-13 07:09:11 -0500
commitdd220a00e8bd5ad7f98ecdc3eed699a7cfabdc27 (patch)
tree8cdedce29665aae1f92ebcccefacda0598d08a1c /drivers/gpu/drm/radeon
parent51a59ac8739b333eaa43a3102b6acaab5037bfa2 (diff)
drm/radeon/kms: add support for streamout v7
v2: agd5f: add strmout CS checking, copy_dw register checking v3: agd5f: don't use cs_check_reg() for copy_dw checking as it will incorrectly patch the command stream for certain regs. v4: agd5f: add warning if safe reg check fails for copy_dw v5: agd5f: add stricter checking for 6xx/7xx v6: agd5f: add range checking for copy_dw on eg+, add sx_surface_sync to safe reg list for 7xx. v7: agd5f: add stricter checking for eg+ Signed-off-by: Marek Olšák <maraeo@gmail.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c189
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h9
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c179
-rw-r--r--drivers/gpu/drm/radeon/r600d.h6
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c2
-rw-r--r--drivers/gpu/drm/radeon/reg_srcs/cayman10
-rw-r--r--drivers/gpu/drm/radeon/reg_srcs/evergreen10
-rw-r--r--drivers/gpu/drm/radeon/reg_srcs/r60011
8 files changed, 407 insertions, 9 deletions
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 8e8cd85e5c00..4e2cadcdf144 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -60,6 +60,10 @@ struct evergreen_cs_track {
60 u32 cb_shader_mask; 60 u32 cb_shader_mask;
61 u32 vgt_strmout_config; 61 u32 vgt_strmout_config;
62 u32 vgt_strmout_buffer_config; 62 u32 vgt_strmout_buffer_config;
63 struct radeon_bo *vgt_strmout_bo[4];
64 u64 vgt_strmout_bo_mc[4];
65 u32 vgt_strmout_bo_offset[4];
66 u32 vgt_strmout_size[4];
63 u32 db_depth_control; 67 u32 db_depth_control;
64 u32 db_depth_view; 68 u32 db_depth_view;
65 u32 db_depth_size; 69 u32 db_depth_size;
@@ -159,16 +163,41 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
159 track->db_s_write_offset = 0xFFFFFFFF; 163 track->db_s_write_offset = 0xFFFFFFFF;
160 track->db_s_read_bo = NULL; 164 track->db_s_read_bo = NULL;
161 track->db_s_write_bo = NULL; 165 track->db_s_write_bo = NULL;
166
167 for (i = 0; i < 4; i++) {
168 track->vgt_strmout_size[i] = 0;
169 track->vgt_strmout_bo[i] = NULL;
170 track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
171 track->vgt_strmout_bo_mc[i] = 0xFFFFFFFF;
172 }
162} 173}
163 174
164static int evergreen_cs_track_check(struct radeon_cs_parser *p) 175static int evergreen_cs_track_check(struct radeon_cs_parser *p)
165{ 176{
166 struct evergreen_cs_track *track = p->track; 177 struct evergreen_cs_track *track = p->track;
178 int i, j;
167 179
168 /* we don't support stream out buffer yet */ 180 /* check streamout */
169 if (track->vgt_strmout_config || track->vgt_strmout_buffer_config) { 181 for (i = 0; i < 4; i++) {
170 dev_warn(p->dev, "this kernel doesn't support SMX output buffer\n"); 182 if (track->vgt_strmout_config & (1 << i)) {
171 return -EINVAL; 183 for (j = 0; j < 4; j++) {
184 if ((track->vgt_strmout_buffer_config >> (i * 4)) & (1 << j)) {
185 if (track->vgt_strmout_bo[j]) {
186 u64 offset = (u64)track->vgt_strmout_bo_offset[j] +
187 (u64)track->vgt_strmout_size[j];
188 if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
189 DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
190 j, offset,
191 radeon_bo_size(track->vgt_strmout_bo[j]));
192 return -EINVAL;
193 }
194 } else {
195 dev_warn(p->dev, "No buffer for streamout %d\n", j);
196 return -EINVAL;
197 }
198 }
199 }
200 }
172 } 201 }
173 202
174 /* XXX fill in */ 203 /* XXX fill in */
@@ -597,6 +626,38 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
597 case VGT_STRMOUT_BUFFER_CONFIG: 626 case VGT_STRMOUT_BUFFER_CONFIG:
598 track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx); 627 track->vgt_strmout_buffer_config = radeon_get_ib_value(p, idx);
599 break; 628 break;
629 case VGT_STRMOUT_BUFFER_BASE_0:
630 case VGT_STRMOUT_BUFFER_BASE_1:
631 case VGT_STRMOUT_BUFFER_BASE_2:
632 case VGT_STRMOUT_BUFFER_BASE_3:
633 r = evergreen_cs_packet_next_reloc(p, &reloc);
634 if (r) {
635 dev_warn(p->dev, "bad SET_CONTEXT_REG "
636 "0x%04X\n", reg);
637 return -EINVAL;
638 }
639 tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
640 track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
641 ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
642 track->vgt_strmout_bo[tmp] = reloc->robj;
643 track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset;
644 break;
645 case VGT_STRMOUT_BUFFER_SIZE_0:
646 case VGT_STRMOUT_BUFFER_SIZE_1:
647 case VGT_STRMOUT_BUFFER_SIZE_2:
648 case VGT_STRMOUT_BUFFER_SIZE_3:
649 tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
650 /* size in register is DWs, convert to bytes */
651 track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
652 break;
653 case CP_COHER_BASE:
654 r = evergreen_cs_packet_next_reloc(p, &reloc);
655 if (r) {
656 dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
657 "0x%04X\n", reg);
658 return -EINVAL;
659 }
660 ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
600 case CB_TARGET_MASK: 661 case CB_TARGET_MASK:
601 track->cb_target_mask = radeon_get_ib_value(p, idx); 662 track->cb_target_mask = radeon_get_ib_value(p, idx);
602 break; 663 break;
@@ -1014,6 +1075,32 @@ static int evergreen_check_texture_resource(struct radeon_cs_parser *p, u32 idx
1014 return 0; 1075 return 0;
1015} 1076}
1016 1077
1078static bool evergreen_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1079{
1080 u32 last_reg, m, i;
1081
1082 if (p->rdev->family >= CHIP_CAYMAN)
1083 last_reg = ARRAY_SIZE(cayman_reg_safe_bm);
1084 else
1085 last_reg = ARRAY_SIZE(evergreen_reg_safe_bm);
1086
1087 i = (reg >> 7);
1088 if (i >= last_reg) {
1089 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1090 return false;
1091 }
1092 m = 1 << ((reg >> 2) & 31);
1093 if (p->rdev->family >= CHIP_CAYMAN) {
1094 if (!(cayman_reg_safe_bm[i] & m))
1095 return true;
1096 } else {
1097 if (!(evergreen_reg_safe_bm[i] & m))
1098 return true;
1099 }
1100 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1101 return false;
1102}
1103
1017static int evergreen_packet3_check(struct radeon_cs_parser *p, 1104static int evergreen_packet3_check(struct radeon_cs_parser *p,
1018 struct radeon_cs_packet *pkt) 1105 struct radeon_cs_packet *pkt)
1019{ 1106{
@@ -1451,6 +1538,100 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
1451 return -EINVAL; 1538 return -EINVAL;
1452 } 1539 }
1453 break; 1540 break;
1541 case PACKET3_STRMOUT_BUFFER_UPDATE:
1542 if (pkt->count != 4) {
1543 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
1544 return -EINVAL;
1545 }
1546 /* Updating memory at DST_ADDRESS. */
1547 if (idx_value & 0x1) {
1548 u64 offset;
1549 r = evergreen_cs_packet_next_reloc(p, &reloc);
1550 if (r) {
1551 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
1552 return -EINVAL;
1553 }
1554 offset = radeon_get_ib_value(p, idx+1);
1555 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
1556 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
1557 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
1558 offset + 4, radeon_bo_size(reloc->robj));
1559 return -EINVAL;
1560 }
1561 ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1562 ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1563 }
1564 /* Reading data from SRC_ADDRESS. */
1565 if (((idx_value >> 1) & 0x3) == 2) {
1566 u64 offset;
1567 r = evergreen_cs_packet_next_reloc(p, &reloc);
1568 if (r) {
1569 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
1570 return -EINVAL;
1571 }
1572 offset = radeon_get_ib_value(p, idx+3);
1573 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
1574 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
1575 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
1576 offset + 4, radeon_bo_size(reloc->robj));
1577 return -EINVAL;
1578 }
1579 ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1580 ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1581 }
1582 break;
1583 case PACKET3_COPY_DW:
1584 if (pkt->count != 4) {
1585 DRM_ERROR("bad COPY_DW (invalid count)\n");
1586 return -EINVAL;
1587 }
1588 if (idx_value & 0x1) {
1589 u64 offset;
1590 /* SRC is memory. */
1591 r = evergreen_cs_packet_next_reloc(p, &reloc);
1592 if (r) {
1593 DRM_ERROR("bad COPY_DW (missing src reloc)\n");
1594 return -EINVAL;
1595 }
1596 offset = radeon_get_ib_value(p, idx+1);
1597 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
1598 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
1599 DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
1600 offset + 4, radeon_bo_size(reloc->robj));
1601 return -EINVAL;
1602 }
1603 ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1604 ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1605 } else {
1606 /* SRC is a reg. */
1607 reg = radeon_get_ib_value(p, idx+1) << 2;
1608 if (!evergreen_is_safe_reg(p, reg, idx+1))
1609 return -EINVAL;
1610 }
1611 if (idx_value & 0x2) {
1612 u64 offset;
1613 /* DST is memory. */
1614 r = evergreen_cs_packet_next_reloc(p, &reloc);
1615 if (r) {
1616 DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
1617 return -EINVAL;
1618 }
1619 offset = radeon_get_ib_value(p, idx+3);
1620 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
1621 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
1622 DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
1623 offset + 4, radeon_bo_size(reloc->robj));
1624 return -EINVAL;
1625 }
1626 ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1627 ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1628 } else {
1629 /* DST is a reg. */
1630 reg = radeon_get_ib_value(p, idx+3) << 2;
1631 if (!evergreen_is_safe_reg(p, reg, idx+3))
1632 return -EINVAL;
1633 }
1634 break;
1454 case PACKET3_NOP: 1635 case PACKET3_NOP:
1455 break; 1636 break;
1456 default: 1637 default:
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 74713d42df29..50d20da5b5f0 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -77,6 +77,7 @@
77 77
78#define CONFIG_MEMSIZE 0x5428 78#define CONFIG_MEMSIZE 0x5428
79 79
80#define CP_COHER_BASE 0x85F8
80#define CP_ME_CNTL 0x86D8 81#define CP_ME_CNTL 0x86D8
81#define CP_ME_HALT (1 << 28) 82#define CP_ME_HALT (1 << 28)
82#define CP_PFP_HALT (1 << 26) 83#define CP_PFP_HALT (1 << 26)
@@ -948,6 +949,14 @@
948#define SQ_PGM_START_HS 0x288b8 949#define SQ_PGM_START_HS 0x288b8
949#define SQ_PGM_START_LS 0x288d0 950#define SQ_PGM_START_LS 0x288d0
950 951
952#define VGT_STRMOUT_BUFFER_BASE_0 0x28AD8
953#define VGT_STRMOUT_BUFFER_BASE_1 0x28AE8
954#define VGT_STRMOUT_BUFFER_BASE_2 0x28AF8
955#define VGT_STRMOUT_BUFFER_BASE_3 0x28B08
956#define VGT_STRMOUT_BUFFER_SIZE_0 0x28AD0
957#define VGT_STRMOUT_BUFFER_SIZE_1 0x28AE0
958#define VGT_STRMOUT_BUFFER_SIZE_2 0x28AF0
959#define VGT_STRMOUT_BUFFER_SIZE_3 0x28B00
951#define VGT_STRMOUT_CONFIG 0x28b94 960#define VGT_STRMOUT_CONFIG 0x28b94
952#define VGT_STRMOUT_BUFFER_CONFIG 0x28b98 961#define VGT_STRMOUT_BUFFER_CONFIG 0x28b98
953 962
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 38ce5d0427e3..9f17571eea62 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -61,6 +61,10 @@ struct r600_cs_track {
61 u32 cb_color_size[8]; 61 u32 cb_color_size[8];
62 u32 vgt_strmout_en; 62 u32 vgt_strmout_en;
63 u32 vgt_strmout_buffer_en; 63 u32 vgt_strmout_buffer_en;
64 struct radeon_bo *vgt_strmout_bo[4];
65 u64 vgt_strmout_bo_mc[4];
66 u32 vgt_strmout_bo_offset[4];
67 u32 vgt_strmout_size[4];
64 u32 db_depth_control; 68 u32 db_depth_control;
65 u32 db_depth_info; 69 u32 db_depth_info;
66 u32 db_depth_size_idx; 70 u32 db_depth_size_idx;
@@ -310,6 +314,13 @@ static void r600_cs_track_init(struct r600_cs_track *track)
310 track->db_depth_size = 0xFFFFFFFF; 314 track->db_depth_size = 0xFFFFFFFF;
311 track->db_depth_size_idx = 0; 315 track->db_depth_size_idx = 0;
312 track->db_depth_control = 0xFFFFFFFF; 316 track->db_depth_control = 0xFFFFFFFF;
317
318 for (i = 0; i < 4; i++) {
319 track->vgt_strmout_size[i] = 0;
320 track->vgt_strmout_bo[i] = NULL;
321 track->vgt_strmout_bo_offset[i] = 0xFFFFFFFF;
322 track->vgt_strmout_bo_mc[i] = 0xFFFFFFFF;
323 }
313} 324}
314 325
315static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i) 326static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
@@ -430,11 +441,28 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
430 /* on legacy kernel we don't perform advanced check */ 441 /* on legacy kernel we don't perform advanced check */
431 if (p->rdev == NULL) 442 if (p->rdev == NULL)
432 return 0; 443 return 0;
433 /* we don't support out buffer yet */ 444
434 if (track->vgt_strmout_en || track->vgt_strmout_buffer_en) { 445 /* check streamout */
435 dev_warn(p->dev, "this kernel doesn't support SMX output buffer\n"); 446 if (track->vgt_strmout_en) {
436 return -EINVAL; 447 for (i = 0; i < 4; i++) {
448 if (track->vgt_strmout_buffer_en & (1 << i)) {
449 if (track->vgt_strmout_bo[i]) {
450 u64 offset = (u64)track->vgt_strmout_bo_offset[i] +
451 (u64)track->vgt_strmout_size[i];
452 if (offset > radeon_bo_size(track->vgt_strmout_bo[i])) {
453 DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
454 i, offset,
455 radeon_bo_size(track->vgt_strmout_bo[i]));
456 return -EINVAL;
457 }
458 } else {
459 dev_warn(p->dev, "No buffer for streamout %d\n", i);
460 return -EINVAL;
461 }
462 }
463 }
437 } 464 }
465
438 /* check that we have a cb for each enabled target, we don't check 466 /* check that we have a cb for each enabled target, we don't check
439 * shader_mask because it seems mesa isn't always setting it :( 467 * shader_mask because it seems mesa isn't always setting it :(
440 */ 468 */
@@ -975,6 +1003,39 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
975 case R_028B20_VGT_STRMOUT_BUFFER_EN: 1003 case R_028B20_VGT_STRMOUT_BUFFER_EN:
976 track->vgt_strmout_buffer_en = radeon_get_ib_value(p, idx); 1004 track->vgt_strmout_buffer_en = radeon_get_ib_value(p, idx);
977 break; 1005 break;
1006 case VGT_STRMOUT_BUFFER_BASE_0:
1007 case VGT_STRMOUT_BUFFER_BASE_1:
1008 case VGT_STRMOUT_BUFFER_BASE_2:
1009 case VGT_STRMOUT_BUFFER_BASE_3:
1010 r = r600_cs_packet_next_reloc(p, &reloc);
1011 if (r) {
1012 dev_warn(p->dev, "bad SET_CONTEXT_REG "
1013 "0x%04X\n", reg);
1014 return -EINVAL;
1015 }
1016 tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
1017 track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
1018 ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1019 track->vgt_strmout_bo[tmp] = reloc->robj;
1020 track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset;
1021 break;
1022 case VGT_STRMOUT_BUFFER_SIZE_0:
1023 case VGT_STRMOUT_BUFFER_SIZE_1:
1024 case VGT_STRMOUT_BUFFER_SIZE_2:
1025 case VGT_STRMOUT_BUFFER_SIZE_3:
1026 tmp = (reg - VGT_STRMOUT_BUFFER_SIZE_0) / 16;
1027 /* size in register is DWs, convert to bytes */
1028 track->vgt_strmout_size[tmp] = radeon_get_ib_value(p, idx) * 4;
1029 break;
1030 case CP_COHER_BASE:
1031 r = r600_cs_packet_next_reloc(p, &reloc);
1032 if (r) {
1033 dev_warn(p->dev, "missing reloc for CP_COHER_BASE "
1034 "0x%04X\n", reg);
1035 return -EINVAL;
1036 }
1037 ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
1038 break;
978 case R_028238_CB_TARGET_MASK: 1039 case R_028238_CB_TARGET_MASK:
979 track->cb_target_mask = radeon_get_ib_value(p, idx); 1040 track->cb_target_mask = radeon_get_ib_value(p, idx);
980 break; 1041 break;
@@ -1397,6 +1458,22 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p, u32 idx,
1397 return 0; 1458 return 0;
1398} 1459}
1399 1460
1461static bool r600_is_safe_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1462{
1463 u32 m, i;
1464
1465 i = (reg >> 7);
1466 if (i >= ARRAY_SIZE(r600_reg_safe_bm)) {
1467 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1468 return false;
1469 }
1470 m = 1 << ((reg >> 2) & 31);
1471 if (!(r600_reg_safe_bm[i] & m))
1472 return true;
1473 dev_warn(p->dev, "forbidden register 0x%08x at %d\n", reg, idx);
1474 return false;
1475}
1476
1400static int r600_packet3_check(struct radeon_cs_parser *p, 1477static int r600_packet3_check(struct radeon_cs_parser *p,
1401 struct radeon_cs_packet *pkt) 1478 struct radeon_cs_packet *pkt)
1402{ 1479{
@@ -1742,6 +1819,100 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
1742 return -EINVAL; 1819 return -EINVAL;
1743 } 1820 }
1744 break; 1821 break;
1822 case PACKET3_STRMOUT_BUFFER_UPDATE:
1823 if (pkt->count != 4) {
1824 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
1825 return -EINVAL;
1826 }
1827 /* Updating memory at DST_ADDRESS. */
1828 if (idx_value & 0x1) {
1829 u64 offset;
1830 r = r600_cs_packet_next_reloc(p, &reloc);
1831 if (r) {
1832 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
1833 return -EINVAL;
1834 }
1835 offset = radeon_get_ib_value(p, idx+1);
1836 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
1837 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
1838 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
1839 offset + 4, radeon_bo_size(reloc->robj));
1840 return -EINVAL;
1841 }
1842 ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1843 ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1844 }
1845 /* Reading data from SRC_ADDRESS. */
1846 if (((idx_value >> 1) & 0x3) == 2) {
1847 u64 offset;
1848 r = r600_cs_packet_next_reloc(p, &reloc);
1849 if (r) {
1850 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
1851 return -EINVAL;
1852 }
1853 offset = radeon_get_ib_value(p, idx+3);
1854 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
1855 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
1856 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
1857 offset + 4, radeon_bo_size(reloc->robj));
1858 return -EINVAL;
1859 }
1860 ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1861 ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1862 }
1863 break;
1864 case PACKET3_COPY_DW:
1865 if (pkt->count != 4) {
1866 DRM_ERROR("bad COPY_DW (invalid count)\n");
1867 return -EINVAL;
1868 }
1869 if (idx_value & 0x1) {
1870 u64 offset;
1871 /* SRC is memory. */
1872 r = r600_cs_packet_next_reloc(p, &reloc);
1873 if (r) {
1874 DRM_ERROR("bad COPY_DW (missing src reloc)\n");
1875 return -EINVAL;
1876 }
1877 offset = radeon_get_ib_value(p, idx+1);
1878 offset += ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;
1879 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
1880 DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
1881 offset + 4, radeon_bo_size(reloc->robj));
1882 return -EINVAL;
1883 }
1884 ib[idx+1] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1885 ib[idx+2] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1886 } else {
1887 /* SRC is a reg. */
1888 reg = radeon_get_ib_value(p, idx+1) << 2;
1889 if (!r600_is_safe_reg(p, reg, idx+1))
1890 return -EINVAL;
1891 }
1892 if (idx_value & 0x2) {
1893 u64 offset;
1894 /* DST is memory. */
1895 r = r600_cs_packet_next_reloc(p, &reloc);
1896 if (r) {
1897 DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
1898 return -EINVAL;
1899 }
1900 offset = radeon_get_ib_value(p, idx+3);
1901 offset += ((u64)(radeon_get_ib_value(p, idx+4) & 0xff)) << 32;
1902 if ((offset + 4) > radeon_bo_size(reloc->robj)) {
1903 DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
1904 offset + 4, radeon_bo_size(reloc->robj));
1905 return -EINVAL;
1906 }
1907 ib[idx+3] += (u32)(reloc->lobj.gpu_offset & 0xffffffff);
1908 ib[idx+4] += upper_32_bits(reloc->lobj.gpu_offset) & 0xff;
1909 } else {
1910 /* DST is a reg. */
1911 reg = radeon_get_ib_value(p, idx+3) << 2;
1912 if (!r600_is_safe_reg(p, reg, idx+3))
1913 return -EINVAL;
1914 }
1915 break;
1745 case PACKET3_NOP: 1916 case PACKET3_NOP:
1746 break; 1917 break;
1747 default: 1918 default:
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 3ee1fd7ef394..aa9d7c352da6 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -493,6 +493,11 @@
493#define VGT_STRMOUT_BUFFER_OFFSET_1 0x28AEC 493#define VGT_STRMOUT_BUFFER_OFFSET_1 0x28AEC
494#define VGT_STRMOUT_BUFFER_OFFSET_2 0x28AFC 494#define VGT_STRMOUT_BUFFER_OFFSET_2 0x28AFC
495#define VGT_STRMOUT_BUFFER_OFFSET_3 0x28B0C 495#define VGT_STRMOUT_BUFFER_OFFSET_3 0x28B0C
496#define VGT_STRMOUT_BUFFER_SIZE_0 0x28AD0
497#define VGT_STRMOUT_BUFFER_SIZE_1 0x28AE0
498#define VGT_STRMOUT_BUFFER_SIZE_2 0x28AF0
499#define VGT_STRMOUT_BUFFER_SIZE_3 0x28B00
500
496#define VGT_STRMOUT_EN 0x28AB0 501#define VGT_STRMOUT_EN 0x28AB0
497#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58 502#define VGT_VERTEX_REUSE_BLOCK_CNTL 0x28C58
498#define VTX_REUSE_DEPTH_MASK 0x000000FF 503#define VTX_REUSE_DEPTH_MASK 0x000000FF
@@ -834,6 +839,7 @@
834# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) 839# define PACKET3_SEM_SEL_SIGNAL (0x6 << 29)
835# define PACKET3_SEM_SEL_WAIT (0x7 << 29) 840# define PACKET3_SEM_SEL_WAIT (0x7 << 29)
836#define PACKET3_MPEG_INDEX 0x3A 841#define PACKET3_MPEG_INDEX 0x3A
842#define PACKET3_COPY_DW 0x3B
837#define PACKET3_WAIT_REG_MEM 0x3C 843#define PACKET3_WAIT_REG_MEM 0x3C
838#define PACKET3_MEM_WRITE 0x3D 844#define PACKET3_MEM_WRITE 0x3D
839#define PACKET3_INDIRECT_BUFFER 0x32 845#define PACKET3_INDIRECT_BUFFER 0x32
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8032f1fedb11..713d066e9d41 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -54,7 +54,7 @@
54 * 2.10.0 - fusion 2D tiling 54 * 2.10.0 - fusion 2D tiling
55 * 2.11.0 - backend map, initial compute support for the CS checker 55 * 2.11.0 - backend map, initial compute support for the CS checker
56 * 2.12.0 - RADEON_CS_KEEP_TILING_FLAGS 56 * 2.12.0 - RADEON_CS_KEEP_TILING_FLAGS
57 * 2.13.0 - virtual memory support 57 * 2.13.0 - virtual memory support, streamout
58 */ 58 */
59#define KMS_DRIVER_MAJOR 2 59#define KMS_DRIVER_MAJOR 2
60#define KMS_DRIVER_MINOR 13 60#define KMS_DRIVER_MINOR 13
diff --git a/drivers/gpu/drm/radeon/reg_srcs/cayman b/drivers/gpu/drm/radeon/reg_srcs/cayman
index 2316977eb924..0eac19ec595f 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/cayman
+++ b/drivers/gpu/drm/radeon/reg_srcs/cayman
@@ -1,5 +1,8 @@
1cayman 0x9400 1cayman 0x9400
20x0000802C GRBM_GFX_INDEX 20x0000802C GRBM_GFX_INDEX
30x000084FC CP_STRMOUT_CNTL
40x000085F0 CP_COHER_CNTL
50x000085F4 CP_COHER_SIZE
30x000088B0 VGT_VTX_VECT_EJECT_REG 60x000088B0 VGT_VTX_VECT_EJECT_REG
40x000088C4 VGT_CACHE_INVALIDATION 70x000088C4 VGT_CACHE_INVALIDATION
50x000088D4 VGT_GS_VERTEX_REUSE 80x000088D4 VGT_GS_VERTEX_REUSE
@@ -512,6 +515,13 @@ cayman 0x9400
5120x00028AC0 DB_SRESULTS_COMPARE_STATE0 5150x00028AC0 DB_SRESULTS_COMPARE_STATE0
5130x00028AC4 DB_SRESULTS_COMPARE_STATE1 5160x00028AC4 DB_SRESULTS_COMPARE_STATE1
5140x00028AC8 DB_PRELOAD_CONTROL 5170x00028AC8 DB_PRELOAD_CONTROL
5180x00028AD4 VGT_STRMOUT_VTX_STRIDE_0
5190x00028AE4 VGT_STRMOUT_VTX_STRIDE_1
5200x00028AF4 VGT_STRMOUT_VTX_STRIDE_2
5210x00028B04 VGT_STRMOUT_VTX_STRIDE_3
5220x00028B28 VGT_STRMOUT_DRAW_OPAQUE_OFFSET
5230x00028B2C VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
5240x00028B30 VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
5150x00028B38 VGT_GS_MAX_VERT_OUT 5250x00028B38 VGT_GS_MAX_VERT_OUT
5160x00028B54 VGT_SHADER_STAGES_EN 5260x00028B54 VGT_SHADER_STAGES_EN
5170x00028B58 VGT_LS_HS_CONFIG 5270x00028B58 VGT_LS_HS_CONFIG
diff --git a/drivers/gpu/drm/radeon/reg_srcs/evergreen b/drivers/gpu/drm/radeon/reg_srcs/evergreen
index 161737a28c23..4e3f208eef77 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/evergreen
+++ b/drivers/gpu/drm/radeon/reg_srcs/evergreen
@@ -4,6 +4,9 @@ evergreen 0x9400
40x00008044 WAIT_UNTIL_POLL_CNTL 40x00008044 WAIT_UNTIL_POLL_CNTL
50x00008048 WAIT_UNTIL_POLL_MASK 50x00008048 WAIT_UNTIL_POLL_MASK
60x0000804c WAIT_UNTIL_POLL_REFDATA 60x0000804c WAIT_UNTIL_POLL_REFDATA
70x000084FC CP_STRMOUT_CNTL
80x000085F0 CP_COHER_CNTL
90x000085F4 CP_COHER_SIZE
70x000088B0 VGT_VTX_VECT_EJECT_REG 100x000088B0 VGT_VTX_VECT_EJECT_REG
80x000088C4 VGT_CACHE_INVALIDATION 110x000088C4 VGT_CACHE_INVALIDATION
90x000088D4 VGT_GS_VERTEX_REUSE 120x000088D4 VGT_GS_VERTEX_REUSE
@@ -522,6 +525,13 @@ evergreen 0x9400
5220x00028AC0 DB_SRESULTS_COMPARE_STATE0 5250x00028AC0 DB_SRESULTS_COMPARE_STATE0
5230x00028AC4 DB_SRESULTS_COMPARE_STATE1 5260x00028AC4 DB_SRESULTS_COMPARE_STATE1
5240x00028AC8 DB_PRELOAD_CONTROL 5270x00028AC8 DB_PRELOAD_CONTROL
5280x00028AD4 VGT_STRMOUT_VTX_STRIDE_0
5290x00028AE4 VGT_STRMOUT_VTX_STRIDE_1
5300x00028AF4 VGT_STRMOUT_VTX_STRIDE_2
5310x00028B04 VGT_STRMOUT_VTX_STRIDE_3
5320x00028B28 VGT_STRMOUT_DRAW_OPAQUE_OFFSET
5330x00028B2C VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
5340x00028B30 VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
5250x00028B38 VGT_GS_MAX_VERT_OUT 5350x00028B38 VGT_GS_MAX_VERT_OUT
5260x00028B54 VGT_SHADER_STAGES_EN 5360x00028B54 VGT_SHADER_STAGES_EN
5270x00028B58 VGT_LS_HS_CONFIG 5370x00028B58 VGT_LS_HS_CONFIG
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600
index 0380c5c15f80..a1fc242df5da 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/r600
+++ b/drivers/gpu/drm/radeon/reg_srcs/r600
@@ -3,6 +3,9 @@ r600 0x9400
30x00028230 R7xx_PA_SC_EDGERULE 30x00028230 R7xx_PA_SC_EDGERULE
40x000286C8 R7xx_SPI_THREAD_GROUPING 40x000286C8 R7xx_SPI_THREAD_GROUPING
50x00008D8C R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ 50x00008D8C R7xx_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ
60x00008490 CP_STRMOUT_CNTL
70x000085F0 CP_COHER_CNTL
80x000085F4 CP_COHER_SIZE
60x000088C4 VGT_CACHE_INVALIDATION 90x000088C4 VGT_CACHE_INVALIDATION
70x00028A50 VGT_ENHANCE 100x00028A50 VGT_ENHANCE
80x000088CC VGT_ES_PER_GS 110x000088CC VGT_ES_PER_GS
@@ -38,6 +41,13 @@ r600 0x9400
380x00028AB4 VGT_REUSE_OFF 410x00028AB4 VGT_REUSE_OFF
390x00028AB8 VGT_VTX_CNT_EN 420x00028AB8 VGT_VTX_CNT_EN
400x000088B0 VGT_VTX_VECT_EJECT_REG 430x000088B0 VGT_VTX_VECT_EJECT_REG
440x00028AD4 VGT_STRMOUT_VTX_STRIDE_0
450x00028AE4 VGT_STRMOUT_VTX_STRIDE_1
460x00028AF4 VGT_STRMOUT_VTX_STRIDE_2
470x00028B04 VGT_STRMOUT_VTX_STRIDE_3
480x00028B28 VGT_STRMOUT_DRAW_OPAQUE_OFFSET
490x00028B2C VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
500x00028B30 VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
410x00028810 PA_CL_CLIP_CNTL 510x00028810 PA_CL_CLIP_CNTL
420x00008A14 PA_CL_ENHANCE 520x00008A14 PA_CL_ENHANCE
430x00028C14 PA_CL_GB_HORZ_CLIP_ADJ 530x00028C14 PA_CL_GB_HORZ_CLIP_ADJ
@@ -429,6 +439,7 @@ r600 0x9400
4290x00028438 SX_ALPHA_REF 4390x00028438 SX_ALPHA_REF
4300x00028410 SX_ALPHA_TEST_CONTROL 4400x00028410 SX_ALPHA_TEST_CONTROL
4310x00028350 SX_MISC 4410x00028350 SX_MISC
4420x00028354 SX_SURFACE_SYNC
4320x00009014 SX_MEMORY_EXPORT_SIZE 4430x00009014 SX_MEMORY_EXPORT_SIZE
4330x00009604 TC_INVALIDATE 4440x00009604 TC_INVALIDATE
4340x00009400 TD_FILTER4 4450x00009400 TD_FILTER4