aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 11:26:17 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 11:26:17 -0500
commit3c2e81ef344a90bb0a39d84af6878b4aeff568a2 (patch)
treebd8c8b23466174899d2fe4d35af6e1e838edb068 /drivers/gpu/drm/radeon
parent221392c3ad0432e39fd74a349364f66cb0ed78f6 (diff)
parent55bde6b1442fed8af67b92d21acce67db454c9f9 (diff)
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull DRM updates from Dave Airlie: "This is the one and only next pull for 3.8, we had a regression we found last week, so I was waiting for that to resolve itself, and I ended up with some Intel fixes on top as well. Highlights: - new driver: nvidia tegra 20/30/hdmi support - radeon: add support for previously unused DMA engines, more HDMI regs, eviction speeds ups and fixes - i915: HSW support enable, agp removal on GEN6, seqno wrapping - exynos: IPP subsystem support (image post proc), HDMI - nouveau: display class reworking, nv20->40 z compression - ttm: start of locking fixes, rcu usage for lookups, - core: documentation updates, docbook integration, monotonic clock usage, move from connector to object properties" * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (590 commits) drm/exynos: add gsc ipp driver drm/exynos: add rotator ipp driver drm/exynos: add fimc ipp driver drm/exynos: add iommu support for ipp drm/exynos: add ipp subsystem drm/exynos: support device tree for fimd radeon: fix regression with eviction since evict caching changes drm/radeon: add more pedantic checks in the CP DMA checker drm/radeon: bump version for CS ioctl support for async DMA drm/radeon: enable the async DMA rings in the CS ioctl drm/radeon: add VM CS parser support for async DMA on cayman/TN/SI drm/radeon/kms: add evergreen/cayman CS parser for async DMA (v2) drm/radeon/kms: add 6xx/7xx CS parser for async DMA (v2) drm/radeon: fix htile buffer size computation for command stream checker drm/radeon: fix fence locking in the pageflip callback drm/radeon: make indirect register access concurrency-safe drm/radeon: add W|RREG32_IDX for MM_INDEX|DATA based mmio accesss drm/exynos: support extended screen coordinate of fimd drm/exynos: fix x, y coordinates for right bottom pixel drm/exynos: fix fb offset calculation for plane ...
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r--drivers/gpu/drm/radeon/atombios_crtc.c2
-rw-r--r--drivers/gpu/drm/radeon/atombios_dp.c149
-rw-r--r--drivers/gpu/drm/radeon/atombios_encoders.c2
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c218
-rw-r--r--drivers/gpu/drm/radeon/evergreen_cs.c739
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h131
-rw-r--r--drivers/gpu/drm/radeon/ni.c357
-rw-r--r--drivers/gpu/drm/radeon/nid.h86
-rw-r--r--drivers/gpu/drm/radeon/r100.c23
-rw-r--r--drivers/gpu/drm/radeon/r600.c480
-rw-r--r--drivers/gpu/drm/radeon/r600_cp.c7
-rw-r--r--drivers/gpu/drm/radeon/r600_cs.c357
-rw-r--r--drivers/gpu/drm/radeon/r600_reg.h9
-rw-r--r--drivers/gpu/drm/radeon/r600d.h86
-rw-r--r--drivers/gpu/drm/radeon/radeon.h38
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c198
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h34
-rw-r--r--drivers/gpu/drm/radeon/radeon_combios.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_connectors.c62
-rw-r--r--drivers/gpu/drm/radeon/radeon_cp.c14
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c13
-rw-r--r--drivers/gpu/drm/radeon/radeon_cursor.c17
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c18
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c16
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c52
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_test.c37
-rw-r--r--drivers/gpu/drm/radeon/radeon_ttm.c40
-rw-r--r--drivers/gpu/drm/radeon/rv515.c122
-rw-r--r--drivers/gpu/drm/radeon/rv770.c31
-rw-r--r--drivers/gpu/drm/radeon/rv770d.h71
-rw-r--r--drivers/gpu/drm/radeon/si.c355
-rw-r--r--drivers/gpu/drm/radeon/sid.h119
39 files changed, 3438 insertions, 464 deletions
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 24d932f53203..9175615bbd8a 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -561,6 +561,8 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
561 /* use frac fb div on APUs */ 561 /* use frac fb div on APUs */
562 if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev)) 562 if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev))
563 radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV; 563 radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
564 if (ASIC_IS_DCE32(rdev) && mode->clock > 165000)
565 radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
564 } else { 566 } else {
565 radeon_crtc->pll_flags |= RADEON_PLL_LEGACY; 567 radeon_crtc->pll_flags |= RADEON_PLL_LEGACY;
566 568
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index d5699fe4f1e8..064023bed480 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -34,8 +34,7 @@
34 34
35/* move these to drm_dp_helper.c/h */ 35/* move these to drm_dp_helper.c/h */
36#define DP_LINK_CONFIGURATION_SIZE 9 36#define DP_LINK_CONFIGURATION_SIZE 9
37#define DP_LINK_STATUS_SIZE 6 37#define DP_DPCD_SIZE DP_RECEIVER_CAP_SIZE
38#define DP_DPCD_SIZE 8
39 38
40static char *voltage_names[] = { 39static char *voltage_names[] = {
41 "0.4V", "0.6V", "0.8V", "1.2V" 40 "0.4V", "0.6V", "0.8V", "1.2V"
@@ -290,78 +289,6 @@ int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
290 289
291/***** general DP utility functions *****/ 290/***** general DP utility functions *****/
292 291
293static u8 dp_link_status(u8 link_status[DP_LINK_STATUS_SIZE], int r)
294{
295 return link_status[r - DP_LANE0_1_STATUS];
296}
297
298static u8 dp_get_lane_status(u8 link_status[DP_LINK_STATUS_SIZE],
299 int lane)
300{
301 int i = DP_LANE0_1_STATUS + (lane >> 1);
302 int s = (lane & 1) * 4;
303 u8 l = dp_link_status(link_status, i);
304 return (l >> s) & 0xf;
305}
306
307static bool dp_clock_recovery_ok(u8 link_status[DP_LINK_STATUS_SIZE],
308 int lane_count)
309{
310 int lane;
311 u8 lane_status;
312
313 for (lane = 0; lane < lane_count; lane++) {
314 lane_status = dp_get_lane_status(link_status, lane);
315 if ((lane_status & DP_LANE_CR_DONE) == 0)
316 return false;
317 }
318 return true;
319}
320
321static bool dp_channel_eq_ok(u8 link_status[DP_LINK_STATUS_SIZE],
322 int lane_count)
323{
324 u8 lane_align;
325 u8 lane_status;
326 int lane;
327
328 lane_align = dp_link_status(link_status,
329 DP_LANE_ALIGN_STATUS_UPDATED);
330 if ((lane_align & DP_INTERLANE_ALIGN_DONE) == 0)
331 return false;
332 for (lane = 0; lane < lane_count; lane++) {
333 lane_status = dp_get_lane_status(link_status, lane);
334 if ((lane_status & DP_CHANNEL_EQ_BITS) != DP_CHANNEL_EQ_BITS)
335 return false;
336 }
337 return true;
338}
339
340static u8 dp_get_adjust_request_voltage(u8 link_status[DP_LINK_STATUS_SIZE],
341 int lane)
342
343{
344 int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1);
345 int s = ((lane & 1) ?
346 DP_ADJUST_VOLTAGE_SWING_LANE1_SHIFT :
347 DP_ADJUST_VOLTAGE_SWING_LANE0_SHIFT);
348 u8 l = dp_link_status(link_status, i);
349
350 return ((l >> s) & 0x3) << DP_TRAIN_VOLTAGE_SWING_SHIFT;
351}
352
353static u8 dp_get_adjust_request_pre_emphasis(u8 link_status[DP_LINK_STATUS_SIZE],
354 int lane)
355{
356 int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1);
357 int s = ((lane & 1) ?
358 DP_ADJUST_PRE_EMPHASIS_LANE1_SHIFT :
359 DP_ADJUST_PRE_EMPHASIS_LANE0_SHIFT);
360 u8 l = dp_link_status(link_status, i);
361
362 return ((l >> s) & 0x3) << DP_TRAIN_PRE_EMPHASIS_SHIFT;
363}
364
365#define DP_VOLTAGE_MAX DP_TRAIN_VOLTAGE_SWING_1200 292#define DP_VOLTAGE_MAX DP_TRAIN_VOLTAGE_SWING_1200
366#define DP_PRE_EMPHASIS_MAX DP_TRAIN_PRE_EMPHASIS_9_5 293#define DP_PRE_EMPHASIS_MAX DP_TRAIN_PRE_EMPHASIS_9_5
367 294
@@ -374,8 +301,8 @@ static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE],
374 int lane; 301 int lane;
375 302
376 for (lane = 0; lane < lane_count; lane++) { 303 for (lane = 0; lane < lane_count; lane++) {
377 u8 this_v = dp_get_adjust_request_voltage(link_status, lane); 304 u8 this_v = drm_dp_get_adjust_request_voltage(link_status, lane);
378 u8 this_p = dp_get_adjust_request_pre_emphasis(link_status, lane); 305 u8 this_p = drm_dp_get_adjust_request_pre_emphasis(link_status, lane);
379 306
380 DRM_DEBUG_KMS("requested signal parameters: lane %d voltage %s pre_emph %s\n", 307 DRM_DEBUG_KMS("requested signal parameters: lane %d voltage %s pre_emph %s\n",
381 lane, 308 lane,
@@ -420,37 +347,6 @@ static int dp_get_max_dp_pix_clock(int link_rate,
420 return (link_rate * lane_num * 8) / bpp; 347 return (link_rate * lane_num * 8) / bpp;
421} 348}
422 349
423static int dp_get_max_link_rate(u8 dpcd[DP_DPCD_SIZE])
424{
425 switch (dpcd[DP_MAX_LINK_RATE]) {
426 case DP_LINK_BW_1_62:
427 default:
428 return 162000;
429 case DP_LINK_BW_2_7:
430 return 270000;
431 case DP_LINK_BW_5_4:
432 return 540000;
433 }
434}
435
436static u8 dp_get_max_lane_number(u8 dpcd[DP_DPCD_SIZE])
437{
438 return dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK;
439}
440
441static u8 dp_get_dp_link_rate_coded(int link_rate)
442{
443 switch (link_rate) {
444 case 162000:
445 default:
446 return DP_LINK_BW_1_62;
447 case 270000:
448 return DP_LINK_BW_2_7;
449 case 540000:
450 return DP_LINK_BW_5_4;
451 }
452}
453
454/***** radeon specific DP functions *****/ 350/***** radeon specific DP functions *****/
455 351
456/* First get the min lane# when low rate is used according to pixel clock 352/* First get the min lane# when low rate is used according to pixel clock
@@ -462,8 +358,8 @@ static int radeon_dp_get_dp_lane_number(struct drm_connector *connector,
462 int pix_clock) 358 int pix_clock)
463{ 359{
464 int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector)); 360 int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector));
465 int max_link_rate = dp_get_max_link_rate(dpcd); 361 int max_link_rate = drm_dp_max_link_rate(dpcd);
466 int max_lane_num = dp_get_max_lane_number(dpcd); 362 int max_lane_num = drm_dp_max_lane_count(dpcd);
467 int lane_num; 363 int lane_num;
468 int max_dp_pix_clock; 364 int max_dp_pix_clock;
469 365
@@ -500,7 +396,7 @@ static int radeon_dp_get_dp_link_clock(struct drm_connector *connector,
500 return 540000; 396 return 540000;
501 } 397 }
502 398
503 return dp_get_max_link_rate(dpcd); 399 return drm_dp_max_link_rate(dpcd);
504} 400}
505 401
506static u8 radeon_dp_encoder_service(struct radeon_device *rdev, 402static u8 radeon_dp_encoder_service(struct radeon_device *rdev,
@@ -551,14 +447,15 @@ static void radeon_dp_probe_oui(struct radeon_connector *radeon_connector)
551bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector) 447bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector)
552{ 448{
553 struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv; 449 struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
554 u8 msg[25]; 450 u8 msg[DP_DPCD_SIZE];
555 int ret, i; 451 int ret, i;
556 452
557 ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg, 8, 0); 453 ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg,
454 DP_DPCD_SIZE, 0);
558 if (ret > 0) { 455 if (ret > 0) {
559 memcpy(dig_connector->dpcd, msg, 8); 456 memcpy(dig_connector->dpcd, msg, DP_DPCD_SIZE);
560 DRM_DEBUG_KMS("DPCD: "); 457 DRM_DEBUG_KMS("DPCD: ");
561 for (i = 0; i < 8; i++) 458 for (i = 0; i < DP_DPCD_SIZE; i++)
562 DRM_DEBUG_KMS("%02x ", msg[i]); 459 DRM_DEBUG_KMS("%02x ", msg[i]);
563 DRM_DEBUG_KMS("\n"); 460 DRM_DEBUG_KMS("\n");
564 461
@@ -664,7 +561,7 @@ bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector)
664 561
665 if (!radeon_dp_get_link_status(radeon_connector, link_status)) 562 if (!radeon_dp_get_link_status(radeon_connector, link_status))
666 return false; 563 return false;
667 if (dp_channel_eq_ok(link_status, dig->dp_lane_count)) 564 if (drm_dp_channel_eq_ok(link_status, dig->dp_lane_count))
668 return false; 565 return false;
669 return true; 566 return true;
670} 567}
@@ -677,9 +574,8 @@ struct radeon_dp_link_train_info {
677 int enc_id; 574 int enc_id;
678 int dp_clock; 575 int dp_clock;
679 int dp_lane_count; 576 int dp_lane_count;
680 int rd_interval;
681 bool tp3_supported; 577 bool tp3_supported;
682 u8 dpcd[8]; 578 u8 dpcd[DP_RECEIVER_CAP_SIZE];
683 u8 train_set[4]; 579 u8 train_set[4];
684 u8 link_status[DP_LINK_STATUS_SIZE]; 580 u8 link_status[DP_LINK_STATUS_SIZE];
685 u8 tries; 581 u8 tries;
@@ -765,7 +661,7 @@ static int radeon_dp_link_train_init(struct radeon_dp_link_train_info *dp_info)
765 radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LANE_COUNT_SET, tmp); 661 radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LANE_COUNT_SET, tmp);
766 662
767 /* set the link rate on the sink */ 663 /* set the link rate on the sink */
768 tmp = dp_get_dp_link_rate_coded(dp_info->dp_clock); 664 tmp = drm_dp_link_rate_to_bw_code(dp_info->dp_clock);
769 radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LINK_BW_SET, tmp); 665 radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LINK_BW_SET, tmp);
770 666
771 /* start training on the source */ 667 /* start training on the source */
@@ -821,17 +717,14 @@ static int radeon_dp_link_train_cr(struct radeon_dp_link_train_info *dp_info)
821 dp_info->tries = 0; 717 dp_info->tries = 0;
822 voltage = 0xff; 718 voltage = 0xff;
823 while (1) { 719 while (1) {
824 if (dp_info->rd_interval == 0) 720 drm_dp_link_train_clock_recovery_delay(dp_info->dpcd);
825 udelay(100);
826 else
827 mdelay(dp_info->rd_interval * 4);
828 721
829 if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) { 722 if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
830 DRM_ERROR("displayport link status failed\n"); 723 DRM_ERROR("displayport link status failed\n");
831 break; 724 break;
832 } 725 }
833 726
834 if (dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) { 727 if (drm_dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
835 clock_recovery = true; 728 clock_recovery = true;
836 break; 729 break;
837 } 730 }
@@ -886,17 +779,14 @@ static int radeon_dp_link_train_ce(struct radeon_dp_link_train_info *dp_info)
886 dp_info->tries = 0; 779 dp_info->tries = 0;
887 channel_eq = false; 780 channel_eq = false;
888 while (1) { 781 while (1) {
889 if (dp_info->rd_interval == 0) 782 drm_dp_link_train_channel_eq_delay(dp_info->dpcd);
890 udelay(400);
891 else
892 mdelay(dp_info->rd_interval * 4);
893 783
894 if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) { 784 if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
895 DRM_ERROR("displayport link status failed\n"); 785 DRM_ERROR("displayport link status failed\n");
896 break; 786 break;
897 } 787 }
898 788
899 if (dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) { 789 if (drm_dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
900 channel_eq = true; 790 channel_eq = true;
901 break; 791 break;
902 } 792 }
@@ -974,14 +864,13 @@ void radeon_dp_link_train(struct drm_encoder *encoder,
974 else 864 else
975 dp_info.enc_id |= ATOM_DP_CONFIG_LINK_A; 865 dp_info.enc_id |= ATOM_DP_CONFIG_LINK_A;
976 866
977 dp_info.rd_interval = radeon_read_dpcd_reg(radeon_connector, DP_TRAINING_AUX_RD_INTERVAL);
978 tmp = radeon_read_dpcd_reg(radeon_connector, DP_MAX_LANE_COUNT); 867 tmp = radeon_read_dpcd_reg(radeon_connector, DP_MAX_LANE_COUNT);
979 if (ASIC_IS_DCE5(rdev) && (tmp & DP_TPS3_SUPPORTED)) 868 if (ASIC_IS_DCE5(rdev) && (tmp & DP_TPS3_SUPPORTED))
980 dp_info.tp3_supported = true; 869 dp_info.tp3_supported = true;
981 else 870 else
982 dp_info.tp3_supported = false; 871 dp_info.tp3_supported = false;
983 872
984 memcpy(dp_info.dpcd, dig_connector->dpcd, 8); 873 memcpy(dp_info.dpcd, dig_connector->dpcd, DP_RECEIVER_CAP_SIZE);
985 dp_info.rdev = rdev; 874 dp_info.rdev = rdev;
986 dp_info.encoder = encoder; 875 dp_info.encoder = encoder;
987 dp_info.connector = connector; 876 dp_info.connector = connector;
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 010bae19554a..4552d4aff317 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -340,7 +340,7 @@ static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
340 ((radeon_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) || 340 ((radeon_encoder->active_device & (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
341 (radeon_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE))) { 341 (radeon_encoder_get_dp_bridge_encoder_id(encoder) != ENCODER_OBJECT_ID_NONE))) {
342 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); 342 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
343 radeon_dp_set_link_config(connector, mode); 343 radeon_dp_set_link_config(connector, adjusted_mode);
344 } 344 }
345 345
346 return true; 346 return true;
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 5d1d21a6dcdd..f95d7fc1f5e0 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1821,7 +1821,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
1821 case CHIP_SUMO: 1821 case CHIP_SUMO:
1822 rdev->config.evergreen.num_ses = 1; 1822 rdev->config.evergreen.num_ses = 1;
1823 rdev->config.evergreen.max_pipes = 4; 1823 rdev->config.evergreen.max_pipes = 4;
1824 rdev->config.evergreen.max_tile_pipes = 2; 1824 rdev->config.evergreen.max_tile_pipes = 4;
1825 if (rdev->pdev->device == 0x9648) 1825 if (rdev->pdev->device == 0x9648)
1826 rdev->config.evergreen.max_simds = 3; 1826 rdev->config.evergreen.max_simds = 3;
1827 else if ((rdev->pdev->device == 0x9647) || 1827 else if ((rdev->pdev->device == 0x9647) ||
@@ -1844,7 +1844,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
1844 rdev->config.evergreen.sc_prim_fifo_size = 0x40; 1844 rdev->config.evergreen.sc_prim_fifo_size = 0x40;
1845 rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30; 1845 rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
1846 rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130; 1846 rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
1847 gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN; 1847 gb_addr_config = SUMO_GB_ADDR_CONFIG_GOLDEN;
1848 break; 1848 break;
1849 case CHIP_SUMO2: 1849 case CHIP_SUMO2:
1850 rdev->config.evergreen.num_ses = 1; 1850 rdev->config.evergreen.num_ses = 1;
@@ -1866,7 +1866,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
1866 rdev->config.evergreen.sc_prim_fifo_size = 0x40; 1866 rdev->config.evergreen.sc_prim_fifo_size = 0x40;
1867 rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30; 1867 rdev->config.evergreen.sc_hiz_tile_fifo_size = 0x30;
1868 rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130; 1868 rdev->config.evergreen.sc_earlyz_tile_fifo_size = 0x130;
1869 gb_addr_config = REDWOOD_GB_ADDR_CONFIG_GOLDEN; 1869 gb_addr_config = SUMO2_GB_ADDR_CONFIG_GOLDEN;
1870 break; 1870 break;
1871 case CHIP_BARTS: 1871 case CHIP_BARTS:
1872 rdev->config.evergreen.num_ses = 2; 1872 rdev->config.evergreen.num_ses = 2;
@@ -1914,7 +1914,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
1914 break; 1914 break;
1915 case CHIP_CAICOS: 1915 case CHIP_CAICOS:
1916 rdev->config.evergreen.num_ses = 1; 1916 rdev->config.evergreen.num_ses = 1;
1917 rdev->config.evergreen.max_pipes = 4; 1917 rdev->config.evergreen.max_pipes = 2;
1918 rdev->config.evergreen.max_tile_pipes = 2; 1918 rdev->config.evergreen.max_tile_pipes = 2;
1919 rdev->config.evergreen.max_simds = 2; 1919 rdev->config.evergreen.max_simds = 2;
1920 rdev->config.evergreen.max_backends = 1 * rdev->config.evergreen.num_ses; 1920 rdev->config.evergreen.max_backends = 1 * rdev->config.evergreen.num_ses;
@@ -2034,6 +2034,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
2034 WREG32(GB_ADDR_CONFIG, gb_addr_config); 2034 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2035 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 2035 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
2036 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 2036 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2037 WREG32(DMA_TILING_CONFIG, gb_addr_config);
2037 2038
2038 tmp = gb_addr_config & NUM_PIPES_MASK; 2039 tmp = gb_addr_config & NUM_PIPES_MASK;
2039 tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends, 2040 tmp = r6xx_remap_render_backend(rdev, tmp, rdev->config.evergreen.max_backends,
@@ -2403,8 +2404,12 @@ void evergreen_disable_interrupt_state(struct radeon_device *rdev)
2403 CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 2404 CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2404 cayman_cp_int_cntl_setup(rdev, 1, 0); 2405 cayman_cp_int_cntl_setup(rdev, 1, 0);
2405 cayman_cp_int_cntl_setup(rdev, 2, 0); 2406 cayman_cp_int_cntl_setup(rdev, 2, 0);
2407 tmp = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
2408 WREG32(CAYMAN_DMA1_CNTL, tmp);
2406 } else 2409 } else
2407 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 2410 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
2411 tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
2412 WREG32(DMA_CNTL, tmp);
2408 WREG32(GRBM_INT_CNTL, 0); 2413 WREG32(GRBM_INT_CNTL, 0);
2409 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 2414 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
2410 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 2415 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -2457,6 +2462,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
2457 u32 grbm_int_cntl = 0; 2462 u32 grbm_int_cntl = 0;
2458 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; 2463 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
2459 u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0; 2464 u32 afmt1 = 0, afmt2 = 0, afmt3 = 0, afmt4 = 0, afmt5 = 0, afmt6 = 0;
2465 u32 dma_cntl, dma_cntl1 = 0;
2460 2466
2461 if (!rdev->irq.installed) { 2467 if (!rdev->irq.installed) {
2462 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 2468 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -2484,6 +2490,8 @@ int evergreen_irq_set(struct radeon_device *rdev)
2484 afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; 2490 afmt5 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
2485 afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK; 2491 afmt6 = RREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET) & ~AFMT_AZ_FORMAT_WTRIG_MASK;
2486 2492
2493 dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
2494
2487 if (rdev->family >= CHIP_CAYMAN) { 2495 if (rdev->family >= CHIP_CAYMAN) {
2488 /* enable CP interrupts on all rings */ 2496 /* enable CP interrupts on all rings */
2489 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 2497 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
@@ -2506,6 +2514,19 @@ int evergreen_irq_set(struct radeon_device *rdev)
2506 } 2514 }
2507 } 2515 }
2508 2516
2517 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
2518 DRM_DEBUG("r600_irq_set: sw int dma\n");
2519 dma_cntl |= TRAP_ENABLE;
2520 }
2521
2522 if (rdev->family >= CHIP_CAYMAN) {
2523 dma_cntl1 = RREG32(CAYMAN_DMA1_CNTL) & ~TRAP_ENABLE;
2524 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
2525 DRM_DEBUG("r600_irq_set: sw int dma1\n");
2526 dma_cntl1 |= TRAP_ENABLE;
2527 }
2528 }
2529
2509 if (rdev->irq.crtc_vblank_int[0] || 2530 if (rdev->irq.crtc_vblank_int[0] ||
2510 atomic_read(&rdev->irq.pflip[0])) { 2531 atomic_read(&rdev->irq.pflip[0])) {
2511 DRM_DEBUG("evergreen_irq_set: vblank 0\n"); 2532 DRM_DEBUG("evergreen_irq_set: vblank 0\n");
@@ -2591,6 +2612,12 @@ int evergreen_irq_set(struct radeon_device *rdev)
2591 cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2); 2612 cayman_cp_int_cntl_setup(rdev, 2, cp_int_cntl2);
2592 } else 2613 } else
2593 WREG32(CP_INT_CNTL, cp_int_cntl); 2614 WREG32(CP_INT_CNTL, cp_int_cntl);
2615
2616 WREG32(DMA_CNTL, dma_cntl);
2617
2618 if (rdev->family >= CHIP_CAYMAN)
2619 WREG32(CAYMAN_DMA1_CNTL, dma_cntl1);
2620
2594 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 2621 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
2595 2622
2596 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 2623 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3093,6 +3120,16 @@ restart_ih:
3093 break; 3120 break;
3094 } 3121 }
3095 break; 3122 break;
3123 case 146:
3124 case 147:
3125 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
3126 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3127 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3128 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3129 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3130 /* reset addr and status */
3131 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
3132 break;
3096 case 176: /* CP_INT in ring buffer */ 3133 case 176: /* CP_INT in ring buffer */
3097 case 177: /* CP_INT in IB1 */ 3134 case 177: /* CP_INT in IB1 */
3098 case 178: /* CP_INT in IB2 */ 3135 case 178: /* CP_INT in IB2 */
@@ -3116,9 +3153,19 @@ restart_ih:
3116 } else 3153 } else
3117 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 3154 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3118 break; 3155 break;
3156 case 224: /* DMA trap event */
3157 DRM_DEBUG("IH: DMA trap\n");
3158 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3159 break;
3119 case 233: /* GUI IDLE */ 3160 case 233: /* GUI IDLE */
3120 DRM_DEBUG("IH: GUI idle\n"); 3161 DRM_DEBUG("IH: GUI idle\n");
3121 break; 3162 break;
3163 case 244: /* DMA trap event */
3164 if (rdev->family >= CHIP_CAYMAN) {
3165 DRM_DEBUG("IH: DMA1 trap\n");
3166 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
3167 }
3168 break;
3122 default: 3169 default:
3123 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 3170 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3124 break; 3171 break;
@@ -3144,6 +3191,143 @@ restart_ih:
3144 return IRQ_HANDLED; 3191 return IRQ_HANDLED;
3145} 3192}
3146 3193
3194/**
3195 * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
3196 *
3197 * @rdev: radeon_device pointer
3198 * @fence: radeon fence object
3199 *
3200 * Add a DMA fence packet to the ring to write
3201 * the fence seq number and DMA trap packet to generate
3202 * an interrupt if needed (evergreen-SI).
3203 */
3204void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
3205 struct radeon_fence *fence)
3206{
3207 struct radeon_ring *ring = &rdev->ring[fence->ring];
3208 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3209 /* write the fence */
3210 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
3211 radeon_ring_write(ring, addr & 0xfffffffc);
3212 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
3213 radeon_ring_write(ring, fence->seq);
3214 /* generate an interrupt */
3215 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
3216 /* flush HDP */
3217 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
3218 radeon_ring_write(ring, (0xf << 16) | HDP_MEM_COHERENCY_FLUSH_CNTL);
3219 radeon_ring_write(ring, 1);
3220}
3221
3222/**
3223 * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
3224 *
3225 * @rdev: radeon_device pointer
3226 * @ib: IB object to schedule
3227 *
3228 * Schedule an IB in the DMA ring (evergreen).
3229 */
3230void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
3231 struct radeon_ib *ib)
3232{
3233 struct radeon_ring *ring = &rdev->ring[ib->ring];
3234
3235 if (rdev->wb.enabled) {
3236 u32 next_rptr = ring->wptr + 4;
3237 while ((next_rptr & 7) != 5)
3238 next_rptr++;
3239 next_rptr += 3;
3240 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
3241 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3242 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
3243 radeon_ring_write(ring, next_rptr);
3244 }
3245
3246 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
3247 * Pad as necessary with NOPs.
3248 */
3249 while ((ring->wptr & 7) != 5)
3250 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
3251 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
3252 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
3253 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
3254
3255}
3256
3257/**
3258 * evergreen_copy_dma - copy pages using the DMA engine
3259 *
3260 * @rdev: radeon_device pointer
3261 * @src_offset: src GPU address
3262 * @dst_offset: dst GPU address
3263 * @num_gpu_pages: number of GPU pages to xfer
3264 * @fence: radeon fence object
3265 *
3266 * Copy GPU paging using the DMA engine (evergreen-cayman).
3267 * Used by the radeon ttm implementation to move pages if
3268 * registered as the asic copy callback.
3269 */
3270int evergreen_copy_dma(struct radeon_device *rdev,
3271 uint64_t src_offset, uint64_t dst_offset,
3272 unsigned num_gpu_pages,
3273 struct radeon_fence **fence)
3274{
3275 struct radeon_semaphore *sem = NULL;
3276 int ring_index = rdev->asic->copy.dma_ring_index;
3277 struct radeon_ring *ring = &rdev->ring[ring_index];
3278 u32 size_in_dw, cur_size_in_dw;
3279 int i, num_loops;
3280 int r = 0;
3281
3282 r = radeon_semaphore_create(rdev, &sem);
3283 if (r) {
3284 DRM_ERROR("radeon: moving bo (%d).\n", r);
3285 return r;
3286 }
3287
3288 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
3289 num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
3290 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
3291 if (r) {
3292 DRM_ERROR("radeon: moving bo (%d).\n", r);
3293 radeon_semaphore_free(rdev, &sem, NULL);
3294 return r;
3295 }
3296
3297 if (radeon_fence_need_sync(*fence, ring->idx)) {
3298 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3299 ring->idx);
3300 radeon_fence_note_sync(*fence, ring->idx);
3301 } else {
3302 radeon_semaphore_free(rdev, &sem, NULL);
3303 }
3304
3305 for (i = 0; i < num_loops; i++) {
3306 cur_size_in_dw = size_in_dw;
3307 if (cur_size_in_dw > 0xFFFFF)
3308 cur_size_in_dw = 0xFFFFF;
3309 size_in_dw -= cur_size_in_dw;
3310 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
3311 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3312 radeon_ring_write(ring, src_offset & 0xfffffffc);
3313 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
3314 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
3315 src_offset += cur_size_in_dw * 4;
3316 dst_offset += cur_size_in_dw * 4;
3317 }
3318
3319 r = radeon_fence_emit(rdev, fence, ring->idx);
3320 if (r) {
3321 radeon_ring_unlock_undo(rdev, ring);
3322 return r;
3323 }
3324
3325 radeon_ring_unlock_commit(rdev, ring);
3326 radeon_semaphore_free(rdev, &sem, *fence);
3327
3328 return r;
3329}
3330
3147static int evergreen_startup(struct radeon_device *rdev) 3331static int evergreen_startup(struct radeon_device *rdev)
3148{ 3332{
3149 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3333 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -3207,6 +3391,12 @@ static int evergreen_startup(struct radeon_device *rdev)
3207 return r; 3391 return r;
3208 } 3392 }
3209 3393
3394 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
3395 if (r) {
3396 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
3397 return r;
3398 }
3399
3210 /* Enable IRQ */ 3400 /* Enable IRQ */
3211 r = r600_irq_init(rdev); 3401 r = r600_irq_init(rdev);
3212 if (r) { 3402 if (r) {
@@ -3221,12 +3411,23 @@ static int evergreen_startup(struct radeon_device *rdev)
3221 0, 0xfffff, RADEON_CP_PACKET2); 3411 0, 0xfffff, RADEON_CP_PACKET2);
3222 if (r) 3412 if (r)
3223 return r; 3413 return r;
3414
3415 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3416 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
3417 DMA_RB_RPTR, DMA_RB_WPTR,
3418 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
3419 if (r)
3420 return r;
3421
3224 r = evergreen_cp_load_microcode(rdev); 3422 r = evergreen_cp_load_microcode(rdev);
3225 if (r) 3423 if (r)
3226 return r; 3424 return r;
3227 r = evergreen_cp_resume(rdev); 3425 r = evergreen_cp_resume(rdev);
3228 if (r) 3426 if (r)
3229 return r; 3427 return r;
3428 r = r600_dma_resume(rdev);
3429 if (r)
3430 return r;
3230 3431
3231 r = radeon_ib_pool_init(rdev); 3432 r = radeon_ib_pool_init(rdev);
3232 if (r) { 3433 if (r) {
@@ -3273,11 +3474,9 @@ int evergreen_resume(struct radeon_device *rdev)
3273 3474
3274int evergreen_suspend(struct radeon_device *rdev) 3475int evergreen_suspend(struct radeon_device *rdev)
3275{ 3476{
3276 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3277
3278 r600_audio_fini(rdev); 3477 r600_audio_fini(rdev);
3279 r700_cp_stop(rdev); 3478 r700_cp_stop(rdev);
3280 ring->ready = false; 3479 r600_dma_stop(rdev);
3281 evergreen_irq_suspend(rdev); 3480 evergreen_irq_suspend(rdev);
3282 radeon_wb_disable(rdev); 3481 radeon_wb_disable(rdev);
3283 evergreen_pcie_gart_disable(rdev); 3482 evergreen_pcie_gart_disable(rdev);
@@ -3354,6 +3553,9 @@ int evergreen_init(struct radeon_device *rdev)
3354 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; 3553 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
3355 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); 3554 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
3356 3555
3556 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
3557 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
3558
3357 rdev->ih.ring_obj = NULL; 3559 rdev->ih.ring_obj = NULL;
3358 r600_ih_ring_init(rdev, 64 * 1024); 3560 r600_ih_ring_init(rdev, 64 * 1024);
3359 3561
@@ -3366,6 +3568,7 @@ int evergreen_init(struct radeon_device *rdev)
3366 if (r) { 3568 if (r) {
3367 dev_err(rdev->dev, "disabling GPU acceleration\n"); 3569 dev_err(rdev->dev, "disabling GPU acceleration\n");
3368 r700_cp_fini(rdev); 3570 r700_cp_fini(rdev);
3571 r600_dma_fini(rdev);
3369 r600_irq_fini(rdev); 3572 r600_irq_fini(rdev);
3370 radeon_wb_fini(rdev); 3573 radeon_wb_fini(rdev);
3371 radeon_ib_pool_fini(rdev); 3574 radeon_ib_pool_fini(rdev);
@@ -3393,6 +3596,7 @@ void evergreen_fini(struct radeon_device *rdev)
3393 r600_audio_fini(rdev); 3596 r600_audio_fini(rdev);
3394 r600_blit_fini(rdev); 3597 r600_blit_fini(rdev);
3395 r700_cp_fini(rdev); 3598 r700_cp_fini(rdev);
3599 r600_dma_fini(rdev);
3396 r600_irq_fini(rdev); 3600 r600_irq_fini(rdev);
3397 radeon_wb_fini(rdev); 3601 radeon_wb_fini(rdev);
3398 radeon_ib_pool_fini(rdev); 3602 radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index c042e497e450..74c6b42d2597 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -34,6 +34,8 @@
34#define MAX(a,b) (((a)>(b))?(a):(b)) 34#define MAX(a,b) (((a)>(b))?(a):(b))
35#define MIN(a,b) (((a)<(b))?(a):(b)) 35#define MIN(a,b) (((a)<(b))?(a):(b))
36 36
37int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
38 struct radeon_cs_reloc **cs_reloc);
37static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p, 39static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
38 struct radeon_cs_reloc **cs_reloc); 40 struct radeon_cs_reloc **cs_reloc);
39 41
@@ -507,20 +509,28 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
507 /* height is npipes htiles aligned == npipes * 8 pixel aligned */ 509 /* height is npipes htiles aligned == npipes * 8 pixel aligned */
508 nby = round_up(nby, track->npipes * 8); 510 nby = round_up(nby, track->npipes * 8);
509 } else { 511 } else {
512 /* always assume 8x8 htile */
513 /* align is htile align * 8, htile align vary according to
514 * number of pipe and tile width and nby
515 */
510 switch (track->npipes) { 516 switch (track->npipes) {
511 case 8: 517 case 8:
518 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
512 nbx = round_up(nbx, 64 * 8); 519 nbx = round_up(nbx, 64 * 8);
513 nby = round_up(nby, 64 * 8); 520 nby = round_up(nby, 64 * 8);
514 break; 521 break;
515 case 4: 522 case 4:
523 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
516 nbx = round_up(nbx, 64 * 8); 524 nbx = round_up(nbx, 64 * 8);
517 nby = round_up(nby, 32 * 8); 525 nby = round_up(nby, 32 * 8);
518 break; 526 break;
519 case 2: 527 case 2:
528 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
520 nbx = round_up(nbx, 32 * 8); 529 nbx = round_up(nbx, 32 * 8);
521 nby = round_up(nby, 32 * 8); 530 nby = round_up(nby, 32 * 8);
522 break; 531 break;
523 case 1: 532 case 1:
533 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
524 nbx = round_up(nbx, 32 * 8); 534 nbx = round_up(nbx, 32 * 8);
525 nby = round_up(nby, 16 * 8); 535 nby = round_up(nby, 16 * 8);
526 break; 536 break;
@@ -531,9 +541,10 @@ static int evergreen_cs_track_validate_htile(struct radeon_cs_parser *p,
531 } 541 }
532 } 542 }
533 /* compute number of htile */ 543 /* compute number of htile */
534 nbx = nbx / 8; 544 nbx = nbx >> 3;
535 nby = nby / 8; 545 nby = nby >> 3;
536 size = nbx * nby * 4; 546 /* size must be aligned on npipes * 2K boundary */
547 size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
537 size += track->htile_offset; 548 size += track->htile_offset;
538 549
539 if (size > radeon_bo_size(track->htile_bo)) { 550 if (size > radeon_bo_size(track->htile_bo)) {
@@ -1790,6 +1801,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1790 case DB_HTILE_SURFACE: 1801 case DB_HTILE_SURFACE:
1791 /* 8x8 only */ 1802 /* 8x8 only */
1792 track->htile_surface = radeon_get_ib_value(p, idx); 1803 track->htile_surface = radeon_get_ib_value(p, idx);
1804 /* force 8x8 htile width and height */
1805 ib[idx] |= 3;
1793 track->db_dirty = true; 1806 track->db_dirty = true;
1794 break; 1807 break;
1795 case CB_IMMED0_BASE: 1808 case CB_IMMED0_BASE:
@@ -2232,6 +2245,107 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
2232 ib[idx+2] = upper_32_bits(offset) & 0xff; 2245 ib[idx+2] = upper_32_bits(offset) & 0xff;
2233 } 2246 }
2234 break; 2247 break;
2248 case PACKET3_CP_DMA:
2249 {
2250 u32 command, size, info;
2251 u64 offset, tmp;
2252 if (pkt->count != 4) {
2253 DRM_ERROR("bad CP DMA\n");
2254 return -EINVAL;
2255 }
2256 command = radeon_get_ib_value(p, idx+4);
2257 size = command & 0x1fffff;
2258 info = radeon_get_ib_value(p, idx+1);
2259 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
2260 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
2261 ((((info & 0x00300000) >> 20) == 0) &&
2262 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
2263 ((((info & 0x60000000) >> 29) == 0) &&
2264 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
2265 /* non mem to mem copies requires dw aligned count */
2266 if (size % 4) {
2267 DRM_ERROR("CP DMA command requires dw count alignment\n");
2268 return -EINVAL;
2269 }
2270 }
2271 if (command & PACKET3_CP_DMA_CMD_SAS) {
2272 /* src address space is register */
2273 /* GDS is ok */
2274 if (((info & 0x60000000) >> 29) != 1) {
2275 DRM_ERROR("CP DMA SAS not supported\n");
2276 return -EINVAL;
2277 }
2278 } else {
2279 if (command & PACKET3_CP_DMA_CMD_SAIC) {
2280 DRM_ERROR("CP DMA SAIC only supported for registers\n");
2281 return -EINVAL;
2282 }
2283 /* src address space is memory */
2284 if (((info & 0x60000000) >> 29) == 0) {
2285 r = evergreen_cs_packet_next_reloc(p, &reloc);
2286 if (r) {
2287 DRM_ERROR("bad CP DMA SRC\n");
2288 return -EINVAL;
2289 }
2290
2291 tmp = radeon_get_ib_value(p, idx) +
2292 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
2293
2294 offset = reloc->lobj.gpu_offset + tmp;
2295
2296 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2297 dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
2298 tmp + size, radeon_bo_size(reloc->robj));
2299 return -EINVAL;
2300 }
2301
2302 ib[idx] = offset;
2303 ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
2304 } else if (((info & 0x60000000) >> 29) != 2) {
2305 DRM_ERROR("bad CP DMA SRC_SEL\n");
2306 return -EINVAL;
2307 }
2308 }
2309 if (command & PACKET3_CP_DMA_CMD_DAS) {
2310 /* dst address space is register */
2311 /* GDS is ok */
2312 if (((info & 0x00300000) >> 20) != 1) {
2313 DRM_ERROR("CP DMA DAS not supported\n");
2314 return -EINVAL;
2315 }
2316 } else {
2317 /* dst address space is memory */
2318 if (command & PACKET3_CP_DMA_CMD_DAIC) {
2319 DRM_ERROR("CP DMA DAIC only supported for registers\n");
2320 return -EINVAL;
2321 }
2322 if (((info & 0x00300000) >> 20) == 0) {
2323 r = evergreen_cs_packet_next_reloc(p, &reloc);
2324 if (r) {
2325 DRM_ERROR("bad CP DMA DST\n");
2326 return -EINVAL;
2327 }
2328
2329 tmp = radeon_get_ib_value(p, idx+2) +
2330 ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
2331
2332 offset = reloc->lobj.gpu_offset + tmp;
2333
2334 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
2335 dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
2336 tmp + size, radeon_bo_size(reloc->robj));
2337 return -EINVAL;
2338 }
2339
2340 ib[idx+2] = offset;
2341 ib[idx+3] = upper_32_bits(offset) & 0xff;
2342 } else {
2343 DRM_ERROR("bad CP DMA DST_SEL\n");
2344 return -EINVAL;
2345 }
2346 }
2347 break;
2348 }
2235 case PACKET3_SURFACE_SYNC: 2349 case PACKET3_SURFACE_SYNC:
2236 if (pkt->count != 3) { 2350 if (pkt->count != 3) {
2237 DRM_ERROR("bad SURFACE_SYNC\n"); 2351 DRM_ERROR("bad SURFACE_SYNC\n");
@@ -2715,6 +2829,455 @@ int evergreen_cs_parse(struct radeon_cs_parser *p)
2715 return 0; 2829 return 0;
2716} 2830}
2717 2831
2832/*
2833 * DMA
2834 */
2835
2836#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2837#define GET_DMA_COUNT(h) ((h) & 0x000fffff)
2838#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2839#define GET_DMA_NEW(h) (((h) & 0x04000000) >> 26)
2840#define GET_DMA_MISC(h) (((h) & 0x0700000) >> 20)
2841
2842/**
2843 * evergreen_dma_cs_parse() - parse the DMA IB
2844 * @p: parser structure holding parsing context.
2845 *
2846 * Parses the DMA IB from the CS ioctl and updates
2847 * the GPU addresses based on the reloc information and
2848 * checks for errors. (Evergreen-Cayman)
2849 * Returns 0 for success and an error on failure.
2850 **/
2851int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
2852{
2853 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2854 struct radeon_cs_reloc *src_reloc, *dst_reloc, *dst2_reloc;
2855 u32 header, cmd, count, tiled, new_cmd, misc;
2856 volatile u32 *ib = p->ib.ptr;
2857 u32 idx, idx_value;
2858 u64 src_offset, dst_offset, dst2_offset;
2859 int r;
2860
2861 do {
2862 if (p->idx >= ib_chunk->length_dw) {
2863 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2864 p->idx, ib_chunk->length_dw);
2865 return -EINVAL;
2866 }
2867 idx = p->idx;
2868 header = radeon_get_ib_value(p, idx);
2869 cmd = GET_DMA_CMD(header);
2870 count = GET_DMA_COUNT(header);
2871 tiled = GET_DMA_T(header);
2872 new_cmd = GET_DMA_NEW(header);
2873 misc = GET_DMA_MISC(header);
2874
2875 switch (cmd) {
2876 case DMA_PACKET_WRITE:
2877 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2878 if (r) {
2879 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2880 return -EINVAL;
2881 }
2882 if (tiled) {
2883 dst_offset = ib[idx+1];
2884 dst_offset <<= 8;
2885
2886 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2887 p->idx += count + 7;
2888 } else {
2889 dst_offset = ib[idx+1];
2890 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
2891
2892 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2893 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2894 p->idx += count + 3;
2895 }
2896 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2897 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2898 dst_offset, radeon_bo_size(dst_reloc->robj));
2899 return -EINVAL;
2900 }
2901 break;
2902 case DMA_PACKET_COPY:
2903 r = r600_dma_cs_next_reloc(p, &src_reloc);
2904 if (r) {
2905 DRM_ERROR("bad DMA_PACKET_COPY\n");
2906 return -EINVAL;
2907 }
2908 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2909 if (r) {
2910 DRM_ERROR("bad DMA_PACKET_COPY\n");
2911 return -EINVAL;
2912 }
2913 if (tiled) {
2914 idx_value = radeon_get_ib_value(p, idx + 2);
2915 if (new_cmd) {
2916 switch (misc) {
2917 case 0:
2918 /* L2T, frame to fields */
2919 if (idx_value & (1 << 31)) {
2920 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2921 return -EINVAL;
2922 }
2923 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2924 if (r) {
2925 DRM_ERROR("bad L2T, frame to fields DMA_PACKET_COPY\n");
2926 return -EINVAL;
2927 }
2928 dst_offset = ib[idx+1];
2929 dst_offset <<= 8;
2930 dst2_offset = ib[idx+2];
2931 dst2_offset <<= 8;
2932 src_offset = ib[idx+8];
2933 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2934 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2935 dev_warn(p->dev, "DMA L2T, frame to fields src buffer too small (%llu %lu)\n",
2936 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2937 return -EINVAL;
2938 }
2939 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2940 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2941 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2942 return -EINVAL;
2943 }
2944 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
2945 dev_warn(p->dev, "DMA L2T, frame to fields buffer too small (%llu %lu)\n",
2946 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
2947 return -EINVAL;
2948 }
2949 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2950 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
2951 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2952 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2953 p->idx += 10;
2954 break;
2955 case 1:
2956 /* L2T, T2L partial */
2957 if (p->family < CHIP_CAYMAN) {
2958 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
2959 return -EINVAL;
2960 }
2961 /* detile bit */
2962 if (idx_value & (1 << 31)) {
2963 /* tiled src, linear dst */
2964 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2965
2966 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2967 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2968 } else {
2969 /* linear src, tiled dst */
2970 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2971 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2972
2973 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2974 }
2975 p->idx += 12;
2976 break;
2977 case 3:
2978 /* L2T, broadcast */
2979 if (idx_value & (1 << 31)) {
2980 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2981 return -EINVAL;
2982 }
2983 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
2984 if (r) {
2985 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
2986 return -EINVAL;
2987 }
2988 dst_offset = ib[idx+1];
2989 dst_offset <<= 8;
2990 dst2_offset = ib[idx+2];
2991 dst2_offset <<= 8;
2992 src_offset = ib[idx+8];
2993 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
2994 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2995 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
2996 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2997 return -EINVAL;
2998 }
2999 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3000 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3001 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3002 return -EINVAL;
3003 }
3004 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3005 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3006 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3007 return -EINVAL;
3008 }
3009 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3010 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3011 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3012 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3013 p->idx += 10;
3014 break;
3015 case 4:
3016 /* L2T, T2L */
3017 /* detile bit */
3018 if (idx_value & (1 << 31)) {
3019 /* tiled src, linear dst */
3020 src_offset = ib[idx+1];
3021 src_offset <<= 8;
3022 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3023
3024 dst_offset = ib[idx+7];
3025 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3026 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3027 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3028 } else {
3029 /* linear src, tiled dst */
3030 src_offset = ib[idx+7];
3031 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3032 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3033 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3034
3035 dst_offset = ib[idx+1];
3036 dst_offset <<= 8;
3037 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3038 }
3039 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3040 dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
3041 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3042 return -EINVAL;
3043 }
3044 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3045 dev_warn(p->dev, "DMA L2T, T2L dst buffer too small (%llu %lu)\n",
3046 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3047 return -EINVAL;
3048 }
3049 p->idx += 9;
3050 break;
3051 case 5:
3052 /* T2T partial */
3053 if (p->family < CHIP_CAYMAN) {
3054 DRM_ERROR("L2T, T2L Partial is cayman only !\n");
3055 return -EINVAL;
3056 }
3057 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3058 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3059 p->idx += 13;
3060 break;
3061 case 7:
3062 /* L2T, broadcast */
3063 if (idx_value & (1 << 31)) {
3064 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3065 return -EINVAL;
3066 }
3067 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3068 if (r) {
3069 DRM_ERROR("bad L2T, broadcast DMA_PACKET_COPY\n");
3070 return -EINVAL;
3071 }
3072 dst_offset = ib[idx+1];
3073 dst_offset <<= 8;
3074 dst2_offset = ib[idx+2];
3075 dst2_offset <<= 8;
3076 src_offset = ib[idx+8];
3077 src_offset |= ((u64)(ib[idx+9] & 0xff)) << 32;
3078 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3079 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3080 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3081 return -EINVAL;
3082 }
3083 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3084 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3085 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3086 return -EINVAL;
3087 }
3088 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3089 dev_warn(p->dev, "DMA L2T, broadcast dst2 buffer too small (%llu %lu)\n",
3090 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3091 return -EINVAL;
3092 }
3093 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3094 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
3095 ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3096 ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3097 p->idx += 10;
3098 break;
3099 default:
3100 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3101 return -EINVAL;
3102 }
3103 } else {
3104 switch (misc) {
3105 case 0:
3106 /* detile bit */
3107 if (idx_value & (1 << 31)) {
3108 /* tiled src, linear dst */
3109 src_offset = ib[idx+1];
3110 src_offset <<= 8;
3111 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
3112
3113 dst_offset = ib[idx+7];
3114 dst_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3115 ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3116 ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3117 } else {
3118 /* linear src, tiled dst */
3119 src_offset = ib[idx+7];
3120 src_offset |= ((u64)(ib[idx+8] & 0xff)) << 32;
3121 ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3122 ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3123
3124 dst_offset = ib[idx+1];
3125 dst_offset <<= 8;
3126 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
3127 }
3128 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3129 dev_warn(p->dev, "DMA L2T, broadcast src buffer too small (%llu %lu)\n",
3130 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3131 return -EINVAL;
3132 }
3133 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3134 dev_warn(p->dev, "DMA L2T, broadcast dst buffer too small (%llu %lu)\n",
3135 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3136 return -EINVAL;
3137 }
3138 p->idx += 9;
3139 break;
3140 default:
3141 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3142 return -EINVAL;
3143 }
3144 }
3145 } else {
3146 if (new_cmd) {
3147 switch (misc) {
3148 case 0:
3149 /* L2L, byte */
3150 src_offset = ib[idx+2];
3151 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3152 dst_offset = ib[idx+1];
3153 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
3154 if ((src_offset + count) > radeon_bo_size(src_reloc->robj)) {
3155 dev_warn(p->dev, "DMA L2L, byte src buffer too small (%llu %lu)\n",
3156 src_offset + count, radeon_bo_size(src_reloc->robj));
3157 return -EINVAL;
3158 }
3159 if ((dst_offset + count) > radeon_bo_size(dst_reloc->robj)) {
3160 dev_warn(p->dev, "DMA L2L, byte dst buffer too small (%llu %lu)\n",
3161 dst_offset + count, radeon_bo_size(dst_reloc->robj));
3162 return -EINVAL;
3163 }
3164 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3165 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3166 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3167 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3168 p->idx += 5;
3169 break;
3170 case 1:
3171 /* L2L, partial */
3172 if (p->family < CHIP_CAYMAN) {
3173 DRM_ERROR("L2L Partial is cayman only !\n");
3174 return -EINVAL;
3175 }
3176 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
3177 ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3178 ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
3179 ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3180
3181 p->idx += 9;
3182 break;
3183 case 4:
3184 /* L2L, dw, broadcast */
3185 r = r600_dma_cs_next_reloc(p, &dst2_reloc);
3186 if (r) {
3187 DRM_ERROR("bad L2L, dw, broadcast DMA_PACKET_COPY\n");
3188 return -EINVAL;
3189 }
3190 dst_offset = ib[idx+1];
3191 dst_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3192 dst2_offset = ib[idx+2];
3193 dst2_offset |= ((u64)(ib[idx+5] & 0xff)) << 32;
3194 src_offset = ib[idx+3];
3195 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
3196 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3197 dev_warn(p->dev, "DMA L2L, dw, broadcast src buffer too small (%llu %lu)\n",
3198 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3199 return -EINVAL;
3200 }
3201 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3202 dev_warn(p->dev, "DMA L2L, dw, broadcast dst buffer too small (%llu %lu)\n",
3203 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3204 return -EINVAL;
3205 }
3206 if ((dst2_offset + (count * 4)) > radeon_bo_size(dst2_reloc->robj)) {
3207 dev_warn(p->dev, "DMA L2L, dw, broadcast dst2 buffer too small (%llu %lu)\n",
3208 dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
3209 return -EINVAL;
3210 }
3211 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3212 ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
3213 ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3214 ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3215 ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
3216 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3217 p->idx += 7;
3218 break;
3219 default:
3220 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3221 return -EINVAL;
3222 }
3223 } else {
3224 /* L2L, dw */
3225 src_offset = ib[idx+2];
3226 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
3227 dst_offset = ib[idx+1];
3228 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
3229 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
3230 dev_warn(p->dev, "DMA L2L, dw src buffer too small (%llu %lu)\n",
3231 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
3232 return -EINVAL;
3233 }
3234 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3235 dev_warn(p->dev, "DMA L2L, dw dst buffer too small (%llu %lu)\n",
3236 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
3237 return -EINVAL;
3238 }
3239 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3240 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
3241 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
3242 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
3243 p->idx += 5;
3244 }
3245 }
3246 break;
3247 case DMA_PACKET_CONSTANT_FILL:
3248 r = r600_dma_cs_next_reloc(p, &dst_reloc);
3249 if (r) {
3250 DRM_ERROR("bad DMA_PACKET_CONSTANT_FILL\n");
3251 return -EINVAL;
3252 }
3253 dst_offset = ib[idx+1];
3254 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
3255 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
3256 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
3257 dst_offset, radeon_bo_size(dst_reloc->robj));
3258 return -EINVAL;
3259 }
3260 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
3261 ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
3262 p->idx += 4;
3263 break;
3264 case DMA_PACKET_NOP:
3265 p->idx += 1;
3266 break;
3267 default:
3268 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3269 return -EINVAL;
3270 }
3271 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
3272#if 0
3273 for (r = 0; r < p->ib->length_dw; r++) {
3274 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
3275 mdelay(1);
3276 }
3277#endif
3278 return 0;
3279}
3280
2718/* vm parser */ 3281/* vm parser */
2719static bool evergreen_vm_reg_valid(u32 reg) 3282static bool evergreen_vm_reg_valid(u32 reg)
2720{ 3283{
@@ -2843,6 +3406,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
2843 u32 idx = pkt->idx + 1; 3406 u32 idx = pkt->idx + 1;
2844 u32 idx_value = ib[idx]; 3407 u32 idx_value = ib[idx];
2845 u32 start_reg, end_reg, reg, i; 3408 u32 start_reg, end_reg, reg, i;
3409 u32 command, info;
2846 3410
2847 switch (pkt->opcode) { 3411 switch (pkt->opcode) {
2848 case PACKET3_NOP: 3412 case PACKET3_NOP:
@@ -2917,6 +3481,64 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
2917 return -EINVAL; 3481 return -EINVAL;
2918 } 3482 }
2919 break; 3483 break;
3484 case PACKET3_CP_DMA:
3485 command = ib[idx + 4];
3486 info = ib[idx + 1];
3487 if ((((info & 0x60000000) >> 29) != 0) || /* src = GDS or DATA */
3488 (((info & 0x00300000) >> 20) != 0) || /* dst = GDS */
3489 ((((info & 0x00300000) >> 20) == 0) &&
3490 (command & PACKET3_CP_DMA_CMD_DAS)) || /* dst = register */
3491 ((((info & 0x60000000) >> 29) == 0) &&
3492 (command & PACKET3_CP_DMA_CMD_SAS))) { /* src = register */
3493 /* non mem to mem copies requires dw aligned count */
3494 if ((command & 0x1fffff) % 4) {
3495 DRM_ERROR("CP DMA command requires dw count alignment\n");
3496 return -EINVAL;
3497 }
3498 }
3499 if (command & PACKET3_CP_DMA_CMD_SAS) {
3500 /* src address space is register */
3501 if (((info & 0x60000000) >> 29) == 0) {
3502 start_reg = idx_value << 2;
3503 if (command & PACKET3_CP_DMA_CMD_SAIC) {
3504 reg = start_reg;
3505 if (!evergreen_vm_reg_valid(reg)) {
3506 DRM_ERROR("CP DMA Bad SRC register\n");
3507 return -EINVAL;
3508 }
3509 } else {
3510 for (i = 0; i < (command & 0x1fffff); i++) {
3511 reg = start_reg + (4 * i);
3512 if (!evergreen_vm_reg_valid(reg)) {
3513 DRM_ERROR("CP DMA Bad SRC register\n");
3514 return -EINVAL;
3515 }
3516 }
3517 }
3518 }
3519 }
3520 if (command & PACKET3_CP_DMA_CMD_DAS) {
3521 /* dst address space is register */
3522 if (((info & 0x00300000) >> 20) == 0) {
3523 start_reg = ib[idx + 2];
3524 if (command & PACKET3_CP_DMA_CMD_DAIC) {
3525 reg = start_reg;
3526 if (!evergreen_vm_reg_valid(reg)) {
3527 DRM_ERROR("CP DMA Bad DST register\n");
3528 return -EINVAL;
3529 }
3530 } else {
3531 for (i = 0; i < (command & 0x1fffff); i++) {
3532 reg = start_reg + (4 * i);
3533 if (!evergreen_vm_reg_valid(reg)) {
3534 DRM_ERROR("CP DMA Bad DST register\n");
3535 return -EINVAL;
3536 }
3537 }
3538 }
3539 }
3540 }
3541 break;
2920 default: 3542 default:
2921 return -EINVAL; 3543 return -EINVAL;
2922 } 3544 }
@@ -2958,3 +3580,114 @@ int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
2958 3580
2959 return ret; 3581 return ret;
2960} 3582}
3583
3584/**
3585 * evergreen_dma_ib_parse() - parse the DMA IB for VM
3586 * @rdev: radeon_device pointer
3587 * @ib: radeon_ib pointer
3588 *
3589 * Parses the DMA IB from the VM CS ioctl
3590 * checks for errors. (Cayman-SI)
3591 * Returns 0 for success and an error on failure.
3592 **/
3593int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
3594{
3595 u32 idx = 0;
3596 u32 header, cmd, count, tiled, new_cmd, misc;
3597
3598 do {
3599 header = ib->ptr[idx];
3600 cmd = GET_DMA_CMD(header);
3601 count = GET_DMA_COUNT(header);
3602 tiled = GET_DMA_T(header);
3603 new_cmd = GET_DMA_NEW(header);
3604 misc = GET_DMA_MISC(header);
3605
3606 switch (cmd) {
3607 case DMA_PACKET_WRITE:
3608 if (tiled)
3609 idx += count + 7;
3610 else
3611 idx += count + 3;
3612 break;
3613 case DMA_PACKET_COPY:
3614 if (tiled) {
3615 if (new_cmd) {
3616 switch (misc) {
3617 case 0:
3618 /* L2T, frame to fields */
3619 idx += 10;
3620 break;
3621 case 1:
3622 /* L2T, T2L partial */
3623 idx += 12;
3624 break;
3625 case 3:
3626 /* L2T, broadcast */
3627 idx += 10;
3628 break;
3629 case 4:
3630 /* L2T, T2L */
3631 idx += 9;
3632 break;
3633 case 5:
3634 /* T2T partial */
3635 idx += 13;
3636 break;
3637 case 7:
3638 /* L2T, broadcast */
3639 idx += 10;
3640 break;
3641 default:
3642 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3643 return -EINVAL;
3644 }
3645 } else {
3646 switch (misc) {
3647 case 0:
3648 idx += 9;
3649 break;
3650 default:
3651 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3652 return -EINVAL;
3653 }
3654 }
3655 } else {
3656 if (new_cmd) {
3657 switch (misc) {
3658 case 0:
3659 /* L2L, byte */
3660 idx += 5;
3661 break;
3662 case 1:
3663 /* L2L, partial */
3664 idx += 9;
3665 break;
3666 case 4:
3667 /* L2L, dw, broadcast */
3668 idx += 7;
3669 break;
3670 default:
3671 DRM_ERROR("bad DMA_PACKET_COPY misc %u\n", misc);
3672 return -EINVAL;
3673 }
3674 } else {
3675 /* L2L, dw */
3676 idx += 5;
3677 }
3678 }
3679 break;
3680 case DMA_PACKET_CONSTANT_FILL:
3681 idx += 4;
3682 break;
3683 case DMA_PACKET_NOP:
3684 idx += 1;
3685 break;
3686 default:
3687 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
3688 return -EINVAL;
3689 }
3690 } while (idx < ib->length_dw);
3691
3692 return 0;
3693}
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 2bc0f6a1b428..cb9baaac9e85 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -45,6 +45,8 @@
45#define TURKS_GB_ADDR_CONFIG_GOLDEN 0x02010002 45#define TURKS_GB_ADDR_CONFIG_GOLDEN 0x02010002
46#define CEDAR_GB_ADDR_CONFIG_GOLDEN 0x02010001 46#define CEDAR_GB_ADDR_CONFIG_GOLDEN 0x02010001
47#define CAICOS_GB_ADDR_CONFIG_GOLDEN 0x02010001 47#define CAICOS_GB_ADDR_CONFIG_GOLDEN 0x02010001
48#define SUMO_GB_ADDR_CONFIG_GOLDEN 0x02010002
49#define SUMO2_GB_ADDR_CONFIG_GOLDEN 0x02010002
48 50
49/* Registers */ 51/* Registers */
50 52
@@ -355,6 +357,54 @@
355# define AFMT_MPEG_INFO_UPDATE (1 << 10) 357# define AFMT_MPEG_INFO_UPDATE (1 << 10)
356#define AFMT_GENERIC0_7 0x7138 358#define AFMT_GENERIC0_7 0x7138
357 359
360/* DCE4/5 ELD audio interface */
361#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x5f84 /* LPCM */
362#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x5f88 /* AC3 */
363#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x5f8c /* MPEG1 */
364#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3 0x5f90 /* MP3 */
365#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4 0x5f94 /* MPEG2 */
366#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5 0x5f98 /* AAC */
367#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6 0x5f9c /* DTS */
368#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7 0x5fa0 /* ATRAC */
369#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8 0x5fa4 /* one bit audio - leave at 0 (default) */
370#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9 0x5fa8 /* Dolby Digital */
371#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10 0x5fac /* DTS-HD */
372#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11 0x5fb0 /* MAT-MLP */
373#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12 0x5fb4 /* DTS */
374#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13 0x5fb8 /* WMA Pro */
375# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
376/* max channels minus one. 7 = 8 channels */
377# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
378# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
379# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
380/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
381 * bit0 = 32 kHz
382 * bit1 = 44.1 kHz
383 * bit2 = 48 kHz
384 * bit3 = 88.2 kHz
385 * bit4 = 96 kHz
386 * bit5 = 176.4 kHz
387 * bit6 = 192 kHz
388 */
389
390#define AZ_HOT_PLUG_CONTROL 0x5e78
391# define AZ_FORCE_CODEC_WAKE (1 << 0)
392# define PIN0_JACK_DETECTION_ENABLE (1 << 4)
393# define PIN1_JACK_DETECTION_ENABLE (1 << 5)
394# define PIN2_JACK_DETECTION_ENABLE (1 << 6)
395# define PIN3_JACK_DETECTION_ENABLE (1 << 7)
396# define PIN0_UNSOLICITED_RESPONSE_ENABLE (1 << 8)
397# define PIN1_UNSOLICITED_RESPONSE_ENABLE (1 << 9)
398# define PIN2_UNSOLICITED_RESPONSE_ENABLE (1 << 10)
399# define PIN3_UNSOLICITED_RESPONSE_ENABLE (1 << 11)
400# define CODEC_HOT_PLUG_ENABLE (1 << 12)
401# define PIN0_AUDIO_ENABLED (1 << 24)
402# define PIN1_AUDIO_ENABLED (1 << 25)
403# define PIN2_AUDIO_ENABLED (1 << 26)
404# define PIN3_AUDIO_ENABLED (1 << 27)
405# define AUDIO_ENABLED (1 << 31)
406
407
358#define GC_USER_SHADER_PIPE_CONFIG 0x8954 408#define GC_USER_SHADER_PIPE_CONFIG 0x8954
359#define INACTIVE_QD_PIPES(x) ((x) << 8) 409#define INACTIVE_QD_PIPES(x) ((x) << 8)
360#define INACTIVE_QD_PIPES_MASK 0x0000FF00 410#define INACTIVE_QD_PIPES_MASK 0x0000FF00
@@ -651,6 +701,7 @@
651#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) 701#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
652#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) 702#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
653#define VM_CONTEXT1_CNTL 0x1414 703#define VM_CONTEXT1_CNTL 0x1414
704#define VM_CONTEXT1_CNTL2 0x1434
654#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C 705#define VM_CONTEXT0_PAGE_TABLE_BASE_ADDR 0x153C
655#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C 706#define VM_CONTEXT0_PAGE_TABLE_END_ADDR 0x157C
656#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155C 707#define VM_CONTEXT0_PAGE_TABLE_START_ADDR 0x155C
@@ -672,6 +723,8 @@
672#define CACHE_UPDATE_MODE(x) ((x) << 6) 723#define CACHE_UPDATE_MODE(x) ((x) << 6)
673#define VM_L2_STATUS 0x140C 724#define VM_L2_STATUS 0x140C
674#define L2_BUSY (1 << 0) 725#define L2_BUSY (1 << 0)
726#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC
727#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC
675 728
676#define WAIT_UNTIL 0x8040 729#define WAIT_UNTIL 0x8040
677 730
@@ -854,6 +907,37 @@
854# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16) 907# define DC_HPDx_RX_INT_TIMER(x) ((x) << 16)
855# define DC_HPDx_EN (1 << 28) 908# define DC_HPDx_EN (1 << 28)
856 909
910/* ASYNC DMA */
911#define DMA_RB_RPTR 0xd008
912#define DMA_RB_WPTR 0xd00c
913
914#define DMA_CNTL 0xd02c
915# define TRAP_ENABLE (1 << 0)
916# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
917# define SEM_WAIT_INT_ENABLE (1 << 2)
918# define DATA_SWAP_ENABLE (1 << 3)
919# define FENCE_SWAP_ENABLE (1 << 4)
920# define CTXEMPTY_INT_ENABLE (1 << 28)
921#define DMA_TILING_CONFIG 0xD0B8
922
923#define CAYMAN_DMA1_CNTL 0xd82c
924
925/* async DMA packets */
926#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
927 (((t) & 0x1) << 23) | \
928 (((s) & 0x1) << 22) | \
929 (((n) & 0xFFFFF) << 0))
930/* async DMA Packet types */
931#define DMA_PACKET_WRITE 0x2
932#define DMA_PACKET_COPY 0x3
933#define DMA_PACKET_INDIRECT_BUFFER 0x4
934#define DMA_PACKET_SEMAPHORE 0x5
935#define DMA_PACKET_FENCE 0x6
936#define DMA_PACKET_TRAP 0x7
937#define DMA_PACKET_SRBM_WRITE 0x9
938#define DMA_PACKET_CONSTANT_FILL 0xd
939#define DMA_PACKET_NOP 0xf
940
857/* PCIE link stuff */ 941/* PCIE link stuff */
858#define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */ 942#define PCIE_LC_TRAINING_CNTL 0xa1 /* PCIE_P */
859#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */ 943#define PCIE_LC_LINK_WIDTH_CNTL 0xa2 /* PCIE_P */
@@ -951,6 +1035,53 @@
951#define PACKET3_WAIT_REG_MEM 0x3C 1035#define PACKET3_WAIT_REG_MEM 0x3C
952#define PACKET3_MEM_WRITE 0x3D 1036#define PACKET3_MEM_WRITE 0x3D
953#define PACKET3_INDIRECT_BUFFER 0x32 1037#define PACKET3_INDIRECT_BUFFER 0x32
1038#define PACKET3_CP_DMA 0x41
1039/* 1. header
1040 * 2. SRC_ADDR_LO or DATA [31:0]
1041 * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] |
1042 * SRC_ADDR_HI [7:0]
1043 * 4. DST_ADDR_LO [31:0]
1044 * 5. DST_ADDR_HI [7:0]
1045 * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
1046 */
1047# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
1048 /* 0 - SRC_ADDR
1049 * 1 - GDS
1050 */
1051# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
1052 /* 0 - ME
1053 * 1 - PFP
1054 */
1055# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
1056 /* 0 - SRC_ADDR
1057 * 1 - GDS
1058 * 2 - DATA
1059 */
1060# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
1061/* COMMAND */
1062# define PACKET3_CP_DMA_DIS_WC (1 << 21)
1063# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
1064 /* 0 - none
1065 * 1 - 8 in 16
1066 * 2 - 8 in 32
1067 * 3 - 8 in 64
1068 */
1069# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
1070 /* 0 - none
1071 * 1 - 8 in 16
1072 * 2 - 8 in 32
1073 * 3 - 8 in 64
1074 */
1075# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
1076 /* 0 - memory
1077 * 1 - register
1078 */
1079# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
1080 /* 0 - memory
1081 * 1 - register
1082 */
1083# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
1084# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
954#define PACKET3_SURFACE_SYNC 0x43 1085#define PACKET3_SURFACE_SYNC 0x43
955# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) 1086# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
956# define PACKET3_CB1_DEST_BASE_ENA (1 << 7) 1087# define PACKET3_CB1_DEST_BASE_ENA (1 << 7)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index cda01f808f12..7bdbcb00aaf2 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -611,6 +611,8 @@ static void cayman_gpu_init(struct radeon_device *rdev)
611 WREG32(GB_ADDR_CONFIG, gb_addr_config); 611 WREG32(GB_ADDR_CONFIG, gb_addr_config);
612 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 612 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
613 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 613 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
614 WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
615 WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
614 616
615 tmp = gb_addr_config & NUM_PIPES_MASK; 617 tmp = gb_addr_config & NUM_PIPES_MASK;
616 tmp = r6xx_remap_render_backend(rdev, tmp, 618 tmp = r6xx_remap_render_backend(rdev, tmp,
@@ -784,10 +786,20 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
784 /* enable context1-7 */ 786 /* enable context1-7 */
785 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 787 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
786 (u32)(rdev->dummy_page.addr >> 12)); 788 (u32)(rdev->dummy_page.addr >> 12));
787 WREG32(VM_CONTEXT1_CNTL2, 0); 789 WREG32(VM_CONTEXT1_CNTL2, 4);
788 WREG32(VM_CONTEXT1_CNTL, 0);
789 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 790 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
790 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); 791 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
792 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
793 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
794 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
795 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
796 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
797 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
798 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
799 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
800 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
801 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
802 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
791 803
792 cayman_pcie_gart_tlb_flush(rdev); 804 cayman_pcie_gart_tlb_flush(rdev);
793 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 805 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -905,6 +917,7 @@ static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
905 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 917 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
906 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 918 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
907 WREG32(SCRATCH_UMSK, 0); 919 WREG32(SCRATCH_UMSK, 0);
920 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
908 } 921 }
909} 922}
910 923
@@ -1118,6 +1131,181 @@ static int cayman_cp_resume(struct radeon_device *rdev)
1118 return 0; 1131 return 0;
1119} 1132}
1120 1133
1134/*
1135 * DMA
1136 * Starting with R600, the GPU has an asynchronous
1137 * DMA engine. The programming model is very similar
1138 * to the 3D engine (ring buffer, IBs, etc.), but the
1139 * DMA controller has it's own packet format that is
1140 * different form the PM4 format used by the 3D engine.
1141 * It supports copying data, writing embedded data,
1142 * solid fills, and a number of other things. It also
1143 * has support for tiling/detiling of buffers.
1144 * Cayman and newer support two asynchronous DMA engines.
1145 */
1146/**
1147 * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
1148 *
1149 * @rdev: radeon_device pointer
1150 * @ib: IB object to schedule
1151 *
1152 * Schedule an IB in the DMA ring (cayman-SI).
1153 */
1154void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
1155 struct radeon_ib *ib)
1156{
1157 struct radeon_ring *ring = &rdev->ring[ib->ring];
1158
1159 if (rdev->wb.enabled) {
1160 u32 next_rptr = ring->wptr + 4;
1161 while ((next_rptr & 7) != 5)
1162 next_rptr++;
1163 next_rptr += 3;
1164 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
1165 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
1166 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
1167 radeon_ring_write(ring, next_rptr);
1168 }
1169
1170 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
1171 * Pad as necessary with NOPs.
1172 */
1173 while ((ring->wptr & 7) != 5)
1174 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1175 radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
1176 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
1177 radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
1178
1179}
1180
1181/**
1182 * cayman_dma_stop - stop the async dma engines
1183 *
1184 * @rdev: radeon_device pointer
1185 *
1186 * Stop the async dma engines (cayman-SI).
1187 */
1188void cayman_dma_stop(struct radeon_device *rdev)
1189{
1190 u32 rb_cntl;
1191
1192 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1193
1194 /* dma0 */
1195 rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
1196 rb_cntl &= ~DMA_RB_ENABLE;
1197 WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
1198
1199 /* dma1 */
1200 rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
1201 rb_cntl &= ~DMA_RB_ENABLE;
1202 WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
1203
1204 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
1205 rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
1206}
1207
1208/**
1209 * cayman_dma_resume - setup and start the async dma engines
1210 *
1211 * @rdev: radeon_device pointer
1212 *
1213 * Set up the DMA ring buffers and enable them. (cayman-SI).
1214 * Returns 0 for success, error for failure.
1215 */
1216int cayman_dma_resume(struct radeon_device *rdev)
1217{
1218 struct radeon_ring *ring;
1219 u32 rb_cntl, dma_cntl;
1220 u32 rb_bufsz;
1221 u32 reg_offset, wb_offset;
1222 int i, r;
1223
1224 /* Reset dma */
1225 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
1226 RREG32(SRBM_SOFT_RESET);
1227 udelay(50);
1228 WREG32(SRBM_SOFT_RESET, 0);
1229
1230 for (i = 0; i < 2; i++) {
1231 if (i == 0) {
1232 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1233 reg_offset = DMA0_REGISTER_OFFSET;
1234 wb_offset = R600_WB_DMA_RPTR_OFFSET;
1235 } else {
1236 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1237 reg_offset = DMA1_REGISTER_OFFSET;
1238 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
1239 }
1240
1241 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
1242 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
1243
1244 /* Set ring buffer size in dwords */
1245 rb_bufsz = drm_order(ring->ring_size / 4);
1246 rb_cntl = rb_bufsz << 1;
1247#ifdef __BIG_ENDIAN
1248 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
1249#endif
1250 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
1251
1252 /* Initialize the ring buffer's read and write pointers */
1253 WREG32(DMA_RB_RPTR + reg_offset, 0);
1254 WREG32(DMA_RB_WPTR + reg_offset, 0);
1255
1256 /* set the wb address whether it's enabled or not */
1257 WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
1258 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
1259 WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
1260 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
1261
1262 if (rdev->wb.enabled)
1263 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
1264
1265 WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
1266
1267 /* enable DMA IBs */
1268 WREG32(DMA_IB_CNTL + reg_offset, DMA_IB_ENABLE | CMD_VMID_FORCE);
1269
1270 dma_cntl = RREG32(DMA_CNTL + reg_offset);
1271 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
1272 WREG32(DMA_CNTL + reg_offset, dma_cntl);
1273
1274 ring->wptr = 0;
1275 WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
1276
1277 ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
1278
1279 WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
1280
1281 ring->ready = true;
1282
1283 r = radeon_ring_test(rdev, ring->idx, ring);
1284 if (r) {
1285 ring->ready = false;
1286 return r;
1287 }
1288 }
1289
1290 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1291
1292 return 0;
1293}
1294
1295/**
1296 * cayman_dma_fini - tear down the async dma engines
1297 *
1298 * @rdev: radeon_device pointer
1299 *
1300 * Stop the async dma engines and free the rings (cayman-SI).
1301 */
1302void cayman_dma_fini(struct radeon_device *rdev)
1303{
1304 cayman_dma_stop(rdev);
1305 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
1306 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
1307}
1308
1121static int cayman_gpu_soft_reset(struct radeon_device *rdev) 1309static int cayman_gpu_soft_reset(struct radeon_device *rdev)
1122{ 1310{
1123 struct evergreen_mc_save save; 1311 struct evergreen_mc_save save;
@@ -1208,6 +1396,32 @@ int cayman_asic_reset(struct radeon_device *rdev)
1208 return cayman_gpu_soft_reset(rdev); 1396 return cayman_gpu_soft_reset(rdev);
1209} 1397}
1210 1398
1399/**
1400 * cayman_dma_is_lockup - Check if the DMA engine is locked up
1401 *
1402 * @rdev: radeon_device pointer
1403 * @ring: radeon_ring structure holding ring information
1404 *
1405 * Check if the async DMA engine is locked up (cayman-SI).
1406 * Returns true if the engine appears to be locked up, false if not.
1407 */
1408bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1409{
1410 u32 dma_status_reg;
1411
1412 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
1413 dma_status_reg = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
1414 else
1415 dma_status_reg = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
1416 if (dma_status_reg & DMA_IDLE) {
1417 radeon_ring_lockup_update(ring);
1418 return false;
1419 }
1420 /* force ring activities */
1421 radeon_ring_force_activity(rdev, ring);
1422 return radeon_ring_test_lockup(rdev, ring);
1423}
1424
1211static int cayman_startup(struct radeon_device *rdev) 1425static int cayman_startup(struct radeon_device *rdev)
1212{ 1426{
1213 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 1427 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -1289,6 +1503,18 @@ static int cayman_startup(struct radeon_device *rdev)
1289 return r; 1503 return r;
1290 } 1504 }
1291 1505
1506 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
1507 if (r) {
1508 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1509 return r;
1510 }
1511
1512 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
1513 if (r) {
1514 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
1515 return r;
1516 }
1517
1292 /* Enable IRQ */ 1518 /* Enable IRQ */
1293 r = r600_irq_init(rdev); 1519 r = r600_irq_init(rdev);
1294 if (r) { 1520 if (r) {
@@ -1303,6 +1529,23 @@ static int cayman_startup(struct radeon_device *rdev)
1303 0, 0xfffff, RADEON_CP_PACKET2); 1529 0, 0xfffff, RADEON_CP_PACKET2);
1304 if (r) 1530 if (r)
1305 return r; 1531 return r;
1532
1533 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1534 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
1535 DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
1536 DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
1537 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1538 if (r)
1539 return r;
1540
1541 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1542 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
1543 DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
1544 DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
1545 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
1546 if (r)
1547 return r;
1548
1306 r = cayman_cp_load_microcode(rdev); 1549 r = cayman_cp_load_microcode(rdev);
1307 if (r) 1550 if (r)
1308 return r; 1551 return r;
@@ -1310,6 +1553,10 @@ static int cayman_startup(struct radeon_device *rdev)
1310 if (r) 1553 if (r)
1311 return r; 1554 return r;
1312 1555
1556 r = cayman_dma_resume(rdev);
1557 if (r)
1558 return r;
1559
1313 r = radeon_ib_pool_init(rdev); 1560 r = radeon_ib_pool_init(rdev);
1314 if (r) { 1561 if (r) {
1315 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 1562 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1354,7 +1601,7 @@ int cayman_suspend(struct radeon_device *rdev)
1354{ 1601{
1355 r600_audio_fini(rdev); 1602 r600_audio_fini(rdev);
1356 cayman_cp_enable(rdev, false); 1603 cayman_cp_enable(rdev, false);
1357 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1604 cayman_dma_stop(rdev);
1358 evergreen_irq_suspend(rdev); 1605 evergreen_irq_suspend(rdev);
1359 radeon_wb_disable(rdev); 1606 radeon_wb_disable(rdev);
1360 cayman_pcie_gart_disable(rdev); 1607 cayman_pcie_gart_disable(rdev);
@@ -1421,6 +1668,14 @@ int cayman_init(struct radeon_device *rdev)
1421 ring->ring_obj = NULL; 1668 ring->ring_obj = NULL;
1422 r600_ring_init(rdev, ring, 1024 * 1024); 1669 r600_ring_init(rdev, ring, 1024 * 1024);
1423 1670
1671 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
1672 ring->ring_obj = NULL;
1673 r600_ring_init(rdev, ring, 64 * 1024);
1674
1675 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
1676 ring->ring_obj = NULL;
1677 r600_ring_init(rdev, ring, 64 * 1024);
1678
1424 rdev->ih.ring_obj = NULL; 1679 rdev->ih.ring_obj = NULL;
1425 r600_ih_ring_init(rdev, 64 * 1024); 1680 r600_ih_ring_init(rdev, 64 * 1024);
1426 1681
@@ -1433,6 +1688,7 @@ int cayman_init(struct radeon_device *rdev)
1433 if (r) { 1688 if (r) {
1434 dev_err(rdev->dev, "disabling GPU acceleration\n"); 1689 dev_err(rdev->dev, "disabling GPU acceleration\n");
1435 cayman_cp_fini(rdev); 1690 cayman_cp_fini(rdev);
1691 cayman_dma_fini(rdev);
1436 r600_irq_fini(rdev); 1692 r600_irq_fini(rdev);
1437 if (rdev->flags & RADEON_IS_IGP) 1693 if (rdev->flags & RADEON_IS_IGP)
1438 si_rlc_fini(rdev); 1694 si_rlc_fini(rdev);
@@ -1463,6 +1719,7 @@ void cayman_fini(struct radeon_device *rdev)
1463{ 1719{
1464 r600_blit_fini(rdev); 1720 r600_blit_fini(rdev);
1465 cayman_cp_fini(rdev); 1721 cayman_cp_fini(rdev);
1722 cayman_dma_fini(rdev);
1466 r600_irq_fini(rdev); 1723 r600_irq_fini(rdev);
1467 if (rdev->flags & RADEON_IS_IGP) 1724 if (rdev->flags & RADEON_IS_IGP)
1468 si_rlc_fini(rdev); 1725 si_rlc_fini(rdev);
@@ -1538,30 +1795,57 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
1538{ 1795{
1539 struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; 1796 struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
1540 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 1797 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
1541 1798 uint64_t value;
1542 while (count) { 1799 unsigned ndw;
1543 unsigned ndw = 1 + count * 2; 1800
1544 if (ndw > 0x3FFF) 1801 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
1545 ndw = 0x3FFF; 1802 while (count) {
1546 1803 ndw = 1 + count * 2;
1547 radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); 1804 if (ndw > 0x3FFF)
1548 radeon_ring_write(ring, pe); 1805 ndw = 0x3FFF;
1549 radeon_ring_write(ring, upper_32_bits(pe) & 0xff); 1806
1550 for (; ndw > 1; ndw -= 2, --count, pe += 8) { 1807 radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw));
1551 uint64_t value = 0; 1808 radeon_ring_write(ring, pe);
1552 if (flags & RADEON_VM_PAGE_SYSTEM) { 1809 radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
1553 value = radeon_vm_map_gart(rdev, addr); 1810 for (; ndw > 1; ndw -= 2, --count, pe += 8) {
1554 value &= 0xFFFFFFFFFFFFF000ULL; 1811 if (flags & RADEON_VM_PAGE_SYSTEM) {
1812 value = radeon_vm_map_gart(rdev, addr);
1813 value &= 0xFFFFFFFFFFFFF000ULL;
1814 } else if (flags & RADEON_VM_PAGE_VALID) {
1815 value = addr;
1816 } else {
1817 value = 0;
1818 }
1555 addr += incr; 1819 addr += incr;
1556 1820 value |= r600_flags;
1557 } else if (flags & RADEON_VM_PAGE_VALID) { 1821 radeon_ring_write(ring, value);
1558 value = addr; 1822 radeon_ring_write(ring, upper_32_bits(value));
1823 }
1824 }
1825 } else {
1826 while (count) {
1827 ndw = count * 2;
1828 if (ndw > 0xFFFFE)
1829 ndw = 0xFFFFE;
1830
1831 /* for non-physically contiguous pages (system) */
1832 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw));
1833 radeon_ring_write(ring, pe);
1834 radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
1835 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
1836 if (flags & RADEON_VM_PAGE_SYSTEM) {
1837 value = radeon_vm_map_gart(rdev, addr);
1838 value &= 0xFFFFFFFFFFFFF000ULL;
1839 } else if (flags & RADEON_VM_PAGE_VALID) {
1840 value = addr;
1841 } else {
1842 value = 0;
1843 }
1559 addr += incr; 1844 addr += incr;
1845 value |= r600_flags;
1846 radeon_ring_write(ring, value);
1847 radeon_ring_write(ring, upper_32_bits(value));
1560 } 1848 }
1561
1562 value |= r600_flags;
1563 radeon_ring_write(ring, value);
1564 radeon_ring_write(ring, upper_32_bits(value));
1565 } 1849 }
1566 } 1850 }
1567} 1851}
@@ -1596,3 +1880,26 @@ void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
1596 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 1880 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
1597 radeon_ring_write(ring, 0x0); 1881 radeon_ring_write(ring, 0x0);
1598} 1882}
1883
1884void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
1885{
1886 struct radeon_ring *ring = &rdev->ring[ridx];
1887
1888 if (vm == NULL)
1889 return;
1890
1891 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1892 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
1893 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
1894
1895 /* flush hdp cache */
1896 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1897 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
1898 radeon_ring_write(ring, 1);
1899
1900 /* bits 0-7 are the VM contexts0-7 */
1901 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
1902 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
1903 radeon_ring_write(ring, 1 << vm->id);
1904}
1905
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index cbef6815907a..b93186b8ee4b 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -50,6 +50,24 @@
50#define VMID(x) (((x) & 0x7) << 0) 50#define VMID(x) (((x) & 0x7) << 0)
51#define SRBM_STATUS 0x0E50 51#define SRBM_STATUS 0x0E50
52 52
53#define SRBM_SOFT_RESET 0x0E60
54#define SOFT_RESET_BIF (1 << 1)
55#define SOFT_RESET_CG (1 << 2)
56#define SOFT_RESET_DC (1 << 5)
57#define SOFT_RESET_DMA1 (1 << 6)
58#define SOFT_RESET_GRBM (1 << 8)
59#define SOFT_RESET_HDP (1 << 9)
60#define SOFT_RESET_IH (1 << 10)
61#define SOFT_RESET_MC (1 << 11)
62#define SOFT_RESET_RLC (1 << 13)
63#define SOFT_RESET_ROM (1 << 14)
64#define SOFT_RESET_SEM (1 << 15)
65#define SOFT_RESET_VMC (1 << 17)
66#define SOFT_RESET_DMA (1 << 20)
67#define SOFT_RESET_TST (1 << 21)
68#define SOFT_RESET_REGBB (1 << 22)
69#define SOFT_RESET_ORB (1 << 23)
70
53#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470 71#define VM_CONTEXT0_REQUEST_RESPONSE 0x1470
54#define REQUEST_TYPE(x) (((x) & 0xf) << 0) 72#define REQUEST_TYPE(x) (((x) & 0xf) << 0)
55#define RESPONSE_TYPE_MASK 0x000000F0 73#define RESPONSE_TYPE_MASK 0x000000F0
@@ -80,7 +98,18 @@
80#define VM_CONTEXT0_CNTL 0x1410 98#define VM_CONTEXT0_CNTL 0x1410
81#define ENABLE_CONTEXT (1 << 0) 99#define ENABLE_CONTEXT (1 << 0)
82#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) 100#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
101#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
83#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) 102#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
103#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
104#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
105#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
106#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
107#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
108#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
109#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
110#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
111#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
112#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
84#define VM_CONTEXT1_CNTL 0x1414 113#define VM_CONTEXT1_CNTL 0x1414
85#define VM_CONTEXT0_CNTL2 0x1430 114#define VM_CONTEXT0_CNTL2 0x1430
86#define VM_CONTEXT1_CNTL2 0x1434 115#define VM_CONTEXT1_CNTL2 0x1434
@@ -588,5 +617,62 @@
588#define PACKET3_SET_APPEND_CNT 0x75 617#define PACKET3_SET_APPEND_CNT 0x75
589#define PACKET3_ME_WRITE 0x7A 618#define PACKET3_ME_WRITE 0x7A
590 619
620/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
621#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
622#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
623
624#define DMA_RB_CNTL 0xd000
625# define DMA_RB_ENABLE (1 << 0)
626# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
627# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
628# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
629# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
630# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
631#define DMA_RB_BASE 0xd004
632#define DMA_RB_RPTR 0xd008
633#define DMA_RB_WPTR 0xd00c
634
635#define DMA_RB_RPTR_ADDR_HI 0xd01c
636#define DMA_RB_RPTR_ADDR_LO 0xd020
637
638#define DMA_IB_CNTL 0xd024
639# define DMA_IB_ENABLE (1 << 0)
640# define DMA_IB_SWAP_ENABLE (1 << 4)
641# define CMD_VMID_FORCE (1 << 31)
642#define DMA_IB_RPTR 0xd028
643#define DMA_CNTL 0xd02c
644# define TRAP_ENABLE (1 << 0)
645# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
646# define SEM_WAIT_INT_ENABLE (1 << 2)
647# define DATA_SWAP_ENABLE (1 << 3)
648# define FENCE_SWAP_ENABLE (1 << 4)
649# define CTXEMPTY_INT_ENABLE (1 << 28)
650#define DMA_STATUS_REG 0xd034
651# define DMA_IDLE (1 << 0)
652#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
653#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
654#define DMA_TILING_CONFIG 0xd0b8
655#define DMA_MODE 0xd0bc
656
657#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
658 (((t) & 0x1) << 23) | \
659 (((s) & 0x1) << 22) | \
660 (((n) & 0xFFFFF) << 0))
661
662#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
663 (((vmid) & 0xF) << 20) | \
664 (((n) & 0xFFFFF) << 0))
665
666/* async DMA Packet types */
667#define DMA_PACKET_WRITE 0x2
668#define DMA_PACKET_COPY 0x3
669#define DMA_PACKET_INDIRECT_BUFFER 0x4
670#define DMA_PACKET_SEMAPHORE 0x5
671#define DMA_PACKET_FENCE 0x6
672#define DMA_PACKET_TRAP 0x7
673#define DMA_PACKET_SRBM_WRITE 0x9
674#define DMA_PACKET_CONSTANT_FILL 0xd
675#define DMA_PACKET_NOP 0xf
676
591#endif 677#endif
592 678
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 376884f1bcd2..8ff7cac222dc 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4135,23 +4135,36 @@ int r100_init(struct radeon_device *rdev)
4135 return 0; 4135 return 0;
4136} 4136}
4137 4137
4138uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg) 4138uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
4139 bool always_indirect)
4139{ 4140{
4140 if (reg < rdev->rmmio_size) 4141 if (reg < rdev->rmmio_size && !always_indirect)
4141 return readl(((void __iomem *)rdev->rmmio) + reg); 4142 return readl(((void __iomem *)rdev->rmmio) + reg);
4142 else { 4143 else {
4144 unsigned long flags;
4145 uint32_t ret;
4146
4147 spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4143 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 4148 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4144 return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 4149 ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4150 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4151
4152 return ret;
4145 } 4153 }
4146} 4154}
4147 4155
4148void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) 4156void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
4157 bool always_indirect)
4149{ 4158{
4150 if (reg < rdev->rmmio_size) 4159 if (reg < rdev->rmmio_size && !always_indirect)
4151 writel(v, ((void __iomem *)rdev->rmmio) + reg); 4160 writel(v, ((void __iomem *)rdev->rmmio) + reg);
4152 else { 4161 else {
4162 unsigned long flags;
4163
4164 spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
4153 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); 4165 writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
4154 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); 4166 writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
4167 spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
4155 } 4168 }
4156} 4169}
4157 4170
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index cda280d157da..2aaf147969bd 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1370,6 +1370,29 @@ bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1370 return radeon_ring_test_lockup(rdev, ring); 1370 return radeon_ring_test_lockup(rdev, ring);
1371} 1371}
1372 1372
1373/**
1374 * r600_dma_is_lockup - Check if the DMA engine is locked up
1375 *
1376 * @rdev: radeon_device pointer
1377 * @ring: radeon_ring structure holding ring information
1378 *
1379 * Check if the async DMA engine is locked up (r6xx-evergreen).
1380 * Returns true if the engine appears to be locked up, false if not.
1381 */
1382bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
1383{
1384 u32 dma_status_reg;
1385
1386 dma_status_reg = RREG32(DMA_STATUS_REG);
1387 if (dma_status_reg & DMA_IDLE) {
1388 radeon_ring_lockup_update(ring);
1389 return false;
1390 }
1391 /* force ring activities */
1392 radeon_ring_force_activity(rdev, ring);
1393 return radeon_ring_test_lockup(rdev, ring);
1394}
1395
1373int r600_asic_reset(struct radeon_device *rdev) 1396int r600_asic_reset(struct radeon_device *rdev)
1374{ 1397{
1375 return r600_gpu_soft_reset(rdev); 1398 return r600_gpu_soft_reset(rdev);
@@ -1424,13 +1447,7 @@ u32 r6xx_remap_render_backend(struct radeon_device *rdev,
1424 1447
1425int r600_count_pipe_bits(uint32_t val) 1448int r600_count_pipe_bits(uint32_t val)
1426{ 1449{
1427 int i, ret = 0; 1450 return hweight32(val);
1428
1429 for (i = 0; i < 32; i++) {
1430 ret += val & 1;
1431 val >>= 1;
1432 }
1433 return ret;
1434} 1451}
1435 1452
1436static void r600_gpu_init(struct radeon_device *rdev) 1453static void r600_gpu_init(struct radeon_device *rdev)
@@ -1594,6 +1611,7 @@ static void r600_gpu_init(struct radeon_device *rdev)
1594 WREG32(GB_TILING_CONFIG, tiling_config); 1611 WREG32(GB_TILING_CONFIG, tiling_config);
1595 WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff); 1612 WREG32(DCP_TILING_CONFIG, tiling_config & 0xffff);
1596 WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff); 1613 WREG32(HDP_TILING_CONFIG, tiling_config & 0xffff);
1614 WREG32(DMA_TILING_CONFIG, tiling_config & 0xffff);
1597 1615
1598 tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8); 1616 tmp = R6XX_MAX_PIPES - r600_count_pipe_bits((cc_gc_shader_pipe_config & INACTIVE_QD_PIPES_MASK) >> 8);
1599 WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK); 1617 WREG32(VGT_OUT_DEALLOC_CNTL, (tmp * 4) & DEALLOC_DIST_MASK);
@@ -1871,6 +1889,7 @@ void r600_cp_stop(struct radeon_device *rdev)
1871 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1889 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1872 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1)); 1890 WREG32(R_0086D8_CP_ME_CNTL, S_0086D8_CP_ME_HALT(1));
1873 WREG32(SCRATCH_UMSK, 0); 1891 WREG32(SCRATCH_UMSK, 0);
1892 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1874} 1893}
1875 1894
1876int r600_init_microcode(struct radeon_device *rdev) 1895int r600_init_microcode(struct radeon_device *rdev)
@@ -2196,6 +2215,128 @@ void r600_cp_fini(struct radeon_device *rdev)
2196 radeon_scratch_free(rdev, ring->rptr_save_reg); 2215 radeon_scratch_free(rdev, ring->rptr_save_reg);
2197} 2216}
2198 2217
2218/*
2219 * DMA
2220 * Starting with R600, the GPU has an asynchronous
2221 * DMA engine. The programming model is very similar
2222 * to the 3D engine (ring buffer, IBs, etc.), but the
2223 * DMA controller has it's own packet format that is
2224 * different form the PM4 format used by the 3D engine.
2225 * It supports copying data, writing embedded data,
2226 * solid fills, and a number of other things. It also
2227 * has support for tiling/detiling of buffers.
2228 */
2229/**
2230 * r600_dma_stop - stop the async dma engine
2231 *
2232 * @rdev: radeon_device pointer
2233 *
2234 * Stop the async dma engine (r6xx-evergreen).
2235 */
2236void r600_dma_stop(struct radeon_device *rdev)
2237{
2238 u32 rb_cntl = RREG32(DMA_RB_CNTL);
2239
2240 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
2241
2242 rb_cntl &= ~DMA_RB_ENABLE;
2243 WREG32(DMA_RB_CNTL, rb_cntl);
2244
2245 rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
2246}
2247
2248/**
2249 * r600_dma_resume - setup and start the async dma engine
2250 *
2251 * @rdev: radeon_device pointer
2252 *
2253 * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
2254 * Returns 0 for success, error for failure.
2255 */
2256int r600_dma_resume(struct radeon_device *rdev)
2257{
2258 struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2259 u32 rb_cntl, dma_cntl;
2260 u32 rb_bufsz;
2261 int r;
2262
2263 /* Reset dma */
2264 if (rdev->family >= CHIP_RV770)
2265 WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
2266 else
2267 WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
2268 RREG32(SRBM_SOFT_RESET);
2269 udelay(50);
2270 WREG32(SRBM_SOFT_RESET, 0);
2271
2272 WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
2273 WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
2274
2275 /* Set ring buffer size in dwords */
2276 rb_bufsz = drm_order(ring->ring_size / 4);
2277 rb_cntl = rb_bufsz << 1;
2278#ifdef __BIG_ENDIAN
2279 rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
2280#endif
2281 WREG32(DMA_RB_CNTL, rb_cntl);
2282
2283 /* Initialize the ring buffer's read and write pointers */
2284 WREG32(DMA_RB_RPTR, 0);
2285 WREG32(DMA_RB_WPTR, 0);
2286
2287 /* set the wb address whether it's enabled or not */
2288 WREG32(DMA_RB_RPTR_ADDR_HI,
2289 upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
2290 WREG32(DMA_RB_RPTR_ADDR_LO,
2291 ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
2292
2293 if (rdev->wb.enabled)
2294 rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
2295
2296 WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
2297
2298 /* enable DMA IBs */
2299 WREG32(DMA_IB_CNTL, DMA_IB_ENABLE);
2300
2301 dma_cntl = RREG32(DMA_CNTL);
2302 dma_cntl &= ~CTXEMPTY_INT_ENABLE;
2303 WREG32(DMA_CNTL, dma_cntl);
2304
2305 if (rdev->family >= CHIP_RV770)
2306 WREG32(DMA_MODE, 1);
2307
2308 ring->wptr = 0;
2309 WREG32(DMA_RB_WPTR, ring->wptr << 2);
2310
2311 ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
2312
2313 WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
2314
2315 ring->ready = true;
2316
2317 r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
2318 if (r) {
2319 ring->ready = false;
2320 return r;
2321 }
2322
2323 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
2324
2325 return 0;
2326}
2327
2328/**
2329 * r600_dma_fini - tear down the async dma engine
2330 *
2331 * @rdev: radeon_device pointer
2332 *
2333 * Stop the async dma engine and free the ring (r6xx-evergreen).
2334 */
2335void r600_dma_fini(struct radeon_device *rdev)
2336{
2337 r600_dma_stop(rdev);
2338 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
2339}
2199 2340
2200/* 2341/*
2201 * GPU scratch registers helpers function. 2342 * GPU scratch registers helpers function.
@@ -2252,6 +2393,64 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2252 return r; 2393 return r;
2253} 2394}
2254 2395
2396/**
2397 * r600_dma_ring_test - simple async dma engine test
2398 *
2399 * @rdev: radeon_device pointer
2400 * @ring: radeon_ring structure holding ring information
2401 *
2402 * Test the DMA engine by writing using it to write an
2403 * value to memory. (r6xx-SI).
2404 * Returns 0 for success, error for failure.
2405 */
2406int r600_dma_ring_test(struct radeon_device *rdev,
2407 struct radeon_ring *ring)
2408{
2409 unsigned i;
2410 int r;
2411 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
2412 u32 tmp;
2413
2414 if (!ptr) {
2415 DRM_ERROR("invalid vram scratch pointer\n");
2416 return -EINVAL;
2417 }
2418
2419 tmp = 0xCAFEDEAD;
2420 writel(tmp, ptr);
2421
2422 r = radeon_ring_lock(rdev, ring, 4);
2423 if (r) {
2424 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
2425 return r;
2426 }
2427 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
2428 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
2429 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
2430 radeon_ring_write(ring, 0xDEADBEEF);
2431 radeon_ring_unlock_commit(rdev, ring);
2432
2433 for (i = 0; i < rdev->usec_timeout; i++) {
2434 tmp = readl(ptr);
2435 if (tmp == 0xDEADBEEF)
2436 break;
2437 DRM_UDELAY(1);
2438 }
2439
2440 if (i < rdev->usec_timeout) {
2441 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2442 } else {
2443 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
2444 ring->idx, tmp);
2445 r = -EINVAL;
2446 }
2447 return r;
2448}
2449
2450/*
2451 * CP fences/semaphores
2452 */
2453
2255void r600_fence_ring_emit(struct radeon_device *rdev, 2454void r600_fence_ring_emit(struct radeon_device *rdev,
2256 struct radeon_fence *fence) 2455 struct radeon_fence *fence)
2257{ 2456{
@@ -2315,6 +2514,59 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
2315 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); 2514 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
2316} 2515}
2317 2516
2517/*
2518 * DMA fences/semaphores
2519 */
2520
2521/**
2522 * r600_dma_fence_ring_emit - emit a fence on the DMA ring
2523 *
2524 * @rdev: radeon_device pointer
2525 * @fence: radeon fence object
2526 *
2527 * Add a DMA fence packet to the ring to write
2528 * the fence seq number and DMA trap packet to generate
2529 * an interrupt if needed (r6xx-r7xx).
2530 */
2531void r600_dma_fence_ring_emit(struct radeon_device *rdev,
2532 struct radeon_fence *fence)
2533{
2534 struct radeon_ring *ring = &rdev->ring[fence->ring];
2535 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2536
2537 /* write the fence */
2538 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
2539 radeon_ring_write(ring, addr & 0xfffffffc);
2540 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
2541 radeon_ring_write(ring, lower_32_bits(fence->seq));
2542 /* generate an interrupt */
2543 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
2544}
2545
2546/**
2547 * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
2548 *
2549 * @rdev: radeon_device pointer
2550 * @ring: radeon_ring structure holding ring information
2551 * @semaphore: radeon semaphore object
2552 * @emit_wait: wait or signal semaphore
2553 *
2554 * Add a DMA semaphore packet to the ring wait on or signal
2555 * other rings (r6xx-SI).
2556 */
2557void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
2558 struct radeon_ring *ring,
2559 struct radeon_semaphore *semaphore,
2560 bool emit_wait)
2561{
2562 u64 addr = semaphore->gpu_addr;
2563 u32 s = emit_wait ? 0 : 1;
2564
2565 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
2566 radeon_ring_write(ring, addr & 0xfffffffc);
2567 radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
2568}
2569
2318int r600_copy_blit(struct radeon_device *rdev, 2570int r600_copy_blit(struct radeon_device *rdev,
2319 uint64_t src_offset, 2571 uint64_t src_offset,
2320 uint64_t dst_offset, 2572 uint64_t dst_offset,
@@ -2334,6 +2586,80 @@ int r600_copy_blit(struct radeon_device *rdev,
2334 return 0; 2586 return 0;
2335} 2587}
2336 2588
2589/**
2590 * r600_copy_dma - copy pages using the DMA engine
2591 *
2592 * @rdev: radeon_device pointer
2593 * @src_offset: src GPU address
2594 * @dst_offset: dst GPU address
2595 * @num_gpu_pages: number of GPU pages to xfer
2596 * @fence: radeon fence object
2597 *
2598 * Copy GPU paging using the DMA engine (r6xx-r7xx).
2599 * Used by the radeon ttm implementation to move pages if
2600 * registered as the asic copy callback.
2601 */
2602int r600_copy_dma(struct radeon_device *rdev,
2603 uint64_t src_offset, uint64_t dst_offset,
2604 unsigned num_gpu_pages,
2605 struct radeon_fence **fence)
2606{
2607 struct radeon_semaphore *sem = NULL;
2608 int ring_index = rdev->asic->copy.dma_ring_index;
2609 struct radeon_ring *ring = &rdev->ring[ring_index];
2610 u32 size_in_dw, cur_size_in_dw;
2611 int i, num_loops;
2612 int r = 0;
2613
2614 r = radeon_semaphore_create(rdev, &sem);
2615 if (r) {
2616 DRM_ERROR("radeon: moving bo (%d).\n", r);
2617 return r;
2618 }
2619
2620 size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
2621 num_loops = DIV_ROUND_UP(size_in_dw, 0xffff);
2622 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
2623 if (r) {
2624 DRM_ERROR("radeon: moving bo (%d).\n", r);
2625 radeon_semaphore_free(rdev, &sem, NULL);
2626 return r;
2627 }
2628
2629 if (radeon_fence_need_sync(*fence, ring->idx)) {
2630 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
2631 ring->idx);
2632 radeon_fence_note_sync(*fence, ring->idx);
2633 } else {
2634 radeon_semaphore_free(rdev, &sem, NULL);
2635 }
2636
2637 for (i = 0; i < num_loops; i++) {
2638 cur_size_in_dw = size_in_dw;
2639 if (cur_size_in_dw > 0xFFFF)
2640 cur_size_in_dw = 0xFFFF;
2641 size_in_dw -= cur_size_in_dw;
2642 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
2643 radeon_ring_write(ring, dst_offset & 0xfffffffc);
2644 radeon_ring_write(ring, src_offset & 0xfffffffc);
2645 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
2646 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
2647 src_offset += cur_size_in_dw * 4;
2648 dst_offset += cur_size_in_dw * 4;
2649 }
2650
2651 r = radeon_fence_emit(rdev, fence, ring->idx);
2652 if (r) {
2653 radeon_ring_unlock_undo(rdev, ring);
2654 return r;
2655 }
2656
2657 radeon_ring_unlock_commit(rdev, ring);
2658 radeon_semaphore_free(rdev, &sem, *fence);
2659
2660 return r;
2661}
2662
2337int r600_set_surface_reg(struct radeon_device *rdev, int reg, 2663int r600_set_surface_reg(struct radeon_device *rdev, int reg,
2338 uint32_t tiling_flags, uint32_t pitch, 2664 uint32_t tiling_flags, uint32_t pitch,
2339 uint32_t offset, uint32_t obj_size) 2665 uint32_t offset, uint32_t obj_size)
@@ -2349,7 +2675,7 @@ void r600_clear_surface_reg(struct radeon_device *rdev, int reg)
2349 2675
2350static int r600_startup(struct radeon_device *rdev) 2676static int r600_startup(struct radeon_device *rdev)
2351{ 2677{
2352 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 2678 struct radeon_ring *ring;
2353 int r; 2679 int r;
2354 2680
2355 /* enable pcie gen2 link */ 2681 /* enable pcie gen2 link */
@@ -2394,6 +2720,12 @@ static int r600_startup(struct radeon_device *rdev)
2394 return r; 2720 return r;
2395 } 2721 }
2396 2722
2723 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
2724 if (r) {
2725 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
2726 return r;
2727 }
2728
2397 /* Enable IRQ */ 2729 /* Enable IRQ */
2398 r = r600_irq_init(rdev); 2730 r = r600_irq_init(rdev);
2399 if (r) { 2731 if (r) {
@@ -2403,12 +2735,20 @@ static int r600_startup(struct radeon_device *rdev)
2403 } 2735 }
2404 r600_irq_set(rdev); 2736 r600_irq_set(rdev);
2405 2737
2738 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2406 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 2739 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
2407 R600_CP_RB_RPTR, R600_CP_RB_WPTR, 2740 R600_CP_RB_RPTR, R600_CP_RB_WPTR,
2408 0, 0xfffff, RADEON_CP_PACKET2); 2741 0, 0xfffff, RADEON_CP_PACKET2);
2742 if (r)
2743 return r;
2409 2744
2745 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
2746 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
2747 DMA_RB_RPTR, DMA_RB_WPTR,
2748 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
2410 if (r) 2749 if (r)
2411 return r; 2750 return r;
2751
2412 r = r600_cp_load_microcode(rdev); 2752 r = r600_cp_load_microcode(rdev);
2413 if (r) 2753 if (r)
2414 return r; 2754 return r;
@@ -2416,6 +2756,10 @@ static int r600_startup(struct radeon_device *rdev)
2416 if (r) 2756 if (r)
2417 return r; 2757 return r;
2418 2758
2759 r = r600_dma_resume(rdev);
2760 if (r)
2761 return r;
2762
2419 r = radeon_ib_pool_init(rdev); 2763 r = radeon_ib_pool_init(rdev);
2420 if (r) { 2764 if (r) {
2421 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 2765 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2471,7 +2815,7 @@ int r600_suspend(struct radeon_device *rdev)
2471{ 2815{
2472 r600_audio_fini(rdev); 2816 r600_audio_fini(rdev);
2473 r600_cp_stop(rdev); 2817 r600_cp_stop(rdev);
2474 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 2818 r600_dma_stop(rdev);
2475 r600_irq_suspend(rdev); 2819 r600_irq_suspend(rdev);
2476 radeon_wb_disable(rdev); 2820 radeon_wb_disable(rdev);
2477 r600_pcie_gart_disable(rdev); 2821 r600_pcie_gart_disable(rdev);
@@ -2544,6 +2888,9 @@ int r600_init(struct radeon_device *rdev)
2544 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; 2888 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
2545 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); 2889 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
2546 2890
2891 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
2892 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
2893
2547 rdev->ih.ring_obj = NULL; 2894 rdev->ih.ring_obj = NULL;
2548 r600_ih_ring_init(rdev, 64 * 1024); 2895 r600_ih_ring_init(rdev, 64 * 1024);
2549 2896
@@ -2556,6 +2903,7 @@ int r600_init(struct radeon_device *rdev)
2556 if (r) { 2903 if (r) {
2557 dev_err(rdev->dev, "disabling GPU acceleration\n"); 2904 dev_err(rdev->dev, "disabling GPU acceleration\n");
2558 r600_cp_fini(rdev); 2905 r600_cp_fini(rdev);
2906 r600_dma_fini(rdev);
2559 r600_irq_fini(rdev); 2907 r600_irq_fini(rdev);
2560 radeon_wb_fini(rdev); 2908 radeon_wb_fini(rdev);
2561 radeon_ib_pool_fini(rdev); 2909 radeon_ib_pool_fini(rdev);
@@ -2572,6 +2920,7 @@ void r600_fini(struct radeon_device *rdev)
2572 r600_audio_fini(rdev); 2920 r600_audio_fini(rdev);
2573 r600_blit_fini(rdev); 2921 r600_blit_fini(rdev);
2574 r600_cp_fini(rdev); 2922 r600_cp_fini(rdev);
2923 r600_dma_fini(rdev);
2575 r600_irq_fini(rdev); 2924 r600_irq_fini(rdev);
2576 radeon_wb_fini(rdev); 2925 radeon_wb_fini(rdev);
2577 radeon_ib_pool_fini(rdev); 2926 radeon_ib_pool_fini(rdev);
@@ -2674,6 +3023,104 @@ free_scratch:
2674 return r; 3023 return r;
2675} 3024}
2676 3025
3026/**
3027 * r600_dma_ib_test - test an IB on the DMA engine
3028 *
3029 * @rdev: radeon_device pointer
3030 * @ring: radeon_ring structure holding ring information
3031 *
3032 * Test a simple IB in the DMA ring (r6xx-SI).
3033 * Returns 0 on success, error on failure.
3034 */
3035int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3036{
3037 struct radeon_ib ib;
3038 unsigned i;
3039 int r;
3040 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3041 u32 tmp = 0;
3042
3043 if (!ptr) {
3044 DRM_ERROR("invalid vram scratch pointer\n");
3045 return -EINVAL;
3046 }
3047
3048 tmp = 0xCAFEDEAD;
3049 writel(tmp, ptr);
3050
3051 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3052 if (r) {
3053 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3054 return r;
3055 }
3056
3057 ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
3058 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3059 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
3060 ib.ptr[3] = 0xDEADBEEF;
3061 ib.length_dw = 4;
3062
3063 r = radeon_ib_schedule(rdev, &ib, NULL);
3064 if (r) {
3065 radeon_ib_free(rdev, &ib);
3066 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3067 return r;
3068 }
3069 r = radeon_fence_wait(ib.fence, false);
3070 if (r) {
3071 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3072 return r;
3073 }
3074 for (i = 0; i < rdev->usec_timeout; i++) {
3075 tmp = readl(ptr);
3076 if (tmp == 0xDEADBEEF)
3077 break;
3078 DRM_UDELAY(1);
3079 }
3080 if (i < rdev->usec_timeout) {
3081 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3082 } else {
3083 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3084 r = -EINVAL;
3085 }
3086 radeon_ib_free(rdev, &ib);
3087 return r;
3088}
3089
3090/**
3091 * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
3092 *
3093 * @rdev: radeon_device pointer
3094 * @ib: IB object to schedule
3095 *
3096 * Schedule an IB in the DMA ring (r6xx-r7xx).
3097 */
3098void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3099{
3100 struct radeon_ring *ring = &rdev->ring[ib->ring];
3101
3102 if (rdev->wb.enabled) {
3103 u32 next_rptr = ring->wptr + 4;
3104 while ((next_rptr & 7) != 5)
3105 next_rptr++;
3106 next_rptr += 3;
3107 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
3108 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3109 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
3110 radeon_ring_write(ring, next_rptr);
3111 }
3112
3113 /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
3114 * Pad as necessary with NOPs.
3115 */
3116 while ((ring->wptr & 7) != 5)
3117 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
3118 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
3119 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
3120 radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
3121
3122}
3123
2677/* 3124/*
2678 * Interrupts 3125 * Interrupts
2679 * 3126 *
@@ -2865,6 +3312,8 @@ static void r600_disable_interrupt_state(struct radeon_device *rdev)
2865 u32 tmp; 3312 u32 tmp;
2866 3313
2867 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 3314 WREG32(CP_INT_CNTL, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3315 tmp = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
3316 WREG32(DMA_CNTL, tmp);
2868 WREG32(GRBM_INT_CNTL, 0); 3317 WREG32(GRBM_INT_CNTL, 0);
2869 WREG32(DxMODE_INT_MASK, 0); 3318 WREG32(DxMODE_INT_MASK, 0);
2870 WREG32(D1GRPH_INTERRUPT_CONTROL, 0); 3319 WREG32(D1GRPH_INTERRUPT_CONTROL, 0);
@@ -3006,6 +3455,7 @@ int r600_irq_set(struct radeon_device *rdev)
3006 u32 grbm_int_cntl = 0; 3455 u32 grbm_int_cntl = 0;
3007 u32 hdmi0, hdmi1; 3456 u32 hdmi0, hdmi1;
3008 u32 d1grph = 0, d2grph = 0; 3457 u32 d1grph = 0, d2grph = 0;
3458 u32 dma_cntl;
3009 3459
3010 if (!rdev->irq.installed) { 3460 if (!rdev->irq.installed) {
3011 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 3461 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3040,12 +3490,19 @@ int r600_irq_set(struct radeon_device *rdev)
3040 hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; 3490 hdmi0 = RREG32(HDMI0_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
3041 hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; 3491 hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
3042 } 3492 }
3493 dma_cntl = RREG32(DMA_CNTL) & ~TRAP_ENABLE;
3043 3494
3044 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 3495 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3045 DRM_DEBUG("r600_irq_set: sw int\n"); 3496 DRM_DEBUG("r600_irq_set: sw int\n");
3046 cp_int_cntl |= RB_INT_ENABLE; 3497 cp_int_cntl |= RB_INT_ENABLE;
3047 cp_int_cntl |= TIME_STAMP_INT_ENABLE; 3498 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
3048 } 3499 }
3500
3501 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
3502 DRM_DEBUG("r600_irq_set: sw int dma\n");
3503 dma_cntl |= TRAP_ENABLE;
3504 }
3505
3049 if (rdev->irq.crtc_vblank_int[0] || 3506 if (rdev->irq.crtc_vblank_int[0] ||
3050 atomic_read(&rdev->irq.pflip[0])) { 3507 atomic_read(&rdev->irq.pflip[0])) {
3051 DRM_DEBUG("r600_irq_set: vblank 0\n"); 3508 DRM_DEBUG("r600_irq_set: vblank 0\n");
@@ -3090,6 +3547,7 @@ int r600_irq_set(struct radeon_device *rdev)
3090 } 3547 }
3091 3548
3092 WREG32(CP_INT_CNTL, cp_int_cntl); 3549 WREG32(CP_INT_CNTL, cp_int_cntl);
3550 WREG32(DMA_CNTL, dma_cntl);
3093 WREG32(DxMODE_INT_MASK, mode_int); 3551 WREG32(DxMODE_INT_MASK, mode_int);
3094 WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph); 3552 WREG32(D1GRPH_INTERRUPT_CONTROL, d1grph);
3095 WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph); 3553 WREG32(D2GRPH_INTERRUPT_CONTROL, d2grph);
@@ -3469,6 +3927,10 @@ restart_ih:
3469 DRM_DEBUG("IH: CP EOP\n"); 3927 DRM_DEBUG("IH: CP EOP\n");
3470 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 3928 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3471 break; 3929 break;
3930 case 224: /* DMA trap event */
3931 DRM_DEBUG("IH: DMA trap\n");
3932 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3933 break;
3472 case 233: /* GUI IDLE */ 3934 case 233: /* GUI IDLE */
3473 DRM_DEBUG("IH: GUI idle\n"); 3935 DRM_DEBUG("IH: GUI idle\n");
3474 break; 3936 break;
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 2514123d2d00..be85f75aedda 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -721,12 +721,7 @@ static u32 r600_get_tile_pipe_to_backend_map(u32 num_tile_pipes,
721 721
722static int r600_count_pipe_bits(uint32_t val) 722static int r600_count_pipe_bits(uint32_t val)
723{ 723{
724 int i, ret = 0; 724 return hweight32(val);
725 for (i = 0; i < 32; i++) {
726 ret += val & 1;
727 val >>= 1;
728 }
729 return ret;
730} 725}
731 726
732static void r600_gfx_init(struct drm_device *dev, 727static void r600_gfx_init(struct drm_device *dev,
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 211c40252fe0..0be768be530c 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -657,87 +657,30 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
657 /* nby is npipes htiles aligned == npipes * 8 pixel aligned */ 657 /* nby is npipes htiles aligned == npipes * 8 pixel aligned */
658 nby = round_up(nby, track->npipes * 8); 658 nby = round_up(nby, track->npipes * 8);
659 } else { 659 } else {
660 /* htile widht & nby (8 or 4) make 2 bits number */ 660 /* always assume 8x8 htile */
661 tmp = track->htile_surface & 3;
662 /* align is htile align * 8, htile align vary according to 661 /* align is htile align * 8, htile align vary according to
663 * number of pipe and tile width and nby 662 * number of pipe and tile width and nby
664 */ 663 */
665 switch (track->npipes) { 664 switch (track->npipes) {
666 case 8: 665 case 8:
667 switch (tmp) { 666 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
668 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 667 nbx = round_up(nbx, 64 * 8);
669 nbx = round_up(nbx, 64 * 8); 668 nby = round_up(nby, 64 * 8);
670 nby = round_up(nby, 64 * 8);
671 break;
672 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
673 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
674 nbx = round_up(nbx, 64 * 8);
675 nby = round_up(nby, 32 * 8);
676 break;
677 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
678 nbx = round_up(nbx, 32 * 8);
679 nby = round_up(nby, 32 * 8);
680 break;
681 default:
682 return -EINVAL;
683 }
684 break; 669 break;
685 case 4: 670 case 4:
686 switch (tmp) { 671 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
687 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 672 nbx = round_up(nbx, 64 * 8);
688 nbx = round_up(nbx, 64 * 8); 673 nby = round_up(nby, 32 * 8);
689 nby = round_up(nby, 32 * 8);
690 break;
691 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
692 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
693 nbx = round_up(nbx, 32 * 8);
694 nby = round_up(nby, 32 * 8);
695 break;
696 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
697 nbx = round_up(nbx, 32 * 8);
698 nby = round_up(nby, 16 * 8);
699 break;
700 default:
701 return -EINVAL;
702 }
703 break; 674 break;
704 case 2: 675 case 2:
705 switch (tmp) { 676 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
706 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 677 nbx = round_up(nbx, 32 * 8);
707 nbx = round_up(nbx, 32 * 8); 678 nby = round_up(nby, 32 * 8);
708 nby = round_up(nby, 32 * 8);
709 break;
710 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
711 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
712 nbx = round_up(nbx, 32 * 8);
713 nby = round_up(nby, 16 * 8);
714 break;
715 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
716 nbx = round_up(nbx, 16 * 8);
717 nby = round_up(nby, 16 * 8);
718 break;
719 default:
720 return -EINVAL;
721 }
722 break; 679 break;
723 case 1: 680 case 1:
724 switch (tmp) { 681 /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/
725 case 3: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 8*/ 682 nbx = round_up(nbx, 32 * 8);
726 nbx = round_up(nbx, 32 * 8); 683 nby = round_up(nby, 16 * 8);
727 nby = round_up(nby, 16 * 8);
728 break;
729 case 2: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 8*/
730 case 1: /* HTILE_WIDTH = 8 & HTILE_HEIGHT = 4*/
731 nbx = round_up(nbx, 16 * 8);
732 nby = round_up(nby, 16 * 8);
733 break;
734 case 0: /* HTILE_WIDTH = 4 & HTILE_HEIGHT = 4*/
735 nbx = round_up(nbx, 16 * 8);
736 nby = round_up(nby, 8 * 8);
737 break;
738 default:
739 return -EINVAL;
740 }
741 break; 684 break;
742 default: 685 default:
743 dev_warn(p->dev, "%s:%d invalid num pipes %d\n", 686 dev_warn(p->dev, "%s:%d invalid num pipes %d\n",
@@ -746,9 +689,10 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
746 } 689 }
747 } 690 }
748 /* compute number of htile */ 691 /* compute number of htile */
749 nbx = G_028D24_HTILE_WIDTH(track->htile_surface) ? nbx / 8 : nbx / 4; 692 nbx = nbx >> 3;
750 nby = G_028D24_HTILE_HEIGHT(track->htile_surface) ? nby / 8 : nby / 4; 693 nby = nby >> 3;
751 size = nbx * nby * 4; 694 /* size must be aligned on npipes * 2K boundary */
695 size = roundup(nbx * nby * 4, track->npipes * (2 << 10));
752 size += track->htile_offset; 696 size += track->htile_offset;
753 697
754 if (size > radeon_bo_size(track->htile_bo)) { 698 if (size > radeon_bo_size(track->htile_bo)) {
@@ -1492,6 +1436,8 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
1492 break; 1436 break;
1493 case DB_HTILE_SURFACE: 1437 case DB_HTILE_SURFACE:
1494 track->htile_surface = radeon_get_ib_value(p, idx); 1438 track->htile_surface = radeon_get_ib_value(p, idx);
1439 /* force 8x8 htile width and height */
1440 ib[idx] |= 3;
1495 track->db_dirty = true; 1441 track->db_dirty = true;
1496 break; 1442 break;
1497 case SQ_PGM_START_FS: 1443 case SQ_PGM_START_FS:
@@ -1949,6 +1895,78 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
1949 ib[idx+2] = upper_32_bits(offset) & 0xff; 1895 ib[idx+2] = upper_32_bits(offset) & 0xff;
1950 } 1896 }
1951 break; 1897 break;
1898 case PACKET3_CP_DMA:
1899 {
1900 u32 command, size;
1901 u64 offset, tmp;
1902 if (pkt->count != 4) {
1903 DRM_ERROR("bad CP DMA\n");
1904 return -EINVAL;
1905 }
1906 command = radeon_get_ib_value(p, idx+4);
1907 size = command & 0x1fffff;
1908 if (command & PACKET3_CP_DMA_CMD_SAS) {
1909 /* src address space is register */
1910 DRM_ERROR("CP DMA SAS not supported\n");
1911 return -EINVAL;
1912 } else {
1913 if (command & PACKET3_CP_DMA_CMD_SAIC) {
1914 DRM_ERROR("CP DMA SAIC only supported for registers\n");
1915 return -EINVAL;
1916 }
1917 /* src address space is memory */
1918 r = r600_cs_packet_next_reloc(p, &reloc);
1919 if (r) {
1920 DRM_ERROR("bad CP DMA SRC\n");
1921 return -EINVAL;
1922 }
1923
1924 tmp = radeon_get_ib_value(p, idx) +
1925 ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);
1926
1927 offset = reloc->lobj.gpu_offset + tmp;
1928
1929 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
1930 dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
1931 tmp + size, radeon_bo_size(reloc->robj));
1932 return -EINVAL;
1933 }
1934
1935 ib[idx] = offset;
1936 ib[idx+1] = (ib[idx+1] & 0xffffff00) | (upper_32_bits(offset) & 0xff);
1937 }
1938 if (command & PACKET3_CP_DMA_CMD_DAS) {
1939 /* dst address space is register */
1940 DRM_ERROR("CP DMA DAS not supported\n");
1941 return -EINVAL;
1942 } else {
1943 /* dst address space is memory */
1944 if (command & PACKET3_CP_DMA_CMD_DAIC) {
1945 DRM_ERROR("CP DMA DAIC only supported for registers\n");
1946 return -EINVAL;
1947 }
1948 r = r600_cs_packet_next_reloc(p, &reloc);
1949 if (r) {
1950 DRM_ERROR("bad CP DMA DST\n");
1951 return -EINVAL;
1952 }
1953
1954 tmp = radeon_get_ib_value(p, idx+2) +
1955 ((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);
1956
1957 offset = reloc->lobj.gpu_offset + tmp;
1958
1959 if ((tmp + size) > radeon_bo_size(reloc->robj)) {
1960 dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
1961 tmp + size, radeon_bo_size(reloc->robj));
1962 return -EINVAL;
1963 }
1964
1965 ib[idx+2] = offset;
1966 ib[idx+3] = upper_32_bits(offset) & 0xff;
1967 }
1968 break;
1969 }
1952 case PACKET3_SURFACE_SYNC: 1970 case PACKET3_SURFACE_SYNC:
1953 if (pkt->count != 3) { 1971 if (pkt->count != 3) {
1954 DRM_ERROR("bad SURFACE_SYNC\n"); 1972 DRM_ERROR("bad SURFACE_SYNC\n");
@@ -2496,3 +2514,196 @@ void r600_cs_legacy_init(void)
2496{ 2514{
2497 r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm; 2515 r600_cs_packet_next_reloc = &r600_cs_packet_next_reloc_nomm;
2498} 2516}
2517
2518/*
2519 * DMA
2520 */
2521/**
2522 * r600_dma_cs_next_reloc() - parse next reloc
2523 * @p: parser structure holding parsing context.
2524 * @cs_reloc: reloc informations
2525 *
2526 * Return the next reloc, do bo validation and compute
2527 * GPU offset using the provided start.
2528 **/
2529int r600_dma_cs_next_reloc(struct radeon_cs_parser *p,
2530 struct radeon_cs_reloc **cs_reloc)
2531{
2532 struct radeon_cs_chunk *relocs_chunk;
2533 unsigned idx;
2534
2535 if (p->chunk_relocs_idx == -1) {
2536 DRM_ERROR("No relocation chunk !\n");
2537 return -EINVAL;
2538 }
2539 *cs_reloc = NULL;
2540 relocs_chunk = &p->chunks[p->chunk_relocs_idx];
2541 idx = p->dma_reloc_idx;
2542 if (idx >= relocs_chunk->length_dw) {
2543 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
2544 idx, relocs_chunk->length_dw);
2545 return -EINVAL;
2546 }
2547 *cs_reloc = p->relocs_ptr[idx];
2548 p->dma_reloc_idx++;
2549 return 0;
2550}
2551
2552#define GET_DMA_CMD(h) (((h) & 0xf0000000) >> 28)
2553#define GET_DMA_COUNT(h) ((h) & 0x0000ffff)
2554#define GET_DMA_T(h) (((h) & 0x00800000) >> 23)
2555
2556/**
2557 * r600_dma_cs_parse() - parse the DMA IB
2558 * @p: parser structure holding parsing context.
2559 *
2560 * Parses the DMA IB from the CS ioctl and updates
2561 * the GPU addresses based on the reloc information and
2562 * checks for errors. (R6xx-R7xx)
2563 * Returns 0 for success and an error on failure.
2564 **/
2565int r600_dma_cs_parse(struct radeon_cs_parser *p)
2566{
2567 struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
2568 struct radeon_cs_reloc *src_reloc, *dst_reloc;
2569 u32 header, cmd, count, tiled;
2570 volatile u32 *ib = p->ib.ptr;
2571 u32 idx, idx_value;
2572 u64 src_offset, dst_offset;
2573 int r;
2574
2575 do {
2576 if (p->idx >= ib_chunk->length_dw) {
2577 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
2578 p->idx, ib_chunk->length_dw);
2579 return -EINVAL;
2580 }
2581 idx = p->idx;
2582 header = radeon_get_ib_value(p, idx);
2583 cmd = GET_DMA_CMD(header);
2584 count = GET_DMA_COUNT(header);
2585 tiled = GET_DMA_T(header);
2586
2587 switch (cmd) {
2588 case DMA_PACKET_WRITE:
2589 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2590 if (r) {
2591 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2592 return -EINVAL;
2593 }
2594 if (tiled) {
2595 dst_offset = ib[idx+1];
2596 dst_offset <<= 8;
2597
2598 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2599 p->idx += count + 5;
2600 } else {
2601 dst_offset = ib[idx+1];
2602 dst_offset |= ((u64)(ib[idx+2] & 0xff)) << 32;
2603
2604 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2605 ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2606 p->idx += count + 3;
2607 }
2608 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2609 dev_warn(p->dev, "DMA write buffer too small (%llu %lu)\n",
2610 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2611 return -EINVAL;
2612 }
2613 break;
2614 case DMA_PACKET_COPY:
2615 r = r600_dma_cs_next_reloc(p, &src_reloc);
2616 if (r) {
2617 DRM_ERROR("bad DMA_PACKET_COPY\n");
2618 return -EINVAL;
2619 }
2620 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2621 if (r) {
2622 DRM_ERROR("bad DMA_PACKET_COPY\n");
2623 return -EINVAL;
2624 }
2625 if (tiled) {
2626 idx_value = radeon_get_ib_value(p, idx + 2);
2627 /* detile bit */
2628 if (idx_value & (1 << 31)) {
2629 /* tiled src, linear dst */
2630 src_offset = ib[idx+1];
2631 src_offset <<= 8;
2632 ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
2633
2634 dst_offset = ib[idx+5];
2635 dst_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
2636 ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2637 ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2638 } else {
2639 /* linear src, tiled dst */
2640 src_offset = ib[idx+5];
2641 src_offset |= ((u64)(ib[idx+6] & 0xff)) << 32;
2642 ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2643 ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2644
2645 dst_offset = ib[idx+1];
2646 dst_offset <<= 8;
2647 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
2648 }
2649 p->idx += 7;
2650 } else {
2651 src_offset = ib[idx+2];
2652 src_offset |= ((u64)(ib[idx+4] & 0xff)) << 32;
2653 dst_offset = ib[idx+1];
2654 dst_offset |= ((u64)(ib[idx+3] & 0xff)) << 32;
2655
2656 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2657 ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
2658 ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
2659 ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
2660 p->idx += 5;
2661 }
2662 if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
2663 dev_warn(p->dev, "DMA copy src buffer too small (%llu %lu)\n",
2664 src_offset + (count * 4), radeon_bo_size(src_reloc->robj));
2665 return -EINVAL;
2666 }
2667 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2668 dev_warn(p->dev, "DMA write dst buffer too small (%llu %lu)\n",
2669 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2670 return -EINVAL;
2671 }
2672 break;
2673 case DMA_PACKET_CONSTANT_FILL:
2674 if (p->family < CHIP_RV770) {
2675 DRM_ERROR("Constant Fill is 7xx only !\n");
2676 return -EINVAL;
2677 }
2678 r = r600_dma_cs_next_reloc(p, &dst_reloc);
2679 if (r) {
2680 DRM_ERROR("bad DMA_PACKET_WRITE\n");
2681 return -EINVAL;
2682 }
2683 dst_offset = ib[idx+1];
2684 dst_offset |= ((u64)(ib[idx+3] & 0x00ff0000)) << 16;
2685 if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
2686 dev_warn(p->dev, "DMA constant fill buffer too small (%llu %lu)\n",
2687 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
2688 return -EINVAL;
2689 }
2690 ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
2691 ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
2692 p->idx += 4;
2693 break;
2694 case DMA_PACKET_NOP:
2695 p->idx += 1;
2696 break;
2697 default:
2698 DRM_ERROR("Unknown packet type %d at %d !\n", cmd, idx);
2699 return -EINVAL;
2700 }
2701 } while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
2702#if 0
2703 for (r = 0; r < p->ib->length_dw; r++) {
2704 printk(KERN_INFO "%05d 0x%08X\n", r, p->ib.ptr[r]);
2705 mdelay(1);
2706 }
2707#endif
2708 return 0;
2709}
diff --git a/drivers/gpu/drm/radeon/r600_reg.h b/drivers/gpu/drm/radeon/r600_reg.h
index 2b960cb5c18a..909219b1bf80 100644
--- a/drivers/gpu/drm/radeon/r600_reg.h
+++ b/drivers/gpu/drm/radeon/r600_reg.h
@@ -96,6 +96,15 @@
96#define R600_CONFIG_F0_BASE 0x542C 96#define R600_CONFIG_F0_BASE 0x542C
97#define R600_CONFIG_APER_SIZE 0x5430 97#define R600_CONFIG_APER_SIZE 0x5430
98 98
99#define R600_BIF_FB_EN 0x5490
100#define R600_FB_READ_EN (1 << 0)
101#define R600_FB_WRITE_EN (1 << 1)
102
103#define R600_CITF_CNTL 0x200c
104#define R600_BLACKOUT_MASK 0x00000003
105
106#define R700_MC_CITF_CNTL 0x25c0
107
99#define R600_ROM_CNTL 0x1600 108#define R600_ROM_CNTL 0x1600
100# define R600_SCK_OVERWRITE (1 << 1) 109# define R600_SCK_OVERWRITE (1 << 1)
101# define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28 110# define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index fa6f37099ba9..4a53402b1852 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -590,9 +590,59 @@
590#define WAIT_2D_IDLECLEAN_bit (1 << 16) 590#define WAIT_2D_IDLECLEAN_bit (1 << 16)
591#define WAIT_3D_IDLECLEAN_bit (1 << 17) 591#define WAIT_3D_IDLECLEAN_bit (1 << 17)
592 592
593/* async DMA */
594#define DMA_TILING_CONFIG 0x3ec4
595#define DMA_CONFIG 0x3e4c
596
597#define DMA_RB_CNTL 0xd000
598# define DMA_RB_ENABLE (1 << 0)
599# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
600# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
601# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
602# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
603# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
604#define DMA_RB_BASE 0xd004
605#define DMA_RB_RPTR 0xd008
606#define DMA_RB_WPTR 0xd00c
607
608#define DMA_RB_RPTR_ADDR_HI 0xd01c
609#define DMA_RB_RPTR_ADDR_LO 0xd020
610
611#define DMA_IB_CNTL 0xd024
612# define DMA_IB_ENABLE (1 << 0)
613# define DMA_IB_SWAP_ENABLE (1 << 4)
614#define DMA_IB_RPTR 0xd028
615#define DMA_CNTL 0xd02c
616# define TRAP_ENABLE (1 << 0)
617# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
618# define SEM_WAIT_INT_ENABLE (1 << 2)
619# define DATA_SWAP_ENABLE (1 << 3)
620# define FENCE_SWAP_ENABLE (1 << 4)
621# define CTXEMPTY_INT_ENABLE (1 << 28)
622#define DMA_STATUS_REG 0xd034
623# define DMA_IDLE (1 << 0)
624#define DMA_SEM_INCOMPLETE_TIMER_CNTL 0xd044
625#define DMA_SEM_WAIT_FAIL_TIMER_CNTL 0xd048
626#define DMA_MODE 0xd0bc
627
628/* async DMA packets */
629#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
630 (((t) & 0x1) << 23) | \
631 (((s) & 0x1) << 22) | \
632 (((n) & 0xFFFF) << 0))
633/* async DMA Packet types */
634#define DMA_PACKET_WRITE 0x2
635#define DMA_PACKET_COPY 0x3
636#define DMA_PACKET_INDIRECT_BUFFER 0x4
637#define DMA_PACKET_SEMAPHORE 0x5
638#define DMA_PACKET_FENCE 0x6
639#define DMA_PACKET_TRAP 0x7
640#define DMA_PACKET_CONSTANT_FILL 0xd /* 7xx only */
641#define DMA_PACKET_NOP 0xf
642
593#define IH_RB_CNTL 0x3e00 643#define IH_RB_CNTL 0x3e00
594# define IH_RB_ENABLE (1 << 0) 644# define IH_RB_ENABLE (1 << 0)
595# define IH_IB_SIZE(x) ((x) << 1) /* log2 */ 645# define IH_RB_SIZE(x) ((x) << 1) /* log2 */
596# define IH_RB_FULL_DRAIN_ENABLE (1 << 6) 646# define IH_RB_FULL_DRAIN_ENABLE (1 << 6)
597# define IH_WPTR_WRITEBACK_ENABLE (1 << 8) 647# define IH_WPTR_WRITEBACK_ENABLE (1 << 8)
598# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */ 648# define IH_WPTR_WRITEBACK_TIMER(x) ((x) << 9) /* log2 */
@@ -637,7 +687,9 @@
637#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20 687#define TN_RLC_CLEAR_STATE_RESTORE_BASE 0x3f20
638 688
639#define SRBM_SOFT_RESET 0xe60 689#define SRBM_SOFT_RESET 0xe60
690# define SOFT_RESET_DMA (1 << 12)
640# define SOFT_RESET_RLC (1 << 13) 691# define SOFT_RESET_RLC (1 << 13)
692# define RV770_SOFT_RESET_DMA (1 << 20)
641 693
642#define CP_INT_CNTL 0xc124 694#define CP_INT_CNTL 0xc124
643# define CNTX_BUSY_INT_ENABLE (1 << 19) 695# define CNTX_BUSY_INT_ENABLE (1 << 19)
@@ -1134,6 +1186,38 @@
1134#define PACKET3_WAIT_REG_MEM 0x3C 1186#define PACKET3_WAIT_REG_MEM 0x3C
1135#define PACKET3_MEM_WRITE 0x3D 1187#define PACKET3_MEM_WRITE 0x3D
1136#define PACKET3_INDIRECT_BUFFER 0x32 1188#define PACKET3_INDIRECT_BUFFER 0x32
1189#define PACKET3_CP_DMA 0x41
1190/* 1. header
1191 * 2. SRC_ADDR_LO [31:0]
1192 * 3. CP_SYNC [31] | SRC_ADDR_HI [7:0]
1193 * 4. DST_ADDR_LO [31:0]
1194 * 5. DST_ADDR_HI [7:0]
1195 * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
1196 */
1197# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
1198/* COMMAND */
1199# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
1200 /* 0 - none
1201 * 1 - 8 in 16
1202 * 2 - 8 in 32
1203 * 3 - 8 in 64
1204 */
1205# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
1206 /* 0 - none
1207 * 1 - 8 in 16
1208 * 2 - 8 in 32
1209 * 3 - 8 in 64
1210 */
1211# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
1212 /* 0 - memory
1213 * 1 - register
1214 */
1215# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
1216 /* 0 - memory
1217 * 1 - register
1218 */
1219# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
1220# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
1137#define PACKET3_SURFACE_SYNC 0x43 1221#define PACKET3_SURFACE_SYNC 0x43
1138# define PACKET3_CB0_DEST_BASE_ENA (1 << 6) 1222# define PACKET3_CB0_DEST_BASE_ENA (1 << 6)
1139# define PACKET3_TC_ACTION_ENA (1 << 23) 1223# define PACKET3_TC_ACTION_ENA (1 << 23)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8c42d54c2e26..5dc744d43d12 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -109,7 +109,7 @@ extern int radeon_lockup_timeout;
109#define RADEON_BIOS_NUM_SCRATCH 8 109#define RADEON_BIOS_NUM_SCRATCH 8
110 110
111/* max number of rings */ 111/* max number of rings */
112#define RADEON_NUM_RINGS 3 112#define RADEON_NUM_RINGS 5
113 113
114/* fence seq are set to this number when signaled */ 114/* fence seq are set to this number when signaled */
115#define RADEON_FENCE_SIGNALED_SEQ 0LL 115#define RADEON_FENCE_SIGNALED_SEQ 0LL
@@ -122,6 +122,11 @@ extern int radeon_lockup_timeout;
122#define CAYMAN_RING_TYPE_CP1_INDEX 1 122#define CAYMAN_RING_TYPE_CP1_INDEX 1
123#define CAYMAN_RING_TYPE_CP2_INDEX 2 123#define CAYMAN_RING_TYPE_CP2_INDEX 2
124 124
125/* R600+ has an async dma ring */
126#define R600_RING_TYPE_DMA_INDEX 3
127/* cayman add a second async dma ring */
128#define CAYMAN_RING_TYPE_DMA1_INDEX 4
129
125/* hardcode those limit for now */ 130/* hardcode those limit for now */
126#define RADEON_VA_IB_OFFSET (1 << 20) 131#define RADEON_VA_IB_OFFSET (1 << 20)
127#define RADEON_VA_RESERVED_SIZE (8 << 20) 132#define RADEON_VA_RESERVED_SIZE (8 << 20)
@@ -313,6 +318,7 @@ struct radeon_bo {
313 struct list_head list; 318 struct list_head list;
314 /* Protected by tbo.reserved */ 319 /* Protected by tbo.reserved */
315 u32 placements[3]; 320 u32 placements[3];
321 u32 busy_placements[3];
316 struct ttm_placement placement; 322 struct ttm_placement placement;
317 struct ttm_buffer_object tbo; 323 struct ttm_buffer_object tbo;
318 struct ttm_bo_kmap_obj kmap; 324 struct ttm_bo_kmap_obj kmap;
@@ -787,6 +793,15 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigne
787void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp); 793void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
788 794
789 795
796/* r600 async dma */
797void r600_dma_stop(struct radeon_device *rdev);
798int r600_dma_resume(struct radeon_device *rdev);
799void r600_dma_fini(struct radeon_device *rdev);
800
801void cayman_dma_stop(struct radeon_device *rdev);
802int cayman_dma_resume(struct radeon_device *rdev);
803void cayman_dma_fini(struct radeon_device *rdev);
804
790/* 805/*
791 * CS. 806 * CS.
792 */ 807 */
@@ -824,6 +839,7 @@ struct radeon_cs_parser {
824 struct radeon_cs_reloc *relocs; 839 struct radeon_cs_reloc *relocs;
825 struct radeon_cs_reloc **relocs_ptr; 840 struct radeon_cs_reloc **relocs_ptr;
826 struct list_head validated; 841 struct list_head validated;
842 unsigned dma_reloc_idx;
827 /* indices of various chunks */ 843 /* indices of various chunks */
828 int chunk_ib_idx; 844 int chunk_ib_idx;
829 int chunk_relocs_idx; 845 int chunk_relocs_idx;
@@ -883,7 +899,9 @@ struct radeon_wb {
883#define RADEON_WB_CP_RPTR_OFFSET 1024 899#define RADEON_WB_CP_RPTR_OFFSET 1024
884#define RADEON_WB_CP1_RPTR_OFFSET 1280 900#define RADEON_WB_CP1_RPTR_OFFSET 1280
885#define RADEON_WB_CP2_RPTR_OFFSET 1536 901#define RADEON_WB_CP2_RPTR_OFFSET 1536
902#define R600_WB_DMA_RPTR_OFFSET 1792
886#define R600_WB_IH_WPTR_OFFSET 2048 903#define R600_WB_IH_WPTR_OFFSET 2048
904#define CAYMAN_WB_DMA1_RPTR_OFFSET 2304
887#define R600_WB_EVENT_OFFSET 3072 905#define R600_WB_EVENT_OFFSET 3072
888 906
889/** 907/**
@@ -1539,6 +1557,8 @@ struct radeon_device {
1539 /* Register mmio */ 1557 /* Register mmio */
1540 resource_size_t rmmio_base; 1558 resource_size_t rmmio_base;
1541 resource_size_t rmmio_size; 1559 resource_size_t rmmio_size;
1560 /* protects concurrent MM_INDEX/DATA based register access */
1561 spinlock_t mmio_idx_lock;
1542 void __iomem *rmmio; 1562 void __iomem *rmmio;
1543 radeon_rreg_t mc_rreg; 1563 radeon_rreg_t mc_rreg;
1544 radeon_wreg_t mc_wreg; 1564 radeon_wreg_t mc_wreg;
@@ -1614,8 +1634,10 @@ int radeon_device_init(struct radeon_device *rdev,
1614void radeon_device_fini(struct radeon_device *rdev); 1634void radeon_device_fini(struct radeon_device *rdev);
1615int radeon_gpu_wait_for_idle(struct radeon_device *rdev); 1635int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
1616 1636
1617uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg); 1637uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
1618void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); 1638 bool always_indirect);
1639void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
1640 bool always_indirect);
1619u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); 1641u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
1620void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); 1642void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
1621 1643
@@ -1631,9 +1653,11 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
1631#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg)) 1653#define WREG8(reg, v) writeb(v, (rdev->rmmio) + (reg))
1632#define RREG16(reg) readw((rdev->rmmio) + (reg)) 1654#define RREG16(reg) readw((rdev->rmmio) + (reg))
1633#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg)) 1655#define WREG16(reg, v) writew(v, (rdev->rmmio) + (reg))
1634#define RREG32(reg) r100_mm_rreg(rdev, (reg)) 1656#define RREG32(reg) r100_mm_rreg(rdev, (reg), false)
1635#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg))) 1657#define RREG32_IDX(reg) r100_mm_rreg(rdev, (reg), true)
1636#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v)) 1658#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", r100_mm_rreg(rdev, (reg), false))
1659#define WREG32(reg, v) r100_mm_wreg(rdev, (reg), (v), false)
1660#define WREG32_IDX(reg, v) r100_mm_wreg(rdev, (reg), (v), true)
1637#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) 1661#define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
1638#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) 1662#define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
1639#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg)) 1663#define RREG32_PLL(reg) rdev->pll_rreg(rdev, (reg))
@@ -1658,7 +1682,7 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
1658 tmp_ |= ((val) & ~(mask)); \ 1682 tmp_ |= ((val) & ~(mask)); \
1659 WREG32_PLL(reg, tmp_); \ 1683 WREG32_PLL(reg, tmp_); \
1660 } while (0) 1684 } while (0)
1661#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg))) 1685#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false))
1662#define RREG32_IO(reg) r100_io_rreg(rdev, (reg)) 1686#define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
1663#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v)) 1687#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
1664 1688
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 654520b95ab7..596bcbe80ed0 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -947,6 +947,15 @@ static struct radeon_asic r600_asic = {
947 .ring_test = &r600_ring_test, 947 .ring_test = &r600_ring_test,
948 .ib_test = &r600_ib_test, 948 .ib_test = &r600_ib_test,
949 .is_lockup = &r600_gpu_is_lockup, 949 .is_lockup = &r600_gpu_is_lockup,
950 },
951 [R600_RING_TYPE_DMA_INDEX] = {
952 .ib_execute = &r600_dma_ring_ib_execute,
953 .emit_fence = &r600_dma_fence_ring_emit,
954 .emit_semaphore = &r600_dma_semaphore_ring_emit,
955 .cs_parse = &r600_dma_cs_parse,
956 .ring_test = &r600_dma_ring_test,
957 .ib_test = &r600_dma_ib_test,
958 .is_lockup = &r600_dma_is_lockup,
950 } 959 }
951 }, 960 },
952 .irq = { 961 .irq = {
@@ -963,10 +972,10 @@ static struct radeon_asic r600_asic = {
963 .copy = { 972 .copy = {
964 .blit = &r600_copy_blit, 973 .blit = &r600_copy_blit,
965 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 974 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
966 .dma = NULL, 975 .dma = &r600_copy_dma,
967 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 976 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
968 .copy = &r600_copy_blit, 977 .copy = &r600_copy_dma,
969 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 978 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
970 }, 979 },
971 .surface = { 980 .surface = {
972 .set_reg = r600_set_surface_reg, 981 .set_reg = r600_set_surface_reg,
@@ -1022,6 +1031,15 @@ static struct radeon_asic rs780_asic = {
1022 .ring_test = &r600_ring_test, 1031 .ring_test = &r600_ring_test,
1023 .ib_test = &r600_ib_test, 1032 .ib_test = &r600_ib_test,
1024 .is_lockup = &r600_gpu_is_lockup, 1033 .is_lockup = &r600_gpu_is_lockup,
1034 },
1035 [R600_RING_TYPE_DMA_INDEX] = {
1036 .ib_execute = &r600_dma_ring_ib_execute,
1037 .emit_fence = &r600_dma_fence_ring_emit,
1038 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1039 .cs_parse = &r600_dma_cs_parse,
1040 .ring_test = &r600_dma_ring_test,
1041 .ib_test = &r600_dma_ib_test,
1042 .is_lockup = &r600_dma_is_lockup,
1025 } 1043 }
1026 }, 1044 },
1027 .irq = { 1045 .irq = {
@@ -1038,10 +1056,10 @@ static struct radeon_asic rs780_asic = {
1038 .copy = { 1056 .copy = {
1039 .blit = &r600_copy_blit, 1057 .blit = &r600_copy_blit,
1040 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1058 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1041 .dma = NULL, 1059 .dma = &r600_copy_dma,
1042 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1060 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1043 .copy = &r600_copy_blit, 1061 .copy = &r600_copy_dma,
1044 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1062 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1045 }, 1063 },
1046 .surface = { 1064 .surface = {
1047 .set_reg = r600_set_surface_reg, 1065 .set_reg = r600_set_surface_reg,
@@ -1097,6 +1115,15 @@ static struct radeon_asic rv770_asic = {
1097 .ring_test = &r600_ring_test, 1115 .ring_test = &r600_ring_test,
1098 .ib_test = &r600_ib_test, 1116 .ib_test = &r600_ib_test,
1099 .is_lockup = &r600_gpu_is_lockup, 1117 .is_lockup = &r600_gpu_is_lockup,
1118 },
1119 [R600_RING_TYPE_DMA_INDEX] = {
1120 .ib_execute = &r600_dma_ring_ib_execute,
1121 .emit_fence = &r600_dma_fence_ring_emit,
1122 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1123 .cs_parse = &r600_dma_cs_parse,
1124 .ring_test = &r600_dma_ring_test,
1125 .ib_test = &r600_dma_ib_test,
1126 .is_lockup = &r600_dma_is_lockup,
1100 } 1127 }
1101 }, 1128 },
1102 .irq = { 1129 .irq = {
@@ -1113,10 +1140,10 @@ static struct radeon_asic rv770_asic = {
1113 .copy = { 1140 .copy = {
1114 .blit = &r600_copy_blit, 1141 .blit = &r600_copy_blit,
1115 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1142 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1116 .dma = NULL, 1143 .dma = &r600_copy_dma,
1117 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1144 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1118 .copy = &r600_copy_blit, 1145 .copy = &r600_copy_dma,
1119 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1146 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1120 }, 1147 },
1121 .surface = { 1148 .surface = {
1122 .set_reg = r600_set_surface_reg, 1149 .set_reg = r600_set_surface_reg,
@@ -1172,6 +1199,15 @@ static struct radeon_asic evergreen_asic = {
1172 .ring_test = &r600_ring_test, 1199 .ring_test = &r600_ring_test,
1173 .ib_test = &r600_ib_test, 1200 .ib_test = &r600_ib_test,
1174 .is_lockup = &evergreen_gpu_is_lockup, 1201 .is_lockup = &evergreen_gpu_is_lockup,
1202 },
1203 [R600_RING_TYPE_DMA_INDEX] = {
1204 .ib_execute = &evergreen_dma_ring_ib_execute,
1205 .emit_fence = &evergreen_dma_fence_ring_emit,
1206 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1207 .cs_parse = &evergreen_dma_cs_parse,
1208 .ring_test = &r600_dma_ring_test,
1209 .ib_test = &r600_dma_ib_test,
1210 .is_lockup = &r600_dma_is_lockup,
1175 } 1211 }
1176 }, 1212 },
1177 .irq = { 1213 .irq = {
@@ -1188,10 +1224,10 @@ static struct radeon_asic evergreen_asic = {
1188 .copy = { 1224 .copy = {
1189 .blit = &r600_copy_blit, 1225 .blit = &r600_copy_blit,
1190 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1226 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1191 .dma = NULL, 1227 .dma = &evergreen_copy_dma,
1192 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1228 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1193 .copy = &r600_copy_blit, 1229 .copy = &evergreen_copy_dma,
1194 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1230 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1195 }, 1231 },
1196 .surface = { 1232 .surface = {
1197 .set_reg = r600_set_surface_reg, 1233 .set_reg = r600_set_surface_reg,
@@ -1248,6 +1284,15 @@ static struct radeon_asic sumo_asic = {
1248 .ib_test = &r600_ib_test, 1284 .ib_test = &r600_ib_test,
1249 .is_lockup = &evergreen_gpu_is_lockup, 1285 .is_lockup = &evergreen_gpu_is_lockup,
1250 }, 1286 },
1287 [R600_RING_TYPE_DMA_INDEX] = {
1288 .ib_execute = &evergreen_dma_ring_ib_execute,
1289 .emit_fence = &evergreen_dma_fence_ring_emit,
1290 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1291 .cs_parse = &evergreen_dma_cs_parse,
1292 .ring_test = &r600_dma_ring_test,
1293 .ib_test = &r600_dma_ib_test,
1294 .is_lockup = &r600_dma_is_lockup,
1295 }
1251 }, 1296 },
1252 .irq = { 1297 .irq = {
1253 .set = &evergreen_irq_set, 1298 .set = &evergreen_irq_set,
@@ -1263,10 +1308,10 @@ static struct radeon_asic sumo_asic = {
1263 .copy = { 1308 .copy = {
1264 .blit = &r600_copy_blit, 1309 .blit = &r600_copy_blit,
1265 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1310 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1266 .dma = NULL, 1311 .dma = &evergreen_copy_dma,
1267 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1312 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1268 .copy = &r600_copy_blit, 1313 .copy = &evergreen_copy_dma,
1269 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1314 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1270 }, 1315 },
1271 .surface = { 1316 .surface = {
1272 .set_reg = r600_set_surface_reg, 1317 .set_reg = r600_set_surface_reg,
@@ -1322,6 +1367,15 @@ static struct radeon_asic btc_asic = {
1322 .ring_test = &r600_ring_test, 1367 .ring_test = &r600_ring_test,
1323 .ib_test = &r600_ib_test, 1368 .ib_test = &r600_ib_test,
1324 .is_lockup = &evergreen_gpu_is_lockup, 1369 .is_lockup = &evergreen_gpu_is_lockup,
1370 },
1371 [R600_RING_TYPE_DMA_INDEX] = {
1372 .ib_execute = &evergreen_dma_ring_ib_execute,
1373 .emit_fence = &evergreen_dma_fence_ring_emit,
1374 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1375 .cs_parse = &evergreen_dma_cs_parse,
1376 .ring_test = &r600_dma_ring_test,
1377 .ib_test = &r600_dma_ib_test,
1378 .is_lockup = &r600_dma_is_lockup,
1325 } 1379 }
1326 }, 1380 },
1327 .irq = { 1381 .irq = {
@@ -1338,10 +1392,10 @@ static struct radeon_asic btc_asic = {
1338 .copy = { 1392 .copy = {
1339 .blit = &r600_copy_blit, 1393 .blit = &r600_copy_blit,
1340 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1394 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1341 .dma = NULL, 1395 .dma = &evergreen_copy_dma,
1342 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1396 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1343 .copy = &r600_copy_blit, 1397 .copy = &evergreen_copy_dma,
1344 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1398 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1345 }, 1399 },
1346 .surface = { 1400 .surface = {
1347 .set_reg = r600_set_surface_reg, 1401 .set_reg = r600_set_surface_reg,
@@ -1391,7 +1445,7 @@ static struct radeon_asic cayman_asic = {
1391 .vm = { 1445 .vm = {
1392 .init = &cayman_vm_init, 1446 .init = &cayman_vm_init,
1393 .fini = &cayman_vm_fini, 1447 .fini = &cayman_vm_fini,
1394 .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1448 .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
1395 .set_page = &cayman_vm_set_page, 1449 .set_page = &cayman_vm_set_page,
1396 }, 1450 },
1397 .ring = { 1451 .ring = {
@@ -1427,6 +1481,28 @@ static struct radeon_asic cayman_asic = {
1427 .ib_test = &r600_ib_test, 1481 .ib_test = &r600_ib_test,
1428 .is_lockup = &evergreen_gpu_is_lockup, 1482 .is_lockup = &evergreen_gpu_is_lockup,
1429 .vm_flush = &cayman_vm_flush, 1483 .vm_flush = &cayman_vm_flush,
1484 },
1485 [R600_RING_TYPE_DMA_INDEX] = {
1486 .ib_execute = &cayman_dma_ring_ib_execute,
1487 .ib_parse = &evergreen_dma_ib_parse,
1488 .emit_fence = &evergreen_dma_fence_ring_emit,
1489 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1490 .cs_parse = &evergreen_dma_cs_parse,
1491 .ring_test = &r600_dma_ring_test,
1492 .ib_test = &r600_dma_ib_test,
1493 .is_lockup = &cayman_dma_is_lockup,
1494 .vm_flush = &cayman_dma_vm_flush,
1495 },
1496 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1497 .ib_execute = &cayman_dma_ring_ib_execute,
1498 .ib_parse = &evergreen_dma_ib_parse,
1499 .emit_fence = &evergreen_dma_fence_ring_emit,
1500 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1501 .cs_parse = &evergreen_dma_cs_parse,
1502 .ring_test = &r600_dma_ring_test,
1503 .ib_test = &r600_dma_ib_test,
1504 .is_lockup = &cayman_dma_is_lockup,
1505 .vm_flush = &cayman_dma_vm_flush,
1430 } 1506 }
1431 }, 1507 },
1432 .irq = { 1508 .irq = {
@@ -1443,10 +1519,10 @@ static struct radeon_asic cayman_asic = {
1443 .copy = { 1519 .copy = {
1444 .blit = &r600_copy_blit, 1520 .blit = &r600_copy_blit,
1445 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1521 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1446 .dma = NULL, 1522 .dma = &evergreen_copy_dma,
1447 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1523 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1448 .copy = &r600_copy_blit, 1524 .copy = &evergreen_copy_dma,
1449 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1525 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1450 }, 1526 },
1451 .surface = { 1527 .surface = {
1452 .set_reg = r600_set_surface_reg, 1528 .set_reg = r600_set_surface_reg,
@@ -1496,7 +1572,7 @@ static struct radeon_asic trinity_asic = {
1496 .vm = { 1572 .vm = {
1497 .init = &cayman_vm_init, 1573 .init = &cayman_vm_init,
1498 .fini = &cayman_vm_fini, 1574 .fini = &cayman_vm_fini,
1499 .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1575 .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
1500 .set_page = &cayman_vm_set_page, 1576 .set_page = &cayman_vm_set_page,
1501 }, 1577 },
1502 .ring = { 1578 .ring = {
@@ -1532,6 +1608,28 @@ static struct radeon_asic trinity_asic = {
1532 .ib_test = &r600_ib_test, 1608 .ib_test = &r600_ib_test,
1533 .is_lockup = &evergreen_gpu_is_lockup, 1609 .is_lockup = &evergreen_gpu_is_lockup,
1534 .vm_flush = &cayman_vm_flush, 1610 .vm_flush = &cayman_vm_flush,
1611 },
1612 [R600_RING_TYPE_DMA_INDEX] = {
1613 .ib_execute = &cayman_dma_ring_ib_execute,
1614 .ib_parse = &evergreen_dma_ib_parse,
1615 .emit_fence = &evergreen_dma_fence_ring_emit,
1616 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1617 .cs_parse = &evergreen_dma_cs_parse,
1618 .ring_test = &r600_dma_ring_test,
1619 .ib_test = &r600_dma_ib_test,
1620 .is_lockup = &cayman_dma_is_lockup,
1621 .vm_flush = &cayman_dma_vm_flush,
1622 },
1623 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1624 .ib_execute = &cayman_dma_ring_ib_execute,
1625 .ib_parse = &evergreen_dma_ib_parse,
1626 .emit_fence = &evergreen_dma_fence_ring_emit,
1627 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1628 .cs_parse = &evergreen_dma_cs_parse,
1629 .ring_test = &r600_dma_ring_test,
1630 .ib_test = &r600_dma_ib_test,
1631 .is_lockup = &cayman_dma_is_lockup,
1632 .vm_flush = &cayman_dma_vm_flush,
1535 } 1633 }
1536 }, 1634 },
1537 .irq = { 1635 .irq = {
@@ -1548,10 +1646,10 @@ static struct radeon_asic trinity_asic = {
1548 .copy = { 1646 .copy = {
1549 .blit = &r600_copy_blit, 1647 .blit = &r600_copy_blit,
1550 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1648 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1551 .dma = NULL, 1649 .dma = &evergreen_copy_dma,
1552 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1650 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1553 .copy = &r600_copy_blit, 1651 .copy = &evergreen_copy_dma,
1554 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1652 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1555 }, 1653 },
1556 .surface = { 1654 .surface = {
1557 .set_reg = r600_set_surface_reg, 1655 .set_reg = r600_set_surface_reg,
@@ -1601,7 +1699,7 @@ static struct radeon_asic si_asic = {
1601 .vm = { 1699 .vm = {
1602 .init = &si_vm_init, 1700 .init = &si_vm_init,
1603 .fini = &si_vm_fini, 1701 .fini = &si_vm_fini,
1604 .pt_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1702 .pt_ring_index = R600_RING_TYPE_DMA_INDEX,
1605 .set_page = &si_vm_set_page, 1703 .set_page = &si_vm_set_page,
1606 }, 1704 },
1607 .ring = { 1705 .ring = {
@@ -1637,6 +1735,28 @@ static struct radeon_asic si_asic = {
1637 .ib_test = &r600_ib_test, 1735 .ib_test = &r600_ib_test,
1638 .is_lockup = &si_gpu_is_lockup, 1736 .is_lockup = &si_gpu_is_lockup,
1639 .vm_flush = &si_vm_flush, 1737 .vm_flush = &si_vm_flush,
1738 },
1739 [R600_RING_TYPE_DMA_INDEX] = {
1740 .ib_execute = &cayman_dma_ring_ib_execute,
1741 .ib_parse = &evergreen_dma_ib_parse,
1742 .emit_fence = &evergreen_dma_fence_ring_emit,
1743 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1744 .cs_parse = NULL,
1745 .ring_test = &r600_dma_ring_test,
1746 .ib_test = &r600_dma_ib_test,
1747 .is_lockup = &cayman_dma_is_lockup,
1748 .vm_flush = &si_dma_vm_flush,
1749 },
1750 [CAYMAN_RING_TYPE_DMA1_INDEX] = {
1751 .ib_execute = &cayman_dma_ring_ib_execute,
1752 .ib_parse = &evergreen_dma_ib_parse,
1753 .emit_fence = &evergreen_dma_fence_ring_emit,
1754 .emit_semaphore = &r600_dma_semaphore_ring_emit,
1755 .cs_parse = NULL,
1756 .ring_test = &r600_dma_ring_test,
1757 .ib_test = &r600_dma_ib_test,
1758 .is_lockup = &cayman_dma_is_lockup,
1759 .vm_flush = &si_dma_vm_flush,
1640 } 1760 }
1641 }, 1761 },
1642 .irq = { 1762 .irq = {
@@ -1653,10 +1773,10 @@ static struct radeon_asic si_asic = {
1653 .copy = { 1773 .copy = {
1654 .blit = NULL, 1774 .blit = NULL,
1655 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1775 .blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
1656 .dma = NULL, 1776 .dma = &si_copy_dma,
1657 .dma_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1777 .dma_ring_index = R600_RING_TYPE_DMA_INDEX,
1658 .copy = NULL, 1778 .copy = &si_copy_dma,
1659 .copy_ring_index = RADEON_RING_TYPE_GFX_INDEX, 1779 .copy_ring_index = R600_RING_TYPE_DMA_INDEX,
1660 }, 1780 },
1661 .surface = { 1781 .surface = {
1662 .set_reg = r600_set_surface_reg, 1782 .set_reg = r600_set_surface_reg,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 5e3a0e5c6be1..5f4882cc2152 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -263,6 +263,7 @@ extern int rs690_mc_wait_for_idle(struct radeon_device *rdev);
263struct rv515_mc_save { 263struct rv515_mc_save {
264 u32 vga_render_control; 264 u32 vga_render_control;
265 u32 vga_hdp_control; 265 u32 vga_hdp_control;
266 bool crtc_enabled[2];
266}; 267};
267 268
268int rv515_init(struct radeon_device *rdev); 269int rv515_init(struct radeon_device *rdev);
@@ -303,12 +304,21 @@ void r600_pcie_gart_tlb_flush(struct radeon_device *rdev);
303uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg); 304uint32_t r600_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
304void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); 305void r600_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
305int r600_cs_parse(struct radeon_cs_parser *p); 306int r600_cs_parse(struct radeon_cs_parser *p);
307int r600_dma_cs_parse(struct radeon_cs_parser *p);
306void r600_fence_ring_emit(struct radeon_device *rdev, 308void r600_fence_ring_emit(struct radeon_device *rdev,
307 struct radeon_fence *fence); 309 struct radeon_fence *fence);
308void r600_semaphore_ring_emit(struct radeon_device *rdev, 310void r600_semaphore_ring_emit(struct radeon_device *rdev,
309 struct radeon_ring *cp, 311 struct radeon_ring *cp,
310 struct radeon_semaphore *semaphore, 312 struct radeon_semaphore *semaphore,
311 bool emit_wait); 313 bool emit_wait);
314void r600_dma_fence_ring_emit(struct radeon_device *rdev,
315 struct radeon_fence *fence);
316void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
317 struct radeon_ring *ring,
318 struct radeon_semaphore *semaphore,
319 bool emit_wait);
320void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
321bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
312bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp); 322bool r600_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *cp);
313int r600_asic_reset(struct radeon_device *rdev); 323int r600_asic_reset(struct radeon_device *rdev);
314int r600_set_surface_reg(struct radeon_device *rdev, int reg, 324int r600_set_surface_reg(struct radeon_device *rdev, int reg,
@@ -316,11 +326,16 @@ int r600_set_surface_reg(struct radeon_device *rdev, int reg,
316 uint32_t offset, uint32_t obj_size); 326 uint32_t offset, uint32_t obj_size);
317void r600_clear_surface_reg(struct radeon_device *rdev, int reg); 327void r600_clear_surface_reg(struct radeon_device *rdev, int reg);
318int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); 328int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
329int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
319void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); 330void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
320int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); 331int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
332int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
321int r600_copy_blit(struct radeon_device *rdev, 333int r600_copy_blit(struct radeon_device *rdev,
322 uint64_t src_offset, uint64_t dst_offset, 334 uint64_t src_offset, uint64_t dst_offset,
323 unsigned num_gpu_pages, struct radeon_fence **fence); 335 unsigned num_gpu_pages, struct radeon_fence **fence);
336int r600_copy_dma(struct radeon_device *rdev,
337 uint64_t src_offset, uint64_t dst_offset,
338 unsigned num_gpu_pages, struct radeon_fence **fence);
324void r600_hpd_init(struct radeon_device *rdev); 339void r600_hpd_init(struct radeon_device *rdev);
325void r600_hpd_fini(struct radeon_device *rdev); 340void r600_hpd_fini(struct radeon_device *rdev);
326bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); 341bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -416,6 +431,7 @@ u32 evergreen_get_vblank_counter(struct radeon_device *rdev, int crtc);
416int evergreen_irq_set(struct radeon_device *rdev); 431int evergreen_irq_set(struct radeon_device *rdev);
417int evergreen_irq_process(struct radeon_device *rdev); 432int evergreen_irq_process(struct radeon_device *rdev);
418extern int evergreen_cs_parse(struct radeon_cs_parser *p); 433extern int evergreen_cs_parse(struct radeon_cs_parser *p);
434extern int evergreen_dma_cs_parse(struct radeon_cs_parser *p);
419extern void evergreen_pm_misc(struct radeon_device *rdev); 435extern void evergreen_pm_misc(struct radeon_device *rdev);
420extern void evergreen_pm_prepare(struct radeon_device *rdev); 436extern void evergreen_pm_prepare(struct radeon_device *rdev);
421extern void evergreen_pm_finish(struct radeon_device *rdev); 437extern void evergreen_pm_finish(struct radeon_device *rdev);
@@ -428,6 +444,14 @@ extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc);
428void evergreen_disable_interrupt_state(struct radeon_device *rdev); 444void evergreen_disable_interrupt_state(struct radeon_device *rdev);
429int evergreen_blit_init(struct radeon_device *rdev); 445int evergreen_blit_init(struct radeon_device *rdev);
430int evergreen_mc_wait_for_idle(struct radeon_device *rdev); 446int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
447void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
448 struct radeon_fence *fence);
449void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
450 struct radeon_ib *ib);
451int evergreen_copy_dma(struct radeon_device *rdev,
452 uint64_t src_offset, uint64_t dst_offset,
453 unsigned num_gpu_pages,
454 struct radeon_fence **fence);
431 455
432/* 456/*
433 * cayman 457 * cayman
@@ -449,6 +473,11 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe,
449 uint64_t addr, unsigned count, 473 uint64_t addr, unsigned count,
450 uint32_t incr, uint32_t flags); 474 uint32_t incr, uint32_t flags);
451int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); 475int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
476int evergreen_dma_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
477void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
478 struct radeon_ib *ib);
479bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
480void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
452 481
453/* DCE6 - SI */ 482/* DCE6 - SI */
454void dce6_bandwidth_update(struct radeon_device *rdev); 483void dce6_bandwidth_update(struct radeon_device *rdev);
@@ -476,5 +505,10 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
476void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); 505void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
477int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); 506int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
478uint64_t si_get_gpu_clock(struct radeon_device *rdev); 507uint64_t si_get_gpu_clock(struct radeon_device *rdev);
508int si_copy_dma(struct radeon_device *rdev,
509 uint64_t src_offset, uint64_t dst_offset,
510 unsigned num_gpu_pages,
511 struct radeon_fence **fence);
512void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
479 513
480#endif 514#endif
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 45b660b27cfc..4af89126e223 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -3246,11 +3246,9 @@ static uint32_t combios_detect_ram(struct drm_device *dev, int ram,
3246 while (ram--) { 3246 while (ram--) {
3247 addr = ram * 1024 * 1024; 3247 addr = ram * 1024 * 1024;
3248 /* write to each page */ 3248 /* write to each page */
3249 WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER); 3249 WREG32_IDX((addr) | RADEON_MM_APER, 0xdeadbeef);
3250 WREG32(RADEON_MM_DATA, 0xdeadbeef);
3251 /* read back and verify */ 3250 /* read back and verify */
3252 WREG32(RADEON_MM_INDEX, (addr) | RADEON_MM_APER); 3251 if (RREG32_IDX((addr) | RADEON_MM_APER) != 0xdeadbeef)
3253 if (RREG32(RADEON_MM_DATA) != 0xdeadbeef)
3254 return 0; 3252 return 0;
3255 } 3253 }
3256 3254
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index b884c362a8c2..47bf162ab9c6 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1599,7 +1599,7 @@ radeon_add_atom_connector(struct drm_device *dev,
1599 connector->interlace_allowed = true; 1599 connector->interlace_allowed = true;
1600 connector->doublescan_allowed = true; 1600 connector->doublescan_allowed = true;
1601 radeon_connector->dac_load_detect = true; 1601 radeon_connector->dac_load_detect = true;
1602 drm_connector_attach_property(&radeon_connector->base, 1602 drm_object_attach_property(&radeon_connector->base.base,
1603 rdev->mode_info.load_detect_property, 1603 rdev->mode_info.load_detect_property,
1604 1); 1604 1);
1605 break; 1605 break;
@@ -1608,13 +1608,13 @@ radeon_add_atom_connector(struct drm_device *dev,
1608 case DRM_MODE_CONNECTOR_HDMIA: 1608 case DRM_MODE_CONNECTOR_HDMIA:
1609 case DRM_MODE_CONNECTOR_HDMIB: 1609 case DRM_MODE_CONNECTOR_HDMIB:
1610 case DRM_MODE_CONNECTOR_DisplayPort: 1610 case DRM_MODE_CONNECTOR_DisplayPort:
1611 drm_connector_attach_property(&radeon_connector->base, 1611 drm_object_attach_property(&radeon_connector->base.base,
1612 rdev->mode_info.underscan_property, 1612 rdev->mode_info.underscan_property,
1613 UNDERSCAN_OFF); 1613 UNDERSCAN_OFF);
1614 drm_connector_attach_property(&radeon_connector->base, 1614 drm_object_attach_property(&radeon_connector->base.base,
1615 rdev->mode_info.underscan_hborder_property, 1615 rdev->mode_info.underscan_hborder_property,
1616 0); 1616 0);
1617 drm_connector_attach_property(&radeon_connector->base, 1617 drm_object_attach_property(&radeon_connector->base.base,
1618 rdev->mode_info.underscan_vborder_property, 1618 rdev->mode_info.underscan_vborder_property,
1619 0); 1619 0);
1620 subpixel_order = SubPixelHorizontalRGB; 1620 subpixel_order = SubPixelHorizontalRGB;
@@ -1625,14 +1625,14 @@ radeon_add_atom_connector(struct drm_device *dev,
1625 connector->doublescan_allowed = false; 1625 connector->doublescan_allowed = false;
1626 if (connector_type == DRM_MODE_CONNECTOR_DVII) { 1626 if (connector_type == DRM_MODE_CONNECTOR_DVII) {
1627 radeon_connector->dac_load_detect = true; 1627 radeon_connector->dac_load_detect = true;
1628 drm_connector_attach_property(&radeon_connector->base, 1628 drm_object_attach_property(&radeon_connector->base.base,
1629 rdev->mode_info.load_detect_property, 1629 rdev->mode_info.load_detect_property,
1630 1); 1630 1);
1631 } 1631 }
1632 break; 1632 break;
1633 case DRM_MODE_CONNECTOR_LVDS: 1633 case DRM_MODE_CONNECTOR_LVDS:
1634 case DRM_MODE_CONNECTOR_eDP: 1634 case DRM_MODE_CONNECTOR_eDP:
1635 drm_connector_attach_property(&radeon_connector->base, 1635 drm_object_attach_property(&radeon_connector->base.base,
1636 dev->mode_config.scaling_mode_property, 1636 dev->mode_config.scaling_mode_property,
1637 DRM_MODE_SCALE_FULLSCREEN); 1637 DRM_MODE_SCALE_FULLSCREEN);
1638 subpixel_order = SubPixelHorizontalRGB; 1638 subpixel_order = SubPixelHorizontalRGB;
@@ -1651,7 +1651,7 @@ radeon_add_atom_connector(struct drm_device *dev,
1651 DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 1651 DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
1652 } 1652 }
1653 radeon_connector->dac_load_detect = true; 1653 radeon_connector->dac_load_detect = true;
1654 drm_connector_attach_property(&radeon_connector->base, 1654 drm_object_attach_property(&radeon_connector->base.base,
1655 rdev->mode_info.load_detect_property, 1655 rdev->mode_info.load_detect_property,
1656 1); 1656 1);
1657 /* no HPD on analog connectors */ 1657 /* no HPD on analog connectors */
@@ -1669,7 +1669,7 @@ radeon_add_atom_connector(struct drm_device *dev,
1669 DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 1669 DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
1670 } 1670 }
1671 radeon_connector->dac_load_detect = true; 1671 radeon_connector->dac_load_detect = true;
1672 drm_connector_attach_property(&radeon_connector->base, 1672 drm_object_attach_property(&radeon_connector->base.base,
1673 rdev->mode_info.load_detect_property, 1673 rdev->mode_info.load_detect_property,
1674 1); 1674 1);
1675 /* no HPD on analog connectors */ 1675 /* no HPD on analog connectors */
@@ -1692,23 +1692,23 @@ radeon_add_atom_connector(struct drm_device *dev,
1692 DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 1692 DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
1693 } 1693 }
1694 subpixel_order = SubPixelHorizontalRGB; 1694 subpixel_order = SubPixelHorizontalRGB;
1695 drm_connector_attach_property(&radeon_connector->base, 1695 drm_object_attach_property(&radeon_connector->base.base,
1696 rdev->mode_info.coherent_mode_property, 1696 rdev->mode_info.coherent_mode_property,
1697 1); 1697 1);
1698 if (ASIC_IS_AVIVO(rdev)) { 1698 if (ASIC_IS_AVIVO(rdev)) {
1699 drm_connector_attach_property(&radeon_connector->base, 1699 drm_object_attach_property(&radeon_connector->base.base,
1700 rdev->mode_info.underscan_property, 1700 rdev->mode_info.underscan_property,
1701 UNDERSCAN_OFF); 1701 UNDERSCAN_OFF);
1702 drm_connector_attach_property(&radeon_connector->base, 1702 drm_object_attach_property(&radeon_connector->base.base,
1703 rdev->mode_info.underscan_hborder_property, 1703 rdev->mode_info.underscan_hborder_property,
1704 0); 1704 0);
1705 drm_connector_attach_property(&radeon_connector->base, 1705 drm_object_attach_property(&radeon_connector->base.base,
1706 rdev->mode_info.underscan_vborder_property, 1706 rdev->mode_info.underscan_vborder_property,
1707 0); 1707 0);
1708 } 1708 }
1709 if (connector_type == DRM_MODE_CONNECTOR_DVII) { 1709 if (connector_type == DRM_MODE_CONNECTOR_DVII) {
1710 radeon_connector->dac_load_detect = true; 1710 radeon_connector->dac_load_detect = true;
1711 drm_connector_attach_property(&radeon_connector->base, 1711 drm_object_attach_property(&radeon_connector->base.base,
1712 rdev->mode_info.load_detect_property, 1712 rdev->mode_info.load_detect_property,
1713 1); 1713 1);
1714 } 1714 }
@@ -1732,17 +1732,17 @@ radeon_add_atom_connector(struct drm_device *dev,
1732 if (!radeon_connector->ddc_bus) 1732 if (!radeon_connector->ddc_bus)
1733 DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 1733 DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
1734 } 1734 }
1735 drm_connector_attach_property(&radeon_connector->base, 1735 drm_object_attach_property(&radeon_connector->base.base,
1736 rdev->mode_info.coherent_mode_property, 1736 rdev->mode_info.coherent_mode_property,
1737 1); 1737 1);
1738 if (ASIC_IS_AVIVO(rdev)) { 1738 if (ASIC_IS_AVIVO(rdev)) {
1739 drm_connector_attach_property(&radeon_connector->base, 1739 drm_object_attach_property(&radeon_connector->base.base,
1740 rdev->mode_info.underscan_property, 1740 rdev->mode_info.underscan_property,
1741 UNDERSCAN_OFF); 1741 UNDERSCAN_OFF);
1742 drm_connector_attach_property(&radeon_connector->base, 1742 drm_object_attach_property(&radeon_connector->base.base,
1743 rdev->mode_info.underscan_hborder_property, 1743 rdev->mode_info.underscan_hborder_property,
1744 0); 1744 0);
1745 drm_connector_attach_property(&radeon_connector->base, 1745 drm_object_attach_property(&radeon_connector->base.base,
1746 rdev->mode_info.underscan_vborder_property, 1746 rdev->mode_info.underscan_vborder_property,
1747 0); 1747 0);
1748 } 1748 }
@@ -1771,17 +1771,17 @@ radeon_add_atom_connector(struct drm_device *dev,
1771 DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 1771 DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
1772 } 1772 }
1773 subpixel_order = SubPixelHorizontalRGB; 1773 subpixel_order = SubPixelHorizontalRGB;
1774 drm_connector_attach_property(&radeon_connector->base, 1774 drm_object_attach_property(&radeon_connector->base.base,
1775 rdev->mode_info.coherent_mode_property, 1775 rdev->mode_info.coherent_mode_property,
1776 1); 1776 1);
1777 if (ASIC_IS_AVIVO(rdev)) { 1777 if (ASIC_IS_AVIVO(rdev)) {
1778 drm_connector_attach_property(&radeon_connector->base, 1778 drm_object_attach_property(&radeon_connector->base.base,
1779 rdev->mode_info.underscan_property, 1779 rdev->mode_info.underscan_property,
1780 UNDERSCAN_OFF); 1780 UNDERSCAN_OFF);
1781 drm_connector_attach_property(&radeon_connector->base, 1781 drm_object_attach_property(&radeon_connector->base.base,
1782 rdev->mode_info.underscan_hborder_property, 1782 rdev->mode_info.underscan_hborder_property,
1783 0); 1783 0);
1784 drm_connector_attach_property(&radeon_connector->base, 1784 drm_object_attach_property(&radeon_connector->base.base,
1785 rdev->mode_info.underscan_vborder_property, 1785 rdev->mode_info.underscan_vborder_property,
1786 0); 1786 0);
1787 } 1787 }
@@ -1806,7 +1806,7 @@ radeon_add_atom_connector(struct drm_device *dev,
1806 if (!radeon_connector->ddc_bus) 1806 if (!radeon_connector->ddc_bus)
1807 DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 1807 DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
1808 } 1808 }
1809 drm_connector_attach_property(&radeon_connector->base, 1809 drm_object_attach_property(&radeon_connector->base.base,
1810 dev->mode_config.scaling_mode_property, 1810 dev->mode_config.scaling_mode_property,
1811 DRM_MODE_SCALE_FULLSCREEN); 1811 DRM_MODE_SCALE_FULLSCREEN);
1812 subpixel_order = SubPixelHorizontalRGB; 1812 subpixel_order = SubPixelHorizontalRGB;
@@ -1819,10 +1819,10 @@ radeon_add_atom_connector(struct drm_device *dev,
1819 drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type); 1819 drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
1820 drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs); 1820 drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
1821 radeon_connector->dac_load_detect = true; 1821 radeon_connector->dac_load_detect = true;
1822 drm_connector_attach_property(&radeon_connector->base, 1822 drm_object_attach_property(&radeon_connector->base.base,
1823 rdev->mode_info.load_detect_property, 1823 rdev->mode_info.load_detect_property,
1824 1); 1824 1);
1825 drm_connector_attach_property(&radeon_connector->base, 1825 drm_object_attach_property(&radeon_connector->base.base,
1826 rdev->mode_info.tv_std_property, 1826 rdev->mode_info.tv_std_property,
1827 radeon_atombios_get_tv_info(rdev)); 1827 radeon_atombios_get_tv_info(rdev));
1828 /* no HPD on analog connectors */ 1828 /* no HPD on analog connectors */
@@ -1843,7 +1843,7 @@ radeon_add_atom_connector(struct drm_device *dev,
1843 if (!radeon_connector->ddc_bus) 1843 if (!radeon_connector->ddc_bus)
1844 DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 1844 DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
1845 } 1845 }
1846 drm_connector_attach_property(&radeon_connector->base, 1846 drm_object_attach_property(&radeon_connector->base.base,
1847 dev->mode_config.scaling_mode_property, 1847 dev->mode_config.scaling_mode_property,
1848 DRM_MODE_SCALE_FULLSCREEN); 1848 DRM_MODE_SCALE_FULLSCREEN);
1849 subpixel_order = SubPixelHorizontalRGB; 1849 subpixel_order = SubPixelHorizontalRGB;
@@ -1922,7 +1922,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
1922 DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 1922 DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
1923 } 1923 }
1924 radeon_connector->dac_load_detect = true; 1924 radeon_connector->dac_load_detect = true;
1925 drm_connector_attach_property(&radeon_connector->base, 1925 drm_object_attach_property(&radeon_connector->base.base,
1926 rdev->mode_info.load_detect_property, 1926 rdev->mode_info.load_detect_property,
1927 1); 1927 1);
1928 /* no HPD on analog connectors */ 1928 /* no HPD on analog connectors */
@@ -1940,7 +1940,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
1940 DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 1940 DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
1941 } 1941 }
1942 radeon_connector->dac_load_detect = true; 1942 radeon_connector->dac_load_detect = true;
1943 drm_connector_attach_property(&radeon_connector->base, 1943 drm_object_attach_property(&radeon_connector->base.base,
1944 rdev->mode_info.load_detect_property, 1944 rdev->mode_info.load_detect_property,
1945 1); 1945 1);
1946 /* no HPD on analog connectors */ 1946 /* no HPD on analog connectors */
@@ -1959,7 +1959,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
1959 } 1959 }
1960 if (connector_type == DRM_MODE_CONNECTOR_DVII) { 1960 if (connector_type == DRM_MODE_CONNECTOR_DVII) {
1961 radeon_connector->dac_load_detect = true; 1961 radeon_connector->dac_load_detect = true;
1962 drm_connector_attach_property(&radeon_connector->base, 1962 drm_object_attach_property(&radeon_connector->base.base,
1963 rdev->mode_info.load_detect_property, 1963 rdev->mode_info.load_detect_property,
1964 1); 1964 1);
1965 } 1965 }
@@ -1983,10 +1983,10 @@ radeon_add_legacy_connector(struct drm_device *dev,
1983 */ 1983 */
1984 if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) 1984 if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480)
1985 radeon_connector->dac_load_detect = false; 1985 radeon_connector->dac_load_detect = false;
1986 drm_connector_attach_property(&radeon_connector->base, 1986 drm_object_attach_property(&radeon_connector->base.base,
1987 rdev->mode_info.load_detect_property, 1987 rdev->mode_info.load_detect_property,
1988 radeon_connector->dac_load_detect); 1988 radeon_connector->dac_load_detect);
1989 drm_connector_attach_property(&radeon_connector->base, 1989 drm_object_attach_property(&radeon_connector->base.base,
1990 rdev->mode_info.tv_std_property, 1990 rdev->mode_info.tv_std_property,
1991 radeon_combios_get_tv_info(rdev)); 1991 radeon_combios_get_tv_info(rdev));
1992 /* no HPD on analog connectors */ 1992 /* no HPD on analog connectors */
@@ -2002,7 +2002,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
2002 if (!radeon_connector->ddc_bus) 2002 if (!radeon_connector->ddc_bus)
2003 DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); 2003 DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
2004 } 2004 }
2005 drm_connector_attach_property(&radeon_connector->base, 2005 drm_object_attach_property(&radeon_connector->base.base,
2006 dev->mode_config.scaling_mode_property, 2006 dev->mode_config.scaling_mode_property,
2007 DRM_MODE_SCALE_FULLSCREEN); 2007 DRM_MODE_SCALE_FULLSCREEN);
2008 subpixel_order = SubPixelHorizontalRGB; 2008 subpixel_order = SubPixelHorizontalRGB;
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index 8b2797dc7b64..9143fc45e35b 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -116,20 +116,6 @@ u32 radeon_get_scratch(drm_radeon_private_t *dev_priv, int index)
116 } 116 }
117} 117}
118 118
119u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr)
120{
121 u32 ret;
122
123 if (addr < 0x10000)
124 ret = DRM_READ32(dev_priv->mmio, addr);
125 else {
126 DRM_WRITE32(dev_priv->mmio, RADEON_MM_INDEX, addr);
127 ret = DRM_READ32(dev_priv->mmio, RADEON_MM_DATA);
128 }
129
130 return ret;
131}
132
133static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr) 119static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
134{ 120{
135 u32 ret; 121 u32 ret;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 41672cc563fb..396baba0141a 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -43,6 +43,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
43 return 0; 43 return 0;
44 } 44 }
45 chunk = &p->chunks[p->chunk_relocs_idx]; 45 chunk = &p->chunks[p->chunk_relocs_idx];
46 p->dma_reloc_idx = 0;
46 /* FIXME: we assume that each relocs use 4 dwords */ 47 /* FIXME: we assume that each relocs use 4 dwords */
47 p->nrelocs = chunk->length_dw / 4; 48 p->nrelocs = chunk->length_dw / 4;
48 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); 49 p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
@@ -111,6 +112,18 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
111 } else 112 } else
112 p->ring = RADEON_RING_TYPE_GFX_INDEX; 113 p->ring = RADEON_RING_TYPE_GFX_INDEX;
113 break; 114 break;
115 case RADEON_CS_RING_DMA:
116 if (p->rdev->family >= CHIP_CAYMAN) {
117 if (p->priority > 0)
118 p->ring = R600_RING_TYPE_DMA_INDEX;
119 else
120 p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
121 } else if (p->rdev->family >= CHIP_R600) {
122 p->ring = R600_RING_TYPE_DMA_INDEX;
123 } else {
124 return -EINVAL;
125 }
126 break;
114 } 127 }
115 return 0; 128 return 0;
116} 129}
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 0fe56c9f64bd..ad6df625e8b8 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -66,24 +66,25 @@ static void radeon_hide_cursor(struct drm_crtc *crtc)
66 struct radeon_device *rdev = crtc->dev->dev_private; 66 struct radeon_device *rdev = crtc->dev->dev_private;
67 67
68 if (ASIC_IS_DCE4(rdev)) { 68 if (ASIC_IS_DCE4(rdev)) {
69 WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset); 69 WREG32_IDX(EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset,
70 WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | 70 EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
71 EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); 71 EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
72 } else if (ASIC_IS_AVIVO(rdev)) { 72 } else if (ASIC_IS_AVIVO(rdev)) {
73 WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset); 73 WREG32_IDX(AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset,
74 WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); 74 (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
75 } else { 75 } else {
76 u32 reg;
76 switch (radeon_crtc->crtc_id) { 77 switch (radeon_crtc->crtc_id) {
77 case 0: 78 case 0:
78 WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL); 79 reg = RADEON_CRTC_GEN_CNTL;
79 break; 80 break;
80 case 1: 81 case 1:
81 WREG32(RADEON_MM_INDEX, RADEON_CRTC2_GEN_CNTL); 82 reg = RADEON_CRTC2_GEN_CNTL;
82 break; 83 break;
83 default: 84 default:
84 return; 85 return;
85 } 86 }
86 WREG32_P(RADEON_MM_DATA, 0, ~RADEON_CRTC_CUR_EN); 87 WREG32_IDX(reg, RREG32_IDX(reg) & ~RADEON_CRTC_CUR_EN);
87 } 88 }
88} 89}
89 90
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e2f5f888c374..49b06590001e 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1059,6 +1059,7 @@ int radeon_device_init(struct radeon_device *rdev,
1059 1059
1060 /* Registers mapping */ 1060 /* Registers mapping */
1061 /* TODO: block userspace mapping of io register */ 1061 /* TODO: block userspace mapping of io register */
1062 spin_lock_init(&rdev->mmio_idx_lock);
1062 rdev->rmmio_base = pci_resource_start(rdev->pdev, 2); 1063 rdev->rmmio_base = pci_resource_start(rdev->pdev, 2);
1063 rdev->rmmio_size = pci_resource_len(rdev->pdev, 2); 1064 rdev->rmmio_size = pci_resource_len(rdev->pdev, 2);
1064 rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size); 1065 rdev->rmmio = ioremap(rdev->rmmio_base, rdev->rmmio_size);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index bfa2a6015727..310c0e5254ba 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -378,8 +378,12 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
378 work->old_rbo = rbo; 378 work->old_rbo = rbo;
379 obj = new_radeon_fb->obj; 379 obj = new_radeon_fb->obj;
380 rbo = gem_to_radeon_bo(obj); 380 rbo = gem_to_radeon_bo(obj);
381
382 spin_lock(&rbo->tbo.bdev->fence_lock);
381 if (rbo->tbo.sync_obj) 383 if (rbo->tbo.sync_obj)
382 work->fence = radeon_fence_ref(rbo->tbo.sync_obj); 384 work->fence = radeon_fence_ref(rbo->tbo.sync_obj);
385 spin_unlock(&rbo->tbo.bdev->fence_lock);
386
383 INIT_WORK(&work->work, radeon_unpin_work_func); 387 INIT_WORK(&work->work, radeon_unpin_work_func);
384 388
385 /* We borrow the event spin lock for protecting unpin_work */ 389 /* We borrow the event spin lock for protecting unpin_work */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 07eb84e8a8a4..9b1a727d3c9e 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -65,9 +65,12 @@
65 * 2.22.0 - r600 only: RESOLVE_BOX allowed 65 * 2.22.0 - r600 only: RESOLVE_BOX allowed
66 * 2.23.0 - allow STRMOUT_BASE_UPDATE on RS780 and RS880 66 * 2.23.0 - allow STRMOUT_BASE_UPDATE on RS780 and RS880
67 * 2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures 67 * 2.24.0 - eg only: allow MIP_ADDRESS=0 for MSAA textures
68 * 2.25.0 - eg+: new info request for num SE and num SH
69 * 2.26.0 - r600-eg: fix htile size computation
70 * 2.27.0 - r600-SI: Add CS ioctl support for async DMA
68 */ 71 */
69#define KMS_DRIVER_MAJOR 2 72#define KMS_DRIVER_MAJOR 2
70#define KMS_DRIVER_MINOR 24 73#define KMS_DRIVER_MINOR 27
71#define KMS_DRIVER_PATCHLEVEL 0 74#define KMS_DRIVER_PATCHLEVEL 0
72int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); 75int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
73int radeon_driver_unload_kms(struct drm_device *dev); 76int radeon_driver_unload_kms(struct drm_device *dev);
@@ -281,12 +284,15 @@ static struct drm_driver driver_old = {
281 284
282static struct drm_driver kms_driver; 285static struct drm_driver kms_driver;
283 286
284static void radeon_kick_out_firmware_fb(struct pci_dev *pdev) 287static int radeon_kick_out_firmware_fb(struct pci_dev *pdev)
285{ 288{
286 struct apertures_struct *ap; 289 struct apertures_struct *ap;
287 bool primary = false; 290 bool primary = false;
288 291
289 ap = alloc_apertures(1); 292 ap = alloc_apertures(1);
293 if (!ap)
294 return -ENOMEM;
295
290 ap->ranges[0].base = pci_resource_start(pdev, 0); 296 ap->ranges[0].base = pci_resource_start(pdev, 0);
291 ap->ranges[0].size = pci_resource_len(pdev, 0); 297 ap->ranges[0].size = pci_resource_len(pdev, 0);
292 298
@@ -295,13 +301,19 @@ static void radeon_kick_out_firmware_fb(struct pci_dev *pdev)
295#endif 301#endif
296 remove_conflicting_framebuffers(ap, "radeondrmfb", primary); 302 remove_conflicting_framebuffers(ap, "radeondrmfb", primary);
297 kfree(ap); 303 kfree(ap);
304
305 return 0;
298} 306}
299 307
300static int __devinit 308static int __devinit
301radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 309radeon_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
302{ 310{
311 int ret;
312
303 /* Get rid of things like offb */ 313 /* Get rid of things like offb */
304 radeon_kick_out_firmware_fb(pdev); 314 ret = radeon_kick_out_firmware_fb(pdev);
315 if (ret)
316 return ret;
305 317
306 return drm_get_pci_dev(pdev, ent, &kms_driver); 318 return drm_get_pci_dev(pdev, ent, &kms_driver);
307} 319}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index a1b59ca96d01..e7fdf163a8ca 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -366,7 +366,6 @@ extern int radeon_cp_buffers(struct drm_device *dev, void *data, struct drm_file
366extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv); 366extern u32 radeon_read_fb_location(drm_radeon_private_t *dev_priv);
367extern void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc); 367extern void radeon_write_agp_location(drm_radeon_private_t *dev_priv, u32 agp_loc);
368extern void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base); 368extern void radeon_write_agp_base(drm_radeon_private_t *dev_priv, u64 agp_base);
369extern u32 RADEON_READ_MM(drm_radeon_private_t *dev_priv, int addr);
370 369
371extern void radeon_freelist_reset(struct drm_device * dev); 370extern void radeon_freelist_reset(struct drm_device * dev);
372extern struct drm_buf *radeon_freelist_get(struct drm_device * dev); 371extern struct drm_buf *radeon_freelist_get(struct drm_device * dev);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 22bd6c2c2740..410a975a8eec 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -772,7 +772,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
772 int r; 772 int r;
773 773
774 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); 774 radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
775 if (rdev->wb.use_event) { 775 if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
776 rdev->fence_drv[ring].scratch_reg = 0; 776 rdev->fence_drv[ring].scratch_reg = 0;
777 index = R600_WB_EVENT_OFFSET + ring * 4; 777 index = R600_WB_EVENT_OFFSET + ring * 4;
778 } else { 778 } else {
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 4debd60e5aa6..6e24f84755b5 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -1237,7 +1237,6 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
1237{ 1237{
1238 struct radeon_bo_va *bo_va; 1238 struct radeon_bo_va *bo_va;
1239 1239
1240 BUG_ON(!atomic_read(&bo->tbo.reserved));
1241 list_for_each_entry(bo_va, &bo->va, bo_list) { 1240 list_for_each_entry(bo_va, &bo->va, bo_list) {
1242 bo_va->valid = false; 1241 bo_va->valid = false;
1243 } 1242 }
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index dc781c49b96b..9c312f9afb68 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -361,6 +361,22 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
361 return -EINVAL; 361 return -EINVAL;
362 } 362 }
363 break; 363 break;
364 case RADEON_INFO_MAX_SE:
365 if (rdev->family >= CHIP_TAHITI)
366 value = rdev->config.si.max_shader_engines;
367 else if (rdev->family >= CHIP_CAYMAN)
368 value = rdev->config.cayman.max_shader_engines;
369 else if (rdev->family >= CHIP_CEDAR)
370 value = rdev->config.evergreen.num_ses;
371 else
372 value = 1;
373 break;
374 case RADEON_INFO_MAX_SH_PER_SE:
375 if (rdev->family >= CHIP_TAHITI)
376 value = rdev->config.si.max_sh_per_se;
377 else
378 return -EINVAL;
379 break;
364 default: 380 default:
365 DRM_DEBUG_KMS("Invalid request %d\n", info->request); 381 DRM_DEBUG_KMS("Invalid request %d\n", info->request);
366 return -EINVAL; 382 return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 92c5f473cf08..d818b503b42f 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -427,7 +427,7 @@ struct radeon_connector_atom_dig {
427 uint32_t igp_lane_info; 427 uint32_t igp_lane_info;
428 /* displayport */ 428 /* displayport */
429 struct radeon_i2c_chan *dp_i2c_bus; 429 struct radeon_i2c_chan *dp_i2c_bus;
430 u8 dpcd[8]; 430 u8 dpcd[DP_RECEIVER_CAP_SIZE];
431 u8 dp_sink_type; 431 u8 dp_sink_type;
432 int dp_clock; 432 int dp_clock;
433 int dp_lane_count; 433 int dp_lane_count;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index b91118ccef86..883c95d8d90f 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -84,17 +84,34 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
84 rbo->placement.fpfn = 0; 84 rbo->placement.fpfn = 0;
85 rbo->placement.lpfn = 0; 85 rbo->placement.lpfn = 0;
86 rbo->placement.placement = rbo->placements; 86 rbo->placement.placement = rbo->placements;
87 rbo->placement.busy_placement = rbo->placements;
88 if (domain & RADEON_GEM_DOMAIN_VRAM) 87 if (domain & RADEON_GEM_DOMAIN_VRAM)
89 rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | 88 rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
90 TTM_PL_FLAG_VRAM; 89 TTM_PL_FLAG_VRAM;
91 if (domain & RADEON_GEM_DOMAIN_GTT) 90 if (domain & RADEON_GEM_DOMAIN_GTT) {
92 rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; 91 if (rbo->rdev->flags & RADEON_IS_AGP) {
93 if (domain & RADEON_GEM_DOMAIN_CPU) 92 rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
94 rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; 93 } else {
94 rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
95 }
96 }
97 if (domain & RADEON_GEM_DOMAIN_CPU) {
98 if (rbo->rdev->flags & RADEON_IS_AGP) {
99 rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM;
100 } else {
101 rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
102 }
103 }
95 if (!c) 104 if (!c)
96 rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; 105 rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
97 rbo->placement.num_placement = c; 106 rbo->placement.num_placement = c;
107
108 c = 0;
109 rbo->placement.busy_placement = rbo->busy_placements;
110 if (rbo->rdev->flags & RADEON_IS_AGP) {
111 rbo->busy_placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT;
112 } else {
113 rbo->busy_placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
114 }
98 rbo->placement.num_busy_placement = c; 115 rbo->placement.num_busy_placement = c;
99} 116}
100 117
@@ -140,7 +157,7 @@ int radeon_bo_create(struct radeon_device *rdev,
140 /* Kernel allocation are uninterruptible */ 157 /* Kernel allocation are uninterruptible */
141 down_read(&rdev->pm.mclk_lock); 158 down_read(&rdev->pm.mclk_lock);
142 r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, 159 r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
143 &bo->placement, page_align, 0, !kernel, NULL, 160 &bo->placement, page_align, !kernel, NULL,
144 acc_size, sg, &radeon_ttm_bo_destroy); 161 acc_size, sg, &radeon_ttm_bo_destroy);
145 up_read(&rdev->pm.mclk_lock); 162 up_read(&rdev->pm.mclk_lock);
146 if (unlikely(r != 0)) { 163 if (unlikely(r != 0)) {
@@ -240,7 +257,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
240 } 257 }
241 for (i = 0; i < bo->placement.num_placement; i++) 258 for (i = 0; i < bo->placement.num_placement; i++)
242 bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; 259 bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
243 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false); 260 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
244 if (likely(r == 0)) { 261 if (likely(r == 0)) {
245 bo->pin_count = 1; 262 bo->pin_count = 1;
246 if (gpu_addr != NULL) 263 if (gpu_addr != NULL)
@@ -269,7 +286,7 @@ int radeon_bo_unpin(struct radeon_bo *bo)
269 return 0; 286 return 0;
270 for (i = 0; i < bo->placement.num_placement; i++) 287 for (i = 0; i < bo->placement.num_placement; i++)
271 bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; 288 bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
272 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false, false); 289 r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
273 if (unlikely(r != 0)) 290 if (unlikely(r != 0))
274 dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo); 291 dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
275 return r; 292 return r;
@@ -340,7 +357,6 @@ int radeon_bo_list_validate(struct list_head *head)
340{ 357{
341 struct radeon_bo_list *lobj; 358 struct radeon_bo_list *lobj;
342 struct radeon_bo *bo; 359 struct radeon_bo *bo;
343 u32 domain;
344 int r; 360 int r;
345 361
346 r = ttm_eu_reserve_buffers(head); 362 r = ttm_eu_reserve_buffers(head);
@@ -350,17 +366,9 @@ int radeon_bo_list_validate(struct list_head *head)
350 list_for_each_entry(lobj, head, tv.head) { 366 list_for_each_entry(lobj, head, tv.head) {
351 bo = lobj->bo; 367 bo = lobj->bo;
352 if (!bo->pin_count) { 368 if (!bo->pin_count) {
353 domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain;
354
355 retry:
356 radeon_ttm_placement_from_domain(bo, domain);
357 r = ttm_bo_validate(&bo->tbo, &bo->placement, 369 r = ttm_bo_validate(&bo->tbo, &bo->placement,
358 true, false, false); 370 true, false);
359 if (unlikely(r)) { 371 if (unlikely(r)) {
360 if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) {
361 domain |= RADEON_GEM_DOMAIN_GTT;
362 goto retry;
363 }
364 return r; 372 return r;
365 } 373 }
366 } 374 }
@@ -384,7 +392,7 @@ int radeon_bo_get_surface_reg(struct radeon_bo *bo)
384 int steal; 392 int steal;
385 int i; 393 int i;
386 394
387 BUG_ON(!atomic_read(&bo->tbo.reserved)); 395 BUG_ON(!radeon_bo_is_reserved(bo));
388 396
389 if (!bo->tiling_flags) 397 if (!bo->tiling_flags)
390 return 0; 398 return 0;
@@ -510,7 +518,7 @@ void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
510 uint32_t *tiling_flags, 518 uint32_t *tiling_flags,
511 uint32_t *pitch) 519 uint32_t *pitch)
512{ 520{
513 BUG_ON(!atomic_read(&bo->tbo.reserved)); 521 BUG_ON(!radeon_bo_is_reserved(bo));
514 if (tiling_flags) 522 if (tiling_flags)
515 *tiling_flags = bo->tiling_flags; 523 *tiling_flags = bo->tiling_flags;
516 if (pitch) 524 if (pitch)
@@ -520,7 +528,7 @@ void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
520int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, 528int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
521 bool force_drop) 529 bool force_drop)
522{ 530{
523 BUG_ON(!atomic_read(&bo->tbo.reserved)); 531 BUG_ON(!radeon_bo_is_reserved(bo) && !force_drop);
524 532
525 if (!(bo->tiling_flags & RADEON_TILING_SURFACE)) 533 if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
526 return 0; 534 return 0;
@@ -575,7 +583,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
575 /* hurrah the memory is not visible ! */ 583 /* hurrah the memory is not visible ! */
576 radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM); 584 radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
577 rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT; 585 rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
578 r = ttm_bo_validate(bo, &rbo->placement, false, true, false); 586 r = ttm_bo_validate(bo, &rbo->placement, false, false);
579 if (unlikely(r != 0)) 587 if (unlikely(r != 0))
580 return r; 588 return r;
581 offset = bo->mem.start << PAGE_SHIFT; 589 offset = bo->mem.start << PAGE_SHIFT;
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 93cd491fff2e..5fc86b03043b 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -80,7 +80,7 @@ static inline unsigned long radeon_bo_size(struct radeon_bo *bo)
80 80
81static inline bool radeon_bo_is_reserved(struct radeon_bo *bo) 81static inline bool radeon_bo_is_reserved(struct radeon_bo *bo)
82{ 82{
83 return !!atomic_read(&bo->tbo.reserved); 83 return ttm_bo_is_reserved(&bo->tbo);
84} 84}
85 85
86static inline unsigned radeon_bo_ngpu_pages(struct radeon_bo *bo) 86static inline unsigned radeon_bo_ngpu_pages(struct radeon_bo *bo)
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 587c09a00ba2..fda09c9ea689 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -26,16 +26,31 @@
26#include "radeon_reg.h" 26#include "radeon_reg.h"
27#include "radeon.h" 27#include "radeon.h"
28 28
29#define RADEON_TEST_COPY_BLIT 1
30#define RADEON_TEST_COPY_DMA 0
31
29 32
30/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ 33/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */
31void radeon_test_moves(struct radeon_device *rdev) 34static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
32{ 35{
33 struct radeon_bo *vram_obj = NULL; 36 struct radeon_bo *vram_obj = NULL;
34 struct radeon_bo **gtt_obj = NULL; 37 struct radeon_bo **gtt_obj = NULL;
35 struct radeon_fence *fence = NULL; 38 struct radeon_fence *fence = NULL;
36 uint64_t gtt_addr, vram_addr; 39 uint64_t gtt_addr, vram_addr;
37 unsigned i, n, size; 40 unsigned i, n, size;
38 int r; 41 int r, ring;
42
43 switch (flag) {
44 case RADEON_TEST_COPY_DMA:
45 ring = radeon_copy_dma_ring_index(rdev);
46 break;
47 case RADEON_TEST_COPY_BLIT:
48 ring = radeon_copy_blit_ring_index(rdev);
49 break;
50 default:
51 DRM_ERROR("Unknown copy method\n");
52 return;
53 }
39 54
40 size = 1024 * 1024; 55 size = 1024 * 1024;
41 56
@@ -106,7 +121,10 @@ void radeon_test_moves(struct radeon_device *rdev)
106 121
107 radeon_bo_kunmap(gtt_obj[i]); 122 radeon_bo_kunmap(gtt_obj[i]);
108 123
109 r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); 124 if (ring == R600_RING_TYPE_DMA_INDEX)
125 r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
126 else
127 r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
110 if (r) { 128 if (r) {
111 DRM_ERROR("Failed GTT->VRAM copy %d\n", i); 129 DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
112 goto out_cleanup; 130 goto out_cleanup;
@@ -149,7 +167,10 @@ void radeon_test_moves(struct radeon_device *rdev)
149 167
150 radeon_bo_kunmap(vram_obj); 168 radeon_bo_kunmap(vram_obj);
151 169
152 r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); 170 if (ring == R600_RING_TYPE_DMA_INDEX)
171 r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
172 else
173 r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
153 if (r) { 174 if (r) {
154 DRM_ERROR("Failed VRAM->GTT copy %d\n", i); 175 DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
155 goto out_cleanup; 176 goto out_cleanup;
@@ -223,6 +244,14 @@ out_cleanup:
223 } 244 }
224} 245}
225 246
247void radeon_test_moves(struct radeon_device *rdev)
248{
249 if (rdev->asic->copy.dma)
250 radeon_do_test_moves(rdev, RADEON_TEST_COPY_DMA);
251 if (rdev->asic->copy.blit)
252 radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
253}
254
226void radeon_test_ring_sync(struct radeon_device *rdev, 255void radeon_test_ring_sync(struct radeon_device *rdev,
227 struct radeon_ring *ringA, 256 struct radeon_ring *ringA,
228 struct radeon_ring *ringB) 257 struct radeon_ring *ringB)
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 5ebe1b3e5db2..1d8ff2f850ba 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -216,7 +216,7 @@ static void radeon_move_null(struct ttm_buffer_object *bo,
216} 216}
217 217
218static int radeon_move_blit(struct ttm_buffer_object *bo, 218static int radeon_move_blit(struct ttm_buffer_object *bo,
219 bool evict, int no_wait_reserve, bool no_wait_gpu, 219 bool evict, bool no_wait_gpu,
220 struct ttm_mem_reg *new_mem, 220 struct ttm_mem_reg *new_mem,
221 struct ttm_mem_reg *old_mem) 221 struct ttm_mem_reg *old_mem)
222{ 222{
@@ -265,15 +265,15 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
265 new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ 265 new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
266 &fence); 266 &fence);
267 /* FIXME: handle copy error */ 267 /* FIXME: handle copy error */
268 r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, 268 r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
269 evict, no_wait_reserve, no_wait_gpu, new_mem); 269 evict, no_wait_gpu, new_mem);
270 radeon_fence_unref(&fence); 270 radeon_fence_unref(&fence);
271 return r; 271 return r;
272} 272}
273 273
274static int radeon_move_vram_ram(struct ttm_buffer_object *bo, 274static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
275 bool evict, bool interruptible, 275 bool evict, bool interruptible,
276 bool no_wait_reserve, bool no_wait_gpu, 276 bool no_wait_gpu,
277 struct ttm_mem_reg *new_mem) 277 struct ttm_mem_reg *new_mem)
278{ 278{
279 struct radeon_device *rdev; 279 struct radeon_device *rdev;
@@ -294,7 +294,7 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
294 placement.busy_placement = &placements; 294 placement.busy_placement = &placements;
295 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; 295 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
296 r = ttm_bo_mem_space(bo, &placement, &tmp_mem, 296 r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
297 interruptible, no_wait_reserve, no_wait_gpu); 297 interruptible, no_wait_gpu);
298 if (unlikely(r)) { 298 if (unlikely(r)) {
299 return r; 299 return r;
300 } 300 }
@@ -308,11 +308,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
308 if (unlikely(r)) { 308 if (unlikely(r)) {
309 goto out_cleanup; 309 goto out_cleanup;
310 } 310 }
311 r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem, old_mem); 311 r = radeon_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem);
312 if (unlikely(r)) { 312 if (unlikely(r)) {
313 goto out_cleanup; 313 goto out_cleanup;
314 } 314 }
315 r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, new_mem); 315 r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
316out_cleanup: 316out_cleanup:
317 ttm_bo_mem_put(bo, &tmp_mem); 317 ttm_bo_mem_put(bo, &tmp_mem);
318 return r; 318 return r;
@@ -320,7 +320,7 @@ out_cleanup:
320 320
321static int radeon_move_ram_vram(struct ttm_buffer_object *bo, 321static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
322 bool evict, bool interruptible, 322 bool evict, bool interruptible,
323 bool no_wait_reserve, bool no_wait_gpu, 323 bool no_wait_gpu,
324 struct ttm_mem_reg *new_mem) 324 struct ttm_mem_reg *new_mem)
325{ 325{
326 struct radeon_device *rdev; 326 struct radeon_device *rdev;
@@ -340,15 +340,16 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
340 placement.num_busy_placement = 1; 340 placement.num_busy_placement = 1;
341 placement.busy_placement = &placements; 341 placement.busy_placement = &placements;
342 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; 342 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
343 r = ttm_bo_mem_space(bo, &placement, &tmp_mem, interruptible, no_wait_reserve, no_wait_gpu); 343 r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
344 interruptible, no_wait_gpu);
344 if (unlikely(r)) { 345 if (unlikely(r)) {
345 return r; 346 return r;
346 } 347 }
347 r = ttm_bo_move_ttm(bo, true, no_wait_reserve, no_wait_gpu, &tmp_mem); 348 r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
348 if (unlikely(r)) { 349 if (unlikely(r)) {
349 goto out_cleanup; 350 goto out_cleanup;
350 } 351 }
351 r = radeon_move_blit(bo, true, no_wait_reserve, no_wait_gpu, new_mem, old_mem); 352 r = radeon_move_blit(bo, true, no_wait_gpu, new_mem, old_mem);
352 if (unlikely(r)) { 353 if (unlikely(r)) {
353 goto out_cleanup; 354 goto out_cleanup;
354 } 355 }
@@ -359,7 +360,7 @@ out_cleanup:
359 360
360static int radeon_bo_move(struct ttm_buffer_object *bo, 361static int radeon_bo_move(struct ttm_buffer_object *bo,
361 bool evict, bool interruptible, 362 bool evict, bool interruptible,
362 bool no_wait_reserve, bool no_wait_gpu, 363 bool no_wait_gpu,
363 struct ttm_mem_reg *new_mem) 364 struct ttm_mem_reg *new_mem)
364{ 365{
365 struct radeon_device *rdev; 366 struct radeon_device *rdev;
@@ -388,18 +389,18 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
388 if (old_mem->mem_type == TTM_PL_VRAM && 389 if (old_mem->mem_type == TTM_PL_VRAM &&
389 new_mem->mem_type == TTM_PL_SYSTEM) { 390 new_mem->mem_type == TTM_PL_SYSTEM) {
390 r = radeon_move_vram_ram(bo, evict, interruptible, 391 r = radeon_move_vram_ram(bo, evict, interruptible,
391 no_wait_reserve, no_wait_gpu, new_mem); 392 no_wait_gpu, new_mem);
392 } else if (old_mem->mem_type == TTM_PL_SYSTEM && 393 } else if (old_mem->mem_type == TTM_PL_SYSTEM &&
393 new_mem->mem_type == TTM_PL_VRAM) { 394 new_mem->mem_type == TTM_PL_VRAM) {
394 r = radeon_move_ram_vram(bo, evict, interruptible, 395 r = radeon_move_ram_vram(bo, evict, interruptible,
395 no_wait_reserve, no_wait_gpu, new_mem); 396 no_wait_gpu, new_mem);
396 } else { 397 } else {
397 r = radeon_move_blit(bo, evict, no_wait_reserve, no_wait_gpu, new_mem, old_mem); 398 r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
398 } 399 }
399 400
400 if (r) { 401 if (r) {
401memcpy: 402memcpy:
402 r = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem); 403 r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
403 } 404 }
404 return r; 405 return r;
405} 406}
@@ -471,13 +472,12 @@ static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
471{ 472{
472} 473}
473 474
474static int radeon_sync_obj_wait(void *sync_obj, void *sync_arg, 475static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
475 bool lazy, bool interruptible)
476{ 476{
477 return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible); 477 return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible);
478} 478}
479 479
480static int radeon_sync_obj_flush(void *sync_obj, void *sync_arg) 480static int radeon_sync_obj_flush(void *sync_obj)
481{ 481{
482 return 0; 482 return 0;
483} 483}
@@ -492,7 +492,7 @@ static void *radeon_sync_obj_ref(void *sync_obj)
492 return radeon_fence_ref((struct radeon_fence *)sync_obj); 492 return radeon_fence_ref((struct radeon_fence *)sync_obj);
493} 493}
494 494
495static bool radeon_sync_obj_signaled(void *sync_obj, void *sync_arg) 495static bool radeon_sync_obj_signaled(void *sync_obj)
496{ 496{
497 return radeon_fence_signaled((struct radeon_fence *)sync_obj); 497 return radeon_fence_signaled((struct radeon_fence *)sync_obj);
498} 498}
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 785d09590b24..2bb6d0e84b3d 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -40,6 +40,12 @@ static int rv515_debugfs_ga_info_init(struct radeon_device *rdev);
40static void rv515_gpu_init(struct radeon_device *rdev); 40static void rv515_gpu_init(struct radeon_device *rdev);
41int rv515_mc_wait_for_idle(struct radeon_device *rdev); 41int rv515_mc_wait_for_idle(struct radeon_device *rdev);
42 42
43static const u32 crtc_offsets[2] =
44{
45 0,
46 AVIVO_D2CRTC_H_TOTAL - AVIVO_D1CRTC_H_TOTAL
47};
48
43void rv515_debugfs(struct radeon_device *rdev) 49void rv515_debugfs(struct radeon_device *rdev)
44{ 50{
45 if (r100_debugfs_rbbm_init(rdev)) { 51 if (r100_debugfs_rbbm_init(rdev)) {
@@ -281,30 +287,114 @@ static int rv515_debugfs_ga_info_init(struct radeon_device *rdev)
281 287
282void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save) 288void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save)
283{ 289{
290 u32 crtc_enabled, tmp, frame_count, blackout;
291 int i, j;
292
284 save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL); 293 save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL);
285 save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL); 294 save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL);
286 295
287 /* Stop all video */ 296 /* disable VGA render */
288 WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
289 WREG32(R_000300_VGA_RENDER_CONTROL, 0); 297 WREG32(R_000300_VGA_RENDER_CONTROL, 0);
290 WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1); 298 /* blank the display controllers */
291 WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1); 299 for (i = 0; i < rdev->num_crtc; i++) {
292 WREG32(R_006080_D1CRTC_CONTROL, 0); 300 crtc_enabled = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]) & AVIVO_CRTC_EN;
293 WREG32(R_006880_D2CRTC_CONTROL, 0); 301 if (crtc_enabled) {
294 WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0); 302 save->crtc_enabled[i] = true;
295 WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0); 303 tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]);
296 WREG32(R_000330_D1VGA_CONTROL, 0); 304 if (!(tmp & AVIVO_CRTC_DISP_READ_REQUEST_DISABLE)) {
297 WREG32(R_000338_D2VGA_CONTROL, 0); 305 radeon_wait_for_vblank(rdev, i);
306 tmp |= AVIVO_CRTC_DISP_READ_REQUEST_DISABLE;
307 WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp);
308 }
309 /* wait for the next frame */
310 frame_count = radeon_get_vblank_counter(rdev, i);
311 for (j = 0; j < rdev->usec_timeout; j++) {
312 if (radeon_get_vblank_counter(rdev, i) != frame_count)
313 break;
314 udelay(1);
315 }
316 } else {
317 save->crtc_enabled[i] = false;
318 }
319 }
320
321 radeon_mc_wait_for_idle(rdev);
322
323 if (rdev->family >= CHIP_R600) {
324 if (rdev->family >= CHIP_RV770)
325 blackout = RREG32(R700_MC_CITF_CNTL);
326 else
327 blackout = RREG32(R600_CITF_CNTL);
328 if ((blackout & R600_BLACKOUT_MASK) != R600_BLACKOUT_MASK) {
329 /* Block CPU access */
330 WREG32(R600_BIF_FB_EN, 0);
331 /* blackout the MC */
332 blackout |= R600_BLACKOUT_MASK;
333 if (rdev->family >= CHIP_RV770)
334 WREG32(R700_MC_CITF_CNTL, blackout);
335 else
336 WREG32(R600_CITF_CNTL, blackout);
337 }
338 }
298} 339}
299 340
300void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save) 341void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save)
301{ 342{
302 WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start); 343 u32 tmp, frame_count;
303 WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start); 344 int i, j;
304 WREG32(R_006910_D2GRPH_PRIMARY_SURFACE_ADDRESS, rdev->mc.vram_start); 345
305 WREG32(R_006918_D2GRPH_SECONDARY_SURFACE_ADDRESS, rdev->mc.vram_start); 346 /* update crtc base addresses */
306 WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, rdev->mc.vram_start); 347 for (i = 0; i < rdev->num_crtc; i++) {
307 /* Unlock host access */ 348 if (rdev->family >= CHIP_RV770) {
349 if (i == 1) {
350 WREG32(R700_D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH,
351 upper_32_bits(rdev->mc.vram_start));
352 WREG32(R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH,
353 upper_32_bits(rdev->mc.vram_start));
354 } else {
355 WREG32(R700_D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH,
356 upper_32_bits(rdev->mc.vram_start));
357 WREG32(R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH,
358 upper_32_bits(rdev->mc.vram_start));
359 }
360 }
361 WREG32(R_006110_D1GRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
362 (u32)rdev->mc.vram_start);
363 WREG32(R_006118_D1GRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
364 (u32)rdev->mc.vram_start);
365 }
366 WREG32(R_000310_VGA_MEMORY_BASE_ADDRESS, (u32)rdev->mc.vram_start);
367
368 if (rdev->family >= CHIP_R600) {
369 /* unblackout the MC */
370 if (rdev->family >= CHIP_RV770)
371 tmp = RREG32(R700_MC_CITF_CNTL);
372 else
373 tmp = RREG32(R600_CITF_CNTL);
374 tmp &= ~R600_BLACKOUT_MASK;
375 if (rdev->family >= CHIP_RV770)
376 WREG32(R700_MC_CITF_CNTL, tmp);
377 else
378 WREG32(R600_CITF_CNTL, tmp);
379 /* allow CPU access */
380 WREG32(R600_BIF_FB_EN, R600_FB_READ_EN | R600_FB_WRITE_EN);
381 }
382
383 for (i = 0; i < rdev->num_crtc; i++) {
384 if (save->crtc_enabled[i]) {
385 tmp = RREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i]);
386 tmp &= ~AVIVO_CRTC_DISP_READ_REQUEST_DISABLE;
387 WREG32(AVIVO_D1CRTC_CONTROL + crtc_offsets[i], tmp);
388 /* wait for the next frame */
389 frame_count = radeon_get_vblank_counter(rdev, i);
390 for (j = 0; j < rdev->usec_timeout; j++) {
391 if (radeon_get_vblank_counter(rdev, i) != frame_count)
392 break;
393 udelay(1);
394 }
395 }
396 }
397 /* Unlock vga access */
308 WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control); 398 WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control);
309 mdelay(1); 399 mdelay(1);
310 WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control); 400 WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 79814a08c8e5..87c979c4f721 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -316,6 +316,7 @@ void r700_cp_stop(struct radeon_device *rdev)
316 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 316 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
317 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT)); 317 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT));
318 WREG32(SCRATCH_UMSK, 0); 318 WREG32(SCRATCH_UMSK, 0);
319 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
319} 320}
320 321
321static int rv770_cp_load_microcode(struct radeon_device *rdev) 322static int rv770_cp_load_microcode(struct radeon_device *rdev)
@@ -583,6 +584,8 @@ static void rv770_gpu_init(struct radeon_device *rdev)
583 WREG32(GB_TILING_CONFIG, gb_tiling_config); 584 WREG32(GB_TILING_CONFIG, gb_tiling_config);
584 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 585 WREG32(DCP_TILING_CONFIG, (gb_tiling_config & 0xffff));
585 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff)); 586 WREG32(HDP_TILING_CONFIG, (gb_tiling_config & 0xffff));
587 WREG32(DMA_TILING_CONFIG, (gb_tiling_config & 0xffff));
588 WREG32(DMA_TILING_CONFIG2, (gb_tiling_config & 0xffff));
586 589
587 WREG32(CGTS_SYS_TCC_DISABLE, 0); 590 WREG32(CGTS_SYS_TCC_DISABLE, 0);
588 WREG32(CGTS_TCC_DISABLE, 0); 591 WREG32(CGTS_TCC_DISABLE, 0);
@@ -886,7 +889,7 @@ static int rv770_mc_init(struct radeon_device *rdev)
886 889
887static int rv770_startup(struct radeon_device *rdev) 890static int rv770_startup(struct radeon_device *rdev)
888{ 891{
889 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 892 struct radeon_ring *ring;
890 int r; 893 int r;
891 894
892 /* enable pcie gen2 link */ 895 /* enable pcie gen2 link */
@@ -932,6 +935,12 @@ static int rv770_startup(struct radeon_device *rdev)
932 return r; 935 return r;
933 } 936 }
934 937
938 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
939 if (r) {
940 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
941 return r;
942 }
943
935 /* Enable IRQ */ 944 /* Enable IRQ */
936 r = r600_irq_init(rdev); 945 r = r600_irq_init(rdev);
937 if (r) { 946 if (r) {
@@ -941,11 +950,20 @@ static int rv770_startup(struct radeon_device *rdev)
941 } 950 }
942 r600_irq_set(rdev); 951 r600_irq_set(rdev);
943 952
953 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
944 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET, 954 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
945 R600_CP_RB_RPTR, R600_CP_RB_WPTR, 955 R600_CP_RB_RPTR, R600_CP_RB_WPTR,
946 0, 0xfffff, RADEON_CP_PACKET2); 956 0, 0xfffff, RADEON_CP_PACKET2);
947 if (r) 957 if (r)
948 return r; 958 return r;
959
960 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
961 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
962 DMA_RB_RPTR, DMA_RB_WPTR,
963 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
964 if (r)
965 return r;
966
949 r = rv770_cp_load_microcode(rdev); 967 r = rv770_cp_load_microcode(rdev);
950 if (r) 968 if (r)
951 return r; 969 return r;
@@ -953,6 +971,10 @@ static int rv770_startup(struct radeon_device *rdev)
953 if (r) 971 if (r)
954 return r; 972 return r;
955 973
974 r = r600_dma_resume(rdev);
975 if (r)
976 return r;
977
956 r = radeon_ib_pool_init(rdev); 978 r = radeon_ib_pool_init(rdev);
957 if (r) { 979 if (r) {
958 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 980 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -995,7 +1017,7 @@ int rv770_suspend(struct radeon_device *rdev)
995{ 1017{
996 r600_audio_fini(rdev); 1018 r600_audio_fini(rdev);
997 r700_cp_stop(rdev); 1019 r700_cp_stop(rdev);
998 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 1020 r600_dma_stop(rdev);
999 r600_irq_suspend(rdev); 1021 r600_irq_suspend(rdev);
1000 radeon_wb_disable(rdev); 1022 radeon_wb_disable(rdev);
1001 rv770_pcie_gart_disable(rdev); 1023 rv770_pcie_gart_disable(rdev);
@@ -1066,6 +1088,9 @@ int rv770_init(struct radeon_device *rdev)
1066 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL; 1088 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
1067 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024); 1089 r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
1068 1090
1091 rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
1092 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
1093
1069 rdev->ih.ring_obj = NULL; 1094 rdev->ih.ring_obj = NULL;
1070 r600_ih_ring_init(rdev, 64 * 1024); 1095 r600_ih_ring_init(rdev, 64 * 1024);
1071 1096
@@ -1078,6 +1103,7 @@ int rv770_init(struct radeon_device *rdev)
1078 if (r) { 1103 if (r) {
1079 dev_err(rdev->dev, "disabling GPU acceleration\n"); 1104 dev_err(rdev->dev, "disabling GPU acceleration\n");
1080 r700_cp_fini(rdev); 1105 r700_cp_fini(rdev);
1106 r600_dma_fini(rdev);
1081 r600_irq_fini(rdev); 1107 r600_irq_fini(rdev);
1082 radeon_wb_fini(rdev); 1108 radeon_wb_fini(rdev);
1083 radeon_ib_pool_fini(rdev); 1109 radeon_ib_pool_fini(rdev);
@@ -1093,6 +1119,7 @@ void rv770_fini(struct radeon_device *rdev)
1093{ 1119{
1094 r600_blit_fini(rdev); 1120 r600_blit_fini(rdev);
1095 r700_cp_fini(rdev); 1121 r700_cp_fini(rdev);
1122 r600_dma_fini(rdev);
1096 r600_irq_fini(rdev); 1123 r600_irq_fini(rdev);
1097 radeon_wb_fini(rdev); 1124 radeon_wb_fini(rdev);
1098 radeon_ib_pool_fini(rdev); 1125 radeon_ib_pool_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index b0adfc595d75..20e29d23d348 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -109,6 +109,9 @@
109#define PIPE_TILING__SHIFT 1 109#define PIPE_TILING__SHIFT 1
110#define PIPE_TILING__MASK 0x0000000e 110#define PIPE_TILING__MASK 0x0000000e
111 111
112#define DMA_TILING_CONFIG 0x3ec8
113#define DMA_TILING_CONFIG2 0xd0b8
114
112#define GC_USER_SHADER_PIPE_CONFIG 0x8954 115#define GC_USER_SHADER_PIPE_CONFIG 0x8954
113#define INACTIVE_QD_PIPES(x) ((x) << 8) 116#define INACTIVE_QD_PIPES(x) ((x) << 8)
114#define INACTIVE_QD_PIPES_MASK 0x0000FF00 117#define INACTIVE_QD_PIPES_MASK 0x0000FF00
@@ -358,6 +361,26 @@
358 361
359#define WAIT_UNTIL 0x8040 362#define WAIT_UNTIL 0x8040
360 363
364/* async DMA */
365#define DMA_RB_RPTR 0xd008
366#define DMA_RB_WPTR 0xd00c
367
368/* async DMA packets */
369#define DMA_PACKET(cmd, t, s, n) ((((cmd) & 0xF) << 28) | \
370 (((t) & 0x1) << 23) | \
371 (((s) & 0x1) << 22) | \
372 (((n) & 0xFFFF) << 0))
373/* async DMA Packet types */
374#define DMA_PACKET_WRITE 0x2
375#define DMA_PACKET_COPY 0x3
376#define DMA_PACKET_INDIRECT_BUFFER 0x4
377#define DMA_PACKET_SEMAPHORE 0x5
378#define DMA_PACKET_FENCE 0x6
379#define DMA_PACKET_TRAP 0x7
380#define DMA_PACKET_CONSTANT_FILL 0xd
381#define DMA_PACKET_NOP 0xf
382
383
361#define SRBM_STATUS 0x0E50 384#define SRBM_STATUS 0x0E50
362 385
363/* DCE 3.2 HDMI */ 386/* DCE 3.2 HDMI */
@@ -551,6 +574,54 @@
551#define HDMI_OFFSET0 (0x7400 - 0x7400) 574#define HDMI_OFFSET0 (0x7400 - 0x7400)
552#define HDMI_OFFSET1 (0x7800 - 0x7400) 575#define HDMI_OFFSET1 (0x7800 - 0x7400)
553 576
577/* DCE3.2 ELD audio interface */
578#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0 0x71c8 /* LPCM */
579#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1 0x71cc /* AC3 */
580#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2 0x71d0 /* MPEG1 */
581#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3 0x71d4 /* MP3 */
582#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4 0x71d8 /* MPEG2 */
583#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5 0x71dc /* AAC */
584#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6 0x71e0 /* DTS */
585#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7 0x71e4 /* ATRAC */
586#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8 0x71e8 /* one bit audio - leave at 0 (default) */
587#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9 0x71ec /* Dolby Digital */
588#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10 0x71f0 /* DTS-HD */
589#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11 0x71f4 /* MAT-MLP */
590#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12 0x71f8 /* DTS */
591#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13 0x71fc /* WMA Pro */
592# define MAX_CHANNELS(x) (((x) & 0x7) << 0)
593/* max channels minus one. 7 = 8 channels */
594# define SUPPORTED_FREQUENCIES(x) (((x) & 0xff) << 8)
595# define DESCRIPTOR_BYTE_2(x) (((x) & 0xff) << 16)
596# define SUPPORTED_FREQUENCIES_STEREO(x) (((x) & 0xff) << 24) /* LPCM only */
597/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
598 * bit0 = 32 kHz
599 * bit1 = 44.1 kHz
600 * bit2 = 48 kHz
601 * bit3 = 88.2 kHz
602 * bit4 = 96 kHz
603 * bit5 = 176.4 kHz
604 * bit6 = 192 kHz
605 */
606
607#define AZ_HOT_PLUG_CONTROL 0x7300
608# define AZ_FORCE_CODEC_WAKE (1 << 0)
609# define PIN0_JACK_DETECTION_ENABLE (1 << 4)
610# define PIN1_JACK_DETECTION_ENABLE (1 << 5)
611# define PIN2_JACK_DETECTION_ENABLE (1 << 6)
612# define PIN3_JACK_DETECTION_ENABLE (1 << 7)
613# define PIN0_UNSOLICITED_RESPONSE_ENABLE (1 << 8)
614# define PIN1_UNSOLICITED_RESPONSE_ENABLE (1 << 9)
615# define PIN2_UNSOLICITED_RESPONSE_ENABLE (1 << 10)
616# define PIN3_UNSOLICITED_RESPONSE_ENABLE (1 << 11)
617# define CODEC_HOT_PLUG_ENABLE (1 << 12)
618# define PIN0_AUDIO_ENABLED (1 << 24)
619# define PIN1_AUDIO_ENABLED (1 << 25)
620# define PIN2_AUDIO_ENABLED (1 << 26)
621# define PIN3_AUDIO_ENABLED (1 << 27)
622# define AUDIO_ENABLED (1 << 31)
623
624
554#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110 625#define D1GRPH_PRIMARY_SURFACE_ADDRESS 0x6110
555#define D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6914 626#define D1GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6914
556#define D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6114 627#define D2GRPH_PRIMARY_SURFACE_ADDRESS_HIGH 0x6114
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 010156dd949f..ef683653f0b7 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1660,6 +1660,8 @@ static void si_gpu_init(struct radeon_device *rdev)
1660 WREG32(GB_ADDR_CONFIG, gb_addr_config); 1660 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1661 WREG32(DMIF_ADDR_CONFIG, gb_addr_config); 1661 WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
1662 WREG32(HDP_ADDR_CONFIG, gb_addr_config); 1662 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1663 WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
1664 WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
1663 1665
1664 si_tiling_mode_table_init(rdev); 1666 si_tiling_mode_table_init(rdev);
1665 1667
@@ -1836,6 +1838,9 @@ static void si_cp_enable(struct radeon_device *rdev, bool enable)
1836 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 1838 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
1837 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT)); 1839 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
1838 WREG32(SCRATCH_UMSK, 0); 1840 WREG32(SCRATCH_UMSK, 0);
1841 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
1842 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
1843 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
1839 } 1844 }
1840 udelay(50); 1845 udelay(50);
1841} 1846}
@@ -2426,9 +2431,20 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
2426 /* enable context1-15 */ 2431 /* enable context1-15 */
2427 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR, 2432 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
2428 (u32)(rdev->dummy_page.addr >> 12)); 2433 (u32)(rdev->dummy_page.addr >> 12));
2429 WREG32(VM_CONTEXT1_CNTL2, 0); 2434 WREG32(VM_CONTEXT1_CNTL2, 4);
2430 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) | 2435 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
2431 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT); 2436 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2437 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2438 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2439 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
2440 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
2441 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
2442 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
2443 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
2444 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
2445 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
2446 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
2447 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
2432 2448
2433 si_pcie_gart_tlb_flush(rdev); 2449 si_pcie_gart_tlb_flush(rdev);
2434 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", 2450 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -2534,6 +2550,7 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2534 u32 idx = pkt->idx + 1; 2550 u32 idx = pkt->idx + 1;
2535 u32 idx_value = ib[idx]; 2551 u32 idx_value = ib[idx];
2536 u32 start_reg, end_reg, reg, i; 2552 u32 start_reg, end_reg, reg, i;
2553 u32 command, info;
2537 2554
2538 switch (pkt->opcode) { 2555 switch (pkt->opcode) {
2539 case PACKET3_NOP: 2556 case PACKET3_NOP:
@@ -2633,6 +2650,52 @@ static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
2633 return -EINVAL; 2650 return -EINVAL;
2634 } 2651 }
2635 break; 2652 break;
2653 case PACKET3_CP_DMA:
2654 command = ib[idx + 4];
2655 info = ib[idx + 1];
2656 if (command & PACKET3_CP_DMA_CMD_SAS) {
2657 /* src address space is register */
2658 if (((info & 0x60000000) >> 29) == 0) {
2659 start_reg = idx_value << 2;
2660 if (command & PACKET3_CP_DMA_CMD_SAIC) {
2661 reg = start_reg;
2662 if (!si_vm_reg_valid(reg)) {
2663 DRM_ERROR("CP DMA Bad SRC register\n");
2664 return -EINVAL;
2665 }
2666 } else {
2667 for (i = 0; i < (command & 0x1fffff); i++) {
2668 reg = start_reg + (4 * i);
2669 if (!si_vm_reg_valid(reg)) {
2670 DRM_ERROR("CP DMA Bad SRC register\n");
2671 return -EINVAL;
2672 }
2673 }
2674 }
2675 }
2676 }
2677 if (command & PACKET3_CP_DMA_CMD_DAS) {
2678 /* dst address space is register */
2679 if (((info & 0x00300000) >> 20) == 0) {
2680 start_reg = ib[idx + 2];
2681 if (command & PACKET3_CP_DMA_CMD_DAIC) {
2682 reg = start_reg;
2683 if (!si_vm_reg_valid(reg)) {
2684 DRM_ERROR("CP DMA Bad DST register\n");
2685 return -EINVAL;
2686 }
2687 } else {
2688 for (i = 0; i < (command & 0x1fffff); i++) {
2689 reg = start_reg + (4 * i);
2690 if (!si_vm_reg_valid(reg)) {
2691 DRM_ERROR("CP DMA Bad DST register\n");
2692 return -EINVAL;
2693 }
2694 }
2695 }
2696 }
2697 }
2698 break;
2636 default: 2699 default:
2637 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode); 2700 DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
2638 return -EINVAL; 2701 return -EINVAL;
@@ -2809,30 +2872,86 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe,
2809{ 2872{
2810 struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; 2873 struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index];
2811 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); 2874 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
2812 2875 uint64_t value;
2813 while (count) { 2876 unsigned ndw;
2814 unsigned ndw = 2 + count * 2; 2877
2815 if (ndw > 0x3FFE) 2878 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
2816 ndw = 0x3FFE; 2879 while (count) {
2817 2880 ndw = 2 + count * 2;
2818 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); 2881 if (ndw > 0x3FFE)
2819 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 2882 ndw = 0x3FFE;
2820 WRITE_DATA_DST_SEL(1))); 2883
2821 radeon_ring_write(ring, pe); 2884 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw));
2822 radeon_ring_write(ring, upper_32_bits(pe)); 2885 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2823 for (; ndw > 2; ndw -= 2, --count, pe += 8) { 2886 WRITE_DATA_DST_SEL(1)));
2824 uint64_t value; 2887 radeon_ring_write(ring, pe);
2825 if (flags & RADEON_VM_PAGE_SYSTEM) { 2888 radeon_ring_write(ring, upper_32_bits(pe));
2826 value = radeon_vm_map_gart(rdev, addr); 2889 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
2827 value &= 0xFFFFFFFFFFFFF000ULL; 2890 if (flags & RADEON_VM_PAGE_SYSTEM) {
2828 } else if (flags & RADEON_VM_PAGE_VALID) 2891 value = radeon_vm_map_gart(rdev, addr);
2829 value = addr; 2892 value &= 0xFFFFFFFFFFFFF000ULL;
2830 else 2893 } else if (flags & RADEON_VM_PAGE_VALID) {
2831 value = 0; 2894 value = addr;
2832 addr += incr; 2895 } else {
2833 value |= r600_flags; 2896 value = 0;
2834 radeon_ring_write(ring, value); 2897 }
2835 radeon_ring_write(ring, upper_32_bits(value)); 2898 addr += incr;
2899 value |= r600_flags;
2900 radeon_ring_write(ring, value);
2901 radeon_ring_write(ring, upper_32_bits(value));
2902 }
2903 }
2904 } else {
2905 /* DMA */
2906 if (flags & RADEON_VM_PAGE_SYSTEM) {
2907 while (count) {
2908 ndw = count * 2;
2909 if (ndw > 0xFFFFE)
2910 ndw = 0xFFFFE;
2911
2912 /* for non-physically contiguous pages (system) */
2913 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw));
2914 radeon_ring_write(ring, pe);
2915 radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
2916 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
2917 if (flags & RADEON_VM_PAGE_SYSTEM) {
2918 value = radeon_vm_map_gart(rdev, addr);
2919 value &= 0xFFFFFFFFFFFFF000ULL;
2920 } else if (flags & RADEON_VM_PAGE_VALID) {
2921 value = addr;
2922 } else {
2923 value = 0;
2924 }
2925 addr += incr;
2926 value |= r600_flags;
2927 radeon_ring_write(ring, value);
2928 radeon_ring_write(ring, upper_32_bits(value));
2929 }
2930 }
2931 } else {
2932 while (count) {
2933 ndw = count * 2;
2934 if (ndw > 0xFFFFE)
2935 ndw = 0xFFFFE;
2936
2937 if (flags & RADEON_VM_PAGE_VALID)
2938 value = addr;
2939 else
2940 value = 0;
2941 /* for physically contiguous pages (vram) */
2942 radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw));
2943 radeon_ring_write(ring, pe); /* dst addr */
2944 radeon_ring_write(ring, upper_32_bits(pe) & 0xff);
2945 radeon_ring_write(ring, r600_flags); /* mask */
2946 radeon_ring_write(ring, 0);
2947 radeon_ring_write(ring, value); /* value */
2948 radeon_ring_write(ring, upper_32_bits(value));
2949 radeon_ring_write(ring, incr); /* increment size */
2950 radeon_ring_write(ring, 0);
2951 pe += ndw * 4;
2952 addr += (ndw / 2) * incr;
2953 count -= ndw / 2;
2954 }
2836 } 2955 }
2837 } 2956 }
2838} 2957}
@@ -2880,6 +2999,32 @@ void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
2880 radeon_ring_write(ring, 0x0); 2999 radeon_ring_write(ring, 0x0);
2881} 3000}
2882 3001
3002void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
3003{
3004 struct radeon_ring *ring = &rdev->ring[ridx];
3005
3006 if (vm == NULL)
3007 return;
3008
3009 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3010 if (vm->id < 8) {
3011 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
3012 } else {
3013 radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
3014 }
3015 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
3016
3017 /* flush hdp cache */
3018 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3019 radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
3020 radeon_ring_write(ring, 1);
3021
3022 /* bits 0-7 are the VM contexts0-7 */
3023 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
3024 radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
3025 radeon_ring_write(ring, 1 << vm->id);
3026}
3027
2883/* 3028/*
2884 * RLC 3029 * RLC
2885 */ 3030 */
@@ -3048,6 +3193,10 @@ static void si_disable_interrupt_state(struct radeon_device *rdev)
3048 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE); 3193 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
3049 WREG32(CP_INT_CNTL_RING1, 0); 3194 WREG32(CP_INT_CNTL_RING1, 0);
3050 WREG32(CP_INT_CNTL_RING2, 0); 3195 WREG32(CP_INT_CNTL_RING2, 0);
3196 tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3197 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
3198 tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3199 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
3051 WREG32(GRBM_INT_CNTL, 0); 3200 WREG32(GRBM_INT_CNTL, 0);
3052 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0); 3201 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
3053 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0); 3202 WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
@@ -3167,6 +3316,7 @@ int si_irq_set(struct radeon_device *rdev)
3167 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; 3316 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
3168 u32 grbm_int_cntl = 0; 3317 u32 grbm_int_cntl = 0;
3169 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0; 3318 u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
3319 u32 dma_cntl, dma_cntl1;
3170 3320
3171 if (!rdev->irq.installed) { 3321 if (!rdev->irq.installed) {
3172 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n"); 3322 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -3187,6 +3337,9 @@ int si_irq_set(struct radeon_device *rdev)
3187 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN; 3337 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
3188 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; 3338 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
3189 3339
3340 dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
3341 dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
3342
3190 /* enable CP interrupts on all rings */ 3343 /* enable CP interrupts on all rings */
3191 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { 3344 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
3192 DRM_DEBUG("si_irq_set: sw int gfx\n"); 3345 DRM_DEBUG("si_irq_set: sw int gfx\n");
@@ -3200,6 +3353,15 @@ int si_irq_set(struct radeon_device *rdev)
3200 DRM_DEBUG("si_irq_set: sw int cp2\n"); 3353 DRM_DEBUG("si_irq_set: sw int cp2\n");
3201 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; 3354 cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
3202 } 3355 }
3356 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
3357 DRM_DEBUG("si_irq_set: sw int dma\n");
3358 dma_cntl |= TRAP_ENABLE;
3359 }
3360
3361 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
3362 DRM_DEBUG("si_irq_set: sw int dma1\n");
3363 dma_cntl1 |= TRAP_ENABLE;
3364 }
3203 if (rdev->irq.crtc_vblank_int[0] || 3365 if (rdev->irq.crtc_vblank_int[0] ||
3204 atomic_read(&rdev->irq.pflip[0])) { 3366 atomic_read(&rdev->irq.pflip[0])) {
3205 DRM_DEBUG("si_irq_set: vblank 0\n"); 3367 DRM_DEBUG("si_irq_set: vblank 0\n");
@@ -3259,6 +3421,9 @@ int si_irq_set(struct radeon_device *rdev)
3259 WREG32(CP_INT_CNTL_RING1, cp_int_cntl1); 3421 WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
3260 WREG32(CP_INT_CNTL_RING2, cp_int_cntl2); 3422 WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
3261 3423
3424 WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
3425 WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
3426
3262 WREG32(GRBM_INT_CNTL, grbm_int_cntl); 3427 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
3263 3428
3264 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1); 3429 WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
@@ -3684,6 +3849,16 @@ restart_ih:
3684 break; 3849 break;
3685 } 3850 }
3686 break; 3851 break;
3852 case 146:
3853 case 147:
3854 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
3855 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3856 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3857 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3858 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3859 /* reset addr and status */
3860 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
3861 break;
3687 case 176: /* RINGID0 CP_INT */ 3862 case 176: /* RINGID0 CP_INT */
3688 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX); 3863 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
3689 break; 3864 break;
@@ -3707,9 +3882,17 @@ restart_ih:
3707 break; 3882 break;
3708 } 3883 }
3709 break; 3884 break;
3885 case 224: /* DMA trap event */
3886 DRM_DEBUG("IH: DMA trap\n");
3887 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
3888 break;
3710 case 233: /* GUI IDLE */ 3889 case 233: /* GUI IDLE */
3711 DRM_DEBUG("IH: GUI idle\n"); 3890 DRM_DEBUG("IH: GUI idle\n");
3712 break; 3891 break;
3892 case 244: /* DMA trap event */
3893 DRM_DEBUG("IH: DMA1 trap\n");
3894 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
3895 break;
3713 default: 3896 default:
3714 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); 3897 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
3715 break; 3898 break;
@@ -3733,6 +3916,80 @@ restart_ih:
3733 return IRQ_HANDLED; 3916 return IRQ_HANDLED;
3734} 3917}
3735 3918
3919/**
3920 * si_copy_dma - copy pages using the DMA engine
3921 *
3922 * @rdev: radeon_device pointer
3923 * @src_offset: src GPU address
3924 * @dst_offset: dst GPU address
3925 * @num_gpu_pages: number of GPU pages to xfer
3926 * @fence: radeon fence object
3927 *
3928 * Copy GPU paging using the DMA engine (SI).
3929 * Used by the radeon ttm implementation to move pages if
3930 * registered as the asic copy callback.
3931 */
3932int si_copy_dma(struct radeon_device *rdev,
3933 uint64_t src_offset, uint64_t dst_offset,
3934 unsigned num_gpu_pages,
3935 struct radeon_fence **fence)
3936{
3937 struct radeon_semaphore *sem = NULL;
3938 int ring_index = rdev->asic->copy.dma_ring_index;
3939 struct radeon_ring *ring = &rdev->ring[ring_index];
3940 u32 size_in_bytes, cur_size_in_bytes;
3941 int i, num_loops;
3942 int r = 0;
3943
3944 r = radeon_semaphore_create(rdev, &sem);
3945 if (r) {
3946 DRM_ERROR("radeon: moving bo (%d).\n", r);
3947 return r;
3948 }
3949
3950 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3951 num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
3952 r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
3953 if (r) {
3954 DRM_ERROR("radeon: moving bo (%d).\n", r);
3955 radeon_semaphore_free(rdev, &sem, NULL);
3956 return r;
3957 }
3958
3959 if (radeon_fence_need_sync(*fence, ring->idx)) {
3960 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3961 ring->idx);
3962 radeon_fence_note_sync(*fence, ring->idx);
3963 } else {
3964 radeon_semaphore_free(rdev, &sem, NULL);
3965 }
3966
3967 for (i = 0; i < num_loops; i++) {
3968 cur_size_in_bytes = size_in_bytes;
3969 if (cur_size_in_bytes > 0xFFFFF)
3970 cur_size_in_bytes = 0xFFFFF;
3971 size_in_bytes -= cur_size_in_bytes;
3972 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
3973 radeon_ring_write(ring, dst_offset & 0xffffffff);
3974 radeon_ring_write(ring, src_offset & 0xffffffff);
3975 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
3976 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
3977 src_offset += cur_size_in_bytes;
3978 dst_offset += cur_size_in_bytes;
3979 }
3980
3981 r = radeon_fence_emit(rdev, fence, ring->idx);
3982 if (r) {
3983 radeon_ring_unlock_undo(rdev, ring);
3984 return r;
3985 }
3986
3987 radeon_ring_unlock_commit(rdev, ring);
3988 radeon_semaphore_free(rdev, &sem, *fence);
3989
3990 return r;
3991}
3992
3736/* 3993/*
3737 * startup/shutdown callbacks 3994 * startup/shutdown callbacks
3738 */ 3995 */
@@ -3804,6 +4061,18 @@ static int si_startup(struct radeon_device *rdev)
3804 return r; 4061 return r;
3805 } 4062 }
3806 4063
4064 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
4065 if (r) {
4066 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4067 return r;
4068 }
4069
4070 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
4071 if (r) {
4072 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
4073 return r;
4074 }
4075
3807 /* Enable IRQ */ 4076 /* Enable IRQ */
3808 r = si_irq_init(rdev); 4077 r = si_irq_init(rdev);
3809 if (r) { 4078 if (r) {
@@ -3834,6 +4103,22 @@ static int si_startup(struct radeon_device *rdev)
3834 if (r) 4103 if (r)
3835 return r; 4104 return r;
3836 4105
4106 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4107 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
4108 DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
4109 DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
4110 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4111 if (r)
4112 return r;
4113
4114 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4115 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
4116 DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
4117 DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
4118 2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
4119 if (r)
4120 return r;
4121
3837 r = si_cp_load_microcode(rdev); 4122 r = si_cp_load_microcode(rdev);
3838 if (r) 4123 if (r)
3839 return r; 4124 return r;
@@ -3841,6 +4126,10 @@ static int si_startup(struct radeon_device *rdev)
3841 if (r) 4126 if (r)
3842 return r; 4127 return r;
3843 4128
4129 r = cayman_dma_resume(rdev);
4130 if (r)
4131 return r;
4132
3844 r = radeon_ib_pool_init(rdev); 4133 r = radeon_ib_pool_init(rdev);
3845 if (r) { 4134 if (r) {
3846 dev_err(rdev->dev, "IB initialization failed (%d).\n", r); 4135 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -3882,9 +4171,7 @@ int si_resume(struct radeon_device *rdev)
3882int si_suspend(struct radeon_device *rdev) 4171int si_suspend(struct radeon_device *rdev)
3883{ 4172{
3884 si_cp_enable(rdev, false); 4173 si_cp_enable(rdev, false);
3885 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false; 4174 cayman_dma_stop(rdev);
3886 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3887 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3888 si_irq_suspend(rdev); 4175 si_irq_suspend(rdev);
3889 radeon_wb_disable(rdev); 4176 radeon_wb_disable(rdev);
3890 si_pcie_gart_disable(rdev); 4177 si_pcie_gart_disable(rdev);
@@ -3962,6 +4249,14 @@ int si_init(struct radeon_device *rdev)
3962 ring->ring_obj = NULL; 4249 ring->ring_obj = NULL;
3963 r600_ring_init(rdev, ring, 1024 * 1024); 4250 r600_ring_init(rdev, ring, 1024 * 1024);
3964 4251
4252 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4253 ring->ring_obj = NULL;
4254 r600_ring_init(rdev, ring, 64 * 1024);
4255
4256 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4257 ring->ring_obj = NULL;
4258 r600_ring_init(rdev, ring, 64 * 1024);
4259
3965 rdev->ih.ring_obj = NULL; 4260 rdev->ih.ring_obj = NULL;
3966 r600_ih_ring_init(rdev, 64 * 1024); 4261 r600_ih_ring_init(rdev, 64 * 1024);
3967 4262
@@ -3974,6 +4269,7 @@ int si_init(struct radeon_device *rdev)
3974 if (r) { 4269 if (r) {
3975 dev_err(rdev->dev, "disabling GPU acceleration\n"); 4270 dev_err(rdev->dev, "disabling GPU acceleration\n");
3976 si_cp_fini(rdev); 4271 si_cp_fini(rdev);
4272 cayman_dma_fini(rdev);
3977 si_irq_fini(rdev); 4273 si_irq_fini(rdev);
3978 si_rlc_fini(rdev); 4274 si_rlc_fini(rdev);
3979 radeon_wb_fini(rdev); 4275 radeon_wb_fini(rdev);
@@ -4002,6 +4298,7 @@ void si_fini(struct radeon_device *rdev)
4002 r600_blit_fini(rdev); 4298 r600_blit_fini(rdev);
4003#endif 4299#endif
4004 si_cp_fini(rdev); 4300 si_cp_fini(rdev);
4301 cayman_dma_fini(rdev);
4005 si_irq_fini(rdev); 4302 si_irq_fini(rdev);
4006 si_rlc_fini(rdev); 4303 si_rlc_fini(rdev);
4007 radeon_wb_fini(rdev); 4304 radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index a8871afc5b4e..62b46215d423 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -91,7 +91,18 @@
91#define VM_CONTEXT0_CNTL 0x1410 91#define VM_CONTEXT0_CNTL 0x1410
92#define ENABLE_CONTEXT (1 << 0) 92#define ENABLE_CONTEXT (1 << 0)
93#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1) 93#define PAGE_TABLE_DEPTH(x) (((x) & 3) << 1)
94#define RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 3)
94#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4) 95#define RANGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 4)
96#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 6)
97#define DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 7)
98#define PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 9)
99#define PDE0_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 10)
100#define VALID_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 12)
101#define VALID_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 13)
102#define READ_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 15)
103#define READ_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 16)
104#define WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT (1 << 18)
105#define WRITE_PROTECTION_FAULT_ENABLE_DEFAULT (1 << 19)
95#define VM_CONTEXT1_CNTL 0x1414 106#define VM_CONTEXT1_CNTL 0x1414
96#define VM_CONTEXT0_CNTL2 0x1430 107#define VM_CONTEXT0_CNTL2 0x1430
97#define VM_CONTEXT1_CNTL2 0x1434 108#define VM_CONTEXT1_CNTL2 0x1434
@@ -104,6 +115,9 @@
104#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x1450 115#define VM_CONTEXT14_PAGE_TABLE_BASE_ADDR 0x1450
105#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x1454 116#define VM_CONTEXT15_PAGE_TABLE_BASE_ADDR 0x1454
106 117
118#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC
119#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC
120
107#define VM_INVALIDATE_REQUEST 0x1478 121#define VM_INVALIDATE_REQUEST 0x1478
108#define VM_INVALIDATE_RESPONSE 0x147c 122#define VM_INVALIDATE_RESPONSE 0x147c
109 123
@@ -835,6 +849,54 @@
835#define PACKET3_WAIT_REG_MEM 0x3C 849#define PACKET3_WAIT_REG_MEM 0x3C
836#define PACKET3_MEM_WRITE 0x3D 850#define PACKET3_MEM_WRITE 0x3D
837#define PACKET3_COPY_DATA 0x40 851#define PACKET3_COPY_DATA 0x40
852#define PACKET3_CP_DMA 0x41
853/* 1. header
854 * 2. SRC_ADDR_LO or DATA [31:0]
855 * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] |
856 * SRC_ADDR_HI [7:0]
857 * 4. DST_ADDR_LO [31:0]
858 * 5. DST_ADDR_HI [7:0]
859 * 6. COMMAND [30:21] | BYTE_COUNT [20:0]
860 */
861# define PACKET3_CP_DMA_DST_SEL(x) ((x) << 20)
862 /* 0 - SRC_ADDR
863 * 1 - GDS
864 */
865# define PACKET3_CP_DMA_ENGINE(x) ((x) << 27)
866 /* 0 - ME
867 * 1 - PFP
868 */
869# define PACKET3_CP_DMA_SRC_SEL(x) ((x) << 29)
870 /* 0 - SRC_ADDR
871 * 1 - GDS
872 * 2 - DATA
873 */
874# define PACKET3_CP_DMA_CP_SYNC (1 << 31)
875/* COMMAND */
876# define PACKET3_CP_DMA_DIS_WC (1 << 21)
877# define PACKET3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23)
878 /* 0 - none
879 * 1 - 8 in 16
880 * 2 - 8 in 32
881 * 3 - 8 in 64
882 */
883# define PACKET3_CP_DMA_CMD_DST_SWAP(x) ((x) << 24)
884 /* 0 - none
885 * 1 - 8 in 16
886 * 2 - 8 in 32
887 * 3 - 8 in 64
888 */
889# define PACKET3_CP_DMA_CMD_SAS (1 << 26)
890 /* 0 - memory
891 * 1 - register
892 */
893# define PACKET3_CP_DMA_CMD_DAS (1 << 27)
894 /* 0 - memory
895 * 1 - register
896 */
897# define PACKET3_CP_DMA_CMD_SAIC (1 << 28)
898# define PACKET3_CP_DMA_CMD_DAIC (1 << 29)
899# define PACKET3_CP_DMA_CMD_RAW_WAIT (1 << 30)
838#define PACKET3_PFP_SYNC_ME 0x42 900#define PACKET3_PFP_SYNC_ME 0x42
839#define PACKET3_SURFACE_SYNC 0x43 901#define PACKET3_SURFACE_SYNC 0x43
840# define PACKET3_DEST_BASE_0_ENA (1 << 0) 902# define PACKET3_DEST_BASE_0_ENA (1 << 0)
@@ -922,4 +984,61 @@
922#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A 984#define PACKET3_WAIT_ON_AVAIL_BUFFER 0x8A
923#define PACKET3_SWITCH_BUFFER 0x8B 985#define PACKET3_SWITCH_BUFFER 0x8B
924 986
987/* ASYNC DMA - first instance at 0xd000, second at 0xd800 */
988#define DMA0_REGISTER_OFFSET 0x0 /* not a register */
989#define DMA1_REGISTER_OFFSET 0x800 /* not a register */
990
991#define DMA_RB_CNTL 0xd000
992# define DMA_RB_ENABLE (1 << 0)
993# define DMA_RB_SIZE(x) ((x) << 1) /* log2 */
994# define DMA_RB_SWAP_ENABLE (1 << 9) /* 8IN32 */
995# define DMA_RPTR_WRITEBACK_ENABLE (1 << 12)
996# define DMA_RPTR_WRITEBACK_SWAP_ENABLE (1 << 13) /* 8IN32 */
997# define DMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
998#define DMA_RB_BASE 0xd004
999#define DMA_RB_RPTR 0xd008
1000#define DMA_RB_WPTR 0xd00c
1001
1002#define DMA_RB_RPTR_ADDR_HI 0xd01c
1003#define DMA_RB_RPTR_ADDR_LO 0xd020
1004
1005#define DMA_IB_CNTL 0xd024
1006# define DMA_IB_ENABLE (1 << 0)
1007# define DMA_IB_SWAP_ENABLE (1 << 4)
1008#define DMA_IB_RPTR 0xd028
1009#define DMA_CNTL 0xd02c
1010# define TRAP_ENABLE (1 << 0)
1011# define SEM_INCOMPLETE_INT_ENABLE (1 << 1)
1012# define SEM_WAIT_INT_ENABLE (1 << 2)
1013# define DATA_SWAP_ENABLE (1 << 3)
1014# define FENCE_SWAP_ENABLE (1 << 4)
1015# define CTXEMPTY_INT_ENABLE (1 << 28)
1016#define DMA_TILING_CONFIG 0xd0b8
1017
1018#define DMA_PACKET(cmd, b, t, s, n) ((((cmd) & 0xF) << 28) | \
1019 (((b) & 0x1) << 26) | \
1020 (((t) & 0x1) << 23) | \
1021 (((s) & 0x1) << 22) | \
1022 (((n) & 0xFFFFF) << 0))
1023
1024#define DMA_IB_PACKET(cmd, vmid, n) ((((cmd) & 0xF) << 28) | \
1025 (((vmid) & 0xF) << 20) | \
1026 (((n) & 0xFFFFF) << 0))
1027
1028#define DMA_PTE_PDE_PACKET(n) ((2 << 28) | \
1029 (1 << 26) | \
1030 (1 << 21) | \
1031 (((n) & 0xFFFFF) << 0))
1032
1033/* async DMA Packet types */
1034#define DMA_PACKET_WRITE 0x2
1035#define DMA_PACKET_COPY 0x3
1036#define DMA_PACKET_INDIRECT_BUFFER 0x4
1037#define DMA_PACKET_SEMAPHORE 0x5
1038#define DMA_PACKET_FENCE 0x6
1039#define DMA_PACKET_TRAP 0x7
1040#define DMA_PACKET_SRBM_WRITE 0x9
1041#define DMA_PACKET_CONSTANT_FILL 0xd
1042#define DMA_PACKET_NOP 0xf
1043
925#endif 1044#endif