aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2013-11-21 03:42:19 -0500
committerDave Airlie <airlied@redhat.com>2013-11-21 03:42:19 -0500
commitded5107e2a347657cdd00abb9354b62d5ab8f770 (patch)
treec75df42579031ed756dc25f8969aff5ee5b94cda
parent527d1511310a89650000081869260394e20c7013 (diff)
parent7272c9d2286525d4c6bce788243cf2b6f306d15c (diff)
Merge branch 'drm-next-3.13' of git://people.freedesktop.org/~agd5f/linux into drm-fixes
More fixes for radeon. This adds new queries for tiling on CIK, and fixes a crash in handling acpi atif backlight events on CIK. Some fixes for radeon for 3.13. Mostly CI stability fixes. I think I've tracked down the stability problems with dpm on Trinity/Richland, so I'm going to enable that by default now. * 'drm-next-3.13' of git://people.freedesktop.org/~agd5f/linux: drm/radeon: hook up backlight functions for CI and KV family. drm/radeon/cik: Add macrotile mode array query drm/radeon/cik: Return backend map information to userspace drm/radeon: enable DPM by default in TN asics drm/radeon: adjust TN dpm parameters for stability (v2) drm/radeon: use a single doorbell for cik kms compute drm/radeon/vm: don't attempt to update ptes if ib allocation fails drm/radeon: disable CIK CP semaphores for now drm/radeon: allow semaphore emission to fail drm/radeon: add semaphore trace point radeon: workaround pinning failure on low ram gpu radeon/i2c: do not count reg index in number of i2c byte we are writing. drm/radeon: cypress_dpm: Fix unused variable warning when CONFIG_ACPI=n drm: radeon: ni_dpm: Fix unused variable warning when CONFIG_ACPI=n
-rw-r--r--drivers/gpu/drm/radeon/atombios_i2c.c6
-rw-r--r--drivers/gpu/drm/radeon/cik.c57
-rw-r--r--drivers/gpu/drm/radeon/cik_sdma.c13
-rw-r--r--drivers/gpu/drm/radeon/cypress_dpm.c2
-rw-r--r--drivers/gpu/drm/radeon/evergreen_dma.c9
-rw-r--r--drivers/gpu/drm/radeon/ni_dpm.c2
-rw-r--r--drivers/gpu/drm/radeon/r100.c3
-rw-r--r--drivers/gpu/drm/radeon/r600.c13
-rw-r--r--drivers/gpu/drm/radeon/r600_dma.c13
-rw-r--r--drivers/gpu/drm/radeon/radeon.h38
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c4
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h18
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c9
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c47
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_fence.c30
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c11
-rw-r--r--drivers/gpu/drm/radeon/radeon_legacy_crtc.c28
-rw-r--r--drivers/gpu/drm/radeon/radeon_pm.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_ring.c46
-rw-r--r--drivers/gpu/drm/radeon/radeon_semaphore.c129
-rw-r--r--drivers/gpu/drm/radeon/radeon_trace.h36
-rw-r--r--drivers/gpu/drm/radeon/rv770_dma.c9
-rw-r--r--drivers/gpu/drm/radeon/si_dma.c9
-rw-r--r--drivers/gpu/drm/radeon/trinity_dpm.c6
-rw-r--r--drivers/gpu/drm/radeon/uvd_v1_0.c4
-rw-r--r--drivers/gpu/drm/radeon/uvd_v3_1.c4
-rw-r--r--include/uapi/drm/radeon_drm.h2
29 files changed, 346 insertions, 213 deletions
diff --git a/drivers/gpu/drm/radeon/atombios_i2c.c b/drivers/gpu/drm/radeon/atombios_i2c.c
index deaf98cdca3a..0652ee0a2098 100644
--- a/drivers/gpu/drm/radeon/atombios_i2c.c
+++ b/drivers/gpu/drm/radeon/atombios_i2c.c
@@ -56,8 +56,10 @@ static int radeon_process_i2c_ch(struct radeon_i2c_chan *chan,
56 return -EINVAL; 56 return -EINVAL;
57 } 57 }
58 args.ucRegIndex = buf[0]; 58 args.ucRegIndex = buf[0];
59 if (num > 1) 59 if (num > 1) {
60 memcpy(&out, &buf[1], num - 1); 60 num--;
61 memcpy(&out, &buf[1], num);
62 }
61 args.lpI2CDataOut = cpu_to_le16(out); 63 args.lpI2CDataOut = cpu_to_le16(out);
62 } else { 64 } else {
63 if (num > ATOM_MAX_HW_I2C_READ) { 65 if (num > ATOM_MAX_HW_I2C_READ) {
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ae92aa041c6a..b43a3a3c9067 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -1560,17 +1560,17 @@ u32 cik_get_xclk(struct radeon_device *rdev)
1560 * cik_mm_rdoorbell - read a doorbell dword 1560 * cik_mm_rdoorbell - read a doorbell dword
1561 * 1561 *
1562 * @rdev: radeon_device pointer 1562 * @rdev: radeon_device pointer
1563 * @offset: byte offset into the aperture 1563 * @index: doorbell index
1564 * 1564 *
1565 * Returns the value in the doorbell aperture at the 1565 * Returns the value in the doorbell aperture at the
1566 * requested offset (CIK). 1566 * requested doorbell index (CIK).
1567 */ 1567 */
1568u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset) 1568u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569{ 1569{
1570 if (offset < rdev->doorbell.size) { 1570 if (index < rdev->doorbell.num_doorbells) {
1571 return readl(((void __iomem *)rdev->doorbell.ptr) + offset); 1571 return readl(rdev->doorbell.ptr + index);
1572 } else { 1572 } else {
1573 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset); 1573 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574 return 0; 1574 return 0;
1575 } 1575 }
1576} 1576}
@@ -1579,18 +1579,18 @@ u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1579 * cik_mm_wdoorbell - write a doorbell dword 1579 * cik_mm_wdoorbell - write a doorbell dword
1580 * 1580 *
1581 * @rdev: radeon_device pointer 1581 * @rdev: radeon_device pointer
1582 * @offset: byte offset into the aperture 1582 * @index: doorbell index
1583 * @v: value to write 1583 * @v: value to write
1584 * 1584 *
1585 * Writes @v to the doorbell aperture at the 1585 * Writes @v to the doorbell aperture at the
1586 * requested offset (CIK). 1586 * requested doorbell index (CIK).
1587 */ 1587 */
1588void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v) 1588void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589{ 1589{
1590 if (offset < rdev->doorbell.size) { 1590 if (index < rdev->doorbell.num_doorbells) {
1591 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset); 1591 writel(v, rdev->doorbell.ptr + index);
1592 } else { 1592 } else {
1593 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset); 1593 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594 } 1594 }
1595} 1595}
1596 1596
@@ -2427,6 +2427,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2427 gb_tile_moden = 0; 2427 gb_tile_moden = 0;
2428 break; 2428 break;
2429 } 2429 }
2430 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2430 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2431 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2431 } 2432 }
2432 } else if (num_pipe_configs == 4) { 2433 } else if (num_pipe_configs == 4) {
@@ -2773,6 +2774,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2773 gb_tile_moden = 0; 2774 gb_tile_moden = 0;
2774 break; 2775 break;
2775 } 2776 }
2777 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2776 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2778 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2777 } 2779 }
2778 } else if (num_pipe_configs == 2) { 2780 } else if (num_pipe_configs == 2) {
@@ -2990,6 +2992,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2990 gb_tile_moden = 0; 2992 gb_tile_moden = 0;
2991 break; 2993 break;
2992 } 2994 }
2995 rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2993 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); 2996 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2994 } 2997 }
2995 } else 2998 } else
@@ -3556,17 +3559,24 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3556 radeon_ring_write(ring, 0); 3559 radeon_ring_write(ring, 0);
3557} 3560}
3558 3561
3559void cik_semaphore_ring_emit(struct radeon_device *rdev, 3562bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3560 struct radeon_ring *ring, 3563 struct radeon_ring *ring,
3561 struct radeon_semaphore *semaphore, 3564 struct radeon_semaphore *semaphore,
3562 bool emit_wait) 3565 bool emit_wait)
3563{ 3566{
3567/* TODO: figure out why semaphore cause lockups */
3568#if 0
3564 uint64_t addr = semaphore->gpu_addr; 3569 uint64_t addr = semaphore->gpu_addr;
3565 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; 3570 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3566 3571
3567 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 3572 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3568 radeon_ring_write(ring, addr & 0xffffffff); 3573 radeon_ring_write(ring, addr & 0xffffffff);
3569 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); 3574 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3575
3576 return true;
3577#else
3578 return false;
3579#endif
3570} 3580}
3571 3581
3572/** 3582/**
@@ -3609,13 +3619,8 @@ int cik_copy_cpdma(struct radeon_device *rdev,
3609 return r; 3619 return r;
3610 } 3620 }
3611 3621
3612 if (radeon_fence_need_sync(*fence, ring->idx)) { 3622 radeon_semaphore_sync_to(sem, *fence);
3613 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 3623 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3614 ring->idx);
3615 radeon_fence_note_sync(*fence, ring->idx);
3616 } else {
3617 radeon_semaphore_free(rdev, &sem, NULL);
3618 }
3619 3624
3620 for (i = 0; i < num_loops; i++) { 3625 for (i = 0; i < num_loops; i++) {
3621 cur_size_in_bytes = size_in_bytes; 3626 cur_size_in_bytes = size_in_bytes;
@@ -4052,7 +4057,7 @@ void cik_compute_ring_set_wptr(struct radeon_device *rdev,
4052 struct radeon_ring *ring) 4057 struct radeon_ring *ring)
4053{ 4058{
4054 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr); 4059 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
4055 WDOORBELL32(ring->doorbell_offset, ring->wptr); 4060 WDOORBELL32(ring->doorbell_index, ring->wptr);
4056} 4061}
4057 4062
4058/** 4063/**
@@ -4393,10 +4398,6 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
4393 return r; 4398 return r;
4394 } 4399 }
4395 4400
4396 /* doorbell offset */
4397 rdev->ring[idx].doorbell_offset =
4398 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
4399
4400 /* init the mqd struct */ 4401 /* init the mqd struct */
4401 memset(buf, 0, sizeof(struct bonaire_mqd)); 4402 memset(buf, 0, sizeof(struct bonaire_mqd));
4402 4403
@@ -4508,7 +4509,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
4508 RREG32(CP_HQD_PQ_DOORBELL_CONTROL); 4509 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4509 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK; 4510 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4510 mqd->queue_state.cp_hqd_pq_doorbell_control |= 4511 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4511 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4); 4512 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4512 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN; 4513 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4513 mqd->queue_state.cp_hqd_pq_doorbell_control &= 4514 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4514 ~(DOORBELL_SOURCE | DOORBELL_HIT); 4515 ~(DOORBELL_SOURCE | DOORBELL_HIT);
@@ -7839,14 +7840,14 @@ int cik_init(struct radeon_device *rdev)
7839 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]; 7840 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7840 ring->ring_obj = NULL; 7841 ring->ring_obj = NULL;
7841 r600_ring_init(rdev, ring, 1024 * 1024); 7842 r600_ring_init(rdev, ring, 1024 * 1024);
7842 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7843 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7843 if (r) 7844 if (r)
7844 return r; 7845 return r;
7845 7846
7846 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]; 7847 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7847 ring->ring_obj = NULL; 7848 ring->ring_obj = NULL;
7848 r600_ring_init(rdev, ring, 1024 * 1024); 7849 r600_ring_init(rdev, ring, 1024 * 1024);
7849 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num); 7850 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
7850 if (r) 7851 if (r)
7851 return r; 7852 return r;
7852 7853
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index 9c9529de20ee..0300727a4f70 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -130,7 +130,7 @@ void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
130 * Add a DMA semaphore packet to the ring wait on or signal 130 * Add a DMA semaphore packet to the ring wait on or signal
131 * other rings (CIK). 131 * other rings (CIK).
132 */ 132 */
133void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, 133bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
134 struct radeon_ring *ring, 134 struct radeon_ring *ring,
135 struct radeon_semaphore *semaphore, 135 struct radeon_semaphore *semaphore,
136 bool emit_wait) 136 bool emit_wait)
@@ -141,6 +141,8 @@ void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
141 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); 141 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
142 radeon_ring_write(ring, addr & 0xfffffff8); 142 radeon_ring_write(ring, addr & 0xfffffff8);
143 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 143 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
144
145 return true;
144} 146}
145 147
146/** 148/**
@@ -443,13 +445,8 @@ int cik_copy_dma(struct radeon_device *rdev,
443 return r; 445 return r;
444 } 446 }
445 447
446 if (radeon_fence_need_sync(*fence, ring->idx)) { 448 radeon_semaphore_sync_to(sem, *fence);
447 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 449 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
448 ring->idx);
449 radeon_fence_note_sync(*fence, ring->idx);
450 } else {
451 radeon_semaphore_free(rdev, &sem, NULL);
452 }
453 450
454 for (i = 0; i < num_loops; i++) { 451 for (i = 0; i < num_loops; i++) {
455 cur_size_in_bytes = size_in_bytes; 452 cur_size_in_bytes = size_in_bytes;
diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c
index 91bb470de0a3..920e1e4a52c5 100644
--- a/drivers/gpu/drm/radeon/cypress_dpm.c
+++ b/drivers/gpu/drm/radeon/cypress_dpm.c
@@ -299,7 +299,9 @@ void cypress_program_response_times(struct radeon_device *rdev)
299static int cypress_pcie_performance_request(struct radeon_device *rdev, 299static int cypress_pcie_performance_request(struct radeon_device *rdev,
300 u8 perf_req, bool advertise) 300 u8 perf_req, bool advertise)
301{ 301{
302#if defined(CONFIG_ACPI)
302 struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); 303 struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
304#endif
303 u32 tmp; 305 u32 tmp;
304 306
305 udelay(10); 307 udelay(10);
diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c
index 6a0656d00ed0..a37b54436382 100644
--- a/drivers/gpu/drm/radeon/evergreen_dma.c
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@@ -131,13 +131,8 @@ int evergreen_copy_dma(struct radeon_device *rdev,
131 return r; 131 return r;
132 } 132 }
133 133
134 if (radeon_fence_need_sync(*fence, ring->idx)) { 134 radeon_semaphore_sync_to(sem, *fence);
135 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 135 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
136 ring->idx);
137 radeon_fence_note_sync(*fence, ring->idx);
138 } else {
139 radeon_semaphore_free(rdev, &sem, NULL);
140 }
141 136
142 for (i = 0; i < num_loops; i++) { 137 for (i = 0; i < num_loops; i++) {
143 cur_size_in_dw = size_in_dw; 138 cur_size_in_dw = size_in_dw;
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index f26339028154..cdc003085a76 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -3445,9 +3445,9 @@ static int ni_enable_smc_cac(struct radeon_device *rdev,
3445static int ni_pcie_performance_request(struct radeon_device *rdev, 3445static int ni_pcie_performance_request(struct radeon_device *rdev,
3446 u8 perf_req, bool advertise) 3446 u8 perf_req, bool advertise)
3447{ 3447{
3448#if defined(CONFIG_ACPI)
3448 struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev); 3449 struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
3449 3450
3450#if defined(CONFIG_ACPI)
3451 if ((perf_req == PCIE_PERF_REQ_PECI_GEN1) || 3451 if ((perf_req == PCIE_PERF_REQ_PECI_GEN1) ||
3452 (perf_req == PCIE_PERF_REQ_PECI_GEN2)) { 3452 (perf_req == PCIE_PERF_REQ_PECI_GEN2)) {
3453 if (eg_pi->pcie_performance_request_registered == false) 3453 if (eg_pi->pcie_performance_request_registered == false)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 784983d78158..10abc4d5a6cc 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -869,13 +869,14 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
869 radeon_ring_write(ring, RADEON_SW_INT_FIRE); 869 radeon_ring_write(ring, RADEON_SW_INT_FIRE);
870} 870}
871 871
872void r100_semaphore_ring_emit(struct radeon_device *rdev, 872bool r100_semaphore_ring_emit(struct radeon_device *rdev,
873 struct radeon_ring *ring, 873 struct radeon_ring *ring,
874 struct radeon_semaphore *semaphore, 874 struct radeon_semaphore *semaphore,
875 bool emit_wait) 875 bool emit_wait)
876{ 876{
877 /* Unused on older asics, since we don't have semaphores or multiple rings */ 877 /* Unused on older asics, since we don't have semaphores or multiple rings */
878 BUG(); 878 BUG();
879 return false;
879} 880}
880 881
881int r100_copy_blit(struct radeon_device *rdev, 882int r100_copy_blit(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 4e609e8a8d2b..9ad06732a78b 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2650,7 +2650,7 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
2650 } 2650 }
2651} 2651}
2652 2652
2653void r600_semaphore_ring_emit(struct radeon_device *rdev, 2653bool r600_semaphore_ring_emit(struct radeon_device *rdev,
2654 struct radeon_ring *ring, 2654 struct radeon_ring *ring,
2655 struct radeon_semaphore *semaphore, 2655 struct radeon_semaphore *semaphore,
2656 bool emit_wait) 2656 bool emit_wait)
@@ -2664,6 +2664,8 @@ void r600_semaphore_ring_emit(struct radeon_device *rdev,
2664 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); 2664 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2665 radeon_ring_write(ring, addr & 0xffffffff); 2665 radeon_ring_write(ring, addr & 0xffffffff);
2666 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); 2666 radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
2667
2668 return true;
2667} 2669}
2668 2670
2669/** 2671/**
@@ -2706,13 +2708,8 @@ int r600_copy_cpdma(struct radeon_device *rdev,
2706 return r; 2708 return r;
2707 } 2709 }
2708 2710
2709 if (radeon_fence_need_sync(*fence, ring->idx)) { 2711 radeon_semaphore_sync_to(sem, *fence);
2710 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 2712 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
2711 ring->idx);
2712 radeon_fence_note_sync(*fence, ring->idx);
2713 } else {
2714 radeon_semaphore_free(rdev, &sem, NULL);
2715 }
2716 2713
2717 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 2714 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
2718 radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); 2715 radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index 3b317456512a..7844d15c139f 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -311,7 +311,7 @@ void r600_dma_fence_ring_emit(struct radeon_device *rdev,
311 * Add a DMA semaphore packet to the ring wait on or signal 311 * Add a DMA semaphore packet to the ring wait on or signal
312 * other rings (r6xx-SI). 312 * other rings (r6xx-SI).
313 */ 313 */
314void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, 314bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
315 struct radeon_ring *ring, 315 struct radeon_ring *ring,
316 struct radeon_semaphore *semaphore, 316 struct radeon_semaphore *semaphore,
317 bool emit_wait) 317 bool emit_wait)
@@ -322,6 +322,8 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
322 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0)); 322 radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
323 radeon_ring_write(ring, addr & 0xfffffffc); 323 radeon_ring_write(ring, addr & 0xfffffffc);
324 radeon_ring_write(ring, upper_32_bits(addr) & 0xff); 324 radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
325
326 return true;
325} 327}
326 328
327/** 329/**
@@ -462,13 +464,8 @@ int r600_copy_dma(struct radeon_device *rdev,
462 return r; 464 return r;
463 } 465 }
464 466
465 if (radeon_fence_need_sync(*fence, ring->idx)) { 467 radeon_semaphore_sync_to(sem, *fence);
466 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 468 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
467 ring->idx);
468 radeon_fence_note_sync(*fence, ring->idx);
469 } else {
470 radeon_semaphore_free(rdev, &sem, NULL);
471 }
472 469
473 for (i = 0; i < num_loops; i++) { 470 for (i = 0; i < num_loops; i++) {
474 cur_size_in_dw = size_in_dw; 471 cur_size_in_dw = size_in_dw;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b9ee99258602..ecf2a3960c07 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -348,6 +348,7 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i
348void radeon_fence_process(struct radeon_device *rdev, int ring); 348void radeon_fence_process(struct radeon_device *rdev, int ring);
349bool radeon_fence_signaled(struct radeon_fence *fence); 349bool radeon_fence_signaled(struct radeon_fence *fence);
350int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); 350int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
351int radeon_fence_wait_locked(struct radeon_fence *fence);
351int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); 352int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
352int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); 353int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
353int radeon_fence_wait_any(struct radeon_device *rdev, 354int radeon_fence_wait_any(struct radeon_device *rdev,
@@ -548,17 +549,20 @@ struct radeon_semaphore {
548 struct radeon_sa_bo *sa_bo; 549 struct radeon_sa_bo *sa_bo;
549 signed waiters; 550 signed waiters;
550 uint64_t gpu_addr; 551 uint64_t gpu_addr;
552 struct radeon_fence *sync_to[RADEON_NUM_RINGS];
551}; 553};
552 554
553int radeon_semaphore_create(struct radeon_device *rdev, 555int radeon_semaphore_create(struct radeon_device *rdev,
554 struct radeon_semaphore **semaphore); 556 struct radeon_semaphore **semaphore);
555void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, 557bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
556 struct radeon_semaphore *semaphore); 558 struct radeon_semaphore *semaphore);
557void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, 559bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
558 struct radeon_semaphore *semaphore); 560 struct radeon_semaphore *semaphore);
561void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
562 struct radeon_fence *fence);
559int radeon_semaphore_sync_rings(struct radeon_device *rdev, 563int radeon_semaphore_sync_rings(struct radeon_device *rdev,
560 struct radeon_semaphore *semaphore, 564 struct radeon_semaphore *semaphore,
561 int signaler, int waiter); 565 int waiting_ring);
562void radeon_semaphore_free(struct radeon_device *rdev, 566void radeon_semaphore_free(struct radeon_device *rdev,
563 struct radeon_semaphore **semaphore, 567 struct radeon_semaphore **semaphore,
564 struct radeon_fence *fence); 568 struct radeon_fence *fence);
@@ -645,13 +649,15 @@ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg);
645/* 649/*
646 * GPU doorbell structures, functions & helpers 650 * GPU doorbell structures, functions & helpers
647 */ 651 */
652#define RADEON_MAX_DOORBELLS 1024 /* Reserve at most 1024 doorbell slots for radeon-owned rings. */
653
648struct radeon_doorbell { 654struct radeon_doorbell {
649 u32 num_pages;
650 bool free[1024];
651 /* doorbell mmio */ 655 /* doorbell mmio */
652 resource_size_t base; 656 resource_size_t base;
653 resource_size_t size; 657 resource_size_t size;
654 void __iomem *ptr; 658 u32 __iomem *ptr;
659 u32 num_doorbells; /* Number of doorbells actually reserved for radeon. */
660 unsigned long used[DIV_ROUND_UP(RADEON_MAX_DOORBELLS, BITS_PER_LONG)];
655}; 661};
656 662
657int radeon_doorbell_get(struct radeon_device *rdev, u32 *page); 663int radeon_doorbell_get(struct radeon_device *rdev, u32 *page);
@@ -765,7 +771,6 @@ struct radeon_ib {
765 struct radeon_fence *fence; 771 struct radeon_fence *fence;
766 struct radeon_vm *vm; 772 struct radeon_vm *vm;
767 bool is_const_ib; 773 bool is_const_ib;
768 struct radeon_fence *sync_to[RADEON_NUM_RINGS];
769 struct radeon_semaphore *semaphore; 774 struct radeon_semaphore *semaphore;
770}; 775};
771 776
@@ -799,8 +804,7 @@ struct radeon_ring {
799 u32 pipe; 804 u32 pipe;
800 u32 queue; 805 u32 queue;
801 struct radeon_bo *mqd_obj; 806 struct radeon_bo *mqd_obj;
802 u32 doorbell_page_num; 807 u32 doorbell_index;
803 u32 doorbell_offset;
804 unsigned wptr_offs; 808 unsigned wptr_offs;
805}; 809};
806 810
@@ -921,7 +925,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
921 struct radeon_ib *ib, struct radeon_vm *vm, 925 struct radeon_ib *ib, struct radeon_vm *vm,
922 unsigned size); 926 unsigned size);
923void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); 927void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
924void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence);
925int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, 928int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
926 struct radeon_ib *const_ib); 929 struct radeon_ib *const_ib);
927int radeon_ib_pool_init(struct radeon_device *rdev); 930int radeon_ib_pool_init(struct radeon_device *rdev);
@@ -1638,7 +1641,7 @@ struct radeon_asic_ring {
1638 /* command emmit functions */ 1641 /* command emmit functions */
1639 void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); 1642 void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
1640 void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); 1643 void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
1641 void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, 1644 bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
1642 struct radeon_semaphore *semaphore, bool emit_wait); 1645 struct radeon_semaphore *semaphore, bool emit_wait);
1643 void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); 1646 void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
1644 1647
@@ -1979,6 +1982,7 @@ struct cik_asic {
1979 1982
1980 unsigned tile_config; 1983 unsigned tile_config;
1981 uint32_t tile_mode_array[32]; 1984 uint32_t tile_mode_array[32];
1985 uint32_t macrotile_mode_array[16];
1982}; 1986};
1983 1987
1984union radeon_asic_config { 1988union radeon_asic_config {
@@ -2239,8 +2243,8 @@ void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
2239u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); 2243u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
2240void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v); 2244void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
2241 2245
2242u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset); 2246u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index);
2243void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v); 2247void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v);
2244 2248
2245/* 2249/*
2246 * Cast helper 2250 * Cast helper
@@ -2303,8 +2307,8 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v);
2303#define RREG32_IO(reg) r100_io_rreg(rdev, (reg)) 2307#define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
2304#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v)) 2308#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
2305 2309
2306#define RDOORBELL32(offset) cik_mm_rdoorbell(rdev, (offset)) 2310#define RDOORBELL32(index) cik_mm_rdoorbell(rdev, (index))
2307#define WDOORBELL32(offset, v) cik_mm_wdoorbell(rdev, (offset), (v)) 2311#define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v))
2308 2312
2309/* 2313/*
2310 * Indirect registers accessor 2314 * Indirect registers accessor
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 50853c0cb49d..e354ce94cdd1 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -2015,6 +2015,8 @@ static struct radeon_asic ci_asic = {
2015 .bandwidth_update = &dce8_bandwidth_update, 2015 .bandwidth_update = &dce8_bandwidth_update,
2016 .get_vblank_counter = &evergreen_get_vblank_counter, 2016 .get_vblank_counter = &evergreen_get_vblank_counter,
2017 .wait_for_vblank = &dce4_wait_for_vblank, 2017 .wait_for_vblank = &dce4_wait_for_vblank,
2018 .set_backlight_level = &atombios_set_backlight_level,
2019 .get_backlight_level = &atombios_get_backlight_level,
2018 .hdmi_enable = &evergreen_hdmi_enable, 2020 .hdmi_enable = &evergreen_hdmi_enable,
2019 .hdmi_setmode = &evergreen_hdmi_setmode, 2021 .hdmi_setmode = &evergreen_hdmi_setmode,
2020 }, 2022 },
@@ -2114,6 +2116,8 @@ static struct radeon_asic kv_asic = {
2114 .bandwidth_update = &dce8_bandwidth_update, 2116 .bandwidth_update = &dce8_bandwidth_update,
2115 .get_vblank_counter = &evergreen_get_vblank_counter, 2117 .get_vblank_counter = &evergreen_get_vblank_counter,
2116 .wait_for_vblank = &dce4_wait_for_vblank, 2118 .wait_for_vblank = &dce4_wait_for_vblank,
2119 .set_backlight_level = &atombios_set_backlight_level,
2120 .get_backlight_level = &atombios_get_backlight_level,
2117 .hdmi_enable = &evergreen_hdmi_enable, 2121 .hdmi_enable = &evergreen_hdmi_enable,
2118 .hdmi_setmode = &evergreen_hdmi_setmode, 2122 .hdmi_setmode = &evergreen_hdmi_setmode,
2119 }, 2123 },
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index f2833ee3a613..c9fd97b58076 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -80,7 +80,7 @@ int r100_irq_set(struct radeon_device *rdev);
80int r100_irq_process(struct radeon_device *rdev); 80int r100_irq_process(struct radeon_device *rdev);
81void r100_fence_ring_emit(struct radeon_device *rdev, 81void r100_fence_ring_emit(struct radeon_device *rdev,
82 struct radeon_fence *fence); 82 struct radeon_fence *fence);
83void r100_semaphore_ring_emit(struct radeon_device *rdev, 83bool r100_semaphore_ring_emit(struct radeon_device *rdev,
84 struct radeon_ring *cp, 84 struct radeon_ring *cp,
85 struct radeon_semaphore *semaphore, 85 struct radeon_semaphore *semaphore,
86 bool emit_wait); 86 bool emit_wait);
@@ -313,13 +313,13 @@ int r600_cs_parse(struct radeon_cs_parser *p);
313int r600_dma_cs_parse(struct radeon_cs_parser *p); 313int r600_dma_cs_parse(struct radeon_cs_parser *p);
314void r600_fence_ring_emit(struct radeon_device *rdev, 314void r600_fence_ring_emit(struct radeon_device *rdev,
315 struct radeon_fence *fence); 315 struct radeon_fence *fence);
316void r600_semaphore_ring_emit(struct radeon_device *rdev, 316bool r600_semaphore_ring_emit(struct radeon_device *rdev,
317 struct radeon_ring *cp, 317 struct radeon_ring *cp,
318 struct radeon_semaphore *semaphore, 318 struct radeon_semaphore *semaphore,
319 bool emit_wait); 319 bool emit_wait);
320void r600_dma_fence_ring_emit(struct radeon_device *rdev, 320void r600_dma_fence_ring_emit(struct radeon_device *rdev,
321 struct radeon_fence *fence); 321 struct radeon_fence *fence);
322void r600_dma_semaphore_ring_emit(struct radeon_device *rdev, 322bool r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
323 struct radeon_ring *ring, 323 struct radeon_ring *ring,
324 struct radeon_semaphore *semaphore, 324 struct radeon_semaphore *semaphore,
325 bool emit_wait); 325 bool emit_wait);
@@ -566,10 +566,6 @@ int sumo_dpm_force_performance_level(struct radeon_device *rdev,
566 */ 566 */
567void cayman_fence_ring_emit(struct radeon_device *rdev, 567void cayman_fence_ring_emit(struct radeon_device *rdev,
568 struct radeon_fence *fence); 568 struct radeon_fence *fence);
569void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
570 struct radeon_ring *ring,
571 struct radeon_semaphore *semaphore,
572 bool emit_wait);
573void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev); 569void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
574int cayman_init(struct radeon_device *rdev); 570int cayman_init(struct radeon_device *rdev);
575void cayman_fini(struct radeon_device *rdev); 571void cayman_fini(struct radeon_device *rdev);
@@ -697,7 +693,7 @@ void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
697int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); 693int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
698void cik_sdma_fence_ring_emit(struct radeon_device *rdev, 694void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
699 struct radeon_fence *fence); 695 struct radeon_fence *fence);
700void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev, 696bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
701 struct radeon_ring *ring, 697 struct radeon_ring *ring,
702 struct radeon_semaphore *semaphore, 698 struct radeon_semaphore *semaphore,
703 bool emit_wait); 699 bool emit_wait);
@@ -717,7 +713,7 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
717 struct radeon_fence *fence); 713 struct radeon_fence *fence);
718void cik_fence_compute_ring_emit(struct radeon_device *rdev, 714void cik_fence_compute_ring_emit(struct radeon_device *rdev,
719 struct radeon_fence *fence); 715 struct radeon_fence *fence);
720void cik_semaphore_ring_emit(struct radeon_device *rdev, 716bool cik_semaphore_ring_emit(struct radeon_device *rdev,
721 struct radeon_ring *cp, 717 struct radeon_ring *cp,
722 struct radeon_semaphore *semaphore, 718 struct radeon_semaphore *semaphore,
723 bool emit_wait); 719 bool emit_wait);
@@ -807,7 +803,7 @@ void uvd_v1_0_stop(struct radeon_device *rdev);
807 803
808int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); 804int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
809int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring); 805int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
810void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, 806bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
811 struct radeon_ring *ring, 807 struct radeon_ring *ring,
812 struct radeon_semaphore *semaphore, 808 struct radeon_semaphore *semaphore,
813 bool emit_wait); 809 bool emit_wait);
@@ -819,7 +815,7 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev,
819 struct radeon_fence *fence); 815 struct radeon_fence *fence);
820 816
821/* uvd v3.1 */ 817/* uvd v3.1 */
822void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, 818bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
823 struct radeon_ring *ring, 819 struct radeon_ring *ring,
824 struct radeon_semaphore *semaphore, 820 struct radeon_semaphore *semaphore,
825 bool emit_wait); 821 bool emit_wait);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 26ca223d12d6..f41594b2eeac 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -159,7 +159,8 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
159 if (!p->relocs[i].robj) 159 if (!p->relocs[i].robj)
160 continue; 160 continue;
161 161
162 radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); 162 radeon_semaphore_sync_to(p->ib.semaphore,
163 p->relocs[i].robj->tbo.sync_obj);
163 } 164 }
164} 165}
165 166
@@ -411,9 +412,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
411 goto out; 412 goto out;
412 } 413 }
413 radeon_cs_sync_rings(parser); 414 radeon_cs_sync_rings(parser);
414 radeon_ib_sync_to(&parser->ib, vm->fence); 415 radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
415 radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( 416 radeon_semaphore_sync_to(parser->ib.semaphore,
416 rdev, vm, parser->ring)); 417 radeon_vm_grab_id(rdev, vm, parser->ring));
417 418
418 if ((rdev->family >= CHIP_TAHITI) && 419 if ((rdev->family >= CHIP_TAHITI) &&
419 (parser->chunk_const_ib_idx != -1)) { 420 (parser->chunk_const_ib_idx != -1)) {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b9234c43f43d..39b033b441d2 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -251,28 +251,23 @@ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg)
251 */ 251 */
252int radeon_doorbell_init(struct radeon_device *rdev) 252int radeon_doorbell_init(struct radeon_device *rdev)
253{ 253{
254 int i;
255
256 /* doorbell bar mapping */ 254 /* doorbell bar mapping */
257 rdev->doorbell.base = pci_resource_start(rdev->pdev, 2); 255 rdev->doorbell.base = pci_resource_start(rdev->pdev, 2);
258 rdev->doorbell.size = pci_resource_len(rdev->pdev, 2); 256 rdev->doorbell.size = pci_resource_len(rdev->pdev, 2);
259 257
260 /* limit to 4 MB for now */ 258 rdev->doorbell.num_doorbells = min_t(u32, rdev->doorbell.size / sizeof(u32), RADEON_MAX_DOORBELLS);
261 if (rdev->doorbell.size > (4 * 1024 * 1024)) 259 if (rdev->doorbell.num_doorbells == 0)
262 rdev->doorbell.size = 4 * 1024 * 1024; 260 return -EINVAL;
263 261
264 rdev->doorbell.ptr = ioremap(rdev->doorbell.base, rdev->doorbell.size); 262 rdev->doorbell.ptr = ioremap(rdev->doorbell.base, rdev->doorbell.num_doorbells * sizeof(u32));
265 if (rdev->doorbell.ptr == NULL) { 263 if (rdev->doorbell.ptr == NULL) {
266 return -ENOMEM; 264 return -ENOMEM;
267 } 265 }
268 DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)rdev->doorbell.base); 266 DRM_INFO("doorbell mmio base: 0x%08X\n", (uint32_t)rdev->doorbell.base);
269 DRM_INFO("doorbell mmio size: %u\n", (unsigned)rdev->doorbell.size); 267 DRM_INFO("doorbell mmio size: %u\n", (unsigned)rdev->doorbell.size);
270 268
271 rdev->doorbell.num_pages = rdev->doorbell.size / PAGE_SIZE; 269 memset(&rdev->doorbell.used, 0, sizeof(rdev->doorbell.used));
272 270
273 for (i = 0; i < rdev->doorbell.num_pages; i++) {
274 rdev->doorbell.free[i] = true;
275 }
276 return 0; 271 return 0;
277} 272}
278 273
@@ -290,40 +285,38 @@ void radeon_doorbell_fini(struct radeon_device *rdev)
290} 285}
291 286
292/** 287/**
293 * radeon_doorbell_get - Allocate a doorbell page 288 * radeon_doorbell_get - Allocate a doorbell entry
294 * 289 *
295 * @rdev: radeon_device pointer 290 * @rdev: radeon_device pointer
296 * @doorbell: doorbell page number 291 * @doorbell: doorbell index
297 * 292 *
298 * Allocate a doorbell page for use by the driver (all asics). 293 * Allocate a doorbell for use by the driver (all asics).
299 * Returns 0 on success or -EINVAL on failure. 294 * Returns 0 on success or -EINVAL on failure.
300 */ 295 */
301int radeon_doorbell_get(struct radeon_device *rdev, u32 *doorbell) 296int radeon_doorbell_get(struct radeon_device *rdev, u32 *doorbell)
302{ 297{
303 int i; 298 unsigned long offset = find_first_zero_bit(rdev->doorbell.used, rdev->doorbell.num_doorbells);
304 299 if (offset < rdev->doorbell.num_doorbells) {
305 for (i = 0; i < rdev->doorbell.num_pages; i++) { 300 __set_bit(offset, rdev->doorbell.used);
306 if (rdev->doorbell.free[i]) { 301 *doorbell = offset;
307 rdev->doorbell.free[i] = false; 302 return 0;
308 *doorbell = i; 303 } else {
309 return 0; 304 return -EINVAL;
310 }
311 } 305 }
312 return -EINVAL;
313} 306}
314 307
315/** 308/**
316 * radeon_doorbell_free - Free a doorbell page 309 * radeon_doorbell_free - Free a doorbell entry
317 * 310 *
318 * @rdev: radeon_device pointer 311 * @rdev: radeon_device pointer
319 * @doorbell: doorbell page number 312 * @doorbell: doorbell index
320 * 313 *
321 * Free a doorbell page allocated for use by the driver (all asics) 314 * Free a doorbell allocated for use by the driver (all asics)
322 */ 315 */
323void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell) 316void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell)
324{ 317{
325 if (doorbell < rdev->doorbell.num_pages) 318 if (doorbell < rdev->doorbell.num_doorbells)
326 rdev->doorbell.free[doorbell] = true; 319 __clear_bit(doorbell, rdev->doorbell.used);
327} 320}
328 321
329/* 322/*
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 1aee32213f66..9f5ff28864f6 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -76,9 +76,10 @@
76 * 2.32.0 - new info request for rings working 76 * 2.32.0 - new info request for rings working
77 * 2.33.0 - Add SI tiling mode array query 77 * 2.33.0 - Add SI tiling mode array query
78 * 2.34.0 - Add CIK tiling mode array query 78 * 2.34.0 - Add CIK tiling mode array query
79 * 2.35.0 - Add CIK macrotile mode array query
79 */ 80 */
80#define KMS_DRIVER_MAJOR 2 81#define KMS_DRIVER_MAJOR 2
81#define KMS_DRIVER_MINOR 34 82#define KMS_DRIVER_MINOR 35
82#define KMS_DRIVER_PATCHLEVEL 0 83#define KMS_DRIVER_PATCHLEVEL 0
83int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); 84int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
84int radeon_driver_unload_kms(struct drm_device *dev); 85int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 281d14c22a47..d3a86e43c012 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -472,6 +472,36 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
472} 472}
473 473
474/** 474/**
475 * radeon_fence_wait_locked - wait for a fence to signal
476 *
477 * @fence: radeon fence object
478 *
479 * Wait for the requested fence to signal (all asics).
480 * Returns 0 if the fence has passed, error for all other cases.
481 */
482int radeon_fence_wait_locked(struct radeon_fence *fence)
483{
484 uint64_t seq[RADEON_NUM_RINGS] = {};
485 int r;
486
487 if (fence == NULL) {
488 WARN(1, "Querying an invalid fence : %p !\n", fence);
489 return -EINVAL;
490 }
491
492 seq[fence->ring] = fence->seq;
493 if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
494 return 0;
495
496 r = radeon_fence_wait_seq(fence->rdev, seq, false, false);
497 if (r)
498 return r;
499
500 fence->seq = RADEON_FENCE_SIGNALED_SEQ;
501 return 0;
502}
503
504/**
475 * radeon_fence_wait_next_locked - wait for the next fence to signal 505 * radeon_fence_wait_next_locked - wait for the next fence to signal
476 * 506 *
477 * @rdev: radeon device pointer 507 * @rdev: radeon device pointer
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 8a83b89d4709..3044e504f4ec 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -651,7 +651,7 @@ retry:
651 radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr, 651 radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
652 0, pd_entries, 0, 0); 652 0, pd_entries, 0, 0);
653 653
654 radeon_ib_sync_to(&ib, vm->fence); 654 radeon_semaphore_sync_to(ib.semaphore, vm->fence);
655 r = radeon_ib_schedule(rdev, &ib, NULL); 655 r = radeon_ib_schedule(rdev, &ib, NULL);
656 if (r) { 656 if (r) {
657 radeon_ib_free(rdev, &ib); 657 radeon_ib_free(rdev, &ib);
@@ -1209,6 +1209,8 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
1209 return -ENOMEM; 1209 return -ENOMEM;
1210 1210
1211 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); 1211 r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
1212 if (r)
1213 return r;
1212 ib.length_dw = 0; 1214 ib.length_dw = 0;
1213 1215
1214 r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); 1216 r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
@@ -1220,7 +1222,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
1220 radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, 1222 radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
1221 addr, radeon_vm_page_flags(bo_va->flags)); 1223 addr, radeon_vm_page_flags(bo_va->flags));
1222 1224
1223 radeon_ib_sync_to(&ib, vm->fence); 1225 radeon_semaphore_sync_to(ib.semaphore, vm->fence);
1224 r = radeon_ib_schedule(rdev, &ib, NULL); 1226 r = radeon_ib_schedule(rdev, &ib, NULL);
1225 if (r) { 1227 if (r) {
1226 radeon_ib_free(rdev, &ib); 1228 radeon_ib_free(rdev, &ib);
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index bb8710531a1b..55d0b474bd37 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -340,7 +340,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
340 break; 340 break;
341 case RADEON_INFO_BACKEND_MAP: 341 case RADEON_INFO_BACKEND_MAP:
342 if (rdev->family >= CHIP_BONAIRE) 342 if (rdev->family >= CHIP_BONAIRE)
343 return -EINVAL; 343 *value = rdev->config.cik.backend_map;
344 else if (rdev->family >= CHIP_TAHITI) 344 else if (rdev->family >= CHIP_TAHITI)
345 *value = rdev->config.si.backend_map; 345 *value = rdev->config.si.backend_map;
346 else if (rdev->family >= CHIP_CAYMAN) 346 else if (rdev->family >= CHIP_CAYMAN)
@@ -449,6 +449,15 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
449 return -EINVAL; 449 return -EINVAL;
450 } 450 }
451 break; 451 break;
452 case RADEON_INFO_CIK_MACROTILE_MODE_ARRAY:
453 if (rdev->family >= CHIP_BONAIRE) {
454 value = rdev->config.cik.macrotile_mode_array;
455 value_size = sizeof(uint32_t)*16;
456 } else {
457 DRM_DEBUG_KMS("macrotile mode array is cik+ only!\n");
458 return -EINVAL;
459 }
460 break;
452 case RADEON_INFO_SI_CP_DMA_COMPUTE: 461 case RADEON_INFO_SI_CP_DMA_COMPUTE:
453 *value = 1; 462 *value = 1;
454 break; 463 break;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 0c7b8c66301b..0b158f98d287 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -422,6 +422,7 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
422 /* Pin framebuffer & get tilling informations */ 422 /* Pin framebuffer & get tilling informations */
423 obj = radeon_fb->obj; 423 obj = radeon_fb->obj;
424 rbo = gem_to_radeon_bo(obj); 424 rbo = gem_to_radeon_bo(obj);
425retry:
425 r = radeon_bo_reserve(rbo, false); 426 r = radeon_bo_reserve(rbo, false);
426 if (unlikely(r != 0)) 427 if (unlikely(r != 0))
427 return r; 428 return r;
@@ -430,6 +431,33 @@ int radeon_crtc_do_set_base(struct drm_crtc *crtc,
430 &base); 431 &base);
431 if (unlikely(r != 0)) { 432 if (unlikely(r != 0)) {
432 radeon_bo_unreserve(rbo); 433 radeon_bo_unreserve(rbo);
434
435 /* On old GPU like RN50 with little vram pining can fails because
436 * current fb is taking all space needed. So instead of unpining
437 * the old buffer after pining the new one, first unpin old one
438 * and then retry pining new one.
439 *
440 * As only master can set mode only master can pin and it is
441 * unlikely the master client will race with itself especialy
442 * on those old gpu with single crtc.
443 *
444 * We don't shutdown the display controller because new buffer
445 * will end up in same spot.
446 */
447 if (!atomic && fb && fb != crtc->fb) {
448 struct radeon_bo *old_rbo;
449 unsigned long nsize, osize;
450
451 old_rbo = gem_to_radeon_bo(to_radeon_framebuffer(fb)->obj);
452 osize = radeon_bo_size(old_rbo);
453 nsize = radeon_bo_size(rbo);
454 if (nsize <= osize && !radeon_bo_reserve(old_rbo, false)) {
455 radeon_bo_unpin(old_rbo);
456 radeon_bo_unreserve(old_rbo);
457 fb = NULL;
458 goto retry;
459 }
460 }
433 return -EINVAL; 461 return -EINVAL;
434 } 462 }
435 radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL); 463 radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL);
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 866ace070b91..d1385ccc672c 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -1252,7 +1252,6 @@ int radeon_pm_init(struct radeon_device *rdev)
1252 case CHIP_RS780: 1252 case CHIP_RS780:
1253 case CHIP_RS880: 1253 case CHIP_RS880:
1254 case CHIP_CAYMAN: 1254 case CHIP_CAYMAN:
1255 case CHIP_ARUBA:
1256 case CHIP_BONAIRE: 1255 case CHIP_BONAIRE:
1257 case CHIP_KABINI: 1256 case CHIP_KABINI:
1258 case CHIP_KAVERI: 1257 case CHIP_KAVERI:
@@ -1284,6 +1283,7 @@ int radeon_pm_init(struct radeon_device *rdev)
1284 case CHIP_BARTS: 1283 case CHIP_BARTS:
1285 case CHIP_TURKS: 1284 case CHIP_TURKS:
1286 case CHIP_CAICOS: 1285 case CHIP_CAICOS:
1286 case CHIP_ARUBA:
1287 case CHIP_TAHITI: 1287 case CHIP_TAHITI:
1288 case CHIP_PITCAIRN: 1288 case CHIP_PITCAIRN:
1289 case CHIP_VERDE: 1289 case CHIP_VERDE:
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 18254e1c3e71..9214403ae173 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -61,7 +61,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
61 struct radeon_ib *ib, struct radeon_vm *vm, 61 struct radeon_ib *ib, struct radeon_vm *vm,
62 unsigned size) 62 unsigned size)
63{ 63{
64 int i, r; 64 int r;
65 65
66 r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); 66 r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true);
67 if (r) { 67 if (r) {
@@ -87,8 +87,6 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
87 ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); 87 ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo);
88 } 88 }
89 ib->is_const_ib = false; 89 ib->is_const_ib = false;
90 for (i = 0; i < RADEON_NUM_RINGS; ++i)
91 ib->sync_to[i] = NULL;
92 90
93 return 0; 91 return 0;
94} 92}
@@ -109,25 +107,6 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
109} 107}
110 108
111/** 109/**
112 * radeon_ib_sync_to - sync to fence before executing the IB
113 *
114 * @ib: IB object to add fence to
115 * @fence: fence to sync to
116 *
117 * Sync to the fence before executing the IB
118 */
119void radeon_ib_sync_to(struct radeon_ib *ib, struct radeon_fence *fence)
120{
121 struct radeon_fence *other;
122
123 if (!fence)
124 return;
125
126 other = ib->sync_to[fence->ring];
127 ib->sync_to[fence->ring] = radeon_fence_later(fence, other);
128}
129
130/**
131 * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring 110 * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring
132 * 111 *
133 * @rdev: radeon_device pointer 112 * @rdev: radeon_device pointer
@@ -151,8 +130,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
151 struct radeon_ib *const_ib) 130 struct radeon_ib *const_ib)
152{ 131{
153 struct radeon_ring *ring = &rdev->ring[ib->ring]; 132 struct radeon_ring *ring = &rdev->ring[ib->ring];
154 bool need_sync = false; 133 int r = 0;
155 int i, r = 0;
156 134
157 if (!ib->length_dw || !ring->ready) { 135 if (!ib->length_dw || !ring->ready) {
158 /* TODO: Nothings in the ib we should report. */ 136 /* TODO: Nothings in the ib we should report. */
@@ -166,19 +144,15 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
166 dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); 144 dev_err(rdev->dev, "scheduling IB failed (%d).\n", r);
167 return r; 145 return r;
168 } 146 }
169 for (i = 0; i < RADEON_NUM_RINGS; ++i) { 147
170 struct radeon_fence *fence = ib->sync_to[i]; 148 /* sync with other rings */
171 if (radeon_fence_need_sync(fence, ib->ring)) { 149 r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
172 need_sync = true; 150 if (r) {
173 radeon_semaphore_sync_rings(rdev, ib->semaphore, 151 dev_err(rdev->dev, "failed to sync rings (%d)\n", r);
174 fence->ring, ib->ring); 152 radeon_ring_unlock_undo(rdev, ring);
175 radeon_fence_note_sync(fence, ib->ring); 153 return r;
176 }
177 }
178 /* immediately free semaphore when we don't need to sync */
179 if (!need_sync) {
180 radeon_semaphore_free(rdev, &ib->semaphore, NULL);
181 } 154 }
155
182 /* if we can't remember our last VM flush then flush now! */ 156 /* if we can't remember our last VM flush then flush now! */
183 /* XXX figure out why we have to flush for every IB */ 157 /* XXX figure out why we have to flush for every IB */
184 if (ib->vm /*&& !ib->vm->last_flush*/) { 158 if (ib->vm /*&& !ib->vm->last_flush*/) {
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 8dcc20f53d73..2b42aa1914f2 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -29,12 +29,12 @@
29 */ 29 */
30#include <drm/drmP.h> 30#include <drm/drmP.h>
31#include "radeon.h" 31#include "radeon.h"
32 32#include "radeon_trace.h"
33 33
34int radeon_semaphore_create(struct radeon_device *rdev, 34int radeon_semaphore_create(struct radeon_device *rdev,
35 struct radeon_semaphore **semaphore) 35 struct radeon_semaphore **semaphore)
36{ 36{
37 int r; 37 int i, r;
38 38
39 *semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL); 39 *semaphore = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
40 if (*semaphore == NULL) { 40 if (*semaphore == NULL) {
@@ -50,54 +50,121 @@ int radeon_semaphore_create(struct radeon_device *rdev,
50 (*semaphore)->waiters = 0; 50 (*semaphore)->waiters = 0;
51 (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); 51 (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo);
52 *((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; 52 *((uint64_t*)radeon_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
53
54 for (i = 0; i < RADEON_NUM_RINGS; ++i)
55 (*semaphore)->sync_to[i] = NULL;
56
53 return 0; 57 return 0;
54} 58}
55 59
56void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring, 60bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ridx,
57 struct radeon_semaphore *semaphore) 61 struct radeon_semaphore *semaphore)
58{ 62{
59 --semaphore->waiters; 63 struct radeon_ring *ring = &rdev->ring[ridx];
60 radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, false); 64
65 trace_radeon_semaphore_signale(ridx, semaphore);
66
67 if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, false)) {
68 --semaphore->waiters;
69
70 /* for debugging lockup only, used by sysfs debug files */
71 ring->last_semaphore_signal_addr = semaphore->gpu_addr;
72 return true;
73 }
74 return false;
61} 75}
62 76
63void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, 77bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx,
64 struct radeon_semaphore *semaphore) 78 struct radeon_semaphore *semaphore)
65{ 79{
66 ++semaphore->waiters; 80 struct radeon_ring *ring = &rdev->ring[ridx];
67 radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); 81
82 trace_radeon_semaphore_wait(ridx, semaphore);
83
84 if (radeon_semaphore_ring_emit(rdev, ridx, ring, semaphore, true)) {
85 ++semaphore->waiters;
86
87 /* for debugging lockup only, used by sysfs debug files */
88 ring->last_semaphore_wait_addr = semaphore->gpu_addr;
89 return true;
90 }
91 return false;
92}
93
94/**
95 * radeon_semaphore_sync_to - use the semaphore to sync to a fence
96 *
97 * @semaphore: semaphore object to add fence to
98 * @fence: fence to sync to
99 *
100 * Sync to the fence using this semaphore object
101 */
102void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
103 struct radeon_fence *fence)
104{
105 struct radeon_fence *other;
106
107 if (!fence)
108 return;
109
110 other = semaphore->sync_to[fence->ring];
111 semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
68} 112}
69 113
70/* caller must hold ring lock */ 114/**
115 * radeon_semaphore_sync_rings - sync ring to all registered fences
116 *
117 * @rdev: radeon_device pointer
118 * @semaphore: semaphore object to use for sync
119 * @ring: ring that needs sync
120 *
121 * Ensure that all registered fences are signaled before letting
122 * the ring continue. The caller must hold the ring lock.
123 */
71int radeon_semaphore_sync_rings(struct radeon_device *rdev, 124int radeon_semaphore_sync_rings(struct radeon_device *rdev,
72 struct radeon_semaphore *semaphore, 125 struct radeon_semaphore *semaphore,
73 int signaler, int waiter) 126 int ring)
74{ 127{
75 int r; 128 int i, r;
76 129
77 /* no need to signal and wait on the same ring */ 130 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
78 if (signaler == waiter) { 131 struct radeon_fence *fence = semaphore->sync_to[i];
79 return 0;
80 }
81 132
82 /* prevent GPU deadlocks */ 133 /* check if we really need to sync */
83 if (!rdev->ring[signaler].ready) { 134 if (!radeon_fence_need_sync(fence, ring))
84 dev_err(rdev->dev, "Trying to sync to a disabled ring!"); 135 continue;
85 return -EINVAL;
86 }
87 136
88 r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); 137 /* prevent GPU deadlocks */
89 if (r) { 138 if (!rdev->ring[i].ready) {
90 return r; 139 dev_err(rdev->dev, "Syncing to a disabled ring!");
91 } 140 return -EINVAL;
92 radeon_semaphore_emit_signal(rdev, signaler, semaphore); 141 }
93 radeon_ring_commit(rdev, &rdev->ring[signaler]);
94 142
95 /* we assume caller has already allocated space on waiters ring */ 143 /* allocate enough space for sync command */
96 radeon_semaphore_emit_wait(rdev, waiter, semaphore); 144 r = radeon_ring_alloc(rdev, &rdev->ring[i], 16);
145 if (r) {
146 return r;
147 }
97 148
98 /* for debugging lockup only, used by sysfs debug files */ 149 /* emit the signal semaphore */
99 rdev->ring[signaler].last_semaphore_signal_addr = semaphore->gpu_addr; 150 if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
100 rdev->ring[waiter].last_semaphore_wait_addr = semaphore->gpu_addr; 151 /* signaling wasn't successful wait manually */
152 radeon_ring_undo(&rdev->ring[i]);
153 radeon_fence_wait_locked(fence);
154 continue;
155 }
156
157 /* we assume caller has already allocated space on waiters ring */
158 if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
159 /* waiting wasn't successful wait manually */
160 radeon_ring_undo(&rdev->ring[i]);
161 radeon_fence_wait_locked(fence);
162 continue;
163 }
164
165 radeon_ring_commit(rdev, &rdev->ring[i]);
166 radeon_fence_note_sync(fence, ring);
167 }
101 168
102 return 0; 169 return 0;
103} 170}
diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h
index 811bca691b36..9f0e18172b6e 100644
--- a/drivers/gpu/drm/radeon/radeon_trace.h
+++ b/drivers/gpu/drm/radeon/radeon_trace.h
@@ -111,6 +111,42 @@ DEFINE_EVENT(radeon_fence_request, radeon_fence_wait_end,
111 TP_ARGS(dev, seqno) 111 TP_ARGS(dev, seqno)
112); 112);
113 113
114DECLARE_EVENT_CLASS(radeon_semaphore_request,
115
116 TP_PROTO(int ring, struct radeon_semaphore *sem),
117
118 TP_ARGS(ring, sem),
119
120 TP_STRUCT__entry(
121 __field(int, ring)
122 __field(signed, waiters)
123 __field(uint64_t, gpu_addr)
124 ),
125
126 TP_fast_assign(
127 __entry->ring = ring;
128 __entry->waiters = sem->waiters;
129 __entry->gpu_addr = sem->gpu_addr;
130 ),
131
132 TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring,
133 __entry->waiters, __entry->gpu_addr)
134);
135
136DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_signale,
137
138 TP_PROTO(int ring, struct radeon_semaphore *sem),
139
140 TP_ARGS(ring, sem)
141);
142
143DEFINE_EVENT(radeon_semaphore_request, radeon_semaphore_wait,
144
145 TP_PROTO(int ring, struct radeon_semaphore *sem),
146
147 TP_ARGS(ring, sem)
148);
149
114#endif 150#endif
115 151
116/* This part must be outside protection */ 152/* This part must be outside protection */
diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c
index f9b02e3d6830..aca8cbe8a335 100644
--- a/drivers/gpu/drm/radeon/rv770_dma.c
+++ b/drivers/gpu/drm/radeon/rv770_dma.c
@@ -66,13 +66,8 @@ int rv770_copy_dma(struct radeon_device *rdev,
66 return r; 66 return r;
67 } 67 }
68 68
69 if (radeon_fence_need_sync(*fence, ring->idx)) { 69 radeon_semaphore_sync_to(sem, *fence);
70 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 70 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
71 ring->idx);
72 radeon_fence_note_sync(*fence, ring->idx);
73 } else {
74 radeon_semaphore_free(rdev, &sem, NULL);
75 }
76 71
77 for (i = 0; i < num_loops; i++) { 72 for (i = 0; i < num_loops; i++) {
78 cur_size_in_dw = size_in_dw; 73 cur_size_in_dw = size_in_dw;
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
index 8e8f46133532..59be2cfcbb47 100644
--- a/drivers/gpu/drm/radeon/si_dma.c
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -195,13 +195,8 @@ int si_copy_dma(struct radeon_device *rdev,
195 return r; 195 return r;
196 } 196 }
197 197
198 if (radeon_fence_need_sync(*fence, ring->idx)) { 198 radeon_semaphore_sync_to(sem, *fence);
199 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, 199 radeon_semaphore_sync_rings(rdev, sem, ring->idx);
200 ring->idx);
201 radeon_fence_note_sync(*fence, ring->idx);
202 } else {
203 radeon_semaphore_free(rdev, &sem, NULL);
204 }
205 200
206 for (i = 0; i < num_loops; i++) { 201 for (i = 0; i < num_loops; i++) {
207 cur_size_in_bytes = size_in_bytes; 202 cur_size_in_bytes = size_in_bytes;
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 9364129ba292..d700698a1f22 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -1873,9 +1873,9 @@ int trinity_dpm_init(struct radeon_device *rdev)
1873 pi->enable_sclk_ds = true; 1873 pi->enable_sclk_ds = true;
1874 pi->enable_gfx_power_gating = true; 1874 pi->enable_gfx_power_gating = true;
1875 pi->enable_gfx_clock_gating = true; 1875 pi->enable_gfx_clock_gating = true;
1876 pi->enable_mg_clock_gating = true; 1876 pi->enable_mg_clock_gating = false;
1877 pi->enable_gfx_dynamic_mgpg = true; /* ??? */ 1877 pi->enable_gfx_dynamic_mgpg = false;
1878 pi->override_dynamic_mgpg = true; 1878 pi->override_dynamic_mgpg = false;
1879 pi->enable_auto_thermal_throttling = true; 1879 pi->enable_auto_thermal_throttling = true;
1880 pi->voltage_drop_in_dce = false; /* need to restructure dpm/modeset interaction */ 1880 pi->voltage_drop_in_dce = false; /* need to restructure dpm/modeset interaction */
1881 pi->uvd_dpm = true; /* ??? */ 1881 pi->uvd_dpm = true; /* ??? */
diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c
index 7266805d9786..d4a68af1a279 100644
--- a/drivers/gpu/drm/radeon/uvd_v1_0.c
+++ b/drivers/gpu/drm/radeon/uvd_v1_0.c
@@ -357,7 +357,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
357 * 357 *
358 * Emit a semaphore command (either wait or signal) to the UVD ring. 358 * Emit a semaphore command (either wait or signal) to the UVD ring.
359 */ 359 */
360void uvd_v1_0_semaphore_emit(struct radeon_device *rdev, 360bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
361 struct radeon_ring *ring, 361 struct radeon_ring *ring,
362 struct radeon_semaphore *semaphore, 362 struct radeon_semaphore *semaphore,
363 bool emit_wait) 363 bool emit_wait)
@@ -372,6 +372,8 @@ void uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
372 372
373 radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); 373 radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
374 radeon_ring_write(ring, emit_wait ? 1 : 0); 374 radeon_ring_write(ring, emit_wait ? 1 : 0);
375
376 return true;
375} 377}
376 378
377/** 379/**
diff --git a/drivers/gpu/drm/radeon/uvd_v3_1.c b/drivers/gpu/drm/radeon/uvd_v3_1.c
index 5b6fa1f62d4e..d722db2cf340 100644
--- a/drivers/gpu/drm/radeon/uvd_v3_1.c
+++ b/drivers/gpu/drm/radeon/uvd_v3_1.c
@@ -37,7 +37,7 @@
37 * 37 *
38 * Emit a semaphore command (either wait or signal) to the UVD ring. 38 * Emit a semaphore command (either wait or signal) to the UVD ring.
39 */ 39 */
40void uvd_v3_1_semaphore_emit(struct radeon_device *rdev, 40bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
41 struct radeon_ring *ring, 41 struct radeon_ring *ring,
42 struct radeon_semaphore *semaphore, 42 struct radeon_semaphore *semaphore,
43 bool emit_wait) 43 bool emit_wait)
@@ -52,4 +52,6 @@ void uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
52 52
53 radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0)); 53 radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
54 radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); 54 radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
55
56 return true;
55} 57}
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 46d41e8b0dcc..2f3f7ea8c77b 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -981,6 +981,8 @@ struct drm_radeon_cs {
981#define RADEON_INFO_SI_TILE_MODE_ARRAY 0x16 981#define RADEON_INFO_SI_TILE_MODE_ARRAY 0x16
982/* query if CP DMA is supported on the compute ring */ 982/* query if CP DMA is supported on the compute ring */
983#define RADEON_INFO_SI_CP_DMA_COMPUTE 0x17 983#define RADEON_INFO_SI_CP_DMA_COMPUTE 0x17
984/* CIK macrotile mode array */
985#define RADEON_INFO_CIK_MACROTILE_MODE_ARRAY 0x18
984 986
985 987
986struct drm_radeon_info { 988struct drm_radeon_info {