aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/radeon/r100.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/radeon/r100.c')
-rw-r--r--drivers/gpu/drm/radeon/r100.c1191
1 files changed, 647 insertions, 544 deletions
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index fb44e7e49083..8acb34fd3fd5 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -65,6 +65,19 @@ MODULE_FIRMWARE(FIRMWARE_R520);
65 65
66#include "r100_track.h" 66#include "r100_track.h"
67 67
68/* This files gather functions specifics to:
69 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
70 * and others in some cases.
71 */
72
73/**
74 * r100_wait_for_vblank - vblank wait asic callback.
75 *
76 * @rdev: radeon_device pointer
77 * @crtc: crtc to wait for vblank on
78 *
79 * Wait for vblank on the requested crtc (r1xx-r4xx).
80 */
68void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) 81void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
69{ 82{
70 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc]; 83 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
@@ -99,128 +112,49 @@ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
99 } 112 }
100} 113}
101 114
102/* This files gather functions specifics to: 115/**
103 * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 116 * r100_pre_page_flip - pre-pageflip callback.
117 *
118 * @rdev: radeon_device pointer
119 * @crtc: crtc to prepare for pageflip on
120 *
121 * Pre-pageflip callback (r1xx-r4xx).
122 * Enables the pageflip irq (vblank irq).
104 */ 123 */
105
106int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
107 struct radeon_cs_packet *pkt,
108 unsigned idx,
109 unsigned reg)
110{
111 int r;
112 u32 tile_flags = 0;
113 u32 tmp;
114 struct radeon_cs_reloc *reloc;
115 u32 value;
116
117 r = r100_cs_packet_next_reloc(p, &reloc);
118 if (r) {
119 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
120 idx, reg);
121 r100_cs_dump_packet(p, pkt);
122 return r;
123 }
124
125 value = radeon_get_ib_value(p, idx);
126 tmp = value & 0x003fffff;
127 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
128
129 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
130 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
131 tile_flags |= RADEON_DST_TILE_MACRO;
132 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
133 if (reg == RADEON_SRC_PITCH_OFFSET) {
134 DRM_ERROR("Cannot src blit from microtiled surface\n");
135 r100_cs_dump_packet(p, pkt);
136 return -EINVAL;
137 }
138 tile_flags |= RADEON_DST_TILE_MICRO;
139 }
140
141 tmp |= tile_flags;
142 p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
143 } else
144 p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
145 return 0;
146}
147
148int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
149 struct radeon_cs_packet *pkt,
150 int idx)
151{
152 unsigned c, i;
153 struct radeon_cs_reloc *reloc;
154 struct r100_cs_track *track;
155 int r = 0;
156 volatile uint32_t *ib;
157 u32 idx_value;
158
159 ib = p->ib.ptr;
160 track = (struct r100_cs_track *)p->track;
161 c = radeon_get_ib_value(p, idx++) & 0x1F;
162 if (c > 16) {
163 DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
164 pkt->opcode);
165 r100_cs_dump_packet(p, pkt);
166 return -EINVAL;
167 }
168 track->num_arrays = c;
169 for (i = 0; i < (c - 1); i+=2, idx+=3) {
170 r = r100_cs_packet_next_reloc(p, &reloc);
171 if (r) {
172 DRM_ERROR("No reloc for packet3 %d\n",
173 pkt->opcode);
174 r100_cs_dump_packet(p, pkt);
175 return r;
176 }
177 idx_value = radeon_get_ib_value(p, idx);
178 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
179
180 track->arrays[i + 0].esize = idx_value >> 8;
181 track->arrays[i + 0].robj = reloc->robj;
182 track->arrays[i + 0].esize &= 0x7F;
183 r = r100_cs_packet_next_reloc(p, &reloc);
184 if (r) {
185 DRM_ERROR("No reloc for packet3 %d\n",
186 pkt->opcode);
187 r100_cs_dump_packet(p, pkt);
188 return r;
189 }
190 ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
191 track->arrays[i + 1].robj = reloc->robj;
192 track->arrays[i + 1].esize = idx_value >> 24;
193 track->arrays[i + 1].esize &= 0x7F;
194 }
195 if (c & 1) {
196 r = r100_cs_packet_next_reloc(p, &reloc);
197 if (r) {
198 DRM_ERROR("No reloc for packet3 %d\n",
199 pkt->opcode);
200 r100_cs_dump_packet(p, pkt);
201 return r;
202 }
203 idx_value = radeon_get_ib_value(p, idx);
204 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
205 track->arrays[i + 0].robj = reloc->robj;
206 track->arrays[i + 0].esize = idx_value >> 8;
207 track->arrays[i + 0].esize &= 0x7F;
208 }
209 return r;
210}
211
212void r100_pre_page_flip(struct radeon_device *rdev, int crtc) 124void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
213{ 125{
214 /* enable the pflip int */ 126 /* enable the pflip int */
215 radeon_irq_kms_pflip_irq_get(rdev, crtc); 127 radeon_irq_kms_pflip_irq_get(rdev, crtc);
216} 128}
217 129
130/**
131 * r100_post_page_flip - pos-pageflip callback.
132 *
133 * @rdev: radeon_device pointer
134 * @crtc: crtc to cleanup pageflip on
135 *
136 * Post-pageflip callback (r1xx-r4xx).
137 * Disables the pageflip irq (vblank irq).
138 */
218void r100_post_page_flip(struct radeon_device *rdev, int crtc) 139void r100_post_page_flip(struct radeon_device *rdev, int crtc)
219{ 140{
220 /* disable the pflip int */ 141 /* disable the pflip int */
221 radeon_irq_kms_pflip_irq_put(rdev, crtc); 142 radeon_irq_kms_pflip_irq_put(rdev, crtc);
222} 143}
223 144
145/**
146 * r100_page_flip - pageflip callback.
147 *
148 * @rdev: radeon_device pointer
149 * @crtc_id: crtc to cleanup pageflip on
150 * @crtc_base: new address of the crtc (GPU MC address)
151 *
152 * Does the actual pageflip (r1xx-r4xx).
153 * During vblank we take the crtc lock and wait for the update_pending
154 * bit to go high, when it does, we release the lock, and allow the
155 * double buffered update to take place.
156 * Returns the current update pending status.
157 */
224u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base) 158u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
225{ 159{
226 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id]; 160 struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@@ -247,6 +181,15 @@ u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
247 return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET; 181 return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET;
248} 182}
249 183
184/**
185 * r100_pm_get_dynpm_state - look up dynpm power state callback.
186 *
187 * @rdev: radeon_device pointer
188 *
189 * Look up the optimal power state based on the
190 * current state of the GPU (r1xx-r5xx).
191 * Used for dynpm only.
192 */
250void r100_pm_get_dynpm_state(struct radeon_device *rdev) 193void r100_pm_get_dynpm_state(struct radeon_device *rdev)
251{ 194{
252 int i; 195 int i;
@@ -329,6 +272,15 @@ void r100_pm_get_dynpm_state(struct radeon_device *rdev)
329 pcie_lanes); 272 pcie_lanes);
330} 273}
331 274
275/**
276 * r100_pm_init_profile - Initialize power profiles callback.
277 *
278 * @rdev: radeon_device pointer
279 *
280 * Initialize the power states used in profile mode
281 * (r1xx-r3xx).
282 * Used for profile mode only.
283 */
332void r100_pm_init_profile(struct radeon_device *rdev) 284void r100_pm_init_profile(struct radeon_device *rdev)
333{ 285{
334 /* default */ 286 /* default */
@@ -368,6 +320,14 @@ void r100_pm_init_profile(struct radeon_device *rdev)
368 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0; 320 rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
369} 321}
370 322
323/**
324 * r100_pm_misc - set additional pm hw parameters callback.
325 *
326 * @rdev: radeon_device pointer
327 *
328 * Set non-clock parameters associated with a power state
329 * (voltage, pcie lanes, etc.) (r1xx-r4xx).
330 */
371void r100_pm_misc(struct radeon_device *rdev) 331void r100_pm_misc(struct radeon_device *rdev)
372{ 332{
373 int requested_index = rdev->pm.requested_power_state_index; 333 int requested_index = rdev->pm.requested_power_state_index;
@@ -459,6 +419,13 @@ void r100_pm_misc(struct radeon_device *rdev)
459 } 419 }
460} 420}
461 421
422/**
423 * r100_pm_prepare - pre-power state change callback.
424 *
425 * @rdev: radeon_device pointer
426 *
427 * Prepare for a power state change (r1xx-r4xx).
428 */
462void r100_pm_prepare(struct radeon_device *rdev) 429void r100_pm_prepare(struct radeon_device *rdev)
463{ 430{
464 struct drm_device *ddev = rdev->ddev; 431 struct drm_device *ddev = rdev->ddev;
@@ -483,6 +450,13 @@ void r100_pm_prepare(struct radeon_device *rdev)
483 } 450 }
484} 451}
485 452
453/**
454 * r100_pm_finish - post-power state change callback.
455 *
456 * @rdev: radeon_device pointer
457 *
458 * Clean up after a power state change (r1xx-r4xx).
459 */
486void r100_pm_finish(struct radeon_device *rdev) 460void r100_pm_finish(struct radeon_device *rdev)
487{ 461{
488 struct drm_device *ddev = rdev->ddev; 462 struct drm_device *ddev = rdev->ddev;
@@ -507,6 +481,14 @@ void r100_pm_finish(struct radeon_device *rdev)
507 } 481 }
508} 482}
509 483
484/**
485 * r100_gui_idle - gui idle callback.
486 *
487 * @rdev: radeon_device pointer
488 *
489 * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
490 * Returns true if idle, false if not.
491 */
510bool r100_gui_idle(struct radeon_device *rdev) 492bool r100_gui_idle(struct radeon_device *rdev)
511{ 493{
512 if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE) 494 if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
@@ -516,6 +498,15 @@ bool r100_gui_idle(struct radeon_device *rdev)
516} 498}
517 499
518/* hpd for digital panel detect/disconnect */ 500/* hpd for digital panel detect/disconnect */
501/**
502 * r100_hpd_sense - hpd sense callback.
503 *
504 * @rdev: radeon_device pointer
505 * @hpd: hpd (hotplug detect) pin
506 *
507 * Checks if a digital monitor is connected (r1xx-r4xx).
508 * Returns true if connected, false if not connected.
509 */
519bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd) 510bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
520{ 511{
521 bool connected = false; 512 bool connected = false;
@@ -535,6 +526,14 @@ bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
535 return connected; 526 return connected;
536} 527}
537 528
529/**
530 * r100_hpd_set_polarity - hpd set polarity callback.
531 *
532 * @rdev: radeon_device pointer
533 * @hpd: hpd (hotplug detect) pin
534 *
535 * Set the polarity of the hpd pin (r1xx-r4xx).
536 */
538void r100_hpd_set_polarity(struct radeon_device *rdev, 537void r100_hpd_set_polarity(struct radeon_device *rdev,
539 enum radeon_hpd_id hpd) 538 enum radeon_hpd_id hpd)
540{ 539{
@@ -563,47 +562,47 @@ void r100_hpd_set_polarity(struct radeon_device *rdev,
563 } 562 }
564} 563}
565 564
565/**
566 * r100_hpd_init - hpd setup callback.
567 *
568 * @rdev: radeon_device pointer
569 *
570 * Setup the hpd pins used by the card (r1xx-r4xx).
571 * Set the polarity, and enable the hpd interrupts.
572 */
566void r100_hpd_init(struct radeon_device *rdev) 573void r100_hpd_init(struct radeon_device *rdev)
567{ 574{
568 struct drm_device *dev = rdev->ddev; 575 struct drm_device *dev = rdev->ddev;
569 struct drm_connector *connector; 576 struct drm_connector *connector;
577 unsigned enable = 0;
570 578
571 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 579 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
572 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 580 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
573 switch (radeon_connector->hpd.hpd) { 581 enable |= 1 << radeon_connector->hpd.hpd;
574 case RADEON_HPD_1:
575 rdev->irq.hpd[0] = true;
576 break;
577 case RADEON_HPD_2:
578 rdev->irq.hpd[1] = true;
579 break;
580 default:
581 break;
582 }
583 radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); 582 radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
584 } 583 }
585 if (rdev->irq.installed) 584 radeon_irq_kms_enable_hpd(rdev, enable);
586 r100_irq_set(rdev);
587} 585}
588 586
587/**
588 * r100_hpd_fini - hpd tear down callback.
589 *
590 * @rdev: radeon_device pointer
591 *
592 * Tear down the hpd pins used by the card (r1xx-r4xx).
593 * Disable the hpd interrupts.
594 */
589void r100_hpd_fini(struct radeon_device *rdev) 595void r100_hpd_fini(struct radeon_device *rdev)
590{ 596{
591 struct drm_device *dev = rdev->ddev; 597 struct drm_device *dev = rdev->ddev;
592 struct drm_connector *connector; 598 struct drm_connector *connector;
599 unsigned disable = 0;
593 600
594 list_for_each_entry(connector, &dev->mode_config.connector_list, head) { 601 list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
595 struct radeon_connector *radeon_connector = to_radeon_connector(connector); 602 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
596 switch (radeon_connector->hpd.hpd) { 603 disable |= 1 << radeon_connector->hpd.hpd;
597 case RADEON_HPD_1:
598 rdev->irq.hpd[0] = false;
599 break;
600 case RADEON_HPD_2:
601 rdev->irq.hpd[1] = false;
602 break;
603 default:
604 break;
605 }
606 } 604 }
605 radeon_irq_kms_disable_hpd(rdev, disable);
607} 606}
608 607
609/* 608/*
@@ -635,15 +634,6 @@ int r100_pci_gart_init(struct radeon_device *rdev)
635 return radeon_gart_table_ram_alloc(rdev); 634 return radeon_gart_table_ram_alloc(rdev);
636} 635}
637 636
638/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
639void r100_enable_bm(struct radeon_device *rdev)
640{
641 uint32_t tmp;
642 /* Enable bus mastering */
643 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
644 WREG32(RADEON_BUS_CNTL, tmp);
645}
646
647int r100_pci_gart_enable(struct radeon_device *rdev) 637int r100_pci_gart_enable(struct radeon_device *rdev)
648{ 638{
649 uint32_t tmp; 639 uint32_t tmp;
@@ -705,18 +695,18 @@ int r100_irq_set(struct radeon_device *rdev)
705 WREG32(R_000040_GEN_INT_CNTL, 0); 695 WREG32(R_000040_GEN_INT_CNTL, 0);
706 return -EINVAL; 696 return -EINVAL;
707 } 697 }
708 if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { 698 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
709 tmp |= RADEON_SW_INT_ENABLE; 699 tmp |= RADEON_SW_INT_ENABLE;
710 } 700 }
711 if (rdev->irq.gui_idle) { 701 if (rdev->irq.gui_idle) {
712 tmp |= RADEON_GUI_IDLE_MASK; 702 tmp |= RADEON_GUI_IDLE_MASK;
713 } 703 }
714 if (rdev->irq.crtc_vblank_int[0] || 704 if (rdev->irq.crtc_vblank_int[0] ||
715 rdev->irq.pflip[0]) { 705 atomic_read(&rdev->irq.pflip[0])) {
716 tmp |= RADEON_CRTC_VBLANK_MASK; 706 tmp |= RADEON_CRTC_VBLANK_MASK;
717 } 707 }
718 if (rdev->irq.crtc_vblank_int[1] || 708 if (rdev->irq.crtc_vblank_int[1] ||
719 rdev->irq.pflip[1]) { 709 atomic_read(&rdev->irq.pflip[1])) {
720 tmp |= RADEON_CRTC2_VBLANK_MASK; 710 tmp |= RADEON_CRTC2_VBLANK_MASK;
721 } 711 }
722 if (rdev->irq.hpd[0]) { 712 if (rdev->irq.hpd[0]) {
@@ -782,7 +772,6 @@ int r100_irq_process(struct radeon_device *rdev)
782 /* gui idle interrupt */ 772 /* gui idle interrupt */
783 if (status & RADEON_GUI_IDLE_STAT) { 773 if (status & RADEON_GUI_IDLE_STAT) {
784 rdev->irq.gui_idle_acked = true; 774 rdev->irq.gui_idle_acked = true;
785 rdev->pm.gui_idle = true;
786 wake_up(&rdev->irq.idle_queue); 775 wake_up(&rdev->irq.idle_queue);
787 } 776 }
788 /* Vertical blank interrupts */ 777 /* Vertical blank interrupts */
@@ -792,7 +781,7 @@ int r100_irq_process(struct radeon_device *rdev)
792 rdev->pm.vblank_sync = true; 781 rdev->pm.vblank_sync = true;
793 wake_up(&rdev->irq.vblank_queue); 782 wake_up(&rdev->irq.vblank_queue);
794 } 783 }
795 if (rdev->irq.pflip[0]) 784 if (atomic_read(&rdev->irq.pflip[0]))
796 radeon_crtc_handle_flip(rdev, 0); 785 radeon_crtc_handle_flip(rdev, 0);
797 } 786 }
798 if (status & RADEON_CRTC2_VBLANK_STAT) { 787 if (status & RADEON_CRTC2_VBLANK_STAT) {
@@ -801,7 +790,7 @@ int r100_irq_process(struct radeon_device *rdev)
801 rdev->pm.vblank_sync = true; 790 rdev->pm.vblank_sync = true;
802 wake_up(&rdev->irq.vblank_queue); 791 wake_up(&rdev->irq.vblank_queue);
803 } 792 }
804 if (rdev->irq.pflip[1]) 793 if (atomic_read(&rdev->irq.pflip[1]))
805 radeon_crtc_handle_flip(rdev, 1); 794 radeon_crtc_handle_flip(rdev, 1);
806 } 795 }
807 if (status & RADEON_FP_DETECT_STAT) { 796 if (status & RADEON_FP_DETECT_STAT) {
@@ -883,7 +872,7 @@ int r100_copy_blit(struct radeon_device *rdev,
883 uint64_t src_offset, 872 uint64_t src_offset,
884 uint64_t dst_offset, 873 uint64_t dst_offset,
885 unsigned num_gpu_pages, 874 unsigned num_gpu_pages,
886 struct radeon_fence *fence) 875 struct radeon_fence **fence)
887{ 876{
888 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 877 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
889 uint32_t cur_pages; 878 uint32_t cur_pages;
@@ -947,7 +936,7 @@ int r100_copy_blit(struct radeon_device *rdev,
947 RADEON_WAIT_HOST_IDLECLEAN | 936 RADEON_WAIT_HOST_IDLECLEAN |
948 RADEON_WAIT_DMA_GUI_IDLE); 937 RADEON_WAIT_DMA_GUI_IDLE);
949 if (fence) { 938 if (fence) {
950 r = radeon_fence_emit(rdev, fence); 939 r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
951 } 940 }
952 radeon_ring_unlock_commit(rdev, ring); 941 radeon_ring_unlock_commit(rdev, ring);
953 return r; 942 return r;
@@ -1192,6 +1181,14 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
1192 } 1181 }
1193 ring->ready = true; 1182 ring->ready = true;
1194 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 1183 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
1184
1185 if (radeon_ring_supports_scratch_reg(rdev, ring)) {
1186 r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
1187 if (r) {
1188 DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
1189 ring->rptr_save_reg = 0;
1190 }
1191 }
1195 return 0; 1192 return 0;
1196} 1193}
1197 1194
@@ -1202,6 +1199,7 @@ void r100_cp_fini(struct radeon_device *rdev)
1202 } 1199 }
1203 /* Disable ring */ 1200 /* Disable ring */
1204 r100_cp_disable(rdev); 1201 r100_cp_disable(rdev);
1202 radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
1205 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]); 1203 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
1206 DRM_INFO("radeon: cp finalized\n"); 1204 DRM_INFO("radeon: cp finalized\n");
1207} 1205}
@@ -1223,6 +1221,112 @@ void r100_cp_disable(struct radeon_device *rdev)
1223/* 1221/*
1224 * CS functions 1222 * CS functions
1225 */ 1223 */
1224int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
1225 struct radeon_cs_packet *pkt,
1226 unsigned idx,
1227 unsigned reg)
1228{
1229 int r;
1230 u32 tile_flags = 0;
1231 u32 tmp;
1232 struct radeon_cs_reloc *reloc;
1233 u32 value;
1234
1235 r = r100_cs_packet_next_reloc(p, &reloc);
1236 if (r) {
1237 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1238 idx, reg);
1239 r100_cs_dump_packet(p, pkt);
1240 return r;
1241 }
1242
1243 value = radeon_get_ib_value(p, idx);
1244 tmp = value & 0x003fffff;
1245 tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
1246
1247 if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
1248 if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
1249 tile_flags |= RADEON_DST_TILE_MACRO;
1250 if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
1251 if (reg == RADEON_SRC_PITCH_OFFSET) {
1252 DRM_ERROR("Cannot src blit from microtiled surface\n");
1253 r100_cs_dump_packet(p, pkt);
1254 return -EINVAL;
1255 }
1256 tile_flags |= RADEON_DST_TILE_MICRO;
1257 }
1258
1259 tmp |= tile_flags;
1260 p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
1261 } else
1262 p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
1263 return 0;
1264}
1265
1266int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
1267 struct radeon_cs_packet *pkt,
1268 int idx)
1269{
1270 unsigned c, i;
1271 struct radeon_cs_reloc *reloc;
1272 struct r100_cs_track *track;
1273 int r = 0;
1274 volatile uint32_t *ib;
1275 u32 idx_value;
1276
1277 ib = p->ib.ptr;
1278 track = (struct r100_cs_track *)p->track;
1279 c = radeon_get_ib_value(p, idx++) & 0x1F;
1280 if (c > 16) {
1281 DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
1282 pkt->opcode);
1283 r100_cs_dump_packet(p, pkt);
1284 return -EINVAL;
1285 }
1286 track->num_arrays = c;
1287 for (i = 0; i < (c - 1); i+=2, idx+=3) {
1288 r = r100_cs_packet_next_reloc(p, &reloc);
1289 if (r) {
1290 DRM_ERROR("No reloc for packet3 %d\n",
1291 pkt->opcode);
1292 r100_cs_dump_packet(p, pkt);
1293 return r;
1294 }
1295 idx_value = radeon_get_ib_value(p, idx);
1296 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
1297
1298 track->arrays[i + 0].esize = idx_value >> 8;
1299 track->arrays[i + 0].robj = reloc->robj;
1300 track->arrays[i + 0].esize &= 0x7F;
1301 r = r100_cs_packet_next_reloc(p, &reloc);
1302 if (r) {
1303 DRM_ERROR("No reloc for packet3 %d\n",
1304 pkt->opcode);
1305 r100_cs_dump_packet(p, pkt);
1306 return r;
1307 }
1308 ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
1309 track->arrays[i + 1].robj = reloc->robj;
1310 track->arrays[i + 1].esize = idx_value >> 24;
1311 track->arrays[i + 1].esize &= 0x7F;
1312 }
1313 if (c & 1) {
1314 r = r100_cs_packet_next_reloc(p, &reloc);
1315 if (r) {
1316 DRM_ERROR("No reloc for packet3 %d\n",
1317 pkt->opcode);
1318 r100_cs_dump_packet(p, pkt);
1319 return r;
1320 }
1321 idx_value = radeon_get_ib_value(p, idx);
1322 ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
1323 track->arrays[i + 0].robj = reloc->robj;
1324 track->arrays[i + 0].esize = idx_value >> 8;
1325 track->arrays[i + 0].esize &= 0x7F;
1326 }
1327 return r;
1328}
1329
1226int r100_cs_parse_packet0(struct radeon_cs_parser *p, 1330int r100_cs_parse_packet0(struct radeon_cs_parser *p,
1227 struct radeon_cs_packet *pkt, 1331 struct radeon_cs_packet *pkt,
1228 const unsigned *auth, unsigned n, 1332 const unsigned *auth, unsigned n,
@@ -2048,6 +2152,379 @@ int r100_cs_parse(struct radeon_cs_parser *p)
2048 return 0; 2152 return 0;
2049} 2153}
2050 2154
2155static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
2156{
2157 DRM_ERROR("pitch %d\n", t->pitch);
2158 DRM_ERROR("use_pitch %d\n", t->use_pitch);
2159 DRM_ERROR("width %d\n", t->width);
2160 DRM_ERROR("width_11 %d\n", t->width_11);
2161 DRM_ERROR("height %d\n", t->height);
2162 DRM_ERROR("height_11 %d\n", t->height_11);
2163 DRM_ERROR("num levels %d\n", t->num_levels);
2164 DRM_ERROR("depth %d\n", t->txdepth);
2165 DRM_ERROR("bpp %d\n", t->cpp);
2166 DRM_ERROR("coordinate type %d\n", t->tex_coord_type);
2167 DRM_ERROR("width round to power of 2 %d\n", t->roundup_w);
2168 DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
2169 DRM_ERROR("compress format %d\n", t->compress_format);
2170}
2171
2172static int r100_track_compress_size(int compress_format, int w, int h)
2173{
2174 int block_width, block_height, block_bytes;
2175 int wblocks, hblocks;
2176 int min_wblocks;
2177 int sz;
2178
2179 block_width = 4;
2180 block_height = 4;
2181
2182 switch (compress_format) {
2183 case R100_TRACK_COMP_DXT1:
2184 block_bytes = 8;
2185 min_wblocks = 4;
2186 break;
2187 default:
2188 case R100_TRACK_COMP_DXT35:
2189 block_bytes = 16;
2190 min_wblocks = 2;
2191 break;
2192 }
2193
2194 hblocks = (h + block_height - 1) / block_height;
2195 wblocks = (w + block_width - 1) / block_width;
2196 if (wblocks < min_wblocks)
2197 wblocks = min_wblocks;
2198 sz = wblocks * hblocks * block_bytes;
2199 return sz;
2200}
2201
2202static int r100_cs_track_cube(struct radeon_device *rdev,
2203 struct r100_cs_track *track, unsigned idx)
2204{
2205 unsigned face, w, h;
2206 struct radeon_bo *cube_robj;
2207 unsigned long size;
2208 unsigned compress_format = track->textures[idx].compress_format;
2209
2210 for (face = 0; face < 5; face++) {
2211 cube_robj = track->textures[idx].cube_info[face].robj;
2212 w = track->textures[idx].cube_info[face].width;
2213 h = track->textures[idx].cube_info[face].height;
2214
2215 if (compress_format) {
2216 size = r100_track_compress_size(compress_format, w, h);
2217 } else
2218 size = w * h;
2219 size *= track->textures[idx].cpp;
2220
2221 size += track->textures[idx].cube_info[face].offset;
2222
2223 if (size > radeon_bo_size(cube_robj)) {
2224 DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
2225 size, radeon_bo_size(cube_robj));
2226 r100_cs_track_texture_print(&track->textures[idx]);
2227 return -1;
2228 }
2229 }
2230 return 0;
2231}
2232
2233static int r100_cs_track_texture_check(struct radeon_device *rdev,
2234 struct r100_cs_track *track)
2235{
2236 struct radeon_bo *robj;
2237 unsigned long size;
2238 unsigned u, i, w, h, d;
2239 int ret;
2240
2241 for (u = 0; u < track->num_texture; u++) {
2242 if (!track->textures[u].enabled)
2243 continue;
2244 if (track->textures[u].lookup_disable)
2245 continue;
2246 robj = track->textures[u].robj;
2247 if (robj == NULL) {
2248 DRM_ERROR("No texture bound to unit %u\n", u);
2249 return -EINVAL;
2250 }
2251 size = 0;
2252 for (i = 0; i <= track->textures[u].num_levels; i++) {
2253 if (track->textures[u].use_pitch) {
2254 if (rdev->family < CHIP_R300)
2255 w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
2256 else
2257 w = track->textures[u].pitch / (1 << i);
2258 } else {
2259 w = track->textures[u].width;
2260 if (rdev->family >= CHIP_RV515)
2261 w |= track->textures[u].width_11;
2262 w = w / (1 << i);
2263 if (track->textures[u].roundup_w)
2264 w = roundup_pow_of_two(w);
2265 }
2266 h = track->textures[u].height;
2267 if (rdev->family >= CHIP_RV515)
2268 h |= track->textures[u].height_11;
2269 h = h / (1 << i);
2270 if (track->textures[u].roundup_h)
2271 h = roundup_pow_of_two(h);
2272 if (track->textures[u].tex_coord_type == 1) {
2273 d = (1 << track->textures[u].txdepth) / (1 << i);
2274 if (!d)
2275 d = 1;
2276 } else {
2277 d = 1;
2278 }
2279 if (track->textures[u].compress_format) {
2280
2281 size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
2282 /* compressed textures are block based */
2283 } else
2284 size += w * h * d;
2285 }
2286 size *= track->textures[u].cpp;
2287
2288 switch (track->textures[u].tex_coord_type) {
2289 case 0:
2290 case 1:
2291 break;
2292 case 2:
2293 if (track->separate_cube) {
2294 ret = r100_cs_track_cube(rdev, track, u);
2295 if (ret)
2296 return ret;
2297 } else
2298 size *= 6;
2299 break;
2300 default:
2301 DRM_ERROR("Invalid texture coordinate type %u for unit "
2302 "%u\n", track->textures[u].tex_coord_type, u);
2303 return -EINVAL;
2304 }
2305 if (size > radeon_bo_size(robj)) {
2306 DRM_ERROR("Texture of unit %u needs %lu bytes but is "
2307 "%lu\n", u, size, radeon_bo_size(robj));
2308 r100_cs_track_texture_print(&track->textures[u]);
2309 return -EINVAL;
2310 }
2311 }
2312 return 0;
2313}
2314
2315int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
2316{
2317 unsigned i;
2318 unsigned long size;
2319 unsigned prim_walk;
2320 unsigned nverts;
2321 unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
2322
2323 if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
2324 !track->blend_read_enable)
2325 num_cb = 0;
2326
2327 for (i = 0; i < num_cb; i++) {
2328 if (track->cb[i].robj == NULL) {
2329 DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
2330 return -EINVAL;
2331 }
2332 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
2333 size += track->cb[i].offset;
2334 if (size > radeon_bo_size(track->cb[i].robj)) {
2335 DRM_ERROR("[drm] Buffer too small for color buffer %d "
2336 "(need %lu have %lu) !\n", i, size,
2337 radeon_bo_size(track->cb[i].robj));
2338 DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
2339 i, track->cb[i].pitch, track->cb[i].cpp,
2340 track->cb[i].offset, track->maxy);
2341 return -EINVAL;
2342 }
2343 }
2344 track->cb_dirty = false;
2345
2346 if (track->zb_dirty && track->z_enabled) {
2347 if (track->zb.robj == NULL) {
2348 DRM_ERROR("[drm] No buffer for z buffer !\n");
2349 return -EINVAL;
2350 }
2351 size = track->zb.pitch * track->zb.cpp * track->maxy;
2352 size += track->zb.offset;
2353 if (size > radeon_bo_size(track->zb.robj)) {
2354 DRM_ERROR("[drm] Buffer too small for z buffer "
2355 "(need %lu have %lu) !\n", size,
2356 radeon_bo_size(track->zb.robj));
2357 DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
2358 track->zb.pitch, track->zb.cpp,
2359 track->zb.offset, track->maxy);
2360 return -EINVAL;
2361 }
2362 }
2363 track->zb_dirty = false;
2364
2365 if (track->aa_dirty && track->aaresolve) {
2366 if (track->aa.robj == NULL) {
2367 DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
2368 return -EINVAL;
2369 }
2370 /* I believe the format comes from colorbuffer0. */
2371 size = track->aa.pitch * track->cb[0].cpp * track->maxy;
2372 size += track->aa.offset;
2373 if (size > radeon_bo_size(track->aa.robj)) {
2374 DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
2375 "(need %lu have %lu) !\n", i, size,
2376 radeon_bo_size(track->aa.robj));
2377 DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
2378 i, track->aa.pitch, track->cb[0].cpp,
2379 track->aa.offset, track->maxy);
2380 return -EINVAL;
2381 }
2382 }
2383 track->aa_dirty = false;
2384
2385 prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
2386 if (track->vap_vf_cntl & (1 << 14)) {
2387 nverts = track->vap_alt_nverts;
2388 } else {
2389 nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
2390 }
2391 switch (prim_walk) {
2392 case 1:
2393 for (i = 0; i < track->num_arrays; i++) {
2394 size = track->arrays[i].esize * track->max_indx * 4;
2395 if (track->arrays[i].robj == NULL) {
2396 DRM_ERROR("(PW %u) Vertex array %u no buffer "
2397 "bound\n", prim_walk, i);
2398 return -EINVAL;
2399 }
2400 if (size > radeon_bo_size(track->arrays[i].robj)) {
2401 dev_err(rdev->dev, "(PW %u) Vertex array %u "
2402 "need %lu dwords have %lu dwords\n",
2403 prim_walk, i, size >> 2,
2404 radeon_bo_size(track->arrays[i].robj)
2405 >> 2);
2406 DRM_ERROR("Max indices %u\n", track->max_indx);
2407 return -EINVAL;
2408 }
2409 }
2410 break;
2411 case 2:
2412 for (i = 0; i < track->num_arrays; i++) {
2413 size = track->arrays[i].esize * (nverts - 1) * 4;
2414 if (track->arrays[i].robj == NULL) {
2415 DRM_ERROR("(PW %u) Vertex array %u no buffer "
2416 "bound\n", prim_walk, i);
2417 return -EINVAL;
2418 }
2419 if (size > radeon_bo_size(track->arrays[i].robj)) {
2420 dev_err(rdev->dev, "(PW %u) Vertex array %u "
2421 "need %lu dwords have %lu dwords\n",
2422 prim_walk, i, size >> 2,
2423 radeon_bo_size(track->arrays[i].robj)
2424 >> 2);
2425 return -EINVAL;
2426 }
2427 }
2428 break;
2429 case 3:
2430 size = track->vtx_size * nverts;
2431 if (size != track->immd_dwords) {
2432 DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
2433 track->immd_dwords, size);
2434 DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
2435 nverts, track->vtx_size);
2436 return -EINVAL;
2437 }
2438 break;
2439 default:
2440 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
2441 prim_walk);
2442 return -EINVAL;
2443 }
2444
2445 if (track->tex_dirty) {
2446 track->tex_dirty = false;
2447 return r100_cs_track_texture_check(rdev, track);
2448 }
2449 return 0;
2450}
2451
2452void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
2453{
2454 unsigned i, face;
2455
2456 track->cb_dirty = true;
2457 track->zb_dirty = true;
2458 track->tex_dirty = true;
2459 track->aa_dirty = true;
2460
2461 if (rdev->family < CHIP_R300) {
2462 track->num_cb = 1;
2463 if (rdev->family <= CHIP_RS200)
2464 track->num_texture = 3;
2465 else
2466 track->num_texture = 6;
2467 track->maxy = 2048;
2468 track->separate_cube = 1;
2469 } else {
2470 track->num_cb = 4;
2471 track->num_texture = 16;
2472 track->maxy = 4096;
2473 track->separate_cube = 0;
2474 track->aaresolve = false;
2475 track->aa.robj = NULL;
2476 }
2477
2478 for (i = 0; i < track->num_cb; i++) {
2479 track->cb[i].robj = NULL;
2480 track->cb[i].pitch = 8192;
2481 track->cb[i].cpp = 16;
2482 track->cb[i].offset = 0;
2483 }
2484 track->z_enabled = true;
2485 track->zb.robj = NULL;
2486 track->zb.pitch = 8192;
2487 track->zb.cpp = 4;
2488 track->zb.offset = 0;
2489 track->vtx_size = 0x7F;
2490 track->immd_dwords = 0xFFFFFFFFUL;
2491 track->num_arrays = 11;
2492 track->max_indx = 0x00FFFFFFUL;
2493 for (i = 0; i < track->num_arrays; i++) {
2494 track->arrays[i].robj = NULL;
2495 track->arrays[i].esize = 0x7F;
2496 }
2497 for (i = 0; i < track->num_texture; i++) {
2498 track->textures[i].compress_format = R100_TRACK_COMP_NONE;
2499 track->textures[i].pitch = 16536;
2500 track->textures[i].width = 16536;
2501 track->textures[i].height = 16536;
2502 track->textures[i].width_11 = 1 << 11;
2503 track->textures[i].height_11 = 1 << 11;
2504 track->textures[i].num_levels = 12;
2505 if (rdev->family <= CHIP_RS200) {
2506 track->textures[i].tex_coord_type = 0;
2507 track->textures[i].txdepth = 0;
2508 } else {
2509 track->textures[i].txdepth = 16;
2510 track->textures[i].tex_coord_type = 1;
2511 }
2512 track->textures[i].cpp = 64;
2513 track->textures[i].robj = NULL;
2514 /* CS IB emission code makes sure texture unit are disabled */
2515 track->textures[i].enabled = false;
2516 track->textures[i].lookup_disable = false;
2517 track->textures[i].roundup_w = true;
2518 track->textures[i].roundup_h = true;
2519 if (track->separate_cube)
2520 for (face = 0; face < 5; face++) {
2521 track->textures[i].cube_info[face].robj = NULL;
2522 track->textures[i].cube_info[face].width = 16536;
2523 track->textures[i].cube_info[face].height = 16536;
2524 track->textures[i].cube_info[face].offset = 0;
2525 }
2526 }
2527}
2051 2528
2052/* 2529/*
2053 * Global GPU functions 2530 * Global GPU functions
@@ -2175,6 +2652,15 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
2175 return radeon_ring_test_lockup(rdev, ring); 2652 return radeon_ring_test_lockup(rdev, ring);
2176} 2653}
2177 2654
2655/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
2656void r100_enable_bm(struct radeon_device *rdev)
2657{
2658 uint32_t tmp;
2659 /* Enable bus mastering */
2660 tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
2661 WREG32(RADEON_BUS_CNTL, tmp);
2662}
2663
2178void r100_bm_disable(struct radeon_device *rdev) 2664void r100_bm_disable(struct radeon_device *rdev)
2179{ 2665{
2180 u32 tmp; 2666 u32 tmp;
@@ -3261,380 +3747,6 @@ void r100_bandwidth_update(struct radeon_device *rdev)
3261 } 3747 }
3262} 3748}
3263 3749
3264static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
3265{
3266 DRM_ERROR("pitch %d\n", t->pitch);
3267 DRM_ERROR("use_pitch %d\n", t->use_pitch);
3268 DRM_ERROR("width %d\n", t->width);
3269 DRM_ERROR("width_11 %d\n", t->width_11);
3270 DRM_ERROR("height %d\n", t->height);
3271 DRM_ERROR("height_11 %d\n", t->height_11);
3272 DRM_ERROR("num levels %d\n", t->num_levels);
3273 DRM_ERROR("depth %d\n", t->txdepth);
3274 DRM_ERROR("bpp %d\n", t->cpp);
3275 DRM_ERROR("coordinate type %d\n", t->tex_coord_type);
3276 DRM_ERROR("width round to power of 2 %d\n", t->roundup_w);
3277 DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
3278 DRM_ERROR("compress format %d\n", t->compress_format);
3279}
3280
3281static int r100_track_compress_size(int compress_format, int w, int h)
3282{
3283 int block_width, block_height, block_bytes;
3284 int wblocks, hblocks;
3285 int min_wblocks;
3286 int sz;
3287
3288 block_width = 4;
3289 block_height = 4;
3290
3291 switch (compress_format) {
3292 case R100_TRACK_COMP_DXT1:
3293 block_bytes = 8;
3294 min_wblocks = 4;
3295 break;
3296 default:
3297 case R100_TRACK_COMP_DXT35:
3298 block_bytes = 16;
3299 min_wblocks = 2;
3300 break;
3301 }
3302
3303 hblocks = (h + block_height - 1) / block_height;
3304 wblocks = (w + block_width - 1) / block_width;
3305 if (wblocks < min_wblocks)
3306 wblocks = min_wblocks;
3307 sz = wblocks * hblocks * block_bytes;
3308 return sz;
3309}
3310
3311static int r100_cs_track_cube(struct radeon_device *rdev,
3312 struct r100_cs_track *track, unsigned idx)
3313{
3314 unsigned face, w, h;
3315 struct radeon_bo *cube_robj;
3316 unsigned long size;
3317 unsigned compress_format = track->textures[idx].compress_format;
3318
3319 for (face = 0; face < 5; face++) {
3320 cube_robj = track->textures[idx].cube_info[face].robj;
3321 w = track->textures[idx].cube_info[face].width;
3322 h = track->textures[idx].cube_info[face].height;
3323
3324 if (compress_format) {
3325 size = r100_track_compress_size(compress_format, w, h);
3326 } else
3327 size = w * h;
3328 size *= track->textures[idx].cpp;
3329
3330 size += track->textures[idx].cube_info[face].offset;
3331
3332 if (size > radeon_bo_size(cube_robj)) {
3333 DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
3334 size, radeon_bo_size(cube_robj));
3335 r100_cs_track_texture_print(&track->textures[idx]);
3336 return -1;
3337 }
3338 }
3339 return 0;
3340}
3341
3342static int r100_cs_track_texture_check(struct radeon_device *rdev,
3343 struct r100_cs_track *track)
3344{
3345 struct radeon_bo *robj;
3346 unsigned long size;
3347 unsigned u, i, w, h, d;
3348 int ret;
3349
3350 for (u = 0; u < track->num_texture; u++) {
3351 if (!track->textures[u].enabled)
3352 continue;
3353 if (track->textures[u].lookup_disable)
3354 continue;
3355 robj = track->textures[u].robj;
3356 if (robj == NULL) {
3357 DRM_ERROR("No texture bound to unit %u\n", u);
3358 return -EINVAL;
3359 }
3360 size = 0;
3361 for (i = 0; i <= track->textures[u].num_levels; i++) {
3362 if (track->textures[u].use_pitch) {
3363 if (rdev->family < CHIP_R300)
3364 w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
3365 else
3366 w = track->textures[u].pitch / (1 << i);
3367 } else {
3368 w = track->textures[u].width;
3369 if (rdev->family >= CHIP_RV515)
3370 w |= track->textures[u].width_11;
3371 w = w / (1 << i);
3372 if (track->textures[u].roundup_w)
3373 w = roundup_pow_of_two(w);
3374 }
3375 h = track->textures[u].height;
3376 if (rdev->family >= CHIP_RV515)
3377 h |= track->textures[u].height_11;
3378 h = h / (1 << i);
3379 if (track->textures[u].roundup_h)
3380 h = roundup_pow_of_two(h);
3381 if (track->textures[u].tex_coord_type == 1) {
3382 d = (1 << track->textures[u].txdepth) / (1 << i);
3383 if (!d)
3384 d = 1;
3385 } else {
3386 d = 1;
3387 }
3388 if (track->textures[u].compress_format) {
3389
3390 size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
3391 /* compressed textures are block based */
3392 } else
3393 size += w * h * d;
3394 }
3395 size *= track->textures[u].cpp;
3396
3397 switch (track->textures[u].tex_coord_type) {
3398 case 0:
3399 case 1:
3400 break;
3401 case 2:
3402 if (track->separate_cube) {
3403 ret = r100_cs_track_cube(rdev, track, u);
3404 if (ret)
3405 return ret;
3406 } else
3407 size *= 6;
3408 break;
3409 default:
3410 DRM_ERROR("Invalid texture coordinate type %u for unit "
3411 "%u\n", track->textures[u].tex_coord_type, u);
3412 return -EINVAL;
3413 }
3414 if (size > radeon_bo_size(robj)) {
3415 DRM_ERROR("Texture of unit %u needs %lu bytes but is "
3416 "%lu\n", u, size, radeon_bo_size(robj));
3417 r100_cs_track_texture_print(&track->textures[u]);
3418 return -EINVAL;
3419 }
3420 }
3421 return 0;
3422}
3423
3424int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
3425{
3426 unsigned i;
3427 unsigned long size;
3428 unsigned prim_walk;
3429 unsigned nverts;
3430 unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
3431
3432 if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
3433 !track->blend_read_enable)
3434 num_cb = 0;
3435
3436 for (i = 0; i < num_cb; i++) {
3437 if (track->cb[i].robj == NULL) {
3438 DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
3439 return -EINVAL;
3440 }
3441 size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
3442 size += track->cb[i].offset;
3443 if (size > radeon_bo_size(track->cb[i].robj)) {
3444 DRM_ERROR("[drm] Buffer too small for color buffer %d "
3445 "(need %lu have %lu) !\n", i, size,
3446 radeon_bo_size(track->cb[i].robj));
3447 DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
3448 i, track->cb[i].pitch, track->cb[i].cpp,
3449 track->cb[i].offset, track->maxy);
3450 return -EINVAL;
3451 }
3452 }
3453 track->cb_dirty = false;
3454
3455 if (track->zb_dirty && track->z_enabled) {
3456 if (track->zb.robj == NULL) {
3457 DRM_ERROR("[drm] No buffer for z buffer !\n");
3458 return -EINVAL;
3459 }
3460 size = track->zb.pitch * track->zb.cpp * track->maxy;
3461 size += track->zb.offset;
3462 if (size > radeon_bo_size(track->zb.robj)) {
3463 DRM_ERROR("[drm] Buffer too small for z buffer "
3464 "(need %lu have %lu) !\n", size,
3465 radeon_bo_size(track->zb.robj));
3466 DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
3467 track->zb.pitch, track->zb.cpp,
3468 track->zb.offset, track->maxy);
3469 return -EINVAL;
3470 }
3471 }
3472 track->zb_dirty = false;
3473
3474 if (track->aa_dirty && track->aaresolve) {
3475 if (track->aa.robj == NULL) {
3476 DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
3477 return -EINVAL;
3478 }
3479 /* I believe the format comes from colorbuffer0. */
3480 size = track->aa.pitch * track->cb[0].cpp * track->maxy;
3481 size += track->aa.offset;
3482 if (size > radeon_bo_size(track->aa.robj)) {
3483 DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
3484 "(need %lu have %lu) !\n", i, size,
3485 radeon_bo_size(track->aa.robj));
3486 DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
3487 i, track->aa.pitch, track->cb[0].cpp,
3488 track->aa.offset, track->maxy);
3489 return -EINVAL;
3490 }
3491 }
3492 track->aa_dirty = false;
3493
3494 prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
3495 if (track->vap_vf_cntl & (1 << 14)) {
3496 nverts = track->vap_alt_nverts;
3497 } else {
3498 nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
3499 }
3500 switch (prim_walk) {
3501 case 1:
3502 for (i = 0; i < track->num_arrays; i++) {
3503 size = track->arrays[i].esize * track->max_indx * 4;
3504 if (track->arrays[i].robj == NULL) {
3505 DRM_ERROR("(PW %u) Vertex array %u no buffer "
3506 "bound\n", prim_walk, i);
3507 return -EINVAL;
3508 }
3509 if (size > radeon_bo_size(track->arrays[i].robj)) {
3510 dev_err(rdev->dev, "(PW %u) Vertex array %u "
3511 "need %lu dwords have %lu dwords\n",
3512 prim_walk, i, size >> 2,
3513 radeon_bo_size(track->arrays[i].robj)
3514 >> 2);
3515 DRM_ERROR("Max indices %u\n", track->max_indx);
3516 return -EINVAL;
3517 }
3518 }
3519 break;
3520 case 2:
3521 for (i = 0; i < track->num_arrays; i++) {
3522 size = track->arrays[i].esize * (nverts - 1) * 4;
3523 if (track->arrays[i].robj == NULL) {
3524 DRM_ERROR("(PW %u) Vertex array %u no buffer "
3525 "bound\n", prim_walk, i);
3526 return -EINVAL;
3527 }
3528 if (size > radeon_bo_size(track->arrays[i].robj)) {
3529 dev_err(rdev->dev, "(PW %u) Vertex array %u "
3530 "need %lu dwords have %lu dwords\n",
3531 prim_walk, i, size >> 2,
3532 radeon_bo_size(track->arrays[i].robj)
3533 >> 2);
3534 return -EINVAL;
3535 }
3536 }
3537 break;
3538 case 3:
3539 size = track->vtx_size * nverts;
3540 if (size != track->immd_dwords) {
3541 DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
3542 track->immd_dwords, size);
3543 DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
3544 nverts, track->vtx_size);
3545 return -EINVAL;
3546 }
3547 break;
3548 default:
3549 DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
3550 prim_walk);
3551 return -EINVAL;
3552 }
3553
3554 if (track->tex_dirty) {
3555 track->tex_dirty = false;
3556 return r100_cs_track_texture_check(rdev, track);
3557 }
3558 return 0;
3559}
3560
3561void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
3562{
3563 unsigned i, face;
3564
3565 track->cb_dirty = true;
3566 track->zb_dirty = true;
3567 track->tex_dirty = true;
3568 track->aa_dirty = true;
3569
3570 if (rdev->family < CHIP_R300) {
3571 track->num_cb = 1;
3572 if (rdev->family <= CHIP_RS200)
3573 track->num_texture = 3;
3574 else
3575 track->num_texture = 6;
3576 track->maxy = 2048;
3577 track->separate_cube = 1;
3578 } else {
3579 track->num_cb = 4;
3580 track->num_texture = 16;
3581 track->maxy = 4096;
3582 track->separate_cube = 0;
3583 track->aaresolve = false;
3584 track->aa.robj = NULL;
3585 }
3586
3587 for (i = 0; i < track->num_cb; i++) {
3588 track->cb[i].robj = NULL;
3589 track->cb[i].pitch = 8192;
3590 track->cb[i].cpp = 16;
3591 track->cb[i].offset = 0;
3592 }
3593 track->z_enabled = true;
3594 track->zb.robj = NULL;
3595 track->zb.pitch = 8192;
3596 track->zb.cpp = 4;
3597 track->zb.offset = 0;
3598 track->vtx_size = 0x7F;
3599 track->immd_dwords = 0xFFFFFFFFUL;
3600 track->num_arrays = 11;
3601 track->max_indx = 0x00FFFFFFUL;
3602 for (i = 0; i < track->num_arrays; i++) {
3603 track->arrays[i].robj = NULL;
3604 track->arrays[i].esize = 0x7F;
3605 }
3606 for (i = 0; i < track->num_texture; i++) {
3607 track->textures[i].compress_format = R100_TRACK_COMP_NONE;
3608 track->textures[i].pitch = 16536;
3609 track->textures[i].width = 16536;
3610 track->textures[i].height = 16536;
3611 track->textures[i].width_11 = 1 << 11;
3612 track->textures[i].height_11 = 1 << 11;
3613 track->textures[i].num_levels = 12;
3614 if (rdev->family <= CHIP_RS200) {
3615 track->textures[i].tex_coord_type = 0;
3616 track->textures[i].txdepth = 0;
3617 } else {
3618 track->textures[i].txdepth = 16;
3619 track->textures[i].tex_coord_type = 1;
3620 }
3621 track->textures[i].cpp = 64;
3622 track->textures[i].robj = NULL;
3623 /* CS IB emission code makes sure texture unit are disabled */
3624 track->textures[i].enabled = false;
3625 track->textures[i].lookup_disable = false;
3626 track->textures[i].roundup_w = true;
3627 track->textures[i].roundup_h = true;
3628 if (track->separate_cube)
3629 for (face = 0; face < 5; face++) {
3630 track->textures[i].cube_info[face].robj = NULL;
3631 track->textures[i].cube_info[face].width = 16536;
3632 track->textures[i].cube_info[face].height = 16536;
3633 track->textures[i].cube_info[face].offset = 0;
3634 }
3635 }
3636}
3637
3638int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) 3750int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3639{ 3751{
3640 uint32_t scratch; 3752 uint32_t scratch;
@@ -3679,6 +3791,12 @@ void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3679{ 3791{
3680 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; 3792 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3681 3793
3794 if (ring->rptr_save_reg) {
3795 u32 next_rptr = ring->wptr + 2 + 3;
3796 radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
3797 radeon_ring_write(ring, next_rptr);
3798 }
3799
3682 radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1)); 3800 radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
3683 radeon_ring_write(ring, ib->gpu_addr); 3801 radeon_ring_write(ring, ib->gpu_addr);
3684 radeon_ring_write(ring, ib->length_dw); 3802 radeon_ring_write(ring, ib->length_dw);
@@ -3711,7 +3829,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3711 ib.ptr[6] = PACKET2(0); 3829 ib.ptr[6] = PACKET2(0);
3712 ib.ptr[7] = PACKET2(0); 3830 ib.ptr[7] = PACKET2(0);
3713 ib.length_dw = 8; 3831 ib.length_dw = 8;
3714 r = radeon_ib_schedule(rdev, &ib); 3832 r = radeon_ib_schedule(rdev, &ib, NULL);
3715 if (r) { 3833 if (r) {
3716 radeon_scratch_free(rdev, scratch); 3834 radeon_scratch_free(rdev, scratch);
3717 radeon_ib_free(rdev, &ib); 3835 radeon_ib_free(rdev, &ib);
@@ -3740,12 +3858,6 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3740 return r; 3858 return r;
3741} 3859}
3742 3860
3743void r100_ib_fini(struct radeon_device *rdev)
3744{
3745 radeon_ib_pool_suspend(rdev);
3746 radeon_ib_pool_fini(rdev);
3747}
3748
3749void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save) 3861void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
3750{ 3862{
3751 /* Shutdown CP we shouldn't need to do that but better be safe than 3863 /* Shutdown CP we shouldn't need to do that but better be safe than
@@ -3905,13 +4017,11 @@ static int r100_startup(struct radeon_device *rdev)
3905 return r; 4017 return r;
3906 } 4018 }
3907 4019
3908 r = radeon_ib_pool_start(rdev); 4020 r = radeon_ib_pool_init(rdev);
3909 if (r) 4021 if (r) {
3910 return r; 4022 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
3911
3912 r = radeon_ib_ring_tests(rdev);
3913 if (r)
3914 return r; 4023 return r;
4024 }
3915 4025
3916 return 0; 4026 return 0;
3917} 4027}
@@ -3948,7 +4058,6 @@ int r100_resume(struct radeon_device *rdev)
3948 4058
3949int r100_suspend(struct radeon_device *rdev) 4059int r100_suspend(struct radeon_device *rdev)
3950{ 4060{
3951 radeon_ib_pool_suspend(rdev);
3952 r100_cp_disable(rdev); 4061 r100_cp_disable(rdev);
3953 radeon_wb_disable(rdev); 4062 radeon_wb_disable(rdev);
3954 r100_irq_disable(rdev); 4063 r100_irq_disable(rdev);
@@ -3961,7 +4070,7 @@ void r100_fini(struct radeon_device *rdev)
3961{ 4070{
3962 r100_cp_fini(rdev); 4071 r100_cp_fini(rdev);
3963 radeon_wb_fini(rdev); 4072 radeon_wb_fini(rdev);
3964 r100_ib_fini(rdev); 4073 radeon_ib_pool_fini(rdev);
3965 radeon_gem_fini(rdev); 4074 radeon_gem_fini(rdev);
3966 if (rdev->flags & RADEON_IS_PCI) 4075 if (rdev->flags & RADEON_IS_PCI)
3967 r100_pci_gart_fini(rdev); 4076 r100_pci_gart_fini(rdev);
@@ -4068,20 +4177,14 @@ int r100_init(struct radeon_device *rdev)
4068 } 4177 }
4069 r100_set_safe_registers(rdev); 4178 r100_set_safe_registers(rdev);
4070 4179
4071 r = radeon_ib_pool_init(rdev);
4072 rdev->accel_working = true; 4180 rdev->accel_working = true;
4073 if (r) {
4074 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
4075 rdev->accel_working = false;
4076 }
4077
4078 r = r100_startup(rdev); 4181 r = r100_startup(rdev);
4079 if (r) { 4182 if (r) {
4080 /* Somethings want wront with the accel init stop accel */ 4183 /* Somethings want wront with the accel init stop accel */
4081 dev_err(rdev->dev, "Disabling GPU acceleration\n"); 4184 dev_err(rdev->dev, "Disabling GPU acceleration\n");
4082 r100_cp_fini(rdev); 4185 r100_cp_fini(rdev);
4083 radeon_wb_fini(rdev); 4186 radeon_wb_fini(rdev);
4084 r100_ib_fini(rdev); 4187 radeon_ib_pool_fini(rdev);
4085 radeon_irq_kms_fini(rdev); 4188 radeon_irq_kms_fini(rdev);
4086 if (rdev->flags & RADEON_IS_PCI) 4189 if (rdev->flags & RADEON_IS_PCI)
4087 r100_pci_gart_fini(rdev); 4190 r100_pci_gart_fini(rdev);