diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_benchmark.c')
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_benchmark.c | 247 |
1 files changed, 171 insertions, 76 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 10191d9372d8..5cafc90de7f8 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c | |||
@@ -26,21 +26,81 @@ | |||
26 | #include "radeon_reg.h" | 26 | #include "radeon_reg.h" |
27 | #include "radeon.h" | 27 | #include "radeon.h" |
28 | 28 | ||
29 | void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize, | 29 | #define RADEON_BENCHMARK_COPY_BLIT 1 |
30 | unsigned sdomain, unsigned ddomain) | 30 | #define RADEON_BENCHMARK_COPY_DMA 0 |
31 | |||
32 | #define RADEON_BENCHMARK_ITERATIONS 1024 | ||
33 | #define RADEON_BENCHMARK_COMMON_MODES_N 17 | ||
34 | |||
35 | static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, | ||
36 | uint64_t saddr, uint64_t daddr, | ||
37 | int flag, int n) | ||
38 | { | ||
39 | unsigned long start_jiffies; | ||
40 | unsigned long end_jiffies; | ||
41 | struct radeon_fence *fence = NULL; | ||
42 | int i, r; | ||
43 | |||
44 | start_jiffies = jiffies; | ||
45 | for (i = 0; i < n; i++) { | ||
46 | r = radeon_fence_create(rdev, &fence); | ||
47 | if (r) | ||
48 | return r; | ||
49 | |||
50 | switch (flag) { | ||
51 | case RADEON_BENCHMARK_COPY_DMA: | ||
52 | r = radeon_copy_dma(rdev, saddr, daddr, | ||
53 | size / RADEON_GPU_PAGE_SIZE, | ||
54 | fence); | ||
55 | break; | ||
56 | case RADEON_BENCHMARK_COPY_BLIT: | ||
57 | r = radeon_copy_blit(rdev, saddr, daddr, | ||
58 | size / RADEON_GPU_PAGE_SIZE, | ||
59 | fence); | ||
60 | break; | ||
61 | default: | ||
62 | DRM_ERROR("Unknown copy method\n"); | ||
63 | r = -EINVAL; | ||
64 | } | ||
65 | if (r) | ||
66 | goto exit_do_move; | ||
67 | r = radeon_fence_wait(fence, false); | ||
68 | if (r) | ||
69 | goto exit_do_move; | ||
70 | radeon_fence_unref(&fence); | ||
71 | } | ||
72 | end_jiffies = jiffies; | ||
73 | r = jiffies_to_msecs(end_jiffies - start_jiffies); | ||
74 | |||
75 | exit_do_move: | ||
76 | if (fence) | ||
77 | radeon_fence_unref(&fence); | ||
78 | return r; | ||
79 | } | ||
80 | |||
81 | |||
82 | static void radeon_benchmark_log_results(int n, unsigned size, | ||
83 | unsigned int time, | ||
84 | unsigned sdomain, unsigned ddomain, | ||
85 | char *kind) | ||
86 | { | ||
87 | unsigned int throughput = (n * (size >> 10)) / time; | ||
88 | DRM_INFO("radeon: %s %u bo moves of %u kB from" | ||
89 | " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n", | ||
90 | kind, n, size >> 10, sdomain, ddomain, time, | ||
91 | throughput * 8, throughput); | ||
92 | } | ||
93 | |||
94 | static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, | ||
95 | unsigned sdomain, unsigned ddomain) | ||
31 | { | 96 | { |
32 | struct radeon_bo *dobj = NULL; | 97 | struct radeon_bo *dobj = NULL; |
33 | struct radeon_bo *sobj = NULL; | 98 | struct radeon_bo *sobj = NULL; |
34 | struct radeon_fence *fence = NULL; | ||
35 | uint64_t saddr, daddr; | 99 | uint64_t saddr, daddr; |
36 | unsigned long start_jiffies; | 100 | int r, n; |
37 | unsigned long end_jiffies; | 101 | unsigned int time; |
38 | unsigned long time; | ||
39 | unsigned i, n, size; | ||
40 | int r; | ||
41 | 102 | ||
42 | size = bsize; | 103 | n = RADEON_BENCHMARK_ITERATIONS; |
43 | n = 1024; | ||
44 | r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, &sobj); | 104 | r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, &sobj); |
45 | if (r) { | 105 | if (r) { |
46 | goto out_cleanup; | 106 | goto out_cleanup; |
@@ -67,65 +127,26 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize, | |||
67 | } | 127 | } |
68 | 128 | ||
69 | /* r100 doesn't have dma engine so skip the test */ | 129 | /* r100 doesn't have dma engine so skip the test */ |
70 | if (rdev->asic->copy_dma) { | 130 | /* also, VRAM-to-VRAM test doesn't make much sense for DMA */ |
71 | 131 | /* skip it as well if domains are the same */ | |
72 | start_jiffies = jiffies; | 132 | if ((rdev->asic->copy_dma) && (sdomain != ddomain)) { |
73 | for (i = 0; i < n; i++) { | 133 | time = radeon_benchmark_do_move(rdev, size, saddr, daddr, |
74 | r = radeon_fence_create(rdev, &fence); | 134 | RADEON_BENCHMARK_COPY_DMA, n); |
75 | if (r) { | 135 | if (time < 0) |
76 | goto out_cleanup; | ||
77 | } | ||
78 | |||
79 | r = radeon_copy_dma(rdev, saddr, daddr, | ||
80 | size / RADEON_GPU_PAGE_SIZE, fence); | ||
81 | |||
82 | if (r) { | ||
83 | goto out_cleanup; | ||
84 | } | ||
85 | r = radeon_fence_wait(fence, false); | ||
86 | if (r) { | ||
87 | goto out_cleanup; | ||
88 | } | ||
89 | radeon_fence_unref(&fence); | ||
90 | } | ||
91 | end_jiffies = jiffies; | ||
92 | time = end_jiffies - start_jiffies; | ||
93 | time = jiffies_to_msecs(time); | ||
94 | if (time > 0) { | ||
95 | i = ((n * size) >> 10) / time; | ||
96 | printk(KERN_INFO "radeon: dma %u bo moves of %ukb from" | ||
97 | " %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n", | ||
98 | n, size >> 10, | ||
99 | sdomain, ddomain, time, | ||
100 | i, i * 1000, (i * 1000) / 1024); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | start_jiffies = jiffies; | ||
105 | for (i = 0; i < n; i++) { | ||
106 | r = radeon_fence_create(rdev, &fence); | ||
107 | if (r) { | ||
108 | goto out_cleanup; | ||
109 | } | ||
110 | r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence); | ||
111 | if (r) { | ||
112 | goto out_cleanup; | ||
113 | } | ||
114 | r = radeon_fence_wait(fence, false); | ||
115 | if (r) { | ||
116 | goto out_cleanup; | 136 | goto out_cleanup; |
117 | } | 137 | if (time > 0) |
118 | radeon_fence_unref(&fence); | 138 | radeon_benchmark_log_results(n, size, time, |
119 | } | 139 | sdomain, ddomain, "dma"); |
120 | end_jiffies = jiffies; | ||
121 | time = end_jiffies - start_jiffies; | ||
122 | time = jiffies_to_msecs(time); | ||
123 | if (time > 0) { | ||
124 | i = ((n * size) >> 10) / time; | ||
125 | printk(KERN_INFO "radeon: blit %u bo moves of %ukb from %d to %d" | ||
126 | " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10, | ||
127 | sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024); | ||
128 | } | 140 | } |
141 | |||
142 | time = radeon_benchmark_do_move(rdev, size, saddr, daddr, | ||
143 | RADEON_BENCHMARK_COPY_BLIT, n); | ||
144 | if (time < 0) | ||
145 | goto out_cleanup; | ||
146 | if (time > 0) | ||
147 | radeon_benchmark_log_results(n, size, time, | ||
148 | sdomain, ddomain, "blit"); | ||
149 | |||
129 | out_cleanup: | 150 | out_cleanup: |
130 | if (sobj) { | 151 | if (sobj) { |
131 | r = radeon_bo_reserve(sobj, false); | 152 | r = radeon_bo_reserve(sobj, false); |
@@ -143,18 +164,92 @@ out_cleanup: | |||
143 | } | 164 | } |
144 | radeon_bo_unref(&dobj); | 165 | radeon_bo_unref(&dobj); |
145 | } | 166 | } |
146 | if (fence) { | 167 | |
147 | radeon_fence_unref(&fence); | ||
148 | } | ||
149 | if (r) { | 168 | if (r) { |
150 | printk(KERN_WARNING "Error while benchmarking BO move.\n"); | 169 | DRM_ERROR("Error while benchmarking BO move.\n"); |
151 | } | 170 | } |
152 | } | 171 | } |
153 | 172 | ||
154 | void radeon_benchmark(struct radeon_device *rdev) | 173 | void radeon_benchmark(struct radeon_device *rdev, int test_number) |
155 | { | 174 | { |
156 | radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT, | 175 | int i; |
157 | RADEON_GEM_DOMAIN_VRAM); | 176 | int common_modes[RADEON_BENCHMARK_COMMON_MODES_N] = { |
158 | radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, | 177 | 640 * 480 * 4, |
159 | RADEON_GEM_DOMAIN_GTT); | 178 | 720 * 480 * 4, |
179 | 800 * 600 * 4, | ||
180 | 848 * 480 * 4, | ||
181 | 1024 * 768 * 4, | ||
182 | 1152 * 768 * 4, | ||
183 | 1280 * 720 * 4, | ||
184 | 1280 * 800 * 4, | ||
185 | 1280 * 854 * 4, | ||
186 | 1280 * 960 * 4, | ||
187 | 1280 * 1024 * 4, | ||
188 | 1440 * 900 * 4, | ||
189 | 1400 * 1050 * 4, | ||
190 | 1680 * 1050 * 4, | ||
191 | 1600 * 1200 * 4, | ||
192 | 1920 * 1080 * 4, | ||
193 | 1920 * 1200 * 4 | ||
194 | }; | ||
195 | |||
196 | switch (test_number) { | ||
197 | case 1: | ||
198 | /* simple test, VRAM to GTT and GTT to VRAM */ | ||
199 | radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT, | ||
200 | RADEON_GEM_DOMAIN_VRAM); | ||
201 | radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, | ||
202 | RADEON_GEM_DOMAIN_GTT); | ||
203 | break; | ||
204 | case 2: | ||
205 | /* simple test, VRAM to VRAM */ | ||
206 | radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM, | ||
207 | RADEON_GEM_DOMAIN_VRAM); | ||
208 | break; | ||
209 | case 3: | ||
210 | /* GTT to VRAM, buffer size sweep, powers of 2 */ | ||
211 | for (i = 1; i <= 65536; i <<= 1) | ||
212 | radeon_benchmark_move(rdev, i*1024, | ||
213 | RADEON_GEM_DOMAIN_GTT, | ||
214 | RADEON_GEM_DOMAIN_VRAM); | ||
215 | break; | ||
216 | case 4: | ||
217 | /* VRAM to GTT, buffer size sweep, powers of 2 */ | ||
218 | for (i = 1; i <= 65536; i <<= 1) | ||
219 | radeon_benchmark_move(rdev, i*1024, | ||
220 | RADEON_GEM_DOMAIN_VRAM, | ||
221 | RADEON_GEM_DOMAIN_GTT); | ||
222 | break; | ||
223 | case 5: | ||
224 | /* VRAM to VRAM, buffer size sweep, powers of 2 */ | ||
225 | for (i = 1; i <= 65536; i <<= 1) | ||
226 | radeon_benchmark_move(rdev, i*1024, | ||
227 | RADEON_GEM_DOMAIN_VRAM, | ||
228 | RADEON_GEM_DOMAIN_VRAM); | ||
229 | break; | ||
230 | case 6: | ||
231 | /* GTT to VRAM, buffer size sweep, common modes */ | ||
232 | for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) | ||
233 | radeon_benchmark_move(rdev, common_modes[i], | ||
234 | RADEON_GEM_DOMAIN_GTT, | ||
235 | RADEON_GEM_DOMAIN_VRAM); | ||
236 | break; | ||
237 | case 7: | ||
238 | /* VRAM to GTT, buffer size sweep, common modes */ | ||
239 | for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) | ||
240 | radeon_benchmark_move(rdev, common_modes[i], | ||
241 | RADEON_GEM_DOMAIN_VRAM, | ||
242 | RADEON_GEM_DOMAIN_GTT); | ||
243 | break; | ||
244 | case 8: | ||
245 | /* VRAM to VRAM, buffer size sweep, common modes */ | ||
246 | for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++) | ||
247 | radeon_benchmark_move(rdev, common_modes[i], | ||
248 | RADEON_GEM_DOMAIN_VRAM, | ||
249 | RADEON_GEM_DOMAIN_VRAM); | ||
250 | break; | ||
251 | |||
252 | default: | ||
253 | DRM_ERROR("Unknown benchmark\n"); | ||
254 | } | ||
160 | } | 255 | } |