aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-09-20 06:58:09 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2018-09-20 07:49:25 -0400
commita47cd45a37c94bb31ccf9bf6ec3d94cbcd57d6ee (patch)
treee0d9687e62bd9b725e2931a09581f601a378f114
parent82c7c4fcbf84a0943b92050e08daec85f1d9670f (diff)
drm/i915/selftests: Basic stress test for rapid context switching
We need to exercise the HW and submission paths for switching contexts rapidly to check that features such as execlists' wa_tail are adequate. Plus it's an interesting baseline latency metric. v2: Check the initial request for allocation errors v3: Use finite waits for more robust handling of broken code Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180920105809.1872-1-chris@chris-wilson.co.uk
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_context.c195
1 files changed, 195 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 07d700b7cb94..441a51d4aa54 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -22,6 +22,8 @@
22 * 22 *
23 */ 23 */
24 24
25#include <linux/prime_numbers.h>
26
25#include "../i915_selftest.h" 27#include "../i915_selftest.h"
26#include "i915_random.h" 28#include "i915_random.h"
27#include "igt_flush_test.h" 29#include "igt_flush_test.h"
@@ -32,6 +34,198 @@
32 34
33#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 35#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
34 36
37struct live_test {
38 struct drm_i915_private *i915;
39 const char *func;
40 const char *name;
41
42 unsigned int reset_count;
43};
44
45static int begin_live_test(struct live_test *t,
46 struct drm_i915_private *i915,
47 const char *func,
48 const char *name)
49{
50 int err;
51
52 t->i915 = i915;
53 t->func = func;
54 t->name = name;
55
56 err = i915_gem_wait_for_idle(i915,
57 I915_WAIT_LOCKED,
58 MAX_SCHEDULE_TIMEOUT);
59 if (err) {
60 pr_err("%s(%s): failed to idle before, with err=%d!",
61 func, name, err);
62 return err;
63 }
64
65 i915->gpu_error.missed_irq_rings = 0;
66 t->reset_count = i915_reset_count(&i915->gpu_error);
67
68 return 0;
69}
70
71static int end_live_test(struct live_test *t)
72{
73 struct drm_i915_private *i915 = t->i915;
74
75 if (igt_flush_test(i915, I915_WAIT_LOCKED))
76 return -EIO;
77
78 if (t->reset_count != i915_reset_count(&i915->gpu_error)) {
79 pr_err("%s(%s): GPU was reset %d times!\n",
80 t->func, t->name,
81 i915_reset_count(&i915->gpu_error) - t->reset_count);
82 return -EIO;
83 }
84
85 if (i915->gpu_error.missed_irq_rings) {
86 pr_err("%s(%s): Missed interrupts on engines %lx\n",
87 t->func, t->name, i915->gpu_error.missed_irq_rings);
88 return -EIO;
89 }
90
91 return 0;
92}
93
94static int live_nop_switch(void *arg)
95{
96 const unsigned int nctx = 1024;
97 struct drm_i915_private *i915 = arg;
98 struct intel_engine_cs *engine;
99 struct i915_gem_context **ctx;
100 enum intel_engine_id id;
101 struct drm_file *file;
102 struct live_test t;
103 unsigned long n;
104 int err = -ENODEV;
105
106 /*
107 * Create as many contexts as we can feasibly get away with
108 * and check we can switch between them rapidly.
109 *
110 * Serves as very simple stress test for submission and HW switching
111 * between contexts.
112 */
113
114 if (!DRIVER_CAPS(i915)->has_logical_contexts)
115 return 0;
116
117 file = mock_file(i915);
118 if (IS_ERR(file))
119 return PTR_ERR(file);
120
121 mutex_lock(&i915->drm.struct_mutex);
122
123 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
124 if (!ctx) {
125 err = -ENOMEM;
126 goto out_unlock;
127 }
128
129 for (n = 0; n < nctx; n++) {
130 ctx[n] = i915_gem_create_context(i915, file->driver_priv);
131 if (IS_ERR(ctx[n])) {
132 err = PTR_ERR(ctx[n]);
133 goto out_unlock;
134 }
135 }
136
137 for_each_engine(engine, i915, id) {
138 struct i915_request *rq;
139 unsigned long end_time, prime;
140 ktime_t times[2] = {};
141
142 times[0] = ktime_get_raw();
143 for (n = 0; n < nctx; n++) {
144 rq = i915_request_alloc(engine, ctx[n]);
145 if (IS_ERR(rq)) {
146 err = PTR_ERR(rq);
147 goto out_unlock;
148 }
149 i915_request_add(rq);
150 }
151 if (i915_request_wait(rq,
152 I915_WAIT_LOCKED,
153 HZ / 5) < 0) {
154 pr_err("Failed to populated %d contexts\n", nctx);
155 i915_gem_set_wedged(i915);
156 err = -EIO;
157 goto out_unlock;
158 }
159
160 times[1] = ktime_get_raw();
161
162 pr_info("Populated %d contexts on %s in %lluns\n",
163 nctx, engine->name, ktime_to_ns(times[1] - times[0]));
164
165 err = begin_live_test(&t, i915, __func__, engine->name);
166 if (err)
167 goto out_unlock;
168
169 end_time = jiffies + i915_selftest.timeout_jiffies;
170 for_each_prime_number_from(prime, 2, 8192) {
171 times[1] = ktime_get_raw();
172
173 for (n = 0; n < prime; n++) {
174 rq = i915_request_alloc(engine, ctx[n % nctx]);
175 if (IS_ERR(rq)) {
176 err = PTR_ERR(rq);
177 goto out_unlock;
178 }
179
180 /*
181 * This space is left intentionally blank.
182 *
183 * We do not actually want to perform any
184 * action with this request, we just want
185 * to measure the latency in allocation
186 * and submission of our breadcrumbs -
187 * ensuring that the bare request is sufficient
188 * for the system to work (i.e. proper HEAD
189 * tracking of the rings, interrupt handling,
190 * etc). It also gives us the lowest bounds
191 * for latency.
192 */
193
194 i915_request_add(rq);
195 }
196 if (i915_request_wait(rq,
197 I915_WAIT_LOCKED,
198 HZ / 5) < 0) {
199 pr_err("Switching between %ld contexts timed out\n",
200 prime);
201 i915_gem_set_wedged(i915);
202 break;
203 }
204
205 times[1] = ktime_sub(ktime_get_raw(), times[1]);
206 if (prime == 2)
207 times[0] = times[1];
208
209 if (__igt_timeout(end_time, NULL))
210 break;
211 }
212
213 err = end_live_test(&t);
214 if (err)
215 goto out_unlock;
216
217 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
218 engine->name,
219 ktime_to_ns(times[0]),
220 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
221 }
222
223out_unlock:
224 mutex_unlock(&i915->drm.struct_mutex);
225 mock_file_free(i915, file);
226 return err;
227}
228
35static struct i915_vma * 229static struct i915_vma *
36gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) 230gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value)
37{ 231{
@@ -714,6 +908,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
714{ 908{
715 static const struct i915_subtest tests[] = { 909 static const struct i915_subtest tests[] = {
716 SUBTEST(igt_switch_to_kernel_context), 910 SUBTEST(igt_switch_to_kernel_context),
911 SUBTEST(live_nop_switch),
717 SUBTEST(igt_ctx_exec), 912 SUBTEST(igt_ctx_exec),
718 SUBTEST(igt_ctx_readonly), 913 SUBTEST(igt_ctx_readonly),
719 }; 914 };