diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2018-09-20 06:58:09 -0400 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2018-09-20 07:49:25 -0400 |
commit | a47cd45a37c94bb31ccf9bf6ec3d94cbcd57d6ee (patch) | |
tree | e0d9687e62bd9b725e2931a09581f601a378f114 | |
parent | 82c7c4fcbf84a0943b92050e08daec85f1d9670f (diff) |
drm/i915/selftests: Basic stress test for rapid context switching
We need to exercise the HW and submission paths for switching contexts
rapidly to check that features such as execlists' wa_tail are adequate.
Plus it's an interesting baseline latency metric.
v2: Check the initial request for allocation errors
v3: Use finite waits for more robust handling of broken code
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180920105809.1872-1-chris@chris-wilson.co.uk
-rw-r--r-- | drivers/gpu/drm/i915/selftests/i915_gem_context.c | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 07d700b7cb94..441a51d4aa54 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c | |||
@@ -22,6 +22,8 @@ | |||
22 | * | 22 | * |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/prime_numbers.h> | ||
26 | |||
25 | #include "../i915_selftest.h" | 27 | #include "../i915_selftest.h" |
26 | #include "i915_random.h" | 28 | #include "i915_random.h" |
27 | #include "igt_flush_test.h" | 29 | #include "igt_flush_test.h" |
@@ -32,6 +34,198 @@ | |||
32 | 34 | ||
33 | #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) | 35 | #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) |
34 | 36 | ||
37 | struct live_test { | ||
38 | struct drm_i915_private *i915; | ||
39 | const char *func; | ||
40 | const char *name; | ||
41 | |||
42 | unsigned int reset_count; | ||
43 | }; | ||
44 | |||
45 | static int begin_live_test(struct live_test *t, | ||
46 | struct drm_i915_private *i915, | ||
47 | const char *func, | ||
48 | const char *name) | ||
49 | { | ||
50 | int err; | ||
51 | |||
52 | t->i915 = i915; | ||
53 | t->func = func; | ||
54 | t->name = name; | ||
55 | |||
56 | err = i915_gem_wait_for_idle(i915, | ||
57 | I915_WAIT_LOCKED, | ||
58 | MAX_SCHEDULE_TIMEOUT); | ||
59 | if (err) { | ||
60 | pr_err("%s(%s): failed to idle before, with err=%d!", | ||
61 | func, name, err); | ||
62 | return err; | ||
63 | } | ||
64 | |||
65 | i915->gpu_error.missed_irq_rings = 0; | ||
66 | t->reset_count = i915_reset_count(&i915->gpu_error); | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static int end_live_test(struct live_test *t) | ||
72 | { | ||
73 | struct drm_i915_private *i915 = t->i915; | ||
74 | |||
75 | if (igt_flush_test(i915, I915_WAIT_LOCKED)) | ||
76 | return -EIO; | ||
77 | |||
78 | if (t->reset_count != i915_reset_count(&i915->gpu_error)) { | ||
79 | pr_err("%s(%s): GPU was reset %d times!\n", | ||
80 | t->func, t->name, | ||
81 | i915_reset_count(&i915->gpu_error) - t->reset_count); | ||
82 | return -EIO; | ||
83 | } | ||
84 | |||
85 | if (i915->gpu_error.missed_irq_rings) { | ||
86 | pr_err("%s(%s): Missed interrupts on engines %lx\n", | ||
87 | t->func, t->name, i915->gpu_error.missed_irq_rings); | ||
88 | return -EIO; | ||
89 | } | ||
90 | |||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static int live_nop_switch(void *arg) | ||
95 | { | ||
96 | const unsigned int nctx = 1024; | ||
97 | struct drm_i915_private *i915 = arg; | ||
98 | struct intel_engine_cs *engine; | ||
99 | struct i915_gem_context **ctx; | ||
100 | enum intel_engine_id id; | ||
101 | struct drm_file *file; | ||
102 | struct live_test t; | ||
103 | unsigned long n; | ||
104 | int err = -ENODEV; | ||
105 | |||
106 | /* | ||
107 | * Create as many contexts as we can feasibly get away with | ||
108 | * and check we can switch between them rapidly. | ||
109 | * | ||
110 | * Serves as very simple stress test for submission and HW switching | ||
111 | * between contexts. | ||
112 | */ | ||
113 | |||
114 | if (!DRIVER_CAPS(i915)->has_logical_contexts) | ||
115 | return 0; | ||
116 | |||
117 | file = mock_file(i915); | ||
118 | if (IS_ERR(file)) | ||
119 | return PTR_ERR(file); | ||
120 | |||
121 | mutex_lock(&i915->drm.struct_mutex); | ||
122 | |||
123 | ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); | ||
124 | if (!ctx) { | ||
125 | err = -ENOMEM; | ||
126 | goto out_unlock; | ||
127 | } | ||
128 | |||
129 | for (n = 0; n < nctx; n++) { | ||
130 | ctx[n] = i915_gem_create_context(i915, file->driver_priv); | ||
131 | if (IS_ERR(ctx[n])) { | ||
132 | err = PTR_ERR(ctx[n]); | ||
133 | goto out_unlock; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | for_each_engine(engine, i915, id) { | ||
138 | struct i915_request *rq; | ||
139 | unsigned long end_time, prime; | ||
140 | ktime_t times[2] = {}; | ||
141 | |||
142 | times[0] = ktime_get_raw(); | ||
143 | for (n = 0; n < nctx; n++) { | ||
144 | rq = i915_request_alloc(engine, ctx[n]); | ||
145 | if (IS_ERR(rq)) { | ||
146 | err = PTR_ERR(rq); | ||
147 | goto out_unlock; | ||
148 | } | ||
149 | i915_request_add(rq); | ||
150 | } | ||
151 | if (i915_request_wait(rq, | ||
152 | I915_WAIT_LOCKED, | ||
153 | HZ / 5) < 0) { | ||
154 | pr_err("Failed to populated %d contexts\n", nctx); | ||
155 | i915_gem_set_wedged(i915); | ||
156 | err = -EIO; | ||
157 | goto out_unlock; | ||
158 | } | ||
159 | |||
160 | times[1] = ktime_get_raw(); | ||
161 | |||
162 | pr_info("Populated %d contexts on %s in %lluns\n", | ||
163 | nctx, engine->name, ktime_to_ns(times[1] - times[0])); | ||
164 | |||
165 | err = begin_live_test(&t, i915, __func__, engine->name); | ||
166 | if (err) | ||
167 | goto out_unlock; | ||
168 | |||
169 | end_time = jiffies + i915_selftest.timeout_jiffies; | ||
170 | for_each_prime_number_from(prime, 2, 8192) { | ||
171 | times[1] = ktime_get_raw(); | ||
172 | |||
173 | for (n = 0; n < prime; n++) { | ||
174 | rq = i915_request_alloc(engine, ctx[n % nctx]); | ||
175 | if (IS_ERR(rq)) { | ||
176 | err = PTR_ERR(rq); | ||
177 | goto out_unlock; | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * This space is left intentionally blank. | ||
182 | * | ||
183 | * We do not actually want to perform any | ||
184 | * action with this request, we just want | ||
185 | * to measure the latency in allocation | ||
186 | * and submission of our breadcrumbs - | ||
187 | * ensuring that the bare request is sufficient | ||
188 | * for the system to work (i.e. proper HEAD | ||
189 | * tracking of the rings, interrupt handling, | ||
190 | * etc). It also gives us the lowest bounds | ||
191 | * for latency. | ||
192 | */ | ||
193 | |||
194 | i915_request_add(rq); | ||
195 | } | ||
196 | if (i915_request_wait(rq, | ||
197 | I915_WAIT_LOCKED, | ||
198 | HZ / 5) < 0) { | ||
199 | pr_err("Switching between %ld contexts timed out\n", | ||
200 | prime); | ||
201 | i915_gem_set_wedged(i915); | ||
202 | break; | ||
203 | } | ||
204 | |||
205 | times[1] = ktime_sub(ktime_get_raw(), times[1]); | ||
206 | if (prime == 2) | ||
207 | times[0] = times[1]; | ||
208 | |||
209 | if (__igt_timeout(end_time, NULL)) | ||
210 | break; | ||
211 | } | ||
212 | |||
213 | err = end_live_test(&t); | ||
214 | if (err) | ||
215 | goto out_unlock; | ||
216 | |||
217 | pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", | ||
218 | engine->name, | ||
219 | ktime_to_ns(times[0]), | ||
220 | prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); | ||
221 | } | ||
222 | |||
223 | out_unlock: | ||
224 | mutex_unlock(&i915->drm.struct_mutex); | ||
225 | mock_file_free(i915, file); | ||
226 | return err; | ||
227 | } | ||
228 | |||
35 | static struct i915_vma * | 229 | static struct i915_vma * |
36 | gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) | 230 | gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) |
37 | { | 231 | { |
@@ -714,6 +908,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) | |||
714 | { | 908 | { |
715 | static const struct i915_subtest tests[] = { | 909 | static const struct i915_subtest tests[] = { |
716 | SUBTEST(igt_switch_to_kernel_context), | 910 | SUBTEST(igt_switch_to_kernel_context), |
911 | SUBTEST(live_nop_switch), | ||
717 | SUBTEST(igt_ctx_exec), | 912 | SUBTEST(igt_ctx_exec), |
718 | SUBTEST(igt_ctx_readonly), | 913 | SUBTEST(igt_ctx_readonly), |
719 | }; | 914 | }; |