aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMario Kleiner <mario.kleiner.de@gmail.com>2016-06-23 02:17:50 -0400
committerEric Anholt <eric@anholt.net>2016-07-11 20:17:34 -0400
commit1bf59f1dcbe25272f6b5d870054647e58a8a9c55 (patch)
tree74c872364b3b2abbf2b0a94ff90f846672969a52
parent7a100969f30b77761901c05a5c810bcaea65df44 (diff)
drm/vc4: Implement precise vblank timestamping.
Precise vblank timestamping is implemented via the usual scanout position based method. On VC4 the pixelvalves PV do not have a scanout position register. Only the hardware video scaler HVS has a similar register which describes which scanline for the output is currently composited and stored in the HVS fifo for later consumption by the PV. This causes a problem in that the HVS runs at a much faster clock (system clock / audio gate) than the PV which runs at video mode dot clock, so the unless the fifo between HVS and PV is full, the HVS will progress faster in its observable read line position than video scan rate, so the HVS position reading can't be directly translated into a scanout position for timestamp correction. Additionally when the PV is in vblank, it doesn't consume from the fifo, so the fifo gets full very quickly and then the HVS stops compositing until the PV enters active scanout and starts consuming scanlines from the fifo again, making new space for the HVS to composite. Therefore a simple translation of HVS read position into elapsed time since (or to) start of active scanout does not work, but for the most interesting cases we can still get useful and sufficiently accurate results: 1. The PV enters active scanout of a new frame with the fifo of the HVS completely full, and the HVS can refill any fifo line which gets consumed and thereby freed up by the PV during active scanout very quickly. Therefore the PV and HVS work effectively in lock-step during active scanout with the fifo never having more than 1 scanline freed up by the PV before it gets refilled. The PV's real scanout position is therefore trailing the HVS compositing position as scanoutpos = hvspos - fifosize and we can get the true scanoutpos as HVS readpos minus fifo size, so precise timestamping works while in active scanout, except for the last few scanlines of the frame, when the HVS reaches end of frame, stops compositing and the PV catches up and drains the fifo. This special case would only introduce minor errors though. 2. If we are in vblank, then we can only guess something reasonable. If called from vblank irq, we assume the irq is usually dispatched with minimum delay, so we can take a timestamp taken at entry into the vblank irq handler as a baseline and then add a full vblank duration until the guessed start of active scanout. As irq dispatch is usually pretty low latency this works with relatively low jitter and good results. If we aren't called from vblank then we could be anywhere within the vblank interval, so we return a neutral result, simply the current system timestamp, and hope for the best. Measurement shows the generated timestamps to be rather precise, and at least never off more than 1 vblank duration worst-case. Limitations: Doesn't work well yet for interlaced video modes, therefore disabled in interlaced mode for now. v2: Use the DISPBASE registers to determine the FIFO size (changes by anholt) Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com> Signed-off-by: Eric Anholt <eric@anholt.net> Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com> (v2)
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c162
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c2
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h7
-rw-r--r--drivers/gpu/drm/vc4/vc4_regs.h22
4 files changed, 192 insertions, 1 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 0f18b76c7906..3b7db17c356d 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -46,12 +46,17 @@ struct vc4_crtc {
46 const struct vc4_crtc_data *data; 46 const struct vc4_crtc_data *data;
47 void __iomem *regs; 47 void __iomem *regs;
48 48
49 /* Timestamp at start of vblank irq - unaffected by lock delays. */
50 ktime_t t_vblank;
51
49 /* Which HVS channel we're using for our CRTC. */ 52 /* Which HVS channel we're using for our CRTC. */
50 int channel; 53 int channel;
51 54
52 u8 lut_r[256]; 55 u8 lut_r[256];
53 u8 lut_g[256]; 56 u8 lut_g[256];
54 u8 lut_b[256]; 57 u8 lut_b[256];
58 /* Size in pixels of the COB memory allocated to this CRTC. */
59 u32 cob_size;
55 60
56 struct drm_pending_vblank_event *event; 61 struct drm_pending_vblank_event *event;
57}; 62};
@@ -146,6 +151,144 @@ int vc4_crtc_debugfs_regs(struct seq_file *m, void *unused)
146} 151}
147#endif 152#endif
148 153
154int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
155 unsigned int flags, int *vpos, int *hpos,
156 ktime_t *stime, ktime_t *etime,
157 const struct drm_display_mode *mode)
158{
159 struct vc4_dev *vc4 = to_vc4_dev(dev);
160 struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
161 u32 val;
162 int fifo_lines;
163 int vblank_lines;
164 int ret = 0;
165
166 /*
167 * XXX Doesn't work well in interlaced mode yet, partially due
168 * to problems in vc4 kms or drm core interlaced mode handling,
169 * so disable for now in interlaced mode.
170 */
171 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
172 return ret;
173
174 /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
175
176 /* Get optional system timestamp before query. */
177 if (stime)
178 *stime = ktime_get();
179
180 /*
181 * Read vertical scanline which is currently composed for our
182 * pixelvalve by the HVS, and also the scaler status.
183 */
184 val = HVS_READ(SCALER_DISPSTATX(vc4_crtc->channel));
185
186 /* Get optional system timestamp after query. */
187 if (etime)
188 *etime = ktime_get();
189
190 /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
191
192 /* Vertical position of hvs composed scanline. */
193 *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
194
195 /* No hpos info available. */
196 if (hpos)
197 *hpos = 0;
198
199 /* This is the offset we need for translating hvs -> pv scanout pos. */
200 fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay;
201
202 if (fifo_lines > 0)
203 ret |= DRM_SCANOUTPOS_VALID;
204
205 /* HVS more than fifo_lines into frame for compositing? */
206 if (*vpos > fifo_lines) {
207 /*
208 * We are in active scanout and can get some meaningful results
209 * from HVS. The actual PV scanout can not trail behind more
210 * than fifo_lines as that is the fifo's capacity. Assume that
211 * in active scanout the HVS and PV work in lockstep wrt. HVS
212 * refilling the fifo and PV consuming from the fifo, ie.
213 * whenever the PV consumes and frees up a scanline in the
214 * fifo, the HVS will immediately refill it, therefore
215 * incrementing vpos. Therefore we choose HVS read position -
216 * fifo size in scanlines as a estimate of the real scanout
217 * position of the PV.
218 */
219 *vpos -= fifo_lines + 1;
220 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
221 *vpos /= 2;
222
223 ret |= DRM_SCANOUTPOS_ACCURATE;
224 return ret;
225 }
226
227 /*
228 * Less: This happens when we are in vblank and the HVS, after getting
229 * the VSTART restart signal from the PV, just started refilling its
230 * fifo with new lines from the top-most lines of the new framebuffers.
231 * The PV does not scan out in vblank, so does not remove lines from
232 * the fifo, so the fifo will be full quickly and the HVS has to pause.
233 * We can't get meaningful readings wrt. scanline position of the PV
234 * and need to make things up in a approximative but consistent way.
235 */
236 ret |= DRM_SCANOUTPOS_IN_VBLANK;
237 vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay;
238
239 if (flags & DRM_CALLED_FROM_VBLIRQ) {
240 /*
241 * Assume the irq handler got called close to first
242 * line of vblank, so PV has about a full vblank
243 * scanlines to go, and as a base timestamp use the
244 * one taken at entry into vblank irq handler, so it
245 * is not affected by random delays due to lock
246 * contention on event_lock or vblank_time lock in
247 * the core.
248 */
249 *vpos = -vblank_lines;
250
251 if (stime)
252 *stime = vc4_crtc->t_vblank;
253 if (etime)
254 *etime = vc4_crtc->t_vblank;
255
256 /*
257 * If the HVS fifo is not yet full then we know for certain
258 * we are at the very beginning of vblank, as the hvs just
259 * started refilling, and the stime and etime timestamps
260 * truly correspond to start of vblank.
261 */
262 if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL)
263 ret |= DRM_SCANOUTPOS_ACCURATE;
264 } else {
265 /*
266 * No clue where we are inside vblank. Return a vpos of zero,
267 * which will cause calling code to just return the etime
268 * timestamp uncorrected. At least this is no worse than the
269 * standard fallback.
270 */
271 *vpos = 0;
272 }
273
274 return ret;
275}
276
277int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
278 int *max_error, struct timeval *vblank_time,
279 unsigned flags)
280{
281 struct vc4_dev *vc4 = to_vc4_dev(dev);
282 struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
283 struct drm_crtc *crtc = &vc4_crtc->base;
284 struct drm_crtc_state *state = crtc->state;
285
286 /* Helper routine in DRM core does all the work: */
287 return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error,
288 vblank_time, flags,
289 &state->adjusted_mode);
290}
291
149static void vc4_crtc_destroy(struct drm_crtc *crtc) 292static void vc4_crtc_destroy(struct drm_crtc *crtc)
150{ 293{
151 drm_crtc_cleanup(crtc); 294 drm_crtc_cleanup(crtc);
@@ -526,6 +669,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
526 irqreturn_t ret = IRQ_NONE; 669 irqreturn_t ret = IRQ_NONE;
527 670
528 if (stat & PV_INT_VFP_START) { 671 if (stat & PV_INT_VFP_START) {
672 vc4_crtc->t_vblank = ktime_get();
529 CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START); 673 CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
530 drm_crtc_handle_vblank(&vc4_crtc->base); 674 drm_crtc_handle_vblank(&vc4_crtc->base);
531 vc4_crtc_handle_page_flip(vc4_crtc); 675 vc4_crtc_handle_page_flip(vc4_crtc);
@@ -730,6 +874,22 @@ static void vc4_set_crtc_possible_masks(struct drm_device *drm,
730 } 874 }
731} 875}
732 876
877static void
878vc4_crtc_get_cob_allocation(struct vc4_crtc *vc4_crtc)
879{
880 struct drm_device *drm = vc4_crtc->base.dev;
881 struct vc4_dev *vc4 = to_vc4_dev(drm);
882 u32 dispbase = HVS_READ(SCALER_DISPBASEX(vc4_crtc->channel));
883 /* Top/base are supposed to be 4-pixel aligned, but the
884 * Raspberry Pi firmware fills the low bits (which are
885 * presumably ignored).
886 */
887 u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
888 u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
889
890 vc4_crtc->cob_size = top - base + 4;
891}
892
733static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) 893static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
734{ 894{
735 struct platform_device *pdev = to_platform_device(dev); 895 struct platform_device *pdev = to_platform_device(dev);
@@ -806,6 +966,8 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
806 crtc->cursor = cursor_plane; 966 crtc->cursor = cursor_plane;
807 } 967 }
808 968
969 vc4_crtc_get_cob_allocation(vc4_crtc);
970
809 CRTC_WRITE(PV_INTEN, 0); 971 CRTC_WRITE(PV_INTEN, 0);
810 CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START); 972 CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
811 ret = devm_request_irq(dev, platform_get_irq(pdev, 0), 973 ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 6d97d3edb2d2..65f77cc243a6 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -92,6 +92,8 @@ static struct drm_driver vc4_drm_driver = {
92 .enable_vblank = vc4_enable_vblank, 92 .enable_vblank = vc4_enable_vblank,
93 .disable_vblank = vc4_disable_vblank, 93 .disable_vblank = vc4_disable_vblank,
94 .get_vblank_counter = drm_vblank_no_hw_counter, 94 .get_vblank_counter = drm_vblank_no_hw_counter,
95 .get_scanout_position = vc4_crtc_get_scanoutpos,
96 .get_vblank_timestamp = vc4_crtc_get_vblank_timestamp,
95 97
96#if defined(CONFIG_DEBUG_FS) 98#if defined(CONFIG_DEBUG_FS)
97 .debugfs_init = vc4_debugfs_init, 99 .debugfs_init = vc4_debugfs_init,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 37cac59401d7..1b5dc6074244 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -415,6 +415,13 @@ extern struct platform_driver vc4_crtc_driver;
415int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id); 415int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id);
416void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id); 416void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id);
417int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg); 417int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
418int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
419 unsigned int flags, int *vpos, int *hpos,
420 ktime_t *stime, ktime_t *etime,
421 const struct drm_display_mode *mode);
422int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
423 int *max_error, struct timeval *vblank_time,
424 unsigned flags);
418 425
419/* vc4_debugfs.c */ 426/* vc4_debugfs.c */
420int vc4_debugfs_init(struct drm_minor *minor); 427int vc4_debugfs_init(struct drm_minor *minor);
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index f99eece4cc97..160942a9180e 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -366,7 +366,6 @@
366# define SCALER_DISPBKGND_FILL BIT(24) 366# define SCALER_DISPBKGND_FILL BIT(24)
367 367
368#define SCALER_DISPSTAT0 0x00000048 368#define SCALER_DISPSTAT0 0x00000048
369#define SCALER_DISPBASE0 0x0000004c
370# define SCALER_DISPSTATX_MODE_MASK VC4_MASK(31, 30) 369# define SCALER_DISPSTATX_MODE_MASK VC4_MASK(31, 30)
371# define SCALER_DISPSTATX_MODE_SHIFT 30 370# define SCALER_DISPSTATX_MODE_SHIFT 30
372# define SCALER_DISPSTATX_MODE_DISABLED 0 371# define SCALER_DISPSTATX_MODE_DISABLED 0
@@ -375,6 +374,24 @@
375# define SCALER_DISPSTATX_MODE_EOF 3 374# define SCALER_DISPSTATX_MODE_EOF 3
376# define SCALER_DISPSTATX_FULL BIT(29) 375# define SCALER_DISPSTATX_FULL BIT(29)
377# define SCALER_DISPSTATX_EMPTY BIT(28) 376# define SCALER_DISPSTATX_EMPTY BIT(28)
377# define SCALER_DISPSTATX_FRAME_COUNT_MASK VC4_MASK(17, 12)
378# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT 12
379# define SCALER_DISPSTATX_LINE_MASK VC4_MASK(11, 0)
380# define SCALER_DISPSTATX_LINE_SHIFT 0
381
382#define SCALER_DISPBASE0 0x0000004c
383/* Last pixel in the COB (display FIFO memory) allocated to this HVS
384 * channel. Must be 4-pixel aligned (and thus 4 pixels less than the
385 * next COB base).
386 */
387# define SCALER_DISPBASEX_TOP_MASK VC4_MASK(31, 16)
388# define SCALER_DISPBASEX_TOP_SHIFT 16
389/* First pixel in the COB (display FIFO memory) allocated to this HVS
390 * channel. Must be 4-pixel aligned.
391 */
392# define SCALER_DISPBASEX_BASE_MASK VC4_MASK(15, 0)
393# define SCALER_DISPBASEX_BASE_SHIFT 0
394
378#define SCALER_DISPCTRL1 0x00000050 395#define SCALER_DISPCTRL1 0x00000050
379#define SCALER_DISPBKGND1 0x00000054 396#define SCALER_DISPBKGND1 0x00000054
380#define SCALER_DISPBKGNDX(x) (SCALER_DISPBKGND0 + \ 397#define SCALER_DISPBKGNDX(x) (SCALER_DISPBKGND0 + \
@@ -385,6 +402,9 @@
385 (x) * (SCALER_DISPSTAT1 - \ 402 (x) * (SCALER_DISPSTAT1 - \
386 SCALER_DISPSTAT0)) 403 SCALER_DISPSTAT0))
387#define SCALER_DISPBASE1 0x0000005c 404#define SCALER_DISPBASE1 0x0000005c
405#define SCALER_DISPBASEX(x) (SCALER_DISPBASE0 + \
406 (x) * (SCALER_DISPBASE1 - \
407 SCALER_DISPBASE0))
388#define SCALER_DISPCTRL2 0x00000060 408#define SCALER_DISPCTRL2 0x00000060
389#define SCALER_DISPCTRLX(x) (SCALER_DISPCTRL0 + \ 409#define SCALER_DISPCTRLX(x) (SCALER_DISPCTRL0 + \
390 (x) * (SCALER_DISPCTRL1 - \ 410 (x) * (SCALER_DISPCTRL1 - \