summaryrefslogtreecommitdiffstats
path: root/target/linux/brcm2708/patches-4.4/0456-drm-vc4-Implement-precise-vblank-timestamping.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/brcm2708/patches-4.4/0456-drm-vc4-Implement-precise-vblank-timestamping.patch')
-rw-r--r--target/linux/brcm2708/patches-4.4/0456-drm-vc4-Implement-precise-vblank-timestamping.patch358
1 files changed, 358 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.4/0456-drm-vc4-Implement-precise-vblank-timestamping.patch b/target/linux/brcm2708/patches-4.4/0456-drm-vc4-Implement-precise-vblank-timestamping.patch
new file mode 100644
index 0000000000..8ae2819502
--- /dev/null
+++ b/target/linux/brcm2708/patches-4.4/0456-drm-vc4-Implement-precise-vblank-timestamping.patch
@@ -0,0 +1,358 @@
+From d1a9a03cbe1110756a63d4a3747e22eb8417f75e Mon Sep 17 00:00:00 2001
+From: Mario Kleiner <mario.kleiner.de@gmail.com>
+Date: Thu, 23 Jun 2016 08:17:50 +0200
+Subject: [PATCH] drm/vc4: Implement precise vblank timestamping.
+
+Precise vblank timestamping is implemented via the
+usual scanout position based method. On VC4 the
+pixelvalves PV do not have a scanout position
+register. Only the hardware video scaler HVS has a
+similar register which describes which scanline for
+the output is currently composited and stored in the
+HVS fifo for later consumption by the PV.
+
+This causes a problem in that the HVS runs at a much
+faster clock (system clock / audio gate) than the PV
+which runs at video mode dot clock, so the unless the
+fifo between HVS and PV is full, the HVS will progress
+faster in its observable read line position than video
+scan rate, so the HVS position reading can't be directly
+translated into a scanout position for timestamp correction.
+
+Additionally when the PV is in vblank, it doesn't consume
+from the fifo, so the fifo gets full very quickly and then
+the HVS stops compositing until the PV enters active scanout
+and starts consuming scanlines from the fifo again, making
+new space for the HVS to composite.
+
+Therefore a simple translation of HVS read position into
+elapsed time since (or to) start of active scanout does
+not work, but for the most interesting cases we can still
+get useful and sufficiently accurate results:
+
+1. The PV enters active scanout of a new frame with the
+ fifo of the HVS completely full, and the HVS can refill
+ any fifo line which gets consumed and thereby freed up by
+ the PV during active scanout very quickly. Therefore the
+ PV and HVS work effectively in lock-step during active
+ scanout with the fifo never having more than 1 scanline
+ freed up by the PV before it gets refilled. The PV's
+ real scanout position is therefore trailing the HVS
+ compositing position as scanoutpos = hvspos - fifosize
+ and we can get the true scanoutpos as HVS readpos minus
+ fifo size, so precise timestamping works while in active
+ scanout, except for the last few scanlines of the frame,
+ when the HVS reaches end of frame, stops compositing and
+ the PV catches up and drains the fifo. This special case
+ would only introduce minor errors though.
+
+2. If we are in vblank, then we can only guess something
+ reasonable. If called from vblank irq, we assume the irq is
+ usually dispatched with minimum delay, so we can take a
+ timestamp taken at entry into the vblank irq handler as a
+ baseline and then add a full vblank duration until the
+ guessed start of active scanout. As irq dispatch is usually
+ pretty low latency this works with relatively low jitter and
+ good results.
+
+ If we aren't called from vblank then we could be anywhere
+ within the vblank interval, so we return a neutral result,
+ simply the current system timestamp, and hope for the best.
+
+Measurement shows the generated timestamps to be rather precise,
+and at least never off more than 1 vblank duration worst-case.
+
+Limitations: Doesn't work well yet for interlaced video modes,
+ therefore disabled in interlaced mode for now.
+
+v2: Use the DISPBASE registers to determine the FIFO size (changes
+ by anholt)
+
+Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
+Signed-off-by: Eric Anholt <eric@anholt.net>
+Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com> (v2)
+(cherry picked from commit 1bf59f1dcbe25272f6b5d870054647e58a8a9c55)
+---
+ drivers/gpu/drm/vc4/vc4_crtc.c | 162 +++++++++++++++++++++++++++++++++++++++++
+ drivers/gpu/drm/vc4/vc4_drv.c | 2 +
+ drivers/gpu/drm/vc4/vc4_drv.h | 7 ++
+ drivers/gpu/drm/vc4/vc4_regs.h | 22 +++++-
+ 4 files changed, 192 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/vc4/vc4_crtc.c
++++ b/drivers/gpu/drm/vc4/vc4_crtc.c
+@@ -47,12 +47,17 @@ struct vc4_crtc {
+ const struct vc4_crtc_data *data;
+ void __iomem *regs;
+
++ /* Timestamp at start of vblank irq - unaffected by lock delays. */
++ ktime_t t_vblank;
++
+ /* Which HVS channel we're using for our CRTC. */
+ int channel;
+
+ u8 lut_r[256];
+ u8 lut_g[256];
+ u8 lut_b[256];
++ /* Size in pixels of the COB memory allocated to this CRTC. */
++ u32 cob_size;
+
+ struct drm_pending_vblank_event *event;
+ };
+@@ -134,6 +139,144 @@ int vc4_crtc_debugfs_regs(struct seq_fil
+ }
+ #endif
+
++int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
++ unsigned int flags, int *vpos, int *hpos,
++ ktime_t *stime, ktime_t *etime,
++ const struct drm_display_mode *mode)
++{
++ struct vc4_dev *vc4 = to_vc4_dev(dev);
++ struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
++ u32 val;
++ int fifo_lines;
++ int vblank_lines;
++ int ret = 0;
++
++ /*
++ * XXX Doesn't work well in interlaced mode yet, partially due
++ * to problems in vc4 kms or drm core interlaced mode handling,
++ * so disable for now in interlaced mode.
++ */
++ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
++ return ret;
++
++ /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
++
++ /* Get optional system timestamp before query. */
++ if (stime)
++ *stime = ktime_get();
++
++ /*
++ * Read vertical scanline which is currently composed for our
++ * pixelvalve by the HVS, and also the scaler status.
++ */
++ val = HVS_READ(SCALER_DISPSTATX(vc4_crtc->channel));
++
++ /* Get optional system timestamp after query. */
++ if (etime)
++ *etime = ktime_get();
++
++ /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
++
++ /* Vertical position of hvs composed scanline. */
++ *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
++
++ /* No hpos info available. */
++ if (hpos)
++ *hpos = 0;
++
++ /* This is the offset we need for translating hvs -> pv scanout pos. */
++ fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay;
++
++ if (fifo_lines > 0)
++ ret |= DRM_SCANOUTPOS_VALID;
++
++ /* HVS more than fifo_lines into frame for compositing? */
++ if (*vpos > fifo_lines) {
++ /*
++ * We are in active scanout and can get some meaningful results
++ * from HVS. The actual PV scanout can not trail behind more
++ * than fifo_lines as that is the fifo's capacity. Assume that
++ * in active scanout the HVS and PV work in lockstep wrt. HVS
++ * refilling the fifo and PV consuming from the fifo, ie.
++ * whenever the PV consumes and frees up a scanline in the
++ * fifo, the HVS will immediately refill it, therefore
++ * incrementing vpos. Therefore we choose HVS read position -
++ * fifo size in scanlines as a estimate of the real scanout
++ * position of the PV.
++ */
++ *vpos -= fifo_lines + 1;
++ if (mode->flags & DRM_MODE_FLAG_INTERLACE)
++ *vpos /= 2;
++
++ ret |= DRM_SCANOUTPOS_ACCURATE;
++ return ret;
++ }
++
++ /*
++ * Less: This happens when we are in vblank and the HVS, after getting
++ * the VSTART restart signal from the PV, just started refilling its
++ * fifo with new lines from the top-most lines of the new framebuffers.
++ * The PV does not scan out in vblank, so does not remove lines from
++ * the fifo, so the fifo will be full quickly and the HVS has to pause.
++ * We can't get meaningful readings wrt. scanline position of the PV
++ * and need to make things up in a approximative but consistent way.
++ */
++ ret |= DRM_SCANOUTPOS_IN_VBLANK;
++ vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay;
++
++ if (flags & DRM_CALLED_FROM_VBLIRQ) {
++ /*
++ * Assume the irq handler got called close to first
++ * line of vblank, so PV has about a full vblank
++ * scanlines to go, and as a base timestamp use the
++ * one taken at entry into vblank irq handler, so it
++ * is not affected by random delays due to lock
++ * contention on event_lock or vblank_time lock in
++ * the core.
++ */
++ *vpos = -vblank_lines;
++
++ if (stime)
++ *stime = vc4_crtc->t_vblank;
++ if (etime)
++ *etime = vc4_crtc->t_vblank;
++
++ /*
++ * If the HVS fifo is not yet full then we know for certain
++ * we are at the very beginning of vblank, as the hvs just
++ * started refilling, and the stime and etime timestamps
++ * truly correspond to start of vblank.
++ */
++ if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL)
++ ret |= DRM_SCANOUTPOS_ACCURATE;
++ } else {
++ /*
++ * No clue where we are inside vblank. Return a vpos of zero,
++ * which will cause calling code to just return the etime
++ * timestamp uncorrected. At least this is no worse than the
++ * standard fallback.
++ */
++ *vpos = 0;
++ }
++
++ return ret;
++}
++
++int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
++ int *max_error, struct timeval *vblank_time,
++ unsigned flags)
++{
++ struct vc4_dev *vc4 = to_vc4_dev(dev);
++ struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
++ struct drm_crtc *crtc = &vc4_crtc->base;
++ struct drm_crtc_state *state = crtc->state;
++
++ /* Helper routine in DRM core does all the work: */
++ return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error,
++ vblank_time, flags,
++ &state->adjusted_mode);
++}
++
+ static void vc4_crtc_destroy(struct drm_crtc *crtc)
+ {
+ drm_crtc_cleanup(crtc);
+@@ -535,6 +678,7 @@ static irqreturn_t vc4_crtc_irq_handler(
+ irqreturn_t ret = IRQ_NONE;
+
+ if (stat & PV_INT_VFP_START) {
++ vc4_crtc->t_vblank = ktime_get();
+ CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
+ drm_crtc_handle_vblank(&vc4_crtc->base);
+ vc4_crtc_handle_page_flip(vc4_crtc);
+@@ -759,6 +903,22 @@ static void vc4_set_crtc_possible_masks(
+ }
+ }
+
++static void
++vc4_crtc_get_cob_allocation(struct vc4_crtc *vc4_crtc)
++{
++ struct drm_device *drm = vc4_crtc->base.dev;
++ struct vc4_dev *vc4 = to_vc4_dev(drm);
++ u32 dispbase = HVS_READ(SCALER_DISPBASEX(vc4_crtc->channel));
++ /* Top/base are supposed to be 4-pixel aligned, but the
++ * Raspberry Pi firmware fills the low bits (which are
++ * presumably ignored).
++ */
++ u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
++ u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
++
++ vc4_crtc->cob_size = top - base + 4;
++}
++
+ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
+ {
+ struct platform_device *pdev = to_platform_device(dev);
+@@ -835,6 +995,8 @@ static int vc4_crtc_bind(struct device *
+ crtc->cursor = cursor_plane;
+ }
+
++ vc4_crtc_get_cob_allocation(vc4_crtc);
++
+ CRTC_WRITE(PV_INTEN, 0);
+ CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
+ ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
+--- a/drivers/gpu/drm/vc4/vc4_drv.c
++++ b/drivers/gpu/drm/vc4/vc4_drv.c
+@@ -116,6 +116,8 @@ static struct drm_driver vc4_drm_driver
+ .enable_vblank = vc4_enable_vblank,
+ .disable_vblank = vc4_disable_vblank,
+ .get_vblank_counter = drm_vblank_no_hw_counter,
++ .get_scanout_position = vc4_crtc_get_scanoutpos,
++ .get_vblank_timestamp = vc4_crtc_get_vblank_timestamp,
+
+ #if defined(CONFIG_DEBUG_FS)
+ .debugfs_init = vc4_debugfs_init,
+--- a/drivers/gpu/drm/vc4/vc4_drv.h
++++ b/drivers/gpu/drm/vc4/vc4_drv.h
+@@ -419,6 +419,13 @@ int vc4_enable_vblank(struct drm_device
+ void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id);
+ void vc4_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file);
+ int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
++int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
++ unsigned int flags, int *vpos, int *hpos,
++ ktime_t *stime, ktime_t *etime,
++ const struct drm_display_mode *mode);
++int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
++ int *max_error, struct timeval *vblank_time,
++ unsigned flags);
+
+ /* vc4_debugfs.c */
+ int vc4_debugfs_init(struct drm_minor *minor);
+--- a/drivers/gpu/drm/vc4/vc4_regs.h
++++ b/drivers/gpu/drm/vc4/vc4_regs.h
+@@ -368,7 +368,6 @@
+ # define SCALER_DISPBKGND_FILL BIT(24)
+
+ #define SCALER_DISPSTAT0 0x00000048
+-#define SCALER_DISPBASE0 0x0000004c
+ # define SCALER_DISPSTATX_MODE_MASK VC4_MASK(31, 30)
+ # define SCALER_DISPSTATX_MODE_SHIFT 30
+ # define SCALER_DISPSTATX_MODE_DISABLED 0
+@@ -377,6 +376,24 @@
+ # define SCALER_DISPSTATX_MODE_EOF 3
+ # define SCALER_DISPSTATX_FULL BIT(29)
+ # define SCALER_DISPSTATX_EMPTY BIT(28)
++# define SCALER_DISPSTATX_FRAME_COUNT_MASK VC4_MASK(17, 12)
++# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT 12
++# define SCALER_DISPSTATX_LINE_MASK VC4_MASK(11, 0)
++# define SCALER_DISPSTATX_LINE_SHIFT 0
++
++#define SCALER_DISPBASE0 0x0000004c
++/* Last pixel in the COB (display FIFO memory) allocated to this HVS
++ * channel. Must be 4-pixel aligned (and thus 4 pixels less than the
++ * next COB base).
++ */
++# define SCALER_DISPBASEX_TOP_MASK VC4_MASK(31, 16)
++# define SCALER_DISPBASEX_TOP_SHIFT 16
++/* First pixel in the COB (display FIFO memory) allocated to this HVS
++ * channel. Must be 4-pixel aligned.
++ */
++# define SCALER_DISPBASEX_BASE_MASK VC4_MASK(15, 0)
++# define SCALER_DISPBASEX_BASE_SHIFT 0
++
+ #define SCALER_DISPCTRL1 0x00000050
+ #define SCALER_DISPBKGND1 0x00000054
+ #define SCALER_DISPBKGNDX(x) (SCALER_DISPBKGND0 + \
+@@ -387,6 +404,9 @@
+ (x) * (SCALER_DISPSTAT1 - \
+ SCALER_DISPSTAT0))
+ #define SCALER_DISPBASE1 0x0000005c
++#define SCALER_DISPBASEX(x) (SCALER_DISPBASE0 + \
++ (x) * (SCALER_DISPBASE1 - \
++ SCALER_DISPBASE0))
+ #define SCALER_DISPCTRL2 0x00000060
+ #define SCALER_DISPCTRLX(x) (SCALER_DISPCTRL0 + \
+ (x) * (SCALER_DISPCTRL1 - \