aboutsummaryrefslogtreecommitdiffstats
path: root/target/linux/brcm2708/patches-4.19/950-0586-drm-v3d-Add-missing-implicit-synchronization.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/brcm2708/patches-4.19/950-0586-drm-v3d-Add-missing-implicit-synchronization.patch')
-rw-r--r--target/linux/brcm2708/patches-4.19/950-0586-drm-v3d-Add-missing-implicit-synchronization.patch279
1 files changed, 279 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.19/950-0586-drm-v3d-Add-missing-implicit-synchronization.patch b/target/linux/brcm2708/patches-4.19/950-0586-drm-v3d-Add-missing-implicit-synchronization.patch
new file mode 100644
index 0000000000..b24c3a5c4d
--- /dev/null
+++ b/target/linux/brcm2708/patches-4.19/950-0586-drm-v3d-Add-missing-implicit-synchronization.patch
@@ -0,0 +1,279 @@
+From 587258630d44a120953189c581de1bb6f9f53c76 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Wed, 27 Mar 2019 17:44:40 -0700
+Subject: [PATCH 586/782] drm/v3d: Add missing implicit synchronization.
+
+It is the expectation of existing userspace (X11 + Mesa, in
+particular) that jobs submitted to the kernel against a shared BO will
+get implicitly synchronized by their submission order. If we want to
+allow clever userspace to disable implicit synchronization, we should
+do that under its own submit flag (as amdgpu and lima do).
+
+Note that we currently only implicitly sync for the rendering pass,
+not binning -- if you texture-from-pixmap in the binning vertex shader
+(vertex coordinate generation), you'll miss out on synchronization.
+
+Fixes flickering when multiple clients are running in parallel,
+particularly GL apps and compositors.
+
+Signed-off-by: Eric Anholt <eric@anholt.net>
+---
+ drivers/gpu/drm/v3d/v3d_drv.h | 10 +---
+ drivers/gpu/drm/v3d/v3d_gem.c | 98 ++++++++++++++++++++++++++++++---
+ drivers/gpu/drm/v3d/v3d_sched.c | 45 ++-------------
+ 3 files changed, 96 insertions(+), 57 deletions(-)
+
+--- a/drivers/gpu/drm/v3d/v3d_drv.h
++++ b/drivers/gpu/drm/v3d/v3d_drv.h
+@@ -186,8 +186,9 @@ struct v3d_job {
+ struct v3d_bo **bo;
+ u32 bo_count;
+
+- /* An optional fence userspace can pass in for the job to depend on. */
+- struct dma_fence *in_fence;
++ struct dma_fence **deps;
++ int deps_count;
++ int deps_size;
+
+ /* v3d fence to be signaled by IRQ handler when the job is complete. */
+ struct dma_fence *irq_fence;
+@@ -219,11 +220,6 @@ struct v3d_bin_job {
+ struct v3d_render_job {
+ struct v3d_job base;
+
+- /* Optional fence for the binner, to depend on before starting
+- * our job.
+- */
+- struct dma_fence *bin_done_fence;
+-
+ /* GPU virtual addresses of the start/end of the CL job. */
+ u32 start, end;
+
+--- a/drivers/gpu/drm/v3d/v3d_gem.c
++++ b/drivers/gpu/drm/v3d/v3d_gem.c
+@@ -218,6 +218,71 @@ v3d_unlock_bo_reservations(struct v3d_bo
+ ww_acquire_fini(acquire_ctx);
+ }
+
++static int
++v3d_add_dep(struct v3d_job *job, struct dma_fence *fence)
++{
++ if (!fence)
++ return 0;
++
++ if (job->deps_size == job->deps_count) {
++ int new_deps_size = max(job->deps_size * 2, 4);
++ struct dma_fence **new_deps =
++ krealloc(job->deps, new_deps_size * sizeof(*new_deps),
++ GFP_KERNEL);
++ if (!new_deps) {
++ dma_fence_put(fence);
++ return -ENOMEM;
++ }
++
++ job->deps = new_deps;
++ job->deps_size = new_deps_size;
++ }
++
++ job->deps[job->deps_count++] = fence;
++
++ return 0;
++}
++
++/**
++ * Adds the required implicit fences before executing the job
++ *
++ * Userspace (X11 + Mesa) requires that a job submitted against a shared BO
++ * from one fd will implicitly synchronize against previous jobs submitted
++ * against that BO from other fds.
++ *
++ * Currently we don't bother trying to track the shared BOs, and instead just
++ * sync everything. However, our synchronization is only for the render pass
++ * -- the binning stage (VS coordinate calculations) ignores implicit sync,
++ * since using shared buffers for texture coordinates seems unlikely, and
++ * implicitly syncing them would break bin/render parallelism. If we want to
++ * fix that, we should introduce a flag when VS texturing has been used in the
++ * binning stage, or a set of flags for which BOs are sampled during binning.
++ */
++static int
++v3d_add_implicit_fences(struct v3d_job *job, struct v3d_bo *bo)
++{
++ int i, ret, nr_fences;
++ struct dma_fence **fences;
++
++ ret = reservation_object_get_fences_rcu(bo->resv, NULL,
++ &nr_fences, &fences);
++ if (ret || !nr_fences)
++ return ret;
++
++ for (i = 0; i < nr_fences; i++) {
++ ret = v3d_add_dep(job, fences[i]);
++ if (ret)
++ break;
++ }
++
++ /* Free any remaining fences after error. */
++ for (; i < nr_fences; i++)
++ dma_fence_put(fences[i]);
++ kfree(fences);
++
++ return ret;
++}
++
+ /* Takes the reservation lock on all the BOs being referenced, so that
+ * at queue submit time we can update the reservations.
+ *
+@@ -226,10 +291,11 @@ v3d_unlock_bo_reservations(struct v3d_bo
+ * to v3d, so we don't attach dma-buf fences to them.
+ */
+ static int
+-v3d_lock_bo_reservations(struct v3d_bo **bos,
+- int bo_count,
++v3d_lock_bo_reservations(struct v3d_job *job,
+ struct ww_acquire_ctx *acquire_ctx)
+ {
++ struct v3d_bo **bos = job->bo;
++ int bo_count = job->bo_count;
+ int contended_lock = -1;
+ int i, ret;
+
+@@ -281,6 +347,13 @@ retry:
+ * before we commit the CL to the hardware.
+ */
+ for (i = 0; i < bo_count; i++) {
++ ret = v3d_add_implicit_fences(job, bos[i]);
++ if (ret) {
++ v3d_unlock_bo_reservations(bos, bo_count,
++ acquire_ctx);
++ return ret;
++ }
++
+ ret = reservation_object_reserve_shared(bos[i]->resv);
+ if (ret) {
+ v3d_unlock_bo_reservations(bos, bo_count,
+@@ -383,7 +456,10 @@ v3d_job_free(struct kref *ref)
+ }
+ kvfree(job->bo);
+
+- dma_fence_put(job->in_fence);
++ for (i = 0; i < job->deps_count; i++)
++ dma_fence_put(job->deps[i]);
++ kfree(job->deps);
++
+ dma_fence_put(job->irq_fence);
+ dma_fence_put(job->done_fence);
+
+@@ -464,15 +540,20 @@ v3d_job_init(struct v3d_dev *v3d, struct
+ struct v3d_job *job, void (*free)(struct kref *ref),
+ u32 in_sync)
+ {
++ struct dma_fence *in_fence = NULL;
+ int ret;
+
+ job->v3d = v3d;
+ job->free = free;
+
+- ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence);
++ ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &in_fence);
+ if (ret == -EINVAL)
+ return ret;
+
++ ret = v3d_add_dep(job, in_fence);
++ if (ret)
++ return ret;
++
+ kref_init(&job->refcount);
+
+ return 0;
+@@ -590,8 +671,7 @@ v3d_submit_cl_ioctl(struct drm_device *d
+ if (ret)
+ goto fail;
+
+- ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count,
+- &acquire_ctx);
++ ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
+ if (ret)
+ goto fail;
+
+@@ -601,7 +681,8 @@ v3d_submit_cl_ioctl(struct drm_device *d
+ if (ret)
+ goto fail_unreserve;
+
+- render->bin_done_fence = dma_fence_get(bin->base.done_fence);
++ ret = v3d_add_dep(&render->base,
++ dma_fence_get(bin->base.done_fence));
+ }
+
+ ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
+@@ -692,8 +773,7 @@ v3d_submit_tfu_ioctl(struct drm_device *
+ }
+ spin_unlock(&file_priv->table_lock);
+
+- ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count,
+- &acquire_ctx);
++ ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
+ if (ret)
+ goto fail;
+
+--- a/drivers/gpu/drm/v3d/v3d_sched.c
++++ b/drivers/gpu/drm/v3d/v3d_sched.c
+@@ -67,47 +67,10 @@ v3d_job_dependency(struct drm_sched_job
+ struct drm_sched_entity *s_entity)
+ {
+ struct v3d_job *job = to_v3d_job(sched_job);
+- struct dma_fence *fence;
+-
+- fence = job->in_fence;
+- if (fence) {
+- job->in_fence = NULL;
+- return fence;
+- }
+-
+- return NULL;
+-}
+
+-/**
+- * Returns the fences that the render job depends on, one by one.
+- * v3d_job_run() won't be called until all of them have been signaled.
+- */
+-static struct dma_fence *
+-v3d_render_job_dependency(struct drm_sched_job *sched_job,
+- struct drm_sched_entity *s_entity)
+-{
+- struct v3d_render_job *job = to_render_job(sched_job);
+- struct dma_fence *fence;
+-
+- fence = v3d_job_dependency(sched_job, s_entity);
+- if (fence)
+- return fence;
+-
+- /* If we had a bin job, the render job definitely depends on
+- * it. We first have to wait for bin to be scheduled, so that
+- * its done_fence is created.
+- */
+- fence = job->bin_done_fence;
+- if (fence) {
+- job->bin_done_fence = NULL;
+- return fence;
+- }
+-
+- /* XXX: Wait on a fence for switching the GMP if necessary,
+- * and then do so.
+- */
+-
+- return fence;
++ if (!job->deps_count)
++ return NULL;
++ return job->deps[--job->deps_count];
+ }
+
+ static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
+@@ -329,7 +292,7 @@ static const struct drm_sched_backend_op
+ };
+
+ static const struct drm_sched_backend_ops v3d_render_sched_ops = {
+- .dependency = v3d_render_job_dependency,
++ .dependency = v3d_job_dependency,
+ .run_job = v3d_render_job_run,
+ .timedout_job = v3d_render_job_timedout,
+ .free_job = v3d_job_free,