diff options
Diffstat (limited to 'target/linux/brcm2708/patches-4.19/950-0589-drm-v3d-Add-missing-implicit-synchronization.patch')
-rw-r--r-- | target/linux/brcm2708/patches-4.19/950-0589-drm-v3d-Add-missing-implicit-synchronization.patch | 279 |
1 files changed, 0 insertions, 279 deletions
diff --git a/target/linux/brcm2708/patches-4.19/950-0589-drm-v3d-Add-missing-implicit-synchronization.patch b/target/linux/brcm2708/patches-4.19/950-0589-drm-v3d-Add-missing-implicit-synchronization.patch deleted file mode 100644 index a465130c26..0000000000 --- a/target/linux/brcm2708/patches-4.19/950-0589-drm-v3d-Add-missing-implicit-synchronization.patch +++ /dev/null @@ -1,279 +0,0 @@ -From 55757fd208de69d0701ac9d6e368d9647549d74f Mon Sep 17 00:00:00 2001 -From: Eric Anholt <eric@anholt.net> -Date: Wed, 27 Mar 2019 17:44:40 -0700 -Subject: [PATCH 589/725] drm/v3d: Add missing implicit synchronization. - -It is the expectation of existing userspace (X11 + Mesa, in -particular) that jobs submitted to the kernel against a shared BO will -get implicitly synchronized by their submission order. If we want to -allow clever userspace to disable implicit synchronization, we should -do that under its own submit flag (as amdgpu and lima do). - -Note that we currently only implicitly sync for the rendering pass, -not binning -- if you texture-from-pixmap in the binning vertex shader -(vertex coordinate generation), you'll miss out on synchronization. - -Fixes flickering when multiple clients are running in parallel, -particularly GL apps and compositors. - -Signed-off-by: Eric Anholt <eric@anholt.net> ---- - drivers/gpu/drm/v3d/v3d_drv.h | 10 +--- - drivers/gpu/drm/v3d/v3d_gem.c | 98 ++++++++++++++++++++++++++++++--- - drivers/gpu/drm/v3d/v3d_sched.c | 45 ++------------- - 3 files changed, 96 insertions(+), 57 deletions(-) - ---- a/drivers/gpu/drm/v3d/v3d_drv.h -+++ b/drivers/gpu/drm/v3d/v3d_drv.h -@@ -186,8 +186,9 @@ struct v3d_job { - struct v3d_bo **bo; - u32 bo_count; - -- /* An optional fence userspace can pass in for the job to depend on. */ -- struct dma_fence *in_fence; -+ struct dma_fence **deps; -+ int deps_count; -+ int deps_size; - - /* v3d fence to be signaled by IRQ handler when the job is complete. */ - struct dma_fence *irq_fence; -@@ -219,11 +220,6 @@ struct v3d_bin_job { - struct v3d_render_job { - struct v3d_job base; - -- /* Optional fence for the binner, to depend on before starting -- * our job. -- */ -- struct dma_fence *bin_done_fence; -- - /* GPU virtual addresses of the start/end of the CL job. */ - u32 start, end; - ---- a/drivers/gpu/drm/v3d/v3d_gem.c -+++ b/drivers/gpu/drm/v3d/v3d_gem.c -@@ -218,6 +218,71 @@ v3d_unlock_bo_reservations(struct v3d_bo - ww_acquire_fini(acquire_ctx); - } - -+static int -+v3d_add_dep(struct v3d_job *job, struct dma_fence *fence) -+{ -+ if (!fence) -+ return 0; -+ -+ if (job->deps_size == job->deps_count) { -+ int new_deps_size = max(job->deps_size * 2, 4); -+ struct dma_fence **new_deps = -+ krealloc(job->deps, new_deps_size * sizeof(*new_deps), -+ GFP_KERNEL); -+ if (!new_deps) { -+ dma_fence_put(fence); -+ return -ENOMEM; -+ } -+ -+ job->deps = new_deps; -+ job->deps_size = new_deps_size; -+ } -+ -+ job->deps[job->deps_count++] = fence; -+ -+ return 0; -+} -+ -+/** -+ * Adds the required implicit fences before executing the job -+ * -+ * Userspace (X11 + Mesa) requires that a job submitted against a shared BO -+ * from one fd will implicitly synchronize against previous jobs submitted -+ * against that BO from other fds. -+ * -+ * Currently we don't bother trying to track the shared BOs, and instead just -+ * sync everything. However, our synchronization is only for the render pass -+ * -- the binning stage (VS coordinate calculations) ignores implicit sync, -+ * since using shared buffers for texture coordinates seems unlikely, and -+ * implicitly syncing them would break bin/render parallelism. If we want to -+ * fix that, we should introduce a flag when VS texturing has been used in the -+ * binning stage, or a set of flags for which BOs are sampled during binning. -+ */ -+static int -+v3d_add_implicit_fences(struct v3d_job *job, struct v3d_bo *bo) -+{ -+ int i, ret, nr_fences; -+ struct dma_fence **fences; -+ -+ ret = reservation_object_get_fences_rcu(bo->resv, NULL, -+ &nr_fences, &fences); -+ if (ret || !nr_fences) -+ return ret; -+ -+ for (i = 0; i < nr_fences; i++) { -+ ret = v3d_add_dep(job, fences[i]); -+ if (ret) -+ break; -+ } -+ -+ /* Free any remaining fences after error. */ -+ for (; i < nr_fences; i++) -+ dma_fence_put(fences[i]); -+ kfree(fences); -+ -+ return ret; -+} -+ - /* Takes the reservation lock on all the BOs being referenced, so that - * at queue submit time we can update the reservations. - * -@@ -226,10 +291,11 @@ v3d_unlock_bo_reservations(struct v3d_bo - * to v3d, so we don't attach dma-buf fences to them. - */ - static int --v3d_lock_bo_reservations(struct v3d_bo **bos, -- int bo_count, -+v3d_lock_bo_reservations(struct v3d_job *job, - struct ww_acquire_ctx *acquire_ctx) - { -+ struct v3d_bo **bos = job->bo; -+ int bo_count = job->bo_count; - int contended_lock = -1; - int i, ret; - -@@ -281,6 +347,13 @@ retry: - * before we commit the CL to the hardware. - */ - for (i = 0; i < bo_count; i++) { -+ ret = v3d_add_implicit_fences(job, bos[i]); -+ if (ret) { -+ v3d_unlock_bo_reservations(bos, bo_count, -+ acquire_ctx); -+ return ret; -+ } -+ - ret = reservation_object_reserve_shared(bos[i]->resv); - if (ret) { - v3d_unlock_bo_reservations(bos, bo_count, -@@ -383,7 +456,10 @@ v3d_job_free(struct kref *ref) - } - kvfree(job->bo); - -- dma_fence_put(job->in_fence); -+ for (i = 0; i < job->deps_count; i++) -+ dma_fence_put(job->deps[i]); -+ kfree(job->deps); -+ - dma_fence_put(job->irq_fence); - dma_fence_put(job->done_fence); - -@@ -464,15 +540,20 @@ v3d_job_init(struct v3d_dev *v3d, struct - struct v3d_job *job, void (*free)(struct kref *ref), - u32 in_sync) - { -+ struct dma_fence *in_fence = NULL; - int ret; - - job->v3d = v3d; - job->free = free; - -- ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence); -+ ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &in_fence); - if (ret == -EINVAL) - return ret; - -+ ret = v3d_add_dep(job, in_fence); -+ if (ret) -+ return ret; -+ - kref_init(&job->refcount); - - return 0; -@@ -590,8 +671,7 @@ v3d_submit_cl_ioctl(struct drm_device *d - if (ret) - goto fail; - -- ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count, -- &acquire_ctx); -+ ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx); - if (ret) - goto fail; - -@@ -601,7 +681,8 @@ v3d_submit_cl_ioctl(struct drm_device *d - if (ret) - goto fail_unreserve; - -- render->bin_done_fence = dma_fence_get(bin->base.done_fence); -+ ret = v3d_add_dep(&render->base, -+ dma_fence_get(bin->base.done_fence)); - } - - ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); -@@ -692,8 +773,7 @@ v3d_submit_tfu_ioctl(struct drm_device * - } - spin_unlock(&file_priv->table_lock); - -- ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count, -- &acquire_ctx); -+ ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); - if (ret) - goto fail; - ---- a/drivers/gpu/drm/v3d/v3d_sched.c -+++ b/drivers/gpu/drm/v3d/v3d_sched.c -@@ -67,47 +67,10 @@ v3d_job_dependency(struct drm_sched_job - struct drm_sched_entity *s_entity) - { - struct v3d_job *job = to_v3d_job(sched_job); -- struct dma_fence *fence; -- -- fence = job->in_fence; -- if (fence) { -- job->in_fence = NULL; -- return fence; -- } -- -- return NULL; --} - --/** -- * Returns the fences that the render job depends on, one by one. -- * v3d_job_run() won't be called until all of them have been signaled. -- */ --static struct dma_fence * --v3d_render_job_dependency(struct drm_sched_job *sched_job, -- struct drm_sched_entity *s_entity) --{ -- struct v3d_render_job *job = to_render_job(sched_job); -- struct dma_fence *fence; -- -- fence = v3d_job_dependency(sched_job, s_entity); -- if (fence) -- return fence; -- -- /* If we had a bin job, the render job definitely depends on -- * it. We first have to wait for bin to be scheduled, so that -- * its done_fence is created. -- */ -- fence = job->bin_done_fence; -- if (fence) { -- job->bin_done_fence = NULL; -- return fence; -- } -- -- /* XXX: Wait on a fence for switching the GMP if necessary, -- * and then do so. -- */ -- -- return fence; -+ if (!job->deps_count) -+ return NULL; -+ return job->deps[--job->deps_count]; - } - - static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) -@@ -329,7 +292,7 @@ static const struct drm_sched_backend_op - }; - - static const struct drm_sched_backend_ops v3d_render_sched_ops = { -- .dependency = v3d_render_job_dependency, -+ .dependency = v3d_job_dependency, - .run_job = v3d_render_job_run, - .timedout_job = v3d_render_job_timedout, - .free_job = v3d_job_free, |