From 2340c646e6a5da658e114be2f721ff90853588d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Fri, 9 Aug 2019 19:50:30 +0200 Subject: brcm2708: update to latest patches from RPi foundation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Álvaro Fernández Rojas --- ...-v3d-Add-missing-implicit-synchronization.patch | 279 +++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 target/linux/brcm2708/patches-4.19/950-0587-drm-v3d-Add-missing-implicit-synchronization.patch (limited to 'target/linux/brcm2708/patches-4.19/950-0587-drm-v3d-Add-missing-implicit-synchronization.patch') diff --git a/target/linux/brcm2708/patches-4.19/950-0587-drm-v3d-Add-missing-implicit-synchronization.patch b/target/linux/brcm2708/patches-4.19/950-0587-drm-v3d-Add-missing-implicit-synchronization.patch new file mode 100644 index 0000000000..f4fe613310 --- /dev/null +++ b/target/linux/brcm2708/patches-4.19/950-0587-drm-v3d-Add-missing-implicit-synchronization.patch @@ -0,0 +1,279 @@ +From 9691f6245f13a92f49143602f47f6e7f0b6b5f55 Mon Sep 17 00:00:00 2001 +From: Eric Anholt +Date: Wed, 27 Mar 2019 17:44:40 -0700 +Subject: [PATCH 587/773] drm/v3d: Add missing implicit synchronization. + +It is the expectation of existing userspace (X11 + Mesa, in +particular) that jobs submitted to the kernel against a shared BO will +get implicitly synchronized by their submission order. If we want to +allow clever userspace to disable implicit synchronization, we should +do that under its own submit flag (as amdgpu and lima do). + +Note that we currently only implicitly sync for the rendering pass, +not binning -- if you texture-from-pixmap in the binning vertex shader +(vertex coordinate generation), you'll miss out on synchronization. + +Fixes flickering when multiple clients are running in parallel, +particularly GL apps and compositors. + +Signed-off-by: Eric Anholt +--- + drivers/gpu/drm/v3d/v3d_drv.h | 10 +--- + drivers/gpu/drm/v3d/v3d_gem.c | 98 ++++++++++++++++++++++++++++++--- + drivers/gpu/drm/v3d/v3d_sched.c | 45 ++------------- + 3 files changed, 96 insertions(+), 57 deletions(-) + +--- a/drivers/gpu/drm/v3d/v3d_drv.h ++++ b/drivers/gpu/drm/v3d/v3d_drv.h +@@ -186,8 +186,9 @@ struct v3d_job { + struct v3d_bo **bo; + u32 bo_count; + +- /* An optional fence userspace can pass in for the job to depend on. */ +- struct dma_fence *in_fence; ++ struct dma_fence **deps; ++ int deps_count; ++ int deps_size; + + /* v3d fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *irq_fence; +@@ -219,11 +220,6 @@ struct v3d_bin_job { + struct v3d_render_job { + struct v3d_job base; + +- /* Optional fence for the binner, to depend on before starting +- * our job. +- */ +- struct dma_fence *bin_done_fence; +- + /* GPU virtual addresses of the start/end of the CL job. */ + u32 start, end; + +--- a/drivers/gpu/drm/v3d/v3d_gem.c ++++ b/drivers/gpu/drm/v3d/v3d_gem.c +@@ -218,6 +218,71 @@ v3d_unlock_bo_reservations(struct v3d_bo + ww_acquire_fini(acquire_ctx); + } + ++static int ++v3d_add_dep(struct v3d_job *job, struct dma_fence *fence) ++{ ++ if (!fence) ++ return 0; ++ ++ if (job->deps_size == job->deps_count) { ++ int new_deps_size = max(job->deps_size * 2, 4); ++ struct dma_fence **new_deps = ++ krealloc(job->deps, new_deps_size * sizeof(*new_deps), ++ GFP_KERNEL); ++ if (!new_deps) { ++ dma_fence_put(fence); ++ return -ENOMEM; ++ } ++ ++ job->deps = new_deps; ++ job->deps_size = new_deps_size; ++ } ++ ++ job->deps[job->deps_count++] = fence; ++ ++ return 0; ++} ++ ++/** ++ * Adds the required implicit fences before executing the job ++ * ++ * Userspace (X11 + Mesa) requires that a job submitted against a shared BO ++ * from one fd will implicitly synchronize against previous jobs submitted ++ * against that BO from other fds. ++ * ++ * Currently we don't bother trying to track the shared BOs, and instead just ++ * sync everything. However, our synchronization is only for the render pass ++ * -- the binning stage (VS coordinate calculations) ignores implicit sync, ++ * since using shared buffers for texture coordinates seems unlikely, and ++ * implicitly syncing them would break bin/render parallelism. If we want to ++ * fix that, we should introduce a flag when VS texturing has been used in the ++ * binning stage, or a set of flags for which BOs are sampled during binning. ++ */ ++static int ++v3d_add_implicit_fences(struct v3d_job *job, struct v3d_bo *bo) ++{ ++ int i, ret, nr_fences; ++ struct dma_fence **fences; ++ ++ ret = reservation_object_get_fences_rcu(bo->resv, NULL, ++ &nr_fences, &fences); ++ if (ret || !nr_fences) ++ return ret; ++ ++ for (i = 0; i < nr_fences; i++) { ++ ret = v3d_add_dep(job, fences[i]); ++ if (ret) ++ break; ++ } ++ ++ /* Free any remaining fences after error. */ ++ for (; i < nr_fences; i++) ++ dma_fence_put(fences[i]); ++ kfree(fences); ++ ++ return ret; ++} ++ + /* Takes the reservation lock on all the BOs being referenced, so that + * at queue submit time we can update the reservations. + * +@@ -226,10 +291,11 @@ v3d_unlock_bo_reservations(struct v3d_bo + * to v3d, so we don't attach dma-buf fences to them. + */ + static int +-v3d_lock_bo_reservations(struct v3d_bo **bos, +- int bo_count, ++v3d_lock_bo_reservations(struct v3d_job *job, + struct ww_acquire_ctx *acquire_ctx) + { ++ struct v3d_bo **bos = job->bo; ++ int bo_count = job->bo_count; + int contended_lock = -1; + int i, ret; + +@@ -281,6 +347,13 @@ retry: + * before we commit the CL to the hardware. + */ + for (i = 0; i < bo_count; i++) { ++ ret = v3d_add_implicit_fences(job, bos[i]); ++ if (ret) { ++ v3d_unlock_bo_reservations(bos, bo_count, ++ acquire_ctx); ++ return ret; ++ } ++ + ret = reservation_object_reserve_shared(bos[i]->resv); + if (ret) { + v3d_unlock_bo_reservations(bos, bo_count, +@@ -383,7 +456,10 @@ v3d_job_free(struct kref *ref) + } + kvfree(job->bo); + +- dma_fence_put(job->in_fence); ++ for (i = 0; i < job->deps_count; i++) ++ dma_fence_put(job->deps[i]); ++ kfree(job->deps); ++ + dma_fence_put(job->irq_fence); + dma_fence_put(job->done_fence); + +@@ -464,15 +540,20 @@ v3d_job_init(struct v3d_dev *v3d, struct + struct v3d_job *job, void (*free)(struct kref *ref), + u32 in_sync) + { ++ struct dma_fence *in_fence = NULL; + int ret; + + job->v3d = v3d; + job->free = free; + +- ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &job->in_fence); ++ ret = drm_syncobj_find_fence(file_priv, in_sync, 0, &in_fence); + if (ret == -EINVAL) + return ret; + ++ ret = v3d_add_dep(job, in_fence); ++ if (ret) ++ return ret; ++ + kref_init(&job->refcount); + + return 0; +@@ -590,8 +671,7 @@ v3d_submit_cl_ioctl(struct drm_device *d + if (ret) + goto fail; + +- ret = v3d_lock_bo_reservations(render->base.bo, render->base.bo_count, +- &acquire_ctx); ++ ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx); + if (ret) + goto fail; + +@@ -601,7 +681,8 @@ v3d_submit_cl_ioctl(struct drm_device *d + if (ret) + goto fail_unreserve; + +- render->bin_done_fence = dma_fence_get(bin->base.done_fence); ++ ret = v3d_add_dep(&render->base, ++ dma_fence_get(bin->base.done_fence)); + } + + ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER); +@@ -692,8 +773,7 @@ v3d_submit_tfu_ioctl(struct drm_device * + } + spin_unlock(&file_priv->table_lock); + +- ret = v3d_lock_bo_reservations(job->base.bo, job->base.bo_count, +- &acquire_ctx); ++ ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx); + if (ret) + goto fail; + +--- a/drivers/gpu/drm/v3d/v3d_sched.c ++++ b/drivers/gpu/drm/v3d/v3d_sched.c +@@ -67,47 +67,10 @@ v3d_job_dependency(struct drm_sched_job + struct drm_sched_entity *s_entity) + { + struct v3d_job *job = to_v3d_job(sched_job); +- struct dma_fence *fence; +- +- fence = job->in_fence; +- if (fence) { +- job->in_fence = NULL; +- return fence; +- } +- +- return NULL; +-} + +-/** +- * Returns the fences that the render job depends on, one by one. +- * v3d_job_run() won't be called until all of them have been signaled. +- */ +-static struct dma_fence * +-v3d_render_job_dependency(struct drm_sched_job *sched_job, +- struct drm_sched_entity *s_entity) +-{ +- struct v3d_render_job *job = to_render_job(sched_job); +- struct dma_fence *fence; +- +- fence = v3d_job_dependency(sched_job, s_entity); +- if (fence) +- return fence; +- +- /* If we had a bin job, the render job definitely depends on +- * it. We first have to wait for bin to be scheduled, so that +- * its done_fence is created. +- */ +- fence = job->bin_done_fence; +- if (fence) { +- job->bin_done_fence = NULL; +- return fence; +- } +- +- /* XXX: Wait on a fence for switching the GMP if necessary, +- * and then do so. +- */ +- +- return fence; ++ if (!job->deps_count) ++ return NULL; ++ return job->deps[--job->deps_count]; + } + + static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job) +@@ -329,7 +292,7 @@ static const struct drm_sched_backend_op + }; + + static const struct drm_sched_backend_ops v3d_render_sched_ops = { +- .dependency = v3d_render_job_dependency, ++ .dependency = v3d_job_dependency, + .run_job = v3d_render_job_run, + .timedout_job = v3d_render_job_timedout, + .free_job = v3d_job_free, -- cgit v1.2.3